diff --git a/configs/_base_/datasets/openimages_detection.py b/configs/_base_/datasets/openimages_detection.py new file mode 100644 index 0000000..a65d306 --- /dev/null +++ b/configs/_base_/datasets/openimages_detection.py @@ -0,0 +1,65 @@ +# dataset settings +dataset_type = 'OpenImagesDataset' +data_root = 'data/OpenImages/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, denorm_bbox=True), + dict(type='Resize', img_scale=(1024, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1024, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ], + ), +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=0, # workers_per_gpu > 0 may occur out of memory + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/oidv6-train-annotations-bbox.csv', + img_prefix=data_root + 'OpenImages/train/', + label_file=data_root + 'annotations/class-descriptions-boxable.csv', + hierarchy_file=data_root + + 'annotations/bbox_labels_600_hierarchy.json', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/validation-annotations-bbox.csv', + img_prefix=data_root + 'OpenImages/validation/', + label_file=data_root + 'annotations/class-descriptions-boxable.csv', + hierarchy_file=data_root + + 'annotations/bbox_labels_600_hierarchy.json', + meta_file=data_root + 'annotations/validation-image-metas.pkl', + image_level_ann_file=data_root + + 'annotations/validation-annotations-human-imagelabels-boxable.csv', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/validation-annotations-bbox.csv', + img_prefix=data_root + 'OpenImages/validation/', + label_file=data_root + 'annotations/class-descriptions-boxable.csv', + hierarchy_file=data_root + + 'annotations/bbox_labels_600_hierarchy.json', + meta_file=data_root + 'annotations/validation-image-metas.pkl', + image_level_ann_file=data_root + + 'annotations/validation-annotations-human-imagelabels-boxable.csv', + pipeline=test_pipeline)) +evaluation = dict(interval=1, metric='mAP') diff --git a/configs/_base_/default_runtime.py b/configs/_base_/default_runtime.py index 55097c5..5b0b145 100644 --- a/configs/_base_/default_runtime.py +++ b/configs/_base_/default_runtime.py @@ -14,3 +14,14 @@ load_from = None resume_from = None workflow = [('train', 1)] + +# disable opencv multithreading to avoid system being overloaded +opencv_num_threads = 0 +# set multi-process start method as `fork` to speed up the training +mp_start_method = 'fork' + +# Default setting for scaling LR automatically +# - `enable` means enable scaling LR automatically +# or not by default. +# - `base_batch_size` = (8 GPUs) x (2 samples per GPU). +auto_scale_lr = dict(enable=False, base_batch_size=16) diff --git a/configs/_base_/models/faster_rcnn_r50_caffe_c4.py b/configs/_base_/models/faster_rcnn_r50_caffe_c4.py index 51b5db4..dbf965a 100644 --- a/configs/_base_/models/faster_rcnn_r50_caffe_c4.py +++ b/configs/_base_/models/faster_rcnn_r50_caffe_c4.py @@ -42,7 +42,10 @@ dilation=1, style='caffe', norm_cfg=norm_cfg, - norm_eval=True), + norm_eval=True, + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe')), bbox_roi_extractor=dict( type='SingleRoIExtractor', roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), @@ -78,7 +81,7 @@ pos_fraction=0.5, neg_pos_ub=-1, add_gt_as_proposals=False), - allowed_border=0, + allowed_border=-1, pos_weight=-1, debug=False), rpn_proposal=dict( diff --git a/configs/albu_example/README.md b/configs/albu_example/README.md index 7f0eff5..9a180f0 100644 --- a/configs/albu_example/README.md +++ b/configs/albu_example/README.md @@ -1,24 +1,26 @@ # Albu Example -## Abstract +> [Albumentations: fast and flexible image augmentations](https://arxiv.org/abs/1809.06839) + + - +## Abstract Data augmentation is a commonly used technique for increasing both the size and the diversity of labeled training sets by leveraging input transformations that preserve output labels. In computer vision domain, image augmentations have become a common implicit regularization technique to combat overfitting in deep convolutional neural networks and are ubiquitously used to improve performance. While most deep learning frameworks implement basic image transformations, the list is typically limited to some variations and combinations of flipping, rotating, scaling, and cropping. Moreover, the image processing speed varies in existing tools for image augmentation. We present Albumentations, a fast and flexible library for image augmentations with many various image transform operations available, that is also an easy-to-use wrapper around other augmentation libraries. We provide examples of image augmentations for different computer vision tasks and show that Albumentations is faster than other commonly used image augmentation tools on the most of commonly used image transformations. -
- - +## Results and Models -## Citation +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :--------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | pytorch | 1x | 4.4 | 16.6 | 38.0 | 34.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/albu_example/mask_rcnn_r50_fpn_albu_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/albu_example/mask_rcnn_r50_fpn_albu_1x_coco/mask_rcnn_r50_fpn_albu_1x_coco_20200208-ab203bcd.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/albu_example/mask_rcnn_r50_fpn_albu_1x_coco/mask_rcnn_r50_fpn_albu_1x_coco_20200208_225520.log.json) | - +## Citation -``` +```latex @article{2018arXiv180906839B, author = {A. Buslaev, A. Parinov, E. Khvedchenya, V.~I. Iglovikov and A.~A. Kalinin}, title = "{Albumentations: fast and flexible image augmentations}", @@ -27,9 +29,3 @@ Data augmentation is a commonly used technique for increasing both the size and year = 2018 } ``` - -## Results and Models - -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -|:---------:|:-------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| -| R-50 | pytorch | 1x | 4.4 | 16.6 | 38.0 | 34.5 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/albu_example/mask_rcnn_r50_fpn_albu_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/albu_example/mask_rcnn_r50_fpn_albu_1x_coco/mask_rcnn_r50_fpn_albu_1x_coco_20200208-ab203bcd.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/albu_example/mask_rcnn_r50_fpn_albu_1x_coco/mask_rcnn_r50_fpn_albu_1x_coco_20200208_225520.log.json) | diff --git a/configs/atss/README.md b/configs/atss/README.md index 035964f..055ed05 100644 --- a/configs/atss/README.md +++ b/configs/atss/README.md @@ -1,22 +1,25 @@ -# Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection +# ATSS -## Abstract +> [Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection](https://arxiv.org/abs/1912.02424) + + - +## Abstract Object detection has been dominated by anchor-based detectors for several years. Recently, anchor-free detectors have become popular due to the proposal of FPN and Focal Loss. In this paper, we first point out that the essential difference between anchor-based and anchor-free detection is actually how to define positive and negative training samples, which leads to the performance gap between them. If they adopt the same definition of positive and negative samples during training, there is no obvious difference in the final performance, no matter regressing from a box or a point. This shows that how to select positive and negative training samples is important for current object detectors. Then, we propose an Adaptive Training Sample Selection (ATSS) to automatically select positive and negative samples according to statistical characteristics of object. It significantly improves the performance of anchor-based and anchor-free detectors and bridges the gap between them. Finally, we discuss the necessity of tiling multiple anchors per location on the image to detect objects. Extensive experiments conducted on MS COCO support our aforementioned analysis and conclusions. With the newly introduced ATSS, we improve state-of-the-art detectors by a large margin to 50.7% AP without introducing any overhead. -
- - +## Results and Models -## Citation +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :------: | :-----: | :-----: | :------: | :------------: | :----: | :---------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | pytorch | 1x | 3.7 | 19.7 | 39.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/atss/atss_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r50_fpn_1x_coco/atss_r50_fpn_1x_coco_20200209-985f7bd0.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r50_fpn_1x_coco/atss_r50_fpn_1x_coco_20200209_102539.log.json) | +| R-101 | pytorch | 1x | 5.6 | 12.3 | 41.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/atss/atss_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r101_fpn_1x_coco/atss_r101_fpn_1x_20200825-dfcadd6f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r101_fpn_1x_coco/atss_r101_fpn_1x_20200825-dfcadd6f.log.json) | - +## Citation ```latex @article{zhang2019bridging, @@ -26,10 +29,3 @@ Object detection has been dominated by anchor-based detectors for several years. year = {2019} } ``` - -## Results and Models - -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -|:---------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| -| R-50 | pytorch | 1x | 3.7 | 19.7 | 39.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/atss/atss_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r50_fpn_1x_coco/atss_r50_fpn_1x_coco_20200209-985f7bd0.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r50_fpn_1x_coco/atss_r50_fpn_1x_coco_20200209_102539.log.json) | -| R-101 | pytorch | 1x | 5.6 | 12.3 | 41.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/atss/atss_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r101_fpn_1x_coco/atss_r101_fpn_1x_20200825-dfcadd6f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r101_fpn_1x_coco/atss_r101_fpn_1x_20200825-dfcadd6f.log.json) | diff --git a/configs/autoassign/README.md b/configs/autoassign/README.md index 172071d..1297206 100644 --- a/configs/autoassign/README.md +++ b/configs/autoassign/README.md @@ -1,24 +1,31 @@ -# AutoAssign: Differentiable Label Assignment for Dense Object Detection +# AutoAssign -## Abstract +> [AutoAssign: Differentiable Label Assignment for Dense Object Detection](https://arxiv.org/abs/2007.03496) + + - +## Abstract Determining positive/negative samples for object detection is known as label assignment. Here we present an anchor-free detector named AutoAssign. It requires little human knowledge and achieves appearance-aware through a fully differentiable weighting mechanism. During training, to both satisfy the prior distribution of data and adapt to category characteristics, we present Center Weighting to adjust the category-specific prior distributions. To adapt to object appearances, Confidence Weighting is proposed to adjust the specific assign strategy of each instance. The two weighting modules are then combined to generate positive and negative weights to adjust each location's confidence. Extensive experiments on the MS COCO show that our method steadily surpasses other best sampling strategies by large margins with various backbones. Moreover, our best model achieves 52.1% AP, outperforming all existing one-stage detectors. Besides, experiments on other datasets, e.g., PASCAL VOC, Objects365, and WiderFace, demonstrate the broad applicability of AutoAssign. -
- - +## Results and Models -## Citation +| Backbone | Style | Lr schd | Mem (GB) | box AP | Config | Download | +| :------: | :---: | :-----: | :------: | :----: | :------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | caffe | 1x | 4.08 | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/autoassign/autoassign_r50_fpn_8x2_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/autoassign/auto_assign_r50_fpn_1x_coco/auto_assign_r50_fpn_1x_coco_20210413_115540-5e17991f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/autoassign/auto_assign_r50_fpn_1x_coco/auto_assign_r50_fpn_1x_coco_20210413_115540-5e17991f.log.json) | - +**Note**: -``` +1. We find that the performance is unstable with 1x setting and may fluctuate by about 0.3 mAP. mAP 40.3 ~ 40.6 is acceptable. Such fluctuation can also be found in the original implementation. +2. You can get a more stable results ~ mAP 40.6 with a schedule total 13 epoch, and learning rate is divided by 10 at 10th and 13th epoch. + +## Citation + +```latex @article{zhu2020autoassign, title={AutoAssign: Differentiable Label Assignment for Dense Object Detection}, author={Zhu, Benjin and Wang, Jianfeng and Jiang, Zhengkai and Zong, Fuhang and Liu, Songtao and Li, Zeming and Sun, Jian}, @@ -26,14 +33,3 @@ Determining positive/negative samples for object detection is known as label ass year={2020} } ``` - -## Results and Models - -| Backbone | Style | Lr schd | Mem (GB) | box AP | Config | Download | -|:---------:|:-------:|:-------:|:--------:|:------:|:------:|:--------:| -| R-50 | caffe | 1x | 4.08 | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/autoassign/autoassign_r50_fpn_8x2_1x_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/autoassign/auto_assign_r50_fpn_1x_coco/auto_assign_r50_fpn_1x_coco_20210413_115540-5e17991f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/autoassign/auto_assign_r50_fpn_1x_coco/auto_assign_r50_fpn_1x_coco_20210413_115540-5e17991f.log.json) | - -**Note**: - -1. We find that the performance is unstable with 1x setting and may fluctuate by about 0.3 mAP. mAP 40.3 ~ 40.6 is acceptable. Such fluctuation can also be found in the original implementation. -2. You can get a more stable results ~ mAP 40.6 with a schedule total 13 epoch, and learning rate is divided by 10 at 10th and 13th epoch. diff --git a/configs/carafe/README.md b/configs/carafe/README.md index dca52e6..803abe0 100644 --- a/configs/carafe/README.md +++ b/configs/carafe/README.md @@ -1,26 +1,37 @@ -# CARAFE: Content-Aware ReAssembly of FEatures +# CARAFE -## Abstract +> [CARAFE: Content-Aware ReAssembly of FEatures](https://arxiv.org/abs/1905.02188) + + - +## Abstract Feature upsampling is a key operation in a number of modern convolutional network architectures, e.g. feature pyramids. Its design is critical for dense prediction tasks such as object detection and semantic/instance segmentation. In this work, we propose Content-Aware ReAssembly of FEatures (CARAFE), a universal, lightweight and highly effective operator to fulfill this goal. CARAFE has several appealing properties: (1) Large field of view. Unlike previous works (e.g. bilinear interpolation) that only exploit sub-pixel neighborhood, CARAFE can aggregate contextual information within a large receptive field. (2) Content-aware handling. Instead of using a fixed kernel for all samples (e.g. deconvolution), CARAFE enables instance-specific content-aware handling, which generates adaptive kernels on-the-fly. (3) Lightweight and fast to compute. CARAFE introduces little computational overhead and can be readily integrated into modern network architectures. We conduct comprehensive evaluations on standard benchmarks in object detection, instance/semantic segmentation and inpainting. CARAFE shows consistent and substantial gains across all the tasks (1.2%, 1.3%, 1.8%, 1.1db respectively) with negligible computational overhead. It has great potential to serve as a strong building block for future research. It has great potential to serve as a strong building block for future research. -
- - +## Results and Models -## Citation +The results on COCO 2017 val is shown in the below table. - +| Method | Backbone | Style | Lr schd | Test Proposal Num | Inf time (fps) | Box AP | Mask AP | Config | Download | +| :--------------------: | :------: | :-----: | :-----: | :---------------: | :------------: | :----: | :-----: | :------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| Faster R-CNN w/ CARAFE | R-50-FPN | pytorch | 1x | 1000 | 16.5 | 38.6 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/carafe/faster_rcnn_r50_fpn_carafe_1x_coco/faster_rcnn_r50_fpn_carafe_1x_coco_bbox_mAP-0.386_20200504_175733-385a75b7.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/carafe/faster_rcnn_r50_fpn_carafe_1x_coco/faster_rcnn_r50_fpn_carafe_1x_coco_20200504_175733.log.json) | +| - | - | - | - | 2000 | | | | | | +| Mask R-CNN w/ CARAFE | R-50-FPN | pytorch | 1x | 1000 | 14.0 | 39.3 | 35.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/carafe/mask_rcnn_r50_fpn_carafe_1x_coco/mask_rcnn_r50_fpn_carafe_1x_coco_bbox_mAP-0.393__segm_mAP-0.358_20200503_135957-8687f195.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/carafe/mask_rcnn_r50_fpn_carafe_1x_coco/mask_rcnn_r50_fpn_carafe_1x_coco_20200503_135957.log.json) | +| - | - | - | - | 2000 | | | | | | + +## Implementation + +The CUDA implementation of CARAFE can be find at https://github.com/myownskyW7/CARAFE. + +## Citation We provide config files to reproduce the object detection & instance segmentation results in the ICCV 2019 Oral paper for [CARAFE: Content-Aware ReAssembly of FEatures](https://arxiv.org/abs/1905.02188). -``` +```latex @inproceedings{Wang_2019_ICCV, title = {CARAFE: Content-Aware ReAssembly of FEatures}, author = {Wang, Jiaqi and Chen, Kai and Xu, Rui and Liu, Ziwei and Loy, Chen Change and Lin, Dahua}, @@ -29,18 +40,3 @@ We provide config files to reproduce the object detection & instance segmentatio year = {2019} } ``` - -## Results and Models - -The results on COCO 2017 val is shown in the below table. - -| Method | Backbone | Style | Lr schd | Test Proposal Num | Inf time (fps) | Box AP | Mask AP | Config | Download | -|:--------------------:|:--------:|:-------:|:-------:|:-----------------:|:--------------:|:------:|:-------:|:------:|:--------:| -| Faster R-CNN w/ CARAFE | R-50-FPN | pytorch | 1x | 1000 | 16.5 | 38.6 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/carafe/faster_rcnn_r50_fpn_carafe_1x_coco/faster_rcnn_r50_fpn_carafe_1x_coco_bbox_mAP-0.386_20200504_175733-385a75b7.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/carafe/faster_rcnn_r50_fpn_carafe_1x_coco/faster_rcnn_r50_fpn_carafe_1x_coco_20200504_175733.log.json) | -| - | - | - | - | 2000 | | | | | -| Mask R-CNN w/ CARAFE | R-50-FPN | pytorch | 1x | 1000 | 14.0 | 39.3 | 35.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/carafe/mask_rcnn_r50_fpn_carafe_1x_coco/mask_rcnn_r50_fpn_carafe_1x_coco_bbox_mAP-0.393__segm_mAP-0.358_20200503_135957-8687f195.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/carafe/mask_rcnn_r50_fpn_carafe_1x_coco/mask_rcnn_r50_fpn_carafe_1x_coco_20200503_135957.log.json) | -| - | - | - | - | 2000 | | | | | - -## Implementation - -The CUDA implementation of CARAFE can be find at https://github.com/myownskyW7/CARAFE. diff --git a/configs/cascade_rcnn/README.md b/configs/cascade_rcnn/README.md index a88cfd7..5a9e817 100644 --- a/configs/cascade_rcnn/README.md +++ b/configs/cascade_rcnn/README.md @@ -1,22 +1,68 @@ -# Cascade R-CNN: High Quality Object Detection and Instance Segmentation +# Cascade R-CNN -## Abstract +> [Cascade R-CNN: High Quality Object Detection and Instance Segmentation](https://arxiv.org/abs/1906.09756) - + + +## Abstract In object detection, the intersection over union (IoU) threshold is frequently used to define positives/negatives. The threshold used to train a detector defines its quality. While the commonly used threshold of 0.5 leads to noisy (low-quality) detections, detection performance frequently degrades for larger thresholds. This paradox of high-quality detection has two causes: 1) overfitting, due to vanishing positive samples for large thresholds, and 2) inference-time quality mismatch between detector and test hypotheses. A multi-stage object detection architecture, the Cascade R-CNN, composed of a sequence of detectors trained with increasing IoU thresholds, is proposed to address these problems. The detectors are trained sequentially, using the output of a detector as training set for the next. This resampling progressively improves hypotheses quality, guaranteeing a positive training set of equivalent size for all detectors and minimizing overfitting. The same cascade is applied at inference, to eliminate quality mismatches between hypotheses and detectors. An implementation of the Cascade R-CNN without bells or whistles achieves state-of-the-art performance on the COCO dataset, and significantly improves high-quality detection on generic and specific object detection datasets, including VOC, KITTI, CityPerson, and WiderFace. Finally, the Cascade R-CNN is generalized to instance segmentation, with nontrivial improvements over the Mask R-CNN. -
- - +## Results and Models -## Citation +### Cascade R-CNN - +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :--------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | caffe | 1x | 4.2 | | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco/cascade_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.404_20200504_174853-b857be87.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco/cascade_rcnn_r50_caffe_fpn_1x_coco_20200504_174853.log.json) | +| R-50-FPN | pytorch | 1x | 4.4 | 16.1 | 40.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco/cascade_rcnn_r50_fpn_1x_coco_20200316-3dc56deb.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco/cascade_rcnn_r50_fpn_1x_coco_20200316_214748.log.json) | +| R-50-FPN | pytorch | 20e | - | - | 41.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco/cascade_rcnn_r50_fpn_20e_coco_bbox_mAP-0.41_20200504_175131-e9872a90.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco/cascade_rcnn_r50_fpn_20e_coco_20200504_175131.log.json) | +| R-101-FPN | caffe | 1x | 6.2 | | 42.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco/cascade_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.423_20200504_175649-cab8dbd5.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco/cascade_rcnn_r101_caffe_fpn_1x_coco_20200504_175649.log.json) | +| R-101-FPN | pytorch | 1x | 6.4 | 13.5 | 42.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco/cascade_rcnn_r101_fpn_1x_coco_20200317-0b6a2fbf.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco/cascade_rcnn_r101_fpn_1x_coco_20200317_101744.log.json) | +| R-101-FPN | pytorch | 20e | - | - | 42.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco/cascade_rcnn_r101_fpn_20e_coco_bbox_mAP-0.425_20200504_231812-5057dcc5.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco/cascade_rcnn_r101_fpn_20e_coco_20200504_231812.log.json) | +| X-101-32x4d-FPN | pytorch | 1x | 7.6 | 10.9 | 43.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco/cascade_rcnn_x101_32x4d_fpn_1x_coco_20200316-95c2deb6.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco/cascade_rcnn_x101_32x4d_fpn_1x_coco_20200316_055608.log.json) | +| X-101-32x4d-FPN | pytorch | 20e | 7.6 | | 43.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco/cascade_rcnn_x101_32x4d_fpn_20e_coco_20200906_134608-9ae0a720.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco/cascade_rcnn_x101_32x4d_fpn_20e_coco_20200906_134608.log.json) | +| X-101-64x4d-FPN | pytorch | 1x | 10.7 | | 44.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco/cascade_rcnn_x101_64x4d_fpn_1x_coco_20200515_075702-43ce6a30.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco/cascade_rcnn_x101_64x4d_fpn_1x_coco_20200515_075702.log.json) | +| X-101-64x4d-FPN | pytorch | 20e | 10.7 | | 44.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco/cascade_rcnn_x101_64x4d_fpn_20e_coco_20200509_224357-051557b1.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco/cascade_rcnn_x101_64x4d_fpn_20e_coco_20200509_224357.log.json) | + +### Cascade Mask R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :-------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | caffe | 1x | 5.9 | | 41.2 | 36.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco/cascade_mask_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.412__segm_mAP-0.36_20200504_174659-5004b251.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco/cascade_mask_rcnn_r50_caffe_fpn_1x_coco_20200504_174659.log.json) | +| R-50-FPN | pytorch | 1x | 6.0 | 11.2 | 41.2 | 35.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco/cascade_mask_rcnn_r50_fpn_1x_coco_20200203-9d4dcb24.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco/cascade_mask_rcnn_r50_fpn_1x_coco_20200203_170449.log.json) | +| R-50-FPN | pytorch | 20e | - | - | 41.9 | 36.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco/cascade_mask_rcnn_r50_fpn_20e_coco_bbox_mAP-0.419__segm_mAP-0.365_20200504_174711-4af8e66e.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco/cascade_mask_rcnn_r50_fpn_20e_coco_20200504_174711.log.json) | +| R-101-FPN | caffe | 1x | 7.8 | | 43.2 | 37.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco/cascade_mask_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.432__segm_mAP-0.376_20200504_174813-5c1e9599.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco/cascade_mask_rcnn_r101_caffe_fpn_1x_coco_20200504_174813.log.json) | +| R-101-FPN | pytorch | 1x | 7.9 | 9.8 | 42.9 | 37.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco/cascade_mask_rcnn_r101_fpn_1x_coco_20200203-befdf6ee.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco/cascade_mask_rcnn_r101_fpn_1x_coco_20200203_092521.log.json) | +| R-101-FPN | pytorch | 20e | - | - | 43.4 | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco/cascade_mask_rcnn_r101_fpn_20e_coco_bbox_mAP-0.434__segm_mAP-0.378_20200504_174836-005947da.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco/cascade_mask_rcnn_r101_fpn_20e_coco_20200504_174836.log.json) | +| X-101-32x4d-FPN | pytorch | 1x | 9.2 | 8.6 | 44.3 | 38.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco_20200201-0f411b1f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco_20200201_052416.log.json) | +| X-101-32x4d-FPN | pytorch | 20e | 9.2 | - | 45.0 | 39.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco_20200528_083917-ed1f4751.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco_20200528_083917.log.json) | +| X-101-64x4d-FPN | pytorch | 1x | 12.2 | 6.7 | 45.3 | 39.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco_20200203-9a2db89d.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco_20200203_044059.log.json) | +| X-101-64x4d-FPN | pytorch | 20e | 12.2 | | 45.6 | 39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco_20200512_161033-bdb5126a.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco_20200512_161033.log.json) | + +**Notes:** + +- The `20e` schedule in Cascade (Mask) R-CNN indicates decreasing the lr at 16 and 19 epochs, with a total of 20 epochs. + +## Pre-trained Models + +We also train some models with longer schedules and multi-scale training for Cascade Mask R-CNN. The users could finetune them for downstream tasks. + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :--------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | caffe | 3x | 5.7 | | 44.0 | 38.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210707_002651-6e29b3a6.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210707_002651.log.json) | +| R-50-FPN | pytorch | 3x | 5.9 | | 44.3 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco_20210628_164719-5bdc3824.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco_20210628_164719.log.json) | +| R-101-FPN | caffe | 3x | 7.7 | | 45.4 | 39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco_20210707_002620-a5bd2389.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco_20210707_002620.log.json) | +| R-101-FPN | pytorch | 3x | 7.8 | | 45.5 | 39.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco_20210628_165236-51a2d363.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco_20210628_165236.log.json) | +| X-101-32x4d-FPN | pytorch | 3x | 9.0 | | 46.3 | 40.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco_20210706_225234-40773067.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco_20210706_225234.log.json) | +| X-101-32x8d-FPN | pytorch | 3x | 12.1 | | 46.1 | 39.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco_20210719_180640-9ff7e76f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco_20210719_180640.log.json) | +| X-101-64x4d-FPN | pytorch | 3x | 12.0 | | 46.6 | 40.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210719_210311-d3e64ba0.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210719_210311.log.json) | + +## Citation ```latex @article{Cai_2019, @@ -31,53 +77,3 @@ In object detection, the intersection over union (IoU) threshold is frequently u pages={1–1} } ``` - -## Results and models - -### Cascade R-CNN - -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: |:------:|:--------:| -| R-50-FPN | caffe | 1x | 4.2 | | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco/cascade_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.404_20200504_174853-b857be87.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco/cascade_rcnn_r50_caffe_fpn_1x_coco_20200504_174853.log.json) | -| R-50-FPN | pytorch | 1x | 4.4 | 16.1 | 40.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco/cascade_rcnn_r50_fpn_1x_coco_20200316-3dc56deb.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco/cascade_rcnn_r50_fpn_1x_coco_20200316_214748.log.json) | -| R-50-FPN | pytorch | 20e | - | - | 41.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco/cascade_rcnn_r50_fpn_20e_coco_bbox_mAP-0.41_20200504_175131-e9872a90.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco/cascade_rcnn_r50_fpn_20e_coco_20200504_175131.log.json) | -| R-101-FPN | caffe | 1x | 6.2 | | 42.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco/cascade_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.423_20200504_175649-cab8dbd5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco/cascade_rcnn_r101_caffe_fpn_1x_coco_20200504_175649.log.json) | -| R-101-FPN | pytorch | 1x | 6.4 | 13.5 | 42.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco/cascade_rcnn_r101_fpn_1x_coco_20200317-0b6a2fbf.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco/cascade_rcnn_r101_fpn_1x_coco_20200317_101744.log.json) | -| R-101-FPN | pytorch | 20e | - | - | 42.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco/cascade_rcnn_r101_fpn_20e_coco_bbox_mAP-0.425_20200504_231812-5057dcc5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco/cascade_rcnn_r101_fpn_20e_coco_20200504_231812.log.json) | -| X-101-32x4d-FPN | pytorch | 1x | 7.6 | 10.9 | 43.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco/cascade_rcnn_x101_32x4d_fpn_1x_coco_20200316-95c2deb6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco/cascade_rcnn_x101_32x4d_fpn_1x_coco_20200316_055608.log.json) | -| X-101-32x4d-FPN | pytorch | 20e | 7.6 | | 43.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco/cascade_rcnn_x101_32x4d_fpn_20e_coco_20200906_134608-9ae0a720.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco/cascade_rcnn_x101_32x4d_fpn_20e_coco_20200906_134608.log.json) | -| X-101-64x4d-FPN | pytorch | 1x | 10.7 | | 44.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco/cascade_rcnn_x101_64x4d_fpn_1x_coco_20200515_075702-43ce6a30.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco/cascade_rcnn_x101_64x4d_fpn_1x_coco_20200515_075702.log.json) | -| X-101-64x4d-FPN | pytorch | 20e | 10.7 | | 44.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco/cascade_rcnn_x101_64x4d_fpn_20e_coco_20200509_224357-051557b1.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco/cascade_rcnn_x101_64x4d_fpn_20e_coco_20200509_224357.log.json)| - -### Cascade Mask R-CNN - -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | -| R-50-FPN | caffe | 1x | 5.9 | | 41.2 | 36.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco/cascade_mask_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.412__segm_mAP-0.36_20200504_174659-5004b251.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco/cascade_mask_rcnn_r50_caffe_fpn_1x_coco_20200504_174659.log.json) | -| R-50-FPN | pytorch | 1x | 6.0 | 11.2 | 41.2 | 35.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco/cascade_mask_rcnn_r50_fpn_1x_coco_20200203-9d4dcb24.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco/cascade_mask_rcnn_r50_fpn_1x_coco_20200203_170449.log.json) | -| R-50-FPN | pytorch | 20e | - | - | 41.9 | 36.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco/cascade_mask_rcnn_r50_fpn_20e_coco_bbox_mAP-0.419__segm_mAP-0.365_20200504_174711-4af8e66e.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco/cascade_mask_rcnn_r50_fpn_20e_coco_20200504_174711.log.json)| -| R-101-FPN | caffe | 1x | 7.8 | | 43.2 | 37.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco/cascade_mask_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.432__segm_mAP-0.376_20200504_174813-5c1e9599.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco/cascade_mask_rcnn_r101_caffe_fpn_1x_coco_20200504_174813.log.json)| -| R-101-FPN | pytorch | 1x | 7.9 | 9.8 | 42.9 | 37.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco/cascade_mask_rcnn_r101_fpn_1x_coco_20200203-befdf6ee.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco/cascade_mask_rcnn_r101_fpn_1x_coco_20200203_092521.log.json) | -| R-101-FPN | pytorch | 20e | - | - | 43.4 | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco/cascade_mask_rcnn_r101_fpn_20e_coco_bbox_mAP-0.434__segm_mAP-0.378_20200504_174836-005947da.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco/cascade_mask_rcnn_r101_fpn_20e_coco_20200504_174836.log.json)| -| X-101-32x4d-FPN | pytorch | 1x | 9.2 | 8.6 | 44.3 | 38.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco_20200201-0f411b1f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco_20200201_052416.log.json) | -| X-101-32x4d-FPN | pytorch | 20e | 9.2 | - | 45.0 | 39.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco_20200528_083917-ed1f4751.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco_20200528_083917.log.json) | -| X-101-64x4d-FPN | pytorch | 1x | 12.2 | 6.7 | 45.3 | 39.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco_20200203-9a2db89d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco_20200203_044059.log.json) | -| X-101-64x4d-FPN | pytorch | 20e | 12.2 | | 45.6 |39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco_20200512_161033-bdb5126a.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco_20200512_161033.log.json)| - -**Notes:** - -- The `20e` schedule in Cascade (Mask) R-CNN indicates decreasing the lr at 16 and 19 epochs, with a total of 20 epochs. - -## Pre-trained Models - -We also train some models with longer schedules and multi-scale training for Cascade Mask R-CNN. The users could finetune them for downstream tasks. - -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -| :----------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | -| R-50-FPN | caffe | 3x | 5.7 | | 44.0 | 38.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210707_002651-6e29b3a6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210707_002651.log.json) -| R-50-FPN | pytorch| 3x | 5.9 | | 44.3 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco_20210628_164719-5bdc3824.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco_20210628_164719.log.json) -| R-101-FPN | caffe | 3x | 7.7 | | 45.4 | 39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco_20210707_002620-a5bd2389.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco_20210707_002620.log.json) -| R-101-FPN | pytorch| 3x | 7.8 | | 45.5 | 39.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco_20210628_165236-51a2d363.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco_20210628_165236.log.json) -| X-101-32x4d-FPN | pytorch| 3x | 9.0 | | 46.3 | 40.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco_20210706_225234-40773067.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco_20210706_225234.log.json) -| X-101-32x8d-FPN | pytorch| 3x | 12.1 | | 46.1 | 39.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco_20210719_180640-9ff7e76f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco_20210719_180640.log.json) -| X-101-64x4d-FPN | pytorch| 3x | 12.0 | | 46.6 | 40.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210719_210311-d3e64ba0.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210719_210311.log.json) diff --git a/configs/cascade_rpn/README.md b/configs/cascade_rpn/README.md index 06b25a5..fb2b482 100644 --- a/configs/cascade_rpn/README.md +++ b/configs/cascade_rpn/README.md @@ -1,26 +1,37 @@ -# Cascade RPN: Delving into High-Quality Region Proposal Network with Adaptive Convolution +# Cascade RPN -## Abstract +> [Cascade RPN: Delving into High-Quality Region Proposal Network with Adaptive Convolution](https://arxiv.org/abs/1909.06720) - + + +## Abstract This paper considers an architecture referred to as Cascade Region Proposal Network (Cascade RPN) for improving the region-proposal quality and detection performance by systematically addressing the limitation of the conventional RPN that heuristically defines the anchors and aligns the features to the anchors. First, instead of using multiple anchors with predefined scales and aspect ratios, Cascade RPN relies on a single anchor per location and performs multi-stage refinement. Each stage is progressively more stringent in defining positive samples by starting out with an anchor-free metric followed by anchor-based metrics in the ensuing stages. Second, to attain alignment between the features and the anchors throughout the stages, adaptive convolution is proposed that takes the anchors in addition to the image features as its input and learns the sampled features guided by the anchors. A simple implementation of a two-stage Cascade RPN achieves AR 13.4 points higher than that of the conventional RPN, surpassing any existing region proposal methods. When adopting to Fast R-CNN and Faster R-CNN, Cascade RPN can improve the detection mAP by 3.1 and 3.5 points, respectively. -
- - +## Results and Models -## Citation +### Region proposal performance - +| Method | Backbone | Style | Mem (GB) | Train time (s/iter) | Inf time (fps) | AR 1000 | Config | Download | +| :----: | :------: | :---: | :------: | :-----------------: | :------------: | :-----: | :---------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------: | +| CRPN | R-50-FPN | caffe | - | - | - | 72.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rpn/crpn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rpn/crpn_r50_caffe_fpn_1x_coco/cascade_rpn_r50_caffe_fpn_1x_coco-7aa93cef.pth) | + +### Detection performance + +| Method | Proposal | Backbone | Style | Schedule | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Config | Download | +| :----------: | :---------: | :------: | :---: | :------: | :------: | :-----------------: | :------------: | :----: | :---------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| Fast R-CNN | Cascade RPN | R-50-FPN | caffe | 1x | - | - | - | 39.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rpn/crpn_fast_rcnn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rpn/crpn_fast_rcnn_r50_caffe_fpn_1x_coco/crpn_fast_rcnn_r50_caffe_fpn_1x_coco-cb486e66.pth) | +| Faster R-CNN | Cascade RPN | R-50-FPN | caffe | 1x | - | - | - | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco/crpn_faster_rcnn_r50_caffe_fpn_1x_coco-c8283cca.pth) | + +## Citation We provide the code for reproducing experiment results of [Cascade RPN](https://arxiv.org/abs/1909.06720). -``` +```latex @inproceedings{vu2019cascade, title={Cascade RPN: Delving into High-Quality Region Proposal Network with Adaptive Convolution}, author={Vu, Thang and Jang, Hyunjun and Pham, Trung X and Yoo, Chang D}, @@ -28,18 +39,3 @@ We provide the code for reproducing experiment results of [Cascade RPN](https:// year={2019} } ``` - -## Benchmark - -### Region proposal performance - -| Method | Backbone | Style | Mem (GB) | Train time (s/iter) | Inf time (fps) | AR 1000 | Config | Download | -|:------:|:--------:|:-----:|:--------:|:-------------------:|:--------------:|:-------:|:-------:|:--------------------------------------:| -| CRPN | R-50-FPN | caffe | - | - | - | 72.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rpn/crpn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rpn/crpn_r50_caffe_fpn_1x_coco/cascade_rpn_r50_caffe_fpn_1x_coco-7aa93cef.pth) | - -### Detection performance - -| Method | Proposal | Backbone | Style | Schedule | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Config | Download | -|:-------------:|:-----------:|:--------:|:-------:|:--------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------------------------------------------:| -| Fast R-CNN | Cascade RPN | R-50-FPN | caffe | 1x | - | - | - | 39.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rpn/crpn_fast_rcnn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rpn/crpn_fast_rcnn_r50_caffe_fpn_1x_coco/crpn_fast_rcnn_r50_caffe_fpn_1x_coco-cb486e66.pth) | -| Faster R-CNN | Cascade RPN | R-50-FPN | caffe | 1x | - | - | - | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco/crpn_faster_rcnn_r50_caffe_fpn_1x_coco-c8283cca.pth) | diff --git a/configs/cascade_rpn/metafile.yml b/configs/cascade_rpn/metafile.yml new file mode 100644 index 0000000..335b2bc --- /dev/null +++ b/configs/cascade_rpn/metafile.yml @@ -0,0 +1,44 @@ +Collections: + - Name: Cascade RPN + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - Cascade RPN + - FPN + - ResNet + Paper: + URL: https://arxiv.org/abs/1909.06720 + Title: 'Cascade RPN: Delving into High-Quality Region Proposal Network with Adaptive Convolution' + README: configs/cascade_rpn/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.8.0/mmdet/models/dense_heads/cascade_rpn_head.py#L538 + Version: v2.8.0 + +Models: + - Name: crpn_fast_rcnn_r50_caffe_fpn_1x_coco + In Collection: Cascade RPN + Config: configs/cascade_rpn/crpn_fast_rcnn_r50_caffe_fpn_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rpn/crpn_fast_rcnn_r50_caffe_fpn_1x_coco/crpn_fast_rcnn_r50_caffe_fpn_1x_coco-cb486e66.pth + + - Name: crpn_faster_rcnn_r50_caffe_fpn_1x_coco + In Collection: Cascade RPN + Config: configs/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco/crpn_faster_rcnn_r50_caffe_fpn_1x_coco-c8283cca.pth diff --git a/configs/centernet/README.md b/configs/centernet/README.md index 37c18e7..0f951a0 100644 --- a/configs/centernet/README.md +++ b/configs/centernet/README.md @@ -1,22 +1,34 @@ -# Objects as Points +# CenterNet -## Abstract +> [Objects as Points](https://arxiv.org/abs/1904.07850) + + - +## Abstract Detection identifies objects as axis-aligned boxes in an image. Most successful object detectors enumerate a nearly exhaustive list of potential object locations and classify each. This is wasteful, inefficient, and requires additional post-processing. In this paper, we take a different approach. We model an object as a single point --- the center point of its bounding box. Our detector uses keypoint estimation to find center points and regresses to all other object properties, such as size, 3D location, orientation, and even pose. Our center point based approach, CenterNet, is end-to-end differentiable, simpler, faster, and more accurate than corresponding bounding box based detectors. CenterNet achieves the best speed-accuracy trade-off on the MS COCO dataset, with 28.1% AP at 142 FPS, 37.4% AP at 52 FPS, and 45.1% AP with multi-scale testing at 1.4 FPS. We use the same approach to estimate 3D bounding box in the KITTI benchmark and human pose on the COCO keypoint dataset. Our method performs competitively with sophisticated multi-stage methods and runs in real-time. -
- - +## Results and Models -## Citation +| Backbone | DCN | Mem (GB) | Box AP | Flip box AP | Config | Download | +| :-------: | :-: | :------: | :----: | :---------: | :---------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| ResNet-18 | N | 3.45 | 25.9 | 27.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/centernet/centernet_resnet18_140e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_140e_coco/centernet_resnet18_140e_coco_20210705_093630-bb5b3bf7.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_140e_coco/centernet_resnet18_140e_coco_20210705_093630.log.json) | +| ResNet-18 | Y | 3.47 | 29.5 | 30.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/centernet/centernet_resnet18_dcnv2_140e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_dcnv2_140e_coco/centernet_resnet18_dcnv2_140e_coco_20210702_155131-c8cd631f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_dcnv2_140e_coco/centernet_resnet18_dcnv2_140e_coco_20210702_155131.log.json) | - +Note: + +- Flip box AP setting is single-scale and `flip=True`. +- Due to complex data enhancement, we find that the performance is unstable and may fluctuate by about 0.4 mAP. mAP 29.4 ~ 29.8 is acceptable in ResNet-18-DCNv2. +- Compared to the source code, we refer to [CenterNet-Better](https://github.com/FateScript/CenterNet-better), and make the following changes + - fix wrong image mean and variance in image normalization to be compatible with the pre-trained backbone. + - Use SGD rather than ADAM optimizer and add warmup and grad clip. + - Use DistributedDataParallel as other models in MMDetection rather than using DataParallel. + +## Citation ```latex @article{zhou2019objects, @@ -26,19 +38,3 @@ Detection identifies objects as axis-aligned boxes in an image. Most successful year={2019} } ``` - -## Results and models - -| Backbone | DCN | Mem (GB) | Box AP | Flip box AP| Config | Download | -| :-------------: | :--------: |:----------------: | :------: | :------------: | :----: | :----: | -| ResNet-18 | N | 3.45 | 25.9 | 27.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/centernet/centernet_resnet18_140e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_140e_coco/centernet_resnet18_140e_coco_20210705_093630-bb5b3bf7.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_140e_coco/centernet_resnet18_140e_coco_20210705_093630.log.json) | -| ResNet-18 | Y | 3.47 | 29.5 | 30.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/centernet/centernet_resnet18_dcnv2_140e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_dcnv2_140e_coco/centernet_resnet18_dcnv2_140e_coco_20210702_155131-c8cd631f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_dcnv2_140e_coco/centernet_resnet18_dcnv2_140e_coco_20210702_155131.log.json) | - -Note: - -- Flip box AP setting is single-scale and `flip=True`. -- Due to complex data enhancement, we find that the performance is unstable and may fluctuate by about 0.4 mAP. mAP 29.4 ~ 29.8 is acceptable in ResNet-18-DCNv2. -- Compared to the source code, we refer to [CenterNet-Better](https://github.com/FateScript/CenterNet-better), and make the following changes - - fix wrong image mean and variance in image normalization to be compatible with the pre-trained backbone. - - Use SGD rather than ADAM optimizer and add warmup and grad clip. - - Use DistributedDataParallel as other models in MMDetection rather than using DataParallel. diff --git a/configs/centernet/centernet_resnet18_dcnv2_140e_coco.py b/configs/centernet/centernet_resnet18_dcnv2_140e_coco.py index 9eb1db7..b8a0bb1 100644 --- a/configs/centernet/centernet_resnet18_dcnv2_140e_coco.py +++ b/configs/centernet/centernet_resnet18_dcnv2_140e_coco.py @@ -78,9 +78,9 @@ dict(type='DefaultFormatBundle'), dict( type='Collect', - meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape', - 'scale_factor', 'flip', 'flip_direction', - 'img_norm_cfg', 'border'), + meta_keys=('filename', 'ori_filename', 'ori_shape', + 'img_shape', 'pad_shape', 'scale_factor', 'flip', + 'flip_direction', 'img_norm_cfg', 'border'), keys=['img']) ]) ] @@ -120,3 +120,8 @@ warmup_ratio=1.0 / 1000, step=[18, 24]) # the real step is [18*5, 24*5] runner = dict(max_epochs=28) # the real epoch is 28*5=140 + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (16 samples per GPU) +auto_scale_lr = dict(base_batch_size=128) diff --git a/configs/centripetalnet/README.md b/configs/centripetalnet/README.md index f3d22a5..b01b00a 100644 --- a/configs/centripetalnet/README.md +++ b/configs/centripetalnet/README.md @@ -1,22 +1,29 @@ -# CentripetalNet: Pursuing High-quality Keypoint Pairs for Object Detection +# CentripetalNet -## Abstract +> [CentripetalNet: Pursuing High-quality Keypoint Pairs for Object Detection](https://arxiv.org/abs/2003.09119) - + + +## Abstract Keypoint-based detectors have achieved pretty-well performance. However, incorrect keypoint matching is still widespread and greatly affects the performance of the detector. In this paper, we propose CentripetalNet which uses centripetal shift to pair corner keypoints from the same instance. CentripetalNet predicts the position and the centripetal shift of the corner points and matches corners whose shifted results are aligned. Combining position information, our approach matches corner points more accurately than the conventional embedding approaches do. Corner pooling extracts information inside the bounding boxes onto the border. To make this information more aware at the corners, we design a cross-star deformable convolution network to conduct feature adaption. Furthermore, we explore instance segmentation on anchor-free detectors by equipping our CentripetalNet with a mask prediction module. On MS-COCO test-dev, our CentripetalNet not only outperforms all existing anchor-free detectors with an AP of 48.0% but also achieves comparable performance to the state-of-the-art instance segmentation approaches with a 40.2% MaskAP. -
- - +## Results and Models -## Citation +| Backbone | Batch Size | Step/Total Epochs | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :--------------: | :--------------------------------------------------------------: | :---------------: | :------: | :------------: | :----: | :-----------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| HourglassNet-104 | [16 x 6](./centripetalnet_hourglass104_mstest_16x6_210e_coco.py) | 190/210 | 16.7 | 3.7 | 44.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco/centripetalnet_hourglass104_mstest_16x6_210e_coco_20200915_204804-3ccc61e5.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco/centripetalnet_hourglass104_mstest_16x6_210e_coco_20200915_204804.log.json) | - +Note: + +- TTA setting is single-scale and `flip=True`. +- The model we released is the best checkpoint rather than the latest checkpoint (box AP 44.8 vs 44.6 in our experiment). + +## Citation ```latex @InProceedings{Dong_2020_CVPR, @@ -27,14 +34,3 @@ month = {June}, year = {2020} } ``` - -## Results and models - -| Backbone | Batch Size | Step/Total Epochs | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :-------------: | :--------: |:----------------: | :------: | :------------: | :----: | :------: | :--------: | -| HourglassNet-104 | [16 x 6](./centripetalnet_hourglass104_mstest_16x6_210e_coco.py) | 190/210 | 16.7 | 3.7 | 44.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco/centripetalnet_hourglass104_mstest_16x6_210e_coco_20200915_204804-3ccc61e5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco/centripetalnet_hourglass104_mstest_16x6_210e_coco_20200915_204804.log.json) | - -Note: - -- TTA setting is single-scale and `flip=True`. -- The model we released is the best checkpoint rather than the latest checkpoint (box AP 44.8 vs 44.6 in our experiment). diff --git a/configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py b/configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py index e9c5def..5281c5b 100644 --- a/configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py +++ b/configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py @@ -103,3 +103,8 @@ warmup_ratio=1.0 / 3, step=[190]) runner = dict(type='EpochBasedRunner', max_epochs=210) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (16 GPUs) x (6 samples per GPU) +auto_scale_lr = dict(base_batch_size=96) diff --git a/configs/cityscapes/README.md b/configs/cityscapes/README.md index 28310f1..c52a79f 100644 --- a/configs/cityscapes/README.md +++ b/configs/cityscapes/README.md @@ -1,33 +1,18 @@ -# The Cityscapes Dataset for Semantic Urban Scene Understanding +# Cityscapes -## Abstract +> [The Cityscapes Dataset for Semantic Urban Scene Understanding](https://arxiv.org/abs/1604.01685) + + - +## Abstract Visual understanding of complex urban street scenes is an enabling factor for a wide range of applications. Object detection has benefited enormously from large-scale datasets, especially in the context of deep learning. For semantic urban scene understanding, however, no current dataset adequately captures the complexity of real-world urban scenes. To address this, we introduce Cityscapes, a benchmark suite and large-scale dataset to train and test approaches for pixel-level and instance-level semantic labeling. Cityscapes is comprised of a large, diverse set of stereo video sequences recorded in streets from 50 different cities. 5000 of these images have high quality pixel-level annotations; 20000 additional images have coarse annotations to enable methods that leverage large volumes of weakly-labeled data. Crucially, our effort exceeds previous attempts in terms of dataset size, annotation richness, scene variability, and complexity. Our accompanying empirical study provides an in-depth analysis of the dataset characteristics, as well as a performance evaluation of several state-of-the-art approaches based on our benchmark. -
- - - -## Citation - - - -``` -@inproceedings{Cordts2016Cityscapes, - title={The Cityscapes Dataset for Semantic Urban Scene Understanding}, - author={Cordts, Marius and Omran, Mohamed and Ramos, Sebastian and Rehfeld, Timo and Enzweiler, Markus and Benenson, Rodrigo and Franke, Uwe and Roth, Stefan and Schiele, Bernt}, - booktitle={Proc. of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, - year={2016} -} -``` - ## Common settings - All baselines were trained using 8 GPU with a batch size of 8 (1 images per GPU) using the [linear scaling rule](https://arxiv.org/abs/1706.02677) to scale the learning rate. @@ -39,12 +24,23 @@ To address this, we introduce Cityscapes, a benchmark suite and large-scale data ### Faster R-CNN -| Backbone | Style | Lr schd | Scale | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :-------------: | :-----: | :-----: | :---: | :------: | :------------: | :----: | :------: | :--------: | -| R-50-FPN | pytorch | 1x | 800-1024 | 5.2 | - | 40.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes_20200502-829424c0.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes_20200502_114915.log.json) | +| Backbone | Style | Lr schd | Scale | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :------: | :-----: | :-----: | :------: | :------: | :------------: | :----: | :---------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | pytorch | 1x | 800-1024 | 5.2 | - | 40.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes_20200502-829424c0.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes_20200502_114915.log.json) | ### Mask R-CNN -| Backbone | Style | Lr schd | Scale | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------: | :------------: | :----: | :-----: | :------: | :------: | -| R-50-FPN | pytorch | 1x | 800-1024 | 5.3 | - | 40.9 | 36.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes/mask_rcnn_r50_fpn_1x_cityscapes_20201211_133733-d2858245.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes/mask_rcnn_r50_fpn_1x_cityscapes_20201211_133733.log.json) | +| Backbone | Style | Lr schd | Scale | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :------: | :-----: | :-----: | :------: | :------: | :------------: | :----: | :-----: | :-------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | pytorch | 1x | 800-1024 | 5.3 | - | 40.9 | 36.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes/mask_rcnn_r50_fpn_1x_cityscapes_20201211_133733-d2858245.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes/mask_rcnn_r50_fpn_1x_cityscapes_20201211_133733.log.json) | + +## Citation + +```latex +@inproceedings{Cordts2016Cityscapes, + title={The Cityscapes Dataset for Semantic Urban Scene Understanding}, + author={Cordts, Marius and Omran, Mohamed and Ramos, Sebastian and Rehfeld, Timo and Enzweiler, Markus and Benenson, Rodrigo and Franke, Uwe and Roth, Stefan and Schiele, Bernt}, + booktitle={Proc. of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year={2016} +} +``` diff --git a/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py b/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py index c6da80c..ca636bd 100644 --- a/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py +++ b/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py @@ -37,3 +37,8 @@ log_config = dict(interval=100) # For better, more stable performance initialize from COCO load_from = 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth' # noqa + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (1 samples per GPU) +auto_scale_lr = dict(base_batch_size=8) diff --git a/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py b/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py index 679890d..83ea058 100644 --- a/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py +++ b/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py @@ -44,3 +44,8 @@ log_config = dict(interval=100) # For better, more stable performance initialize from COCO load_from = 'https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth' # noqa + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (1 samples per GPU) +auto_scale_lr = dict(base_batch_size=8) diff --git a/configs/common/ssj_270k_coco_instance.py b/configs/common/ssj_270k_coco_instance.py new file mode 100644 index 0000000..851098f --- /dev/null +++ b/configs/common/ssj_270k_coco_instance.py @@ -0,0 +1,91 @@ +_base_ = '../_base_/default_runtime.py' +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +image_size = (1024, 1024) + +file_client_args = dict(backend='disk') + +# Standard Scale Jittering (SSJ) resizes and crops an image +# with a resize range of 0.8 to 1.25 of the original image size. +train_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', + img_scale=image_size, + ratio_range=(0.8, 1.25), + multiscale_mode='range', + keep_ratio=True), + dict( + type='RandomCrop', + crop_type='absolute_range', + crop_size=image_size, + recompute_bbox=True, + allow_negative_crop=True), + dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2)), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=image_size), # padding to image_size leads 0.5+ mAP + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) + +evaluation = dict(interval=6000, metric=['bbox', 'segm']) + +# optimizer assumes batch_size = (32 GPUs) x (2 samples per GPU) +optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.00004) +optimizer_config = dict(grad_clip=None) + +# lr steps at [0.9, 0.95, 0.975] of the maximum iterations +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=0.001, + step=[243000, 256500, 263250]) +checkpoint_config = dict(interval=6000) +# The model is trained by 270k iterations with batch_size 64, +# which is roughly equivalent to 144 epochs. +runner = dict(type='IterBasedRunner', max_iters=270000) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (32 GPUs) x (2 samples per GPU) +auto_scale_lr = dict(base_batch_size=64) diff --git a/configs/common/ssj_scp_270k_coco_instance.py b/configs/common/ssj_scp_270k_coco_instance.py new file mode 100644 index 0000000..540839f --- /dev/null +++ b/configs/common/ssj_scp_270k_coco_instance.py @@ -0,0 +1,97 @@ +_base_ = '../_base_/default_runtime.py' +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +image_size = (1024, 1024) + +file_client_args = dict(backend='disk') + +# Standard Scale Jittering (SSJ) resizes and crops an image +# with a resize range of 0.8 to 1.25 of the original image size. +load_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', + img_scale=image_size, + ratio_range=(0.8, 1.25), + multiscale_mode='range', + keep_ratio=True), + dict( + type='RandomCrop', + crop_type='absolute_range', + crop_size=image_size, + recompute_bbox=True, + allow_negative_crop=True), + dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2)), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Pad', size=image_size), +] +train_pipeline = [ + dict(type='CopyPaste', max_num_pasted=100), + dict(type='Normalize', **img_norm_cfg), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='MultiImageMixDataset', + dataset=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=load_pipeline), + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) + +evaluation = dict(interval=6000, metric=['bbox', 'segm']) + +# optimizer assumes batch_size = (32 GPUs) x (2 samples per GPU) +optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.00004) +optimizer_config = dict(grad_clip=None) + +# lr steps at [0.9, 0.95, 0.975] of the maximum iterations +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=0.001, + step=[243000, 256500, 263250]) +checkpoint_config = dict(interval=6000) +# The model is trained by 270k iterations with batch_size 64, +# which is roughly equivalent to 144 epochs. +runner = dict(type='IterBasedRunner', max_iters=270000) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (32 GPUs) x (2 samples per GPU) +auto_scale_lr = dict(base_batch_size=64) diff --git a/configs/convnext/README.md b/configs/convnext/README.md new file mode 100644 index 0000000..edf72e8 --- /dev/null +++ b/configs/convnext/README.md @@ -0,0 +1,40 @@ +# ConvNeXt + +> [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) + +## Abstract + +The "Roaring 20s" of visual recognition began with the introduction of Vision Transformers (ViTs), which quickly superseded ConvNets as the state-of-the-art image classification model. A vanilla ViT, on the other hand, faces difficulties when applied to general computer vision tasks such as object detection and semantic segmentation. It is the hierarchical Transformers (e.g., Swin Transformers) that reintroduced several ConvNet priors, making Transformers practically viable as a generic vision backbone and demonstrating remarkable performance on a wide variety of vision tasks. However, the effectiveness of such hybrid approaches is still largely credited to the intrinsic superiority of Transformers, rather than the inherent inductive biases of convolutions. In this work, we reexamine the design spaces and test the limits of what a pure ConvNet can achieve. We gradually "modernize" a standard ResNet toward the design of a vision Transformer, and discover several key components that contribute to the performance difference along the way. The outcome of this exploration is a family of pure ConvNet models dubbed ConvNeXt. Constructed entirely from standard ConvNet modules, ConvNeXts compete favorably with Transformers in terms of accuracy and scalability, achieving 87.8% ImageNet top-1 accuracy and outperforming Swin Transformers on COCO detection and ADE20K segmentation, while maintaining the simplicity and efficiency of standard ConvNets. + +
+ +
+ +## Results and models + +| Method | Backbone | Pretrain | Lr schd | Multi-scale crop | FP16 | Mem (GB) | box AP | mask AP | Config | Download | +| :----------------: | :--------: | :---------: | :-----: | :--------------: | :--: | :------: | :----: | :-----: | :-------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| Mask R-CNN | ConvNeXt-T | ImageNet-1K | 3x | yes | yes | 7.3 | 46.2 | 41.7 | [config](./mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/convnext/mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco/mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco_20220426_154953-050731f4.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/convnext/mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco/mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco_20220426_154953.log.json) | +| Cascade Mask R-CNN | ConvNeXt-T | ImageNet-1K | 3x | yes | yes | 9.0 | 50.3 | 43.6 | [config](./cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/convnext/cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco/cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco_20220509_204200-8f07c40b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/convnext/cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco/cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco_20220509_204200.log.json) | +| Cascade Mask R-CNN | ConvNeXt-S | ImageNet-1K | 3x | yes | yes | 12.3 | 51.8 | 44.8 | [config](./cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/convnext/cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco/cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco_20220510_201004-3d24f5a4.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/convnext/cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco/cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco_20220510_201004.log.json) | + +**Note**: + +- ConvNeXt backbone needs to install [MMClassification](https://github.com/open-mmlab/mmclassification) first, which has abundant backbones for downstream tasks. + +```shell +pip install mmcls>=0.22.0 +``` + +- The performance is unstable. `Cascade Mask R-CNN` may fluctuate about 0.2 mAP. + +## Citation + +```bibtex +@article{liu2022convnet, + title={A ConvNet for the 2020s}, + author={Liu, Zhuang and Mao, Hanzi and Wu, Chao-Yuan and Feichtenhofer, Christoph and Darrell, Trevor and Xie, Saining}, + journal={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + year={2022} +} +``` diff --git a/configs/convnext/cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py b/configs/convnext/cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py new file mode 100644 index 0000000..0ccc31d --- /dev/null +++ b/configs/convnext/cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py @@ -0,0 +1,32 @@ +_base_ = './cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py' # noqa + +# please install mmcls>=0.22.0 +# import mmcls.models to trigger register_module in mmcls +custom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False) +checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-small_3rdparty_32xb128-noema_in1k_20220301-303e75e3.pth' # noqa + +model = dict( + backbone=dict( + _delete_=True, + type='mmcls.ConvNeXt', + arch='small', + out_indices=[0, 1, 2, 3], + drop_path_rate=0.6, + layer_scale_init_value=1.0, + gap_before_final_norm=False, + init_cfg=dict( + type='Pretrained', checkpoint=checkpoint_file, + prefix='backbone.'))) + +optimizer = dict( + _delete_=True, + constructor='LearningRateDecayOptimizerConstructor', + type='AdamW', + lr=0.0002, + betas=(0.9, 0.999), + weight_decay=0.05, + paramwise_cfg={ + 'decay_rate': 0.7, + 'decay_type': 'layer_wise', + 'num_layers': 12 + }) diff --git a/configs/convnext/cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py b/configs/convnext/cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py new file mode 100644 index 0000000..93304c0 --- /dev/null +++ b/configs/convnext/cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py @@ -0,0 +1,149 @@ +_base_ = [ + '../_base_/models/cascade_mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +# please install mmcls>=0.22.0 +# import mmcls.models to trigger register_module in mmcls +custom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False) +checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth' # noqa + +model = dict( + backbone=dict( + _delete_=True, + type='mmcls.ConvNeXt', + arch='tiny', + out_indices=[0, 1, 2, 3], + drop_path_rate=0.4, + layer_scale_init_value=1.0, + gap_before_final_norm=False, + init_cfg=dict( + type='Pretrained', checkpoint=checkpoint_file, + prefix='backbone.')), + neck=dict(in_channels=[96, 192, 384, 768]), + roi_head=dict(bbox_head=[ + dict( + type='ConvFCBBoxHead', + num_shared_convs=4, + num_shared_fcs=1, + in_channels=256, + conv_out_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + reg_decoded_bbox=True, + norm_cfg=dict(type='SyncBN', requires_grad=True), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='GIoULoss', loss_weight=10.0)), + dict( + type='ConvFCBBoxHead', + num_shared_convs=4, + num_shared_fcs=1, + in_channels=256, + conv_out_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.05, 0.05, 0.1, 0.1]), + reg_class_agnostic=False, + reg_decoded_bbox=True, + norm_cfg=dict(type='SyncBN', requires_grad=True), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='GIoULoss', loss_weight=10.0)), + dict( + type='ConvFCBBoxHead', + num_shared_convs=4, + num_shared_fcs=1, + in_channels=256, + conv_out_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.033, 0.033, 0.067, 0.067]), + reg_class_agnostic=False, + reg_decoded_bbox=True, + norm_cfg=dict(type='SyncBN', requires_grad=True), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='GIoULoss', loss_weight=10.0)) + ])) + +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +# augmentation strategy originates from DETR / Sparse RCNN +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict( + type='AutoAugment', + policies=[[ + dict( + type='Resize', + img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), + (608, 1333), (640, 1333), (672, 1333), (704, 1333), + (736, 1333), (768, 1333), (800, 1333)], + multiscale_mode='value', + keep_ratio=True) + ], + [ + dict( + type='Resize', + img_scale=[(400, 1333), (500, 1333), (600, 1333)], + multiscale_mode='value', + keep_ratio=True), + dict( + type='RandomCrop', + crop_type='absolute_range', + crop_size=(384, 600), + allow_negative_crop=True), + dict( + type='Resize', + img_scale=[(480, 1333), (512, 1333), (544, 1333), + (576, 1333), (608, 1333), (640, 1333), + (672, 1333), (704, 1333), (736, 1333), + (768, 1333), (800, 1333)], + multiscale_mode='value', + override=True, + keep_ratio=True) + ]]), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +data = dict(train=dict(pipeline=train_pipeline), persistent_workers=True) + +optimizer = dict( + _delete_=True, + constructor='LearningRateDecayOptimizerConstructor', + type='AdamW', + lr=0.0002, + betas=(0.9, 0.999), + weight_decay=0.05, + paramwise_cfg={ + 'decay_rate': 0.7, + 'decay_type': 'layer_wise', + 'num_layers': 6 + }) + +lr_config = dict(warmup_iters=1000, step=[27, 33]) +runner = dict(max_epochs=36) + +# you need to set mode='dynamic' if you are using pytorch<=1.5.0 +fp16 = dict(loss_scale=dict(init_scale=512)) diff --git a/configs/convnext/mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco.py b/configs/convnext/mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco.py new file mode 100644 index 0000000..e8a283f --- /dev/null +++ b/configs/convnext/mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco.py @@ -0,0 +1,90 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +# please install mmcls>=0.22.0 +# import mmcls.models to trigger register_module in mmcls +custom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False) +checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth' # noqa + +model = dict( + backbone=dict( + _delete_=True, + type='mmcls.ConvNeXt', + arch='tiny', + out_indices=[0, 1, 2, 3], + drop_path_rate=0.4, + layer_scale_init_value=1.0, + gap_before_final_norm=False, + init_cfg=dict( + type='Pretrained', checkpoint=checkpoint_file, + prefix='backbone.')), + neck=dict(in_channels=[96, 192, 384, 768])) + +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +# augmentation strategy originates from DETR / Sparse RCNN +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict( + type='AutoAugment', + policies=[[ + dict( + type='Resize', + img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), + (608, 1333), (640, 1333), (672, 1333), (704, 1333), + (736, 1333), (768, 1333), (800, 1333)], + multiscale_mode='value', + keep_ratio=True) + ], + [ + dict( + type='Resize', + img_scale=[(400, 1333), (500, 1333), (600, 1333)], + multiscale_mode='value', + keep_ratio=True), + dict( + type='RandomCrop', + crop_type='absolute_range', + crop_size=(384, 600), + allow_negative_crop=True), + dict( + type='Resize', + img_scale=[(480, 1333), (512, 1333), (544, 1333), + (576, 1333), (608, 1333), (640, 1333), + (672, 1333), (704, 1333), (736, 1333), + (768, 1333), (800, 1333)], + multiscale_mode='value', + override=True, + keep_ratio=True) + ]]), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +data = dict(train=dict(pipeline=train_pipeline), persistent_workers=True) + +optimizer = dict( + _delete_=True, + constructor='LearningRateDecayOptimizerConstructor', + type='AdamW', + lr=0.0001, + betas=(0.9, 0.999), + weight_decay=0.05, + paramwise_cfg={ + 'decay_rate': 0.95, + 'decay_type': 'layer_wise', + 'num_layers': 6 + }) + +lr_config = dict(warmup_iters=1000, step=[27, 33]) +runner = dict(max_epochs=36) + +# you need to set mode='dynamic' if you are using pytorch<=1.5.0 +fp16 = dict(loss_scale=dict(init_scale=512)) diff --git a/configs/convnext/metafile.yml b/configs/convnext/metafile.yml new file mode 100644 index 0000000..20425bf --- /dev/null +++ b/configs/convnext/metafile.yml @@ -0,0 +1,93 @@ +Models: + - Name: mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco + In Collection: Mask R-CNN + Config: configs/convnext/mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco + Metadata: + Training Memory (GB): 7.3 + Epochs: 36 + Training Data: COCO + Training Techniques: + - AdamW + - Mixed Precision Training + Training Resources: 8x A100 GPUs + Architecture: + - ConvNeXt + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 46.2 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 41.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/convnext/mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco/mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco_20220426_154953-050731f4.pth + Paper: + URL: https://arxiv.org/abs/2201.03545 + Title: 'A ConvNet for the 2020s' + README: configs/convnext/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.16.0/mmdet/models/backbones/swin.py#L465 + Version: v2.16.0 + + - Name: cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco + In Collection: Cascade Mask R-CNN + Config: configs/convnext/cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py + Metadata: + Training Memory (GB): 9.0 + Epochs: 36 + Training Data: COCO + Training Techniques: + - AdamW + - Mixed Precision Training + Training Resources: 8x A100 GPUs + Architecture: + - ConvNeXt + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 50.3 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 43.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/convnext/cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco/cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco_20220509_204200-8f07c40b.pth + Paper: + URL: https://arxiv.org/abs/2201.03545 + Title: 'A ConvNet for the 2020s' + README: configs/convnext/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.16.0/mmdet/models/backbones/swin.py#L465 + Version: v2.25.0 + + - Name: cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco + In Collection: Cascade Mask R-CNN + Config: configs/convnext/cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py + Metadata: + Training Memory (GB): 12.3 + Epochs: 36 + Training Data: COCO + Training Techniques: + - AdamW + - Mixed Precision Training + Training Resources: 8x A100 GPUs + Architecture: + - ConvNeXt + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 51.8 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 44.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/convnext/cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco/cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco_20220510_201004-3d24f5a4.pth + Paper: + URL: https://arxiv.org/abs/2201.03545 + Title: 'A ConvNet for the 2020s' + README: configs/convnext/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.16.0/mmdet/models/backbones/swin.py#L465 + Version: v2.25.0 diff --git a/configs/cornernet/README.md b/configs/cornernet/README.md index d7dc08c..d0b9e98 100644 --- a/configs/cornernet/README.md +++ b/configs/cornernet/README.md @@ -1,22 +1,35 @@ -# Cornernet: Detecting objects as paired keypoints +# CornerNet -## Abstract +> [Cornernet: Detecting objects as paired keypoints](https://arxiv.org/abs/1808.01244) + + - +## Abstract We propose CornerNet, a new approach to object detection where we detect an object bounding box as a pair of keypoints, the top-left corner and the bottom-right corner, using a single convolution neural network. By detecting objects as paired keypoints, we eliminate the need for designing a set of anchor boxes commonly used in prior single-stage detectors. In addition to our novel formulation, we introduce corner pooling, a new type of pooling layer that helps the network better localize corners. Experiments show that CornerNet achieves a 42.2% AP on MS COCO, outperforming all existing one-stage detectors. -
- - +## Results and Models -## Citation +| Backbone | Batch Size | Step/Total Epochs | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :--------------: | :---------------------------------------------------------: | :---------------: | :------: | :------------: | :----: | :-------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| HourglassNet-104 | [10 x 5](./cornernet_hourglass104_mstest_10x5_210e_coco.py) | 180/210 | 13.9 | 4.2 | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco/cornernet_hourglass104_mstest_10x5_210e_coco_20200824_185720-5fefbf1c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco/cornernet_hourglass104_mstest_10x5_210e_coco_20200824_185720.log.json) | +| HourglassNet-104 | [8 x 6](./cornernet_hourglass104_mstest_8x6_210e_coco.py) | 180/210 | 15.9 | 4.2 | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco/cornernet_hourglass104_mstest_8x6_210e_coco_20200825_150618-79b44c30.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco/cornernet_hourglass104_mstest_8x6_210e_coco_20200825_150618.log.json) | +| HourglassNet-104 | [32 x 3](./cornernet_hourglass104_mstest_32x3_210e_coco.py) | 180/210 | 9.5 | 3.9 | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco/cornernet_hourglass104_mstest_32x3_210e_coco_20200819_203110-1efaea91.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco/cornernet_hourglass104_mstest_32x3_210e_coco_20200819_203110.log.json) | - +Note: + +- TTA setting is single-scale and `flip=True`. +- Experiments with `images_per_gpu=6` are conducted on Tesla V100-SXM2-32GB, `images_per_gpu=3` are conducted on GeForce GTX 1080 Ti. +- Here are the descriptions of each experiment setting: + - 10 x 5: 10 GPUs with 5 images per gpu. This is the same setting as that reported in the original paper. + - 8 x 6: 8 GPUs with 6 images per gpu. The total batchsize is similar to paper and only need 1 node to train. + - 32 x 3: 32 GPUs with 3 images per gpu. The default setting for 1080TI and need 4 nodes to train. + +## Citation ```latex @inproceedings{law2018cornernet, @@ -28,20 +41,3 @@ We propose CornerNet, a new approach to object detection where we detect an obje organization={Springer Verlag} } ``` - -## Results and models - -| Backbone | Batch Size | Step/Total Epochs | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :-------------: | :--------: |:----------------: | :------: | :------------: | :----: | :------: | :--------: | -| HourglassNet-104 | [10 x 5](./cornernet_hourglass104_mstest_10x5_210e_coco.py) | 180/210 | 13.9 | 4.2 | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco/cornernet_hourglass104_mstest_10x5_210e_coco_20200824_185720-5fefbf1c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco/cornernet_hourglass104_mstest_10x5_210e_coco_20200824_185720.log.json) | -| HourglassNet-104 | [8 x 6](./cornernet_hourglass104_mstest_8x6_210e_coco.py) | 180/210 | 15.9 | 4.2 | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco/cornernet_hourglass104_mstest_8x6_210e_coco_20200825_150618-79b44c30.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco/cornernet_hourglass104_mstest_8x6_210e_coco_20200825_150618.log.json) | -| HourglassNet-104 | [32 x 3](./cornernet_hourglass104_mstest_32x3_210e_coco.py) | 180/210 | 9.5 | 3.9 | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco/cornernet_hourglass104_mstest_32x3_210e_coco_20200819_203110-1efaea91.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco/cornernet_hourglass104_mstest_32x3_210e_coco_20200819_203110.log.json) | - -Note: - -- TTA setting is single-scale and `flip=True`. -- Experiments with `images_per_gpu=6` are conducted on Tesla V100-SXM2-32GB, `images_per_gpu=3` are conducted on GeForce GTX 1080 Ti. -- Here are the descriptions of each experiment setting: - - 10 x 5: 10 GPUs with 5 images per gpu. This is the same setting as that reported in the original paper. - - 8 x 6: 8 GPUs with 6 images per gpu. The total batchsize is similar to paper and only need 1 node to train. - - 32 x 3: 32 GPUs with 3 images per gpu. The default setting for 1080TI and need 4 nodes to train. diff --git a/configs/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco.py b/configs/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco.py index 89f3876..6cb05a7 100644 --- a/configs/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco.py +++ b/configs/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco.py @@ -103,3 +103,8 @@ warmup_ratio=1.0 / 3, step=[180]) runner = dict(type='EpochBasedRunner', max_epochs=210) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (10 GPUs) x (5 samples per GPU) +auto_scale_lr = dict(base_batch_size=50) diff --git a/configs/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco.py b/configs/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco.py index 873d598..f539cdb 100644 --- a/configs/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco.py +++ b/configs/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco.py @@ -103,3 +103,8 @@ warmup_ratio=1.0 / 3, step=[180]) runner = dict(type='EpochBasedRunner', max_epochs=210) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (32 GPUs) x (3 samples per GPU) +auto_scale_lr = dict(base_batch_size=96) diff --git a/configs/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco.py b/configs/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco.py index ef749cc..9b115d7 100644 --- a/configs/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco.py +++ b/configs/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco.py @@ -103,3 +103,8 @@ warmup_ratio=1.0 / 3, step=[180]) runner = dict(type='EpochBasedRunner', max_epochs=210) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (6 samples per GPU) +auto_scale_lr = dict(base_batch_size=48) diff --git a/configs/dcn/README.md b/configs/dcn/README.md index d9d23f0..745b01c 100644 --- a/configs/dcn/README.md +++ b/configs/dcn/README.md @@ -1,24 +1,44 @@ -# Deformable Convolutional Networks +# DCN -## Abstract +> [Deformable Convolutional Networks](https://arxiv.org/abs/1703.06211) + + - +## Abstract Convolutional neural networks (CNNs) are inherently limited to model geometric transformations due to the fixed geometric structures in its building modules. In this work, we introduce two new modules to enhance the transformation modeling capacity of CNNs, namely, deformable convolution and deformable RoI pooling. Both are based on the idea of augmenting the spatial sampling locations in the modules with additional offsets and learning the offsets from target tasks, without additional supervision. The new modules can readily replace their plain counterparts in existing CNNs and can be easily trained end-to-end by standard back-propagation, giving rise to deformable convolutional networks. Extensive experiments validate the effectiveness of our approach on sophisticated vision tasks of object detection and semantic segmentation. -
- - +## Results and Models -## Citation +| Backbone | Model | Style | Conv | Pool | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :----------: | :-----: | :----------: | :---: | :-----: | :------: | :------------: | :----: | :-----: | :---------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | Faster | pytorch | dconv(c3-c5) | - | 1x | 4.0 | 17.8 | 41.3 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130-d68aed1e.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130_212941.log.json) | +| R-50-FPN | Faster | pytorch | - | dpool | 1x | 5.0 | 17.2 | 38.9 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_dpool_1x_coco/faster_rcnn_r50_fpn_dpool_1x_coco_20200307-90d3c01d.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_dpool_1x_coco/faster_rcnn_r50_fpn_dpool_1x_coco_20200307_203250.log.json) | +| R-101-FPN | Faster | pytorch | dconv(c3-c5) | - | 1x | 6.0 | 12.5 | 42.7 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203-1377f13d.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203_230019.log.json) | +| X-101-32x4d-FPN | Faster | pytorch | dconv(c3-c5) | - | 1x | 7.3 | 10.0 | 44.5 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco_20200203-4f85c69c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco_20200203_001325.log.json) | +| R-50-FPN | Mask | pytorch | dconv(c3-c5) | - | 1x | 4.5 | 15.4 | 41.8 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200203-4d9ad43b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200203_061339.log.json) | +| R-101-FPN | Mask | pytorch | dconv(c3-c5) | - | 1x | 6.5 | 11.7 | 43.5 | 38.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200216-a71f5bce.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200216_191601.log.json) | +| R-50-FPN | Cascade | pytorch | dconv(c3-c5) | - | 1x | 4.5 | 14.6 | 43.8 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130-2f1fca44.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130_220843.log.json) | +| R-101-FPN | Cascade | pytorch | dconv(c3-c5) | - | 1x | 6.4 | 11.0 | 45.0 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203-3b2f0594.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203_224829.log.json) | +| R-50-FPN | Cascade Mask | pytorch | dconv(c3-c5) | - | 1x | 6.0 | 10.0 | 44.4 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200202-42e767a2.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200202_010309.log.json) | +| R-101-FPN | Cascade Mask | pytorch | dconv(c3-c5) | - | 1x | 8.0 | 8.6 | 45.8 | 39.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200204-df0c5f10.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200204_134006.log.json) | +| X-101-32x4d-FPN | Cascade Mask | pytorch | dconv(c3-c5) | - | 1x | 9.2 | | 47.3 | 41.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco-e75f90c8.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco-20200606_183737.log.json) | +| R-50-FPN (FP16) | Mask | pytorch | dconv(c3-c5) | - | 1x | 3.0 | | 41.9 | 37.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fp16/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco_20210520_180247-c06429d2.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco_20210520_180247.log.json) | - +**Notes:** + +- `dconv` denotes deformable convolution, `c3-c5` means adding dconv in resnet stage 3 to 5. `dpool` denotes deformable roi pooling. +- The dcn ops are modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch, which should be more memory efficient and slightly faster. +- (\*) For R-50-FPN (dg=4), dg is short for deformable_group. This model is trained and tested on Amazon EC2 p3dn.24xlarge instance. +- **Memory, Train/Inf time is outdated.** -```none +## Citation + +```latex @inproceedings{dai2017deformable, title={Deformable Convolutional Networks}, author={Dai, Jifeng and Qi, Haozhi and Xiong, Yuwen and Li, Yi and Zhang, Guodong and Hu, Han and Wei, Yichen}, @@ -26,43 +46,3 @@ Convolutional neural networks (CNNs) are inherently limited to model geometric t year={2017} } ``` - - - -``` -@article{zhu2018deformable, - title={Deformable ConvNets v2: More Deformable, Better Results}, - author={Zhu, Xizhou and Hu, Han and Lin, Stephen and Dai, Jifeng}, - journal={arXiv preprint arXiv:1811.11168}, - year={2018} -} -``` - -## Results and Models - -| Backbone | Model | Style | Conv | Pool | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -|:----------------:|:------------:|:-------:|:-------------:|:------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| -| R-50-FPN | Faster | pytorch | dconv(c3-c5) | - | 1x | 4.0 | 17.8 | 41.3 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130-d68aed1e.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130_212941.log.json) | -| R-50-FPN | Faster | pytorch | mdconv(c3-c5) | - | 1x | 4.1 | 17.6 | 41.4 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco_20200130-d099253b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco_20200130_222144.log.json) | -| *R-50-FPN (dg=4) | Faster | pytorch | mdconv(c3-c5) | - | 1x | 4.2 | 17.4 | 41.5 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco_20200130-01262257.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco_20200130_222058.log.json) | -| R-50-FPN | Faster | pytorch | - | dpool | 1x | 5.0 | 17.2 | 38.9 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_dpool_1x_coco/faster_rcnn_r50_fpn_dpool_1x_coco_20200307-90d3c01d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_dpool_1x_coco/faster_rcnn_r50_fpn_dpool_1x_coco_20200307_203250.log.json) | -| R-50-FPN | Faster | pytorch | - | mdpool | 1x | 5.8 | 16.6 | 38.7 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco/faster_rcnn_r50_fpn_mdpool_1x_coco_20200307-c0df27ff.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco/faster_rcnn_r50_fpn_mdpool_1x_coco_20200307_203304.log.json) | -| R-101-FPN | Faster | pytorch | dconv(c3-c5) | - | 1x | 6.0 | 12.5 | 42.7 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203-1377f13d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203_230019.log.json) | -| X-101-32x4d-FPN | Faster | pytorch | dconv(c3-c5) | - | 1x | 7.3 | 10.0 | 44.5 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco_20200203-4f85c69c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco_20200203_001325.log.json) | -| R-50-FPN | Mask | pytorch | dconv(c3-c5) | - | 1x | 4.5 | 15.4 | 41.8 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200203-4d9ad43b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200203_061339.log.json) | -| R-50-FPN | Mask | pytorch | mdconv(c3-c5) | - | 1x | 4.5 | 15.1 | 41.5 | 37.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco_20200203-ad97591f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco_20200203_063443.log.json) | -| R-101-FPN | Mask | pytorch | dconv(c3-c5) | - | 1x | 6.5 | 11.7 | 43.5 | 38.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200216-a71f5bce.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200216_191601.log.json) | -| R-50-FPN | Cascade | pytorch | dconv(c3-c5) | - | 1x | 4.5 | 14.6 | 43.8 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130-2f1fca44.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130_220843.log.json) | -| R-101-FPN | Cascade | pytorch | dconv(c3-c5) | - | 1x | 6.4 | 11.0 | 45.0 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203-3b2f0594.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203_224829.log.json) | -| R-50-FPN | Cascade Mask | pytorch | dconv(c3-c5) | - | 1x | 6.0 | 10.0 | 44.4 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200202-42e767a2.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200202_010309.log.json) | -| R-101-FPN | Cascade Mask | pytorch | dconv(c3-c5) | - | 1x | 8.0 | 8.6 | 45.8 | 39.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200204-df0c5f10.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200204_134006.log.json) | -| X-101-32x4d-FPN | Cascade Mask | pytorch | dconv(c3-c5) | - | 1x | 9.2 | | 47.3 | 41.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco-e75f90c8.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco-20200606_183737.log.json) | -| R-50-FPN (FP16) | Mask | pytorch | dconv(c3-c5) | - | 1x | 3.0 | | 41.9 | 37.5 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fp16/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco_20210520_180247-c06429d2.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco_20210520_180247.log.json) | -| R-50-FPN (FP16) | Mask | pytorch | mdconv(c3-c5)| - | 1x | 3.1 | | 42.0 | 37.6 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fp16/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco_20210520_180434-cf8fefa5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco_20210520_180434.log.json) | - -**Notes:** - -- `dconv` and `mdconv` denote (modulated) deformable convolution, `c3-c5` means adding dconv in resnet stage 3 to 5. `dpool` and `mdpool` denote (modulated) deformable roi pooling. -- The dcn ops are modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch, which should be more memory efficient and slightly faster. -- (*) For R-50-FPN (dg=4), dg is short for deformable_group. This model is trained and tested on Amazon EC2 p3dn.24xlarge instance. -- **Memory, Train/Inf time is outdated.** diff --git a/configs/dcn/metafile.yml b/configs/dcn/metafile.yml index 7919b84..36f3887 100644 --- a/configs/dcn/metafile.yml +++ b/configs/dcn/metafile.yml @@ -9,8 +9,8 @@ Collections: Architecture: - Deformable Convolution Paper: - URL: https://arxiv.org/abs/1811.11168 - Title: "Deformable ConvNets v2: More Deformable, Better Results" + URL: https://arxiv.org/abs/1703.06211 + Title: "Deformable Convolutional Networks" README: configs/dcn/README.md Code: URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/ops/dcn/deform_conv.py#L15 @@ -37,46 +37,6 @@ Models: box AP: 41.3 Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130-d68aed1e.pth - - Name: faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco - In Collection: Deformable Convolutional Networks - Config: configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py - Metadata: - Training Memory (GB): 4.1 - inference time (ms/im): - - value: 56.82 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (800, 1333) - Epochs: 12 - Results: - - Task: Object Detection - Dataset: COCO - Metrics: - box AP: 41.4 - Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco_20200130-d099253b.pth - - - Name: faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco - In Collection: Deformable Convolutional Networks - Config: configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py - Metadata: - Training Memory (GB): 4.2 - inference time (ms/im): - - value: 57.47 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (800, 1333) - Epochs: 12 - Results: - - Task: Object Detection - Dataset: COCO - Metrics: - box AP: 41.5 - Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco_20200130-01262257.pth - - Name: faster_rcnn_r50_fpn_dpool_1x_coco In Collection: Deformable Convolutional Networks Config: configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py @@ -97,26 +57,6 @@ Models: box AP: 38.9 Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_dpool_1x_coco/faster_rcnn_r50_fpn_dpool_1x_coco_20200307-90d3c01d.pth - - Name: faster_rcnn_r50_fpn_mdpool_1x_coco - In Collection: Deformable Convolutional Networks - Config: configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py - Metadata: - Training Memory (GB): 5.8 - inference time (ms/im): - - value: 60.24 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (800, 1333) - Epochs: 12 - Results: - - Task: Object Detection - Dataset: COCO - Metrics: - box AP: 38.7 - Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco/faster_rcnn_r50_fpn_mdpool_1x_coco_20200307-c0df27ff.pth - - Name: faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco In Collection: Deformable Convolutional Networks Config: configs/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py @@ -181,30 +121,6 @@ Models: mask AP: 37.4 Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200203-4d9ad43b.pth - - Name: mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco - In Collection: Deformable Convolutional Networks - Config: configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py - Metadata: - Training Memory (GB): 4.5 - inference time (ms/im): - - value: 66.23 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (800, 1333) - Epochs: 12 - Results: - - Task: Object Detection - Dataset: COCO - Metrics: - box AP: 41.5 - - Task: Instance Segmentation - Dataset: COCO - Metrics: - mask AP: 37.1 - Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco_20200203-ad97591f.pth - - Name: mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco In Collection: Deformable Convolutional Networks Config: configs/dcn/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco.py @@ -226,27 +142,6 @@ Models: mask AP: 37.5 Weights: https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco_20210520_180247-c06429d2.pth - - Name: mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco - In Collection: Deformable Convolutional Networks - Config: configs/dcn/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco.py - Metadata: - Training Memory (GB): 3.1 - Training Techniques: - - SGD with Momentum - - Weight Decay - - Mixed Precision Training - Epochs: 12 - Results: - - Task: Object Detection - Dataset: COCO - Metrics: - box AP: 42.0 - - Task: Instance Segmentation - Dataset: COCO - Metrics: - mask AP: 37.6 - Weights: https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco_20210520_180434-cf8fefa5.pth - - Name: mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco In Collection: Deformable Convolutional Networks Config: configs/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py diff --git a/configs/dcnv2/README.md b/configs/dcnv2/README.md new file mode 100644 index 0000000..d230f20 --- /dev/null +++ b/configs/dcnv2/README.md @@ -0,0 +1,37 @@ +# DCNv2 + +> [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/abs/1811.11168) + + + +## Abstract + +The superior performance of Deformable Convolutional Networks arises from its ability to adapt to the geometric variations of objects. Through an examination of its adaptive behavior, we observe that while the spatial support for its neural features conforms more closely than regular ConvNets to object structure, this support may nevertheless extend well beyond the region of interest, causing features to be influenced by irrelevant image content. To address this problem, we present a reformulation of Deformable ConvNets that improves its ability to focus on pertinent image regions, through increased modeling power and stronger training. The modeling power is enhanced through a more comprehensive integration of deformable convolution within the network, and by introducing a modulation mechanism that expands the scope of deformation modeling. To effectively harness this enriched modeling capability, we guide network training via a proposed feature mimicking scheme that helps the network to learn features that reflect the object focus and classification power of RCNN features. With the proposed contributions, this new version of Deformable ConvNets yields significant performance gains over the original model and produces leading results on the COCO benchmark for object detection and instance segmentation. + +## Results and Models + +| Backbone | Model | Style | Conv | Pool | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :---------------: | :----: | :-----: | :-----------: | :----: | :-----: | :------: | :------------: | :----: | :-----: | :------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | Faster | pytorch | mdconv(c3-c5) | - | 1x | 4.1 | 17.6 | 41.4 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcnv2/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco_20200130-d099253b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco_20200130_222144.log.json) | +| \*R-50-FPN (dg=4) | Faster | pytorch | mdconv(c3-c5) | - | 1x | 4.2 | 17.4 | 41.5 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcnv2/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco_20200130-01262257.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco_20200130_222058.log.json) | +| R-50-FPN | Faster | pytorch | - | mdpool | 1x | 5.8 | 16.6 | 38.7 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcnv2/faster_rcnn_r50_fpn_mdpool_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco/faster_rcnn_r50_fpn_mdpool_1x_coco_20200307-c0df27ff.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco/faster_rcnn_r50_fpn_mdpool_1x_coco_20200307_203304.log.json) | +| R-50-FPN | Mask | pytorch | mdconv(c3-c5) | - | 1x | 4.5 | 15.1 | 41.5 | 37.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcnv2/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco_20200203-ad97591f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco_20200203_063443.log.json) | +| R-50-FPN (FP16) | Mask | pytorch | mdconv(c3-c5) | - | 1x | 3.1 | | 42.0 | 37.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fp16/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco_20210520_180434-cf8fefa5.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco_20210520_180434.log.json) | + +**Notes:** + +- `mdconv` denotes modulated deformable convolution, `c3-c5` means adding dconv in resnet stage 3 to 5. `mdpool` denotes modulated deformable roi pooling. +- The dcn ops are modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch, which should be more memory efficient and slightly faster. +- (\*) For R-50-FPN (dg=4), dg is short for deformable_group. This model is trained and tested on Amazon EC2 p3dn.24xlarge instance. +- **Memory, Train/Inf time is outdated.** + +## Citation + +```latex +@article{zhu2018deformable, + title={Deformable ConvNets v2: More Deformable, Better Results}, + author={Zhu, Xizhou and Hu, Han and Lin, Stephen and Dai, Jifeng}, + journal={arXiv preprint arXiv:1811.11168}, + year={2018} +} +``` diff --git a/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py b/configs/dcnv2/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py similarity index 100% rename from configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py rename to configs/dcnv2/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py diff --git a/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py b/configs/dcnv2/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py similarity index 100% rename from configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py rename to configs/dcnv2/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py diff --git a/configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py b/configs/dcnv2/faster_rcnn_r50_fpn_mdpool_1x_coco.py similarity index 100% rename from configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py rename to configs/dcnv2/faster_rcnn_r50_fpn_mdpool_1x_coco.py diff --git a/configs/dcn/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco.py b/configs/dcnv2/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco.py similarity index 100% rename from configs/dcn/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco.py rename to configs/dcnv2/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco.py diff --git a/configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py b/configs/dcnv2/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py similarity index 100% rename from configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py rename to configs/dcnv2/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py diff --git a/configs/dcnv2/metafile.yml b/configs/dcnv2/metafile.yml new file mode 100644 index 0000000..9049421 --- /dev/null +++ b/configs/dcnv2/metafile.yml @@ -0,0 +1,123 @@ +Collections: + - Name: Deformable Convolutional Networks v2 + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - Deformable Convolution + Paper: + URL: https://arxiv.org/abs/1811.11168 + Title: "Deformable ConvNets v2: More Deformable, Better Results" + README: configs/dcnv2/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/ops/dcn/deform_conv.py#L15 + Version: v2.0.0 + +Models: + - Name: faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco + In Collection: Deformable Convolutional Networks v2 + Config: configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 4.1 + inference time (ms/im): + - value: 56.82 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco_20200130-d099253b.pth + + - Name: faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco + In Collection: Deformable Convolutional Networks v2 + Config: configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py + Metadata: + Training Memory (GB): 4.2 + inference time (ms/im): + - value: 57.47 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco_20200130-01262257.pth + + - Name: faster_rcnn_r50_fpn_mdpool_1x_coco + In Collection: Deformable Convolutional Networks v2 + Config: configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py + Metadata: + Training Memory (GB): 5.8 + inference time (ms/im): + - value: 60.24 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco/faster_rcnn_r50_fpn_mdpool_1x_coco_20200307-c0df27ff.pth + + - Name: mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco + In Collection: Deformable Convolutional Networks v2 + Config: configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 4.5 + inference time (ms/im): + - value: 66.23 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.5 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco_20200203-ad97591f.pth + + - Name: mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco + In Collection: Deformable Convolutional Networks v2 + Config: configs/dcn/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 3.1 + Training Techniques: + - SGD with Momentum + - Weight Decay + - Mixed Precision Training + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.0 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco_20210520_180434-cf8fefa5.pth diff --git a/configs/ddod/README.md b/configs/ddod/README.md new file mode 100644 index 0000000..9ab1f48 --- /dev/null +++ b/configs/ddod/README.md @@ -0,0 +1,31 @@ +# DDOD + +> [Disentangle Your Dense Object Detector](https://arxiv.org/pdf/2107.02963.pdf) + + + +## Abstract + +Deep learning-based dense object detectors have achieved great success in the past few years and have been applied to numerous multimedia applications such as video understanding. However, the current training pipeline for dense detectors is compromised to lots of conjunctions that may not hold. In this paper, we investigate three such important conjunctions: 1) only samples assigned as positive in classification head are used to train the regression head; 2) classification and regression share the same input feature and computational fields defined by the parallel head architecture; and 3) samples distributed in different feature pyramid layers are treated equally when computing the loss. We first carry out a series of pilot experiments to show disentangling such conjunctions can lead to persistent performance improvement. Then, based on these findings, we propose Disentangled Dense Object Detector(DDOD), in which simple and effective disentanglement mechanisms are designed and integrated into the current state-of-the-art dense object detectors. Extensive experiments on MS COCO benchmark show that our approach can lead to 2.0 mAP, 2.4 mAP and 2.2 mAP absolute improvements on RetinaNet, FCOS, and ATSS baselines with negligible extra overhead. Notably, our best model reaches 55.0 mAP on the COCO test-dev set and 93.5 AP on the hard subset of WIDER FACE, achieving new state-of-the-art performance on these two competitive benchmarks. Code is available at https://github.com/zehuichen123/DDOD. + +
+ +
+ +## Results and Models + +| Model | Backbone | Style | Lr schd | Mem (GB) | box AP | Config | Download | +| :-------: | :------: | :-----: | :-----: | :------: | :----: | :--------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| DDOD-ATSS | R-50 | pytorch | 1x | 3.4 | 41.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ddod/ddod_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ddod/ddod_r50_fpn_1x_coco/ddod_r50_fpn_1x_coco_20220523_223737-29b2fc67.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/ddod/ddod_r50_fpn_1x_coco/ddod_r50_fpn_1x_coco_20220523_223737.log.json) | + +## Citation + +```latex +@inproceedings{chen2021disentangle, +title={Disentangle Your Dense Object Detector}, +author={Chen, Zehui and Yang, Chenhongyi and Li, Qiaofei and Zhao, Feng and Zha, Zheng-Jun and Wu, Feng}, +booktitle={Proceedings of the 29th ACM International Conference on Multimedia}, +pages={4939--4948}, +year={2021} +} +``` diff --git a/configs/ddod/ddod_r50_fpn_1x_coco.py b/configs/ddod/ddod_r50_fpn_1x_coco.py new file mode 100644 index 0000000..02dd2fe --- /dev/null +++ b/configs/ddod/ddod_r50_fpn_1x_coco.py @@ -0,0 +1,67 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +model = dict( + type='DDOD', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_output', + num_outs=5), + bbox_head=dict( + type='DDODHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + octave_base_scale=8, + scales_per_octave=1, + strides=[8, 16, 32, 64, 128]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.1, 0.1, 0.2, 0.2]), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='GIoULoss', loss_weight=2.0), + loss_iou=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), + train_cfg=dict( + # assigner is mean cls_assigner + assigner=dict(type='ATSSAssigner', topk=9, alpha=0.8), + reg_assigner=dict(type='ATSSAssigner', topk=9, alpha=0.5), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100)) + +# This `persistent_workers` is only valid when PyTorch>=1.7.0 +data = dict(persistent_workers=True) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/ddod/metafile.yml b/configs/ddod/metafile.yml new file mode 100644 index 0000000..c223950 --- /dev/null +++ b/configs/ddod/metafile.yml @@ -0,0 +1,33 @@ +Collections: + - Name: DDOD + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - DDOD + - FPN + - ResNet + Paper: + URL: https://arxiv.org/pdf/2107.02963.pdf + Title: 'Disentangle Your Dense Object Detector' + README: configs/ddod/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.25.0/mmdet/models/detectors/ddod.py#L6 + Version: v2.25.0 + +Models: + - Name: ddod_r50_fpn_1x_coco + In Collection: DDOD + Config: configs/ddod/ddod_r50_fpn_1x_coco.py + Metadata: + Training Memory (GB): 3.4 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/ddod/ddod_r50_fpn_1x_coco/ddod_r50_fpn_1x_coco_20220523_223737-29b2fc67.pth diff --git a/configs/deepfashion/README.md b/configs/deepfashion/README.md index e2c042f..45daec0 100644 --- a/configs/deepfashion/README.md +++ b/configs/deepfashion/README.md @@ -1,23 +1,19 @@ -# DeepFashion: Powering Robust Clothes Recognition and Retrieval With Rich Annotations +# DeepFashion -## Abstract +> [DeepFashion: Powering Robust Clothes Recognition and Retrieval With Rich Annotations](https://openaccess.thecvf.com/content_cvpr_2016/html/Liu_DeepFashion_Powering_Robust_CVPR_2016_paper.html) + + - +## Abstract Recent advances in clothes recognition have been driven by the construction of clothes datasets. Existing datasets are limited in the amount of annotations and are difficult to cope with the various challenges in real-world applications. In this work, we introduce DeepFashion, a large-scale clothes dataset with comprehensive annotations. It contains over 800,000 images, which are richly annotated with massive attributes, clothing landmarks, and correspondence of images taken under different scenarios including store, street snapshot, and consumer. Such rich annotations enable the development of powerful algorithms in clothes recognition and facilitating future researches. To demonstrate the advantages of DeepFashion, we propose a new deep model, namely FashionNet, which learns clothing features by jointly predicting clothing attributes and landmarks. The estimated landmarks are then employed to pool or gate the learned features. It is optimized in an iterative manner. Extensive experiments demonstrate the effectiveness of FashionNet and the usefulness of DeepFashion. -
- - - ## Introduction - - [MMFashion](https://github.com/open-mmlab/mmfashion) develops "fashion parsing and segmentation" module based on the dataset [DeepFashion-Inshop](https://drive.google.com/drive/folders/0B7EVK8r0v71pVDZFQXRsMDZCX1E?usp=sharing). @@ -55,9 +51,15 @@ mmdetection After that you can train the Mask RCNN r50 on DeepFashion-In-shop dataset by launching training with the `mask_rcnn_r50_fpn_1x.py` config or creating your own config file. +## Results and Models + +| Backbone | Model type | Dataset | bbox detection Average Precision | segmentation Average Precision | Config | Download (Google) | +| :------: | :--------: | :-----------------: | :------------------------------: | :----------------------------: | :----------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| ResNet50 | Mask RCNN | DeepFashion-In-shop | 0.599 | 0.584 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/deepfashion/mask_rcnn_r50_fpn_15e_deepfashion.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/deepfashion/mask_rcnn_r50_fpn_15e_deepfashion/mask_rcnn_r50_fpn_15e_deepfashion_20200329_192752.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/deepfashion/mask_rcnn_r50_fpn_15e_deepfashion/20200329_192752.log.json) | + ## Citation -``` +```latex @inproceedings{liuLQWTcvpr16DeepFashion, author = {Liu, Ziwei and Luo, Ping and Qiu, Shi and Wang, Xiaogang and Tang, Xiaoou}, title = {DeepFashion: Powering Robust Clothes Recognition and Retrieval with Rich Annotations}, @@ -66,9 +68,3 @@ or creating your own config file. year = {2016} } ``` - -## Model Zoo - -| Backbone | Model type | Dataset | bbox detection Average Precision | segmentation Average Precision | Config | Download (Google) | -| :---------: | :----------: | :-----------------: | :--------------------------------: | :----------------------------: | :---------:| :-------------------------: | -| ResNet50 | Mask RCNN | DeepFashion-In-shop | 0.599 | 0.584 |[config](https://github.com/open-mmlab/mmdetection/blob/master/configs/deepfashion/mask_rcnn_r50_fpn_15e_deepfashion.py)| [model](https://download.openmmlab.com/mmdetection/v2.0/deepfashion/mask_rcnn_r50_fpn_15e_deepfashion/mask_rcnn_r50_fpn_15e_deepfashion_20200329_192752.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/deepfashion/mask_rcnn_r50_fpn_15e_deepfashion/20200329_192752.log.json) | diff --git a/configs/deformable_detr/README.md b/configs/deformable_detr/README.md index e3b8e41..378e1f2 100644 --- a/configs/deformable_detr/README.md +++ b/configs/deformable_detr/README.md @@ -1,26 +1,35 @@ -# Deformable DETR: Deformable Transformers for End-to-End Object Detection +# Deformable DETR -## Abstract +> [Deformable DETR: Deformable Transformers for End-to-End Object Detection](https://arxiv.org/abs/2010.04159) + + - +## Abstract DETR has been recently proposed to eliminate the need for many hand-designed components in object detection while demonstrating good performance. However, it suffers from slow convergence and limited feature spatial resolution, due to the limitation of Transformer attention modules in processing image feature maps. To mitigate these issues, we proposed Deformable DETR, whose attention modules only attend to a small set of key sampling points around a reference. Deformable DETR can achieve better performance than DETR (especially on small objects) with 10 times less training epochs. Extensive experiments on the COCO benchmark demonstrate the effectiveness of our approach. -
- - +## Results and Models -## Citation +| Backbone | Model | Lr schd | box AP | Config | Download | +| :------: | :---------------------------------: | :-----: | :----: | :------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | Deformable DETR | 50e | 44.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/deformable_detr/deformable_detr_r50_16x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_r50_16x2_50e_coco/deformable_detr_r50_16x2_50e_coco_20210419_220030-a12b9512.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_r50_16x2_50e_coco/deformable_detr_r50_16x2_50e_coco_20210419_220030-a12b9512.log.json) | +| R-50 | + iterative bounding box refinement | 50e | 46.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/deformable_detr/deformable_detr_refine_r50_16x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_refine_r50_16x2_50e_coco/deformable_detr_refine_r50_16x2_50e_coco_20210419_220503-5f5dff21.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_refine_r50_16x2_50e_coco/deformable_detr_refine_r50_16x2_50e_coco_20210419_220503-5f5dff21.log.json) | +| R-50 | ++ two-stage Deformable DETR | 50e | 46.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/deformable_detr/deformable_detr_twostage_refine_r50_16x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_twostage_refine_r50_16x2_50e_coco/deformable_detr_twostage_refine_r50_16x2_50e_coco_20210419_220613-9d28ab72.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_twostage_refine_r50_16x2_50e_coco/deformable_detr_twostage_refine_r50_16x2_50e_coco_20210419_220613-9d28ab72.log.json) | - +# NOTE + +1. All models are trained with batch size 32. +2. The performance is unstable. `Deformable DETR` and `iterative bounding box refinement` may fluctuate about 0.3 mAP. `two-stage Deformable DETR` may fluctuate about 0.2 mAP. + +## Citation We provide the config files for Deformable DETR: [Deformable DETR: Deformable Transformers for End-to-End Object Detection](https://arxiv.org/abs/2010.04159). -``` +```latex @inproceedings{ zhu2021deformable, title={Deformable DETR: Deformable Transformers for End-to-End Object Detection}, @@ -30,16 +39,3 @@ year={2021}, url={https://openreview.net/forum?id=gZ9hCDWe6ke} } ``` - -## Results and Models - -| Backbone | Model | Lr schd | box AP | Config | Download | -|:------:|:--------:|:--------------:|:------:|:------:|:--------:| -| R-50 | Deformable DETR |50e | 44.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/deformable_detr/deformable_detr_r50_16x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_r50_16x2_50e_coco/deformable_detr_r50_16x2_50e_coco_20210419_220030-a12b9512.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_r50_16x2_50e_coco/deformable_detr_r50_16x2_50e_coco_20210419_220030-a12b9512.log.json) | -| R-50 | + iterative bounding box refinement |50e | 46.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/deformable_detr/deformable_detr_refine_r50_16x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_refine_r50_16x2_50e_coco/deformable_detr_refine_r50_16x2_50e_coco_20210419_220503-5f5dff21.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_refine_r50_16x2_50e_coco/deformable_detr_refine_r50_16x2_50e_coco_20210419_220503-5f5dff21.log.json) | -| R-50 | ++ two-stage Deformable DETR |50e | 46.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/deformable_detr/deformable_detr_twostage_refine_r50_16x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_twostage_refine_r50_16x2_50e_coco/deformable_detr_twostage_refine_r50_16x2_50e_coco_20210419_220613-9d28ab72.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_twostage_refine_r50_16x2_50e_coco/deformable_detr_twostage_refine_r50_16x2_50e_coco_20210419_220613-9d28ab72.log.json) | - -# NOTE - -1. All models are trained with batch size 32. -2. The performance is unstable. `Deformable DETR` and `iterative bounding box refinement` may fluctuate about 0.3 mAP. `two-stage Deformable DETR` may fluctuate about 0.2 mAP. diff --git a/configs/deformable_detr/deformable_detr_r50_16x2_50e_coco.py b/configs/deformable_detr/deformable_detr_r50_16x2_50e_coco.py index 0a58d9a..c64d09f 100644 --- a/configs/deformable_detr/deformable_detr_r50_16x2_50e_coco.py +++ b/configs/deformable_detr/deformable_detr_r50_16x2_50e_coco.py @@ -170,3 +170,8 @@ # learning policy lr_config = dict(policy='step', step=[40]) runner = dict(type='EpochBasedRunner', max_epochs=50) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (16 GPUs) x (2 samples per GPU) +auto_scale_lr = dict(base_batch_size=32) diff --git a/configs/detectors/README.md b/configs/detectors/README.md index c90302b..baa245f 100644 --- a/configs/detectors/README.md +++ b/configs/detectors/README.md @@ -1,35 +1,18 @@ -# DetectoRS: Detecting Objects with Recursive Feature Pyramid and Switchable Atrous Convolution +# DetectoRS -## Abstract +> [DetectoRS: Detecting Objects with Recursive Feature Pyramid and Switchable Atrous Convolution](https://arxiv.org/abs/2006.02334) + + - +## Abstract Many modern object detectors demonstrate outstanding performances by using the mechanism of looking and thinking twice. In this paper, we explore this mechanism in the backbone design for object detection. At the macro level, we propose Recursive Feature Pyramid, which incorporates extra feedback connections from Feature Pyramid Networks into the bottom-up backbone layers. At the micro level, we propose Switchable Atrous Convolution, which convolves the features with different atrous rates and gathers the results using switch functions. Combining them results in DetectoRS, which significantly improves the performances of object detection. On COCO test-dev, DetectoRS achieves state-of-the-art 55.7% box AP for object detection, 48.5% mask AP for instance segmentation, and 50.0% PQ for panoptic segmentation. -
- - - -## Citation - - - -We provide the config files for [DetectoRS: Detecting Objects with Recursive Feature Pyramid and Switchable Atrous Convolution](https://arxiv.org/pdf/2006.02334.pdf). - -```BibTeX -@article{qiao2020detectors, - title={DetectoRS: Detecting Objects with Recursive Feature Pyramid and Switchable Atrous Convolution}, - author={Qiao, Siyuan and Chen, Liang-Chieh and Yuille, Alan}, - journal={arXiv preprint arXiv:2006.02334}, - year={2020} -} -``` - -## Dataset +## Introduction DetectoRS requires COCO and [COCO-stuff](http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/stuffthingmaps_trainval2017.zip) dataset for training. You need to download and extract it in the COCO dataset path. The directory should be like this. @@ -59,15 +42,28 @@ They can be used independently. Combining them together results in DetectoRS. The results on COCO 2017 val are shown in the below table. -| Method | Detector | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -|:------:|:--------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| -| RFP | Cascade + ResNet-50 | 1x | 7.5 | - | 44.8 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/cascade_rcnn_r50_rfp_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/detectors/cascade_rcnn_r50_rfp_1x_coco/cascade_rcnn_r50_rfp_1x_coco-8cf51bfd.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/detectors/cascade_rcnn_r50_rfp_1x_coco/cascade_rcnn_r50_rfp_1x_coco_20200624_104126.log.json) | -| SAC | Cascade + ResNet-50 | 1x | 5.6 | - | 45.0| | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/cascade_rcnn_r50_sac_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/detectors/cascade_rcnn_r50_sac_1x_coco/cascade_rcnn_r50_sac_1x_coco-24bfda62.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/detectors/cascade_rcnn_r50_sac_1x_coco/cascade_rcnn_r50_sac_1x_coco_20200624_104402.log.json) | -| DetectoRS | Cascade + ResNet-50 | 1x | 9.9 | - | 47.4 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/detectors_cascade_rcnn_r50_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_cascade_rcnn_r50_1x_coco/detectors_cascade_rcnn_r50_1x_coco-32a10ba0.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_cascade_rcnn_r50_1x_coco/detectors_cascade_rcnn_r50_1x_coco_20200706_001203.log.json) | -| RFP | HTC + ResNet-50 | 1x | 11.2 | - | 46.6 | 40.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/htc_r50_rfp_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/detectors/htc_r50_rfp_1x_coco/htc_r50_rfp_1x_coco-8ff87c51.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/detectors/htc_r50_rfp_1x_coco/htc_r50_rfp_1x_coco_20200624_103053.log.json) | -| SAC | HTC + ResNet-50 | 1x | 9.3 | - | 46.4 | 40.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/htc_r50_sac_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/detectors/htc_r50_sac_1x_coco/htc_r50_sac_1x_coco-bfa60c54.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/detectors/htc_r50_sac_1x_coco/htc_r50_sac_1x_coco_20200624_103111.log.json) | -| DetectoRS | HTC + ResNet-50 | 1x | 13.6 | - | 49.1 | 42.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/detectors_htc_r50_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_htc_r50_1x_coco/detectors_htc_r50_1x_coco-329b1453.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_htc_r50_1x_coco/detectors_htc_r50_1x_coco_20200624_103659.log.json) | -| DetectoRS | HTC + ResNet-101 | 20e | 19.6 | | 50.5 | 43.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/detectors_htc_r101_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_htc_r101_20e_coco/detectors_htc_r101_20e_coco_20210419_203638-348d533b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_htc_r101_20e_coco/detectors_htc_r101_20e_coco_20210419_203638.log.json) | +| Method | Detector | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------: | :-----------------: | :-----: | :------: | :------------: | :----: | :-----: | :---------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| RFP | Cascade + ResNet-50 | 1x | 7.5 | - | 44.8 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/cascade_rcnn_r50_rfp_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/detectors/cascade_rcnn_r50_rfp_1x_coco/cascade_rcnn_r50_rfp_1x_coco-8cf51bfd.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/detectors/cascade_rcnn_r50_rfp_1x_coco/cascade_rcnn_r50_rfp_1x_coco_20200624_104126.log.json) | +| SAC | Cascade + ResNet-50 | 1x | 5.6 | - | 45.0 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/cascade_rcnn_r50_sac_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/detectors/cascade_rcnn_r50_sac_1x_coco/cascade_rcnn_r50_sac_1x_coco-24bfda62.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/detectors/cascade_rcnn_r50_sac_1x_coco/cascade_rcnn_r50_sac_1x_coco_20200624_104402.log.json) | +| DetectoRS | Cascade + ResNet-50 | 1x | 9.9 | - | 47.4 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/detectors_cascade_rcnn_r50_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_cascade_rcnn_r50_1x_coco/detectors_cascade_rcnn_r50_1x_coco-32a10ba0.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_cascade_rcnn_r50_1x_coco/detectors_cascade_rcnn_r50_1x_coco_20200706_001203.log.json) | +| RFP | HTC + ResNet-50 | 1x | 11.2 | - | 46.6 | 40.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/htc_r50_rfp_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/detectors/htc_r50_rfp_1x_coco/htc_r50_rfp_1x_coco-8ff87c51.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/detectors/htc_r50_rfp_1x_coco/htc_r50_rfp_1x_coco_20200624_103053.log.json) | +| SAC | HTC + ResNet-50 | 1x | 9.3 | - | 46.4 | 40.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/htc_r50_sac_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/detectors/htc_r50_sac_1x_coco/htc_r50_sac_1x_coco-bfa60c54.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/detectors/htc_r50_sac_1x_coco/htc_r50_sac_1x_coco_20200624_103111.log.json) | +| DetectoRS | HTC + ResNet-50 | 1x | 13.6 | - | 49.1 | 42.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/detectors_htc_r50_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_htc_r50_1x_coco/detectors_htc_r50_1x_coco-329b1453.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_htc_r50_1x_coco/detectors_htc_r50_1x_coco_20200624_103659.log.json) | +| DetectoRS | HTC + ResNet-101 | 20e | 19.6 | | 50.5 | 43.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/detectors_htc_r101_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_htc_r101_20e_coco/detectors_htc_r101_20e_coco_20210419_203638-348d533b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_htc_r101_20e_coco/detectors_htc_r101_20e_coco_20210419_203638.log.json) | *Note*: This is a re-implementation based on MMDetection-V2. The original implementation is based on MMDetection-V1. + +## Citation + +We provide the config files for [DetectoRS: Detecting Objects with Recursive Feature Pyramid and Switchable Atrous Convolution](https://arxiv.org/pdf/2006.02334.pdf). + +```latex +@article{qiao2020detectors, + title={DetectoRS: Detecting Objects with Recursive Feature Pyramid and Switchable Atrous Convolution}, + author={Qiao, Siyuan and Chen, Liang-Chieh and Yuille, Alan}, + journal={arXiv preprint arXiv:2006.02334}, + year={2020} +} +``` diff --git a/configs/detr/README.md b/configs/detr/README.md index a04e7ab..9f2485d 100644 --- a/configs/detr/README.md +++ b/configs/detr/README.md @@ -1,26 +1,28 @@ -# End-to-End Object Detection with Transformers +# DETR -## Abstract +> [End-to-End Object Detection with Transformers](https://arxiv.org/abs/2005.12872) + + - +## Abstract We present a new method that views object detection as a direct set prediction problem. Our approach streamlines the detection pipeline, effectively removing the need for many hand-designed components like a non-maximum suppression procedure or anchor generation that explicitly encode our prior knowledge about the task. The main ingredients of the new framework, called DEtection TRansformer or DETR, are a set-based global loss that forces unique predictions via bipartite matching, and a transformer encoder-decoder architecture. Given a fixed small set of learned object queries, DETR reasons about the relations of the objects and the global image context to directly output the final set of predictions in parallel. The new model is conceptually simple and does not require a specialized library, unlike many other modern detectors. DETR demonstrates accuracy and run-time performance on par with the well-established and highly-optimized Faster RCNN baseline on the challenging COCO object detection dataset. Moreover, DETR can be easily generalized to produce panoptic segmentation in a unified manner. We show that it significantly outperforms competitive baselines. -
- - +## Results and Models -## Citation +| Backbone | Model | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :------: | :---: | :-----: | :------: | :------------: | :----: | :----------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | DETR | 150e | 7.9 | | 40.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detr/detr_r50_8x2_150e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/detr/detr_r50_8x2_150e_coco/detr_r50_8x2_150e_coco_20201130_194835-2c4b8974.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/detr/detr_r50_8x2_150e_coco/detr_r50_8x2_150e_coco_20201130_194835.log.json) | - +## Citation We provide the config files for DETR: [End-to-End Object Detection with Transformers](https://arxiv.org/abs/2005.12872). -```BibTeX +```latex @inproceedings{detr, author = {Nicolas Carion and Francisco Massa and @@ -33,9 +35,3 @@ We provide the config files for DETR: [End-to-End Object Detection with Transfor year = {2020} } ``` - -## Results and Models - -| Backbone | Model | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -|:------:|:--------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| -| R-50 | DETR |150e |7.9| | 40.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detr/detr_r50_8x2_150e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/detr/detr_r50_8x2_150e_coco/detr_r50_8x2_150e_coco_20201130_194835-2c4b8974.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/detr/detr_r50_8x2_150e_coco/detr_r50_8x2_150e_coco_20201130_194835.log.json) | diff --git a/configs/double_heads/README.md b/configs/double_heads/README.md index 8191d7a..4a149b5 100644 --- a/configs/double_heads/README.md +++ b/configs/double_heads/README.md @@ -1,22 +1,24 @@ -# Rethinking Classification and Localization for Object Detection +# Double Heads -## Abstract +> [Rethinking Classification and Localization for Object Detection](https://arxiv.org/abs/1904.06493) - + + +## Abstract Two head structures (i.e. fully connected head and convolution head) have been widely used in R-CNN based detectors for classification and localization tasks. However, there is a lack of understanding of how does these two head structures work for these two tasks. To address this issue, we perform a thorough analysis and find an interesting fact that the two head structures have opposite preferences towards the two tasks. Specifically, the fully connected head (fc-head) is more suitable for the classification task, while the convolution head (conv-head) is more suitable for the localization task. Furthermore, we examine the output feature maps of both heads and find that fc-head has more spatial sensitivity than conv-head. Thus, fc-head has more capability to distinguish a complete object from part of an object, but is not robust to regress the whole object. Based upon these findings, we propose a Double-Head method, which has a fully connected head focusing on classification and a convolution head for bounding box regression. Without bells and whistles, our method gains +3.5 and +2.8 AP on MS COCO dataset from Feature Pyramid Network (FPN) baselines with ResNet-50 and ResNet-101 backbones, respectively. -
- - +## Results and Models -## Citation +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :------: | :-----: | :-----: | :------: | :------------: | :----: | :--------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | pytorch | 1x | 6.8 | 9.5 | 40.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/double_heads/dh_faster_rcnn_r50_fpn_1x_coco/dh_faster_rcnn_r50_fpn_1x_coco_20200130-586b67df.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/double_heads/dh_faster_rcnn_r50_fpn_1x_coco/dh_faster_rcnn_r50_fpn_1x_coco_20200130_220238.log.json) | - +## Citation ```latex @article{wu2019rethinking, @@ -28,9 +30,3 @@ Two head structures (i.e. fully connected head and convolution head) have been w primaryClass={cs.CV} } ``` - -## Results and models - -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | -| R-50-FPN | pytorch | 1x | 6.8 | 9.5 | 40.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/double_heads/dh_faster_rcnn_r50_fpn_1x_coco/dh_faster_rcnn_r50_fpn_1x_coco_20200130-586b67df.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/double_heads/dh_faster_rcnn_r50_fpn_1x_coco/dh_faster_rcnn_r50_fpn_1x_coco_20200130_220238.log.json) | diff --git a/configs/dyhead/README.md b/configs/dyhead/README.md new file mode 100644 index 0000000..8e6aed3 --- /dev/null +++ b/configs/dyhead/README.md @@ -0,0 +1,52 @@ +# DyHead + +> [Dynamic Head: Unifying Object Detection Heads with Attentions](https://arxiv.org/abs/2106.08322) + + + +## Abstract + +The complex nature of combining localization and classification in object detection has resulted in the flourished development of methods. Previous works tried to improve the performance in various object detection heads but failed to present a unified view. In this paper, we present a novel dynamic head framework to unify object detection heads with attentions. By coherently combining multiple self-attention mechanisms between feature levels for scale-awareness, among spatial locations for spatial-awareness, and within output channels for task-awareness, the proposed approach significantly improves the representation ability of object detection heads without any computational overhead. Further experiments demonstrate that the effectiveness and efficiency of the proposed dynamic head on the COCO benchmark. With a standard ResNeXt-101-DCN backbone, we largely improve the performance over popular object detectors and achieve a new state-of-the-art at 54.0 AP. Furthermore, with latest transformer backbone and extra data, we can push current best COCO result to a new record at 60.6 AP. + +
+ +
+ +## Results and Models + +| Method | Backbone | Style | Setting | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :----: | :------: | :-----: | :----------: | :-----: | :------: | :------------: | :----: | :----------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| ATSS | R-50 | caffe | reproduction | 1x | 5.4 | 13.2 | 42.5 | [config](./atss_r50_caffe_fpn_dyhead_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dyhead/atss_r50_fpn_dyhead_for_reproduction_1x_coco/atss_r50_fpn_dyhead_for_reproduction_4x4_1x_coco_20220107_213939-162888e6.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/dyhead/atss_r50_fpn_dyhead_for_reproduction_1x_coco/atss_r50_fpn_dyhead_for_reproduction_4x4_1x_coco_20220107_213939.log.json) | +| ATSS | R-50 | pytorch | simple | 1x | 4.9 | 13.7 | 43.3 | [config](./atss_r50_fpn_dyhead_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dyhead/atss_r50_fpn_dyhead_4x4_1x_coco/atss_r50_fpn_dyhead_4x4_1x_coco_20211219_023314-eaa620c6.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/dyhead/atss_r50_fpn_dyhead_4x4_1x_coco/atss_r50_fpn_dyhead_4x4_1x_coco_20211219_023314.log.json) | + +- We trained the above models with 4 GPUs and 4 `samples_per_gpu`. +- The `reproduction` setting aims to reproduce the official implementation based on Detectron2. +- The `simple` setting serves as a minimum example to use DyHead in MMDetection. Specifically, + - it adds `DyHead` to `neck` after `FPN` + - it sets `stacked_convs=0` to `bbox_head` +- The `simple` setting achieves higher AP than the original implementation. + We have not conduct ablation study between the two settings. + `dict(type='Pad', size_divisor=128)` may further improve AP by prefer spatial alignment across pyramid levels, although large padding reduces efficiency. + +We also trained the model with Swin-L backbone. Results are as below. + +| Method | Backbone | Style | Setting | Lr schd | mstrain | box AP | Config | Download | +| :----: | :------: | :---: | :----------: | :-----: | :------: | :----: | :----------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| ATSS | Swin-L | caffe | reproduction | 2x | 480~1200 | 56.2 | [config](./atss_swin-l-p4-w12_fpn_dyhead_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dyhead/atss_swin-l-p4-w12_fpn_dyhead_mstrain_2x_coco/atss_swin-l-p4-w12_fpn_dyhead_mstrain_2x_coco_20220509_100315-bc5b6516.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/dyhead/atss_swin-l-p4-w12_fpn_dyhead_mstrain_2x_coco/atss_swin-l-p4-w12_fpn_dyhead_mstrain_2x_coco_20220509_100315.log.json) | + +## Relation to Other Methods + +- DyHead can be regarded as an improved [SEPC](https://arxiv.org/abs/2005.03101) with [DyReLU modules](https://arxiv.org/abs/2003.10027) and simplified [SE blocks](https://arxiv.org/abs/1709.01507). +- Xiyang Dai et al., the author team of DyHead, adopt it for [Dynamic DETR](https://openaccess.thecvf.com/content/ICCV2021/html/Dai_Dynamic_DETR_End-to-End_Object_Detection_With_Dynamic_Attention_ICCV_2021_paper.html). + The description of Dynamic Encoder in Sec. 3.2 will help you understand DyHead. + +## Citation + +```latex +@inproceedings{DyHead_CVPR2021, + author = {Dai, Xiyang and Chen, Yinpeng and Xiao, Bin and Chen, Dongdong and Liu, Mengchen and Yuan, Lu and Zhang, Lei}, + title = {Dynamic Head: Unifying Object Detection Heads With Attentions}, + booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2021} +} +``` diff --git a/configs/dyhead/atss_r50_caffe_fpn_dyhead_1x_coco.py b/configs/dyhead/atss_r50_caffe_fpn_dyhead_1x_coco.py new file mode 100644 index 0000000..223b653 --- /dev/null +++ b/configs/dyhead/atss_r50_caffe_fpn_dyhead_1x_coco.py @@ -0,0 +1,112 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + type='ATSS', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + norm_eval=True, + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe')), + neck=[ + dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_output', + num_outs=5), + dict( + type='DyHead', + in_channels=256, + out_channels=256, + num_blocks=6, + # disable zero_init_offset to follow official implementation + zero_init_offset=False) + ], + bbox_head=dict( + type='ATSSHead', + num_classes=80, + in_channels=256, + pred_kernel_size=1, # follow DyHead official implementation + stacked_convs=0, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + octave_base_scale=8, + scales_per_octave=1, + strides=[8, 16, 32, 64, 128], + center_offset=0.5), # follow DyHead official implementation + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.1, 0.1, 0.2, 0.2]), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='GIoULoss', loss_weight=2.0), + loss_centerness=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), + # training and testing settings + train_cfg=dict( + assigner=dict(type='ATSSAssigner', topk=9), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) + +# use caffe img_norm, size_divisor=128, pillow resize +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=(1333, 800), + keep_ratio=True, + backend='pillow'), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=128), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True, backend='pillow'), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=128), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/dyhead/atss_r50_fpn_dyhead_1x_coco.py b/configs/dyhead/atss_r50_fpn_dyhead_1x_coco.py new file mode 100644 index 0000000..8c5109d --- /dev/null +++ b/configs/dyhead/atss_r50_fpn_dyhead_1x_coco.py @@ -0,0 +1,65 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + type='ATSS', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=[ + dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_output', + num_outs=5), + dict(type='DyHead', in_channels=256, out_channels=256, num_blocks=6) + ], + bbox_head=dict( + type='ATSSHead', + num_classes=80, + in_channels=256, + stacked_convs=0, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + octave_base_scale=8, + scales_per_octave=1, + strides=[8, 16, 32, 64, 128]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.1, 0.1, 0.2, 0.2]), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='GIoULoss', loss_weight=2.0), + loss_centerness=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), + # training and testing settings + train_cfg=dict( + assigner=dict(type='ATSSAssigner', topk=9), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/dyhead/atss_swin-l-p4-w12_fpn_dyhead_mstrain_2x_coco.py b/configs/dyhead/atss_swin-l-p4-w12_fpn_dyhead_mstrain_2x_coco.py new file mode 100644 index 0000000..dc9c328 --- /dev/null +++ b/configs/dyhead/atss_swin-l-p4-w12_fpn_dyhead_mstrain_2x_coco.py @@ -0,0 +1,164 @@ +_base_ = '../_base_/default_runtime.py' + +pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth' # noqa +model = dict( + type='ATSS', + backbone=dict( + type='SwinTransformer', + pretrain_img_size=384, + embed_dims=192, + depths=[2, 2, 18, 2], + num_heads=[6, 12, 24, 48], + window_size=12, + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.2, + patch_norm=True, + out_indices=(1, 2, 3), + # Please only add indices that would be used + # in FPN, otherwise some parameter will not be used + with_cp=False, + convert_weights=True, + init_cfg=dict(type='Pretrained', checkpoint=pretrained)), + neck=[ + dict( + type='FPN', + in_channels=[384, 768, 1536], + out_channels=256, + start_level=0, + add_extra_convs='on_output', + num_outs=5), + dict( + type='DyHead', + in_channels=256, + out_channels=256, + num_blocks=6, + # disable zero_init_offset to follow official implementation + zero_init_offset=False) + ], + bbox_head=dict( + type='ATSSHead', + num_classes=80, + in_channels=256, + pred_kernel_size=1, # follow DyHead official implementation + stacked_convs=0, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + octave_base_scale=8, + scales_per_octave=1, + strides=[8, 16, 32, 64, 128], + center_offset=0.5), # follow DyHead official implementation + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.1, 0.1, 0.2, 0.2]), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='GIoULoss', loss_weight=2.0), + loss_centerness=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), + # training and testing settings + train_cfg=dict( + assigner=dict(type='ATSSAssigner', topk=9), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100)) + +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(2000, 480), (2000, 1200)], + multiscale_mode='range', + keep_ratio=True, + backend='pillow'), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=128), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2000, 1200), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True, backend='pillow'), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=128), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +# Use RepeatDataset to speed up training +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='RepeatDataset', + times=2, + dataset=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) +evaluation = dict(interval=1, metric='bbox') + +# optimizer +optimizer_config = dict(grad_clip=None) +optimizer = dict( + type='AdamW', + lr=0.00005, + betas=(0.9, 0.999), + weight_decay=0.05, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[8, 11]) +runner = dict(type='EpochBasedRunner', max_epochs=12) diff --git a/configs/dyhead/metafile.yml b/configs/dyhead/metafile.yml new file mode 100644 index 0000000..3fb7370 --- /dev/null +++ b/configs/dyhead/metafile.yml @@ -0,0 +1,76 @@ +Collections: + - Name: DyHead + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 4x T4 GPUs + Architecture: + - ATSS + - DyHead + - FPN + - ResNet + - Deformable Convolution + - Pyramid Convolution + Paper: + URL: https://arxiv.org/abs/2106.08322 + Title: 'Dynamic Head: Unifying Object Detection Heads with Attentions' + README: configs/dyhead/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.22.0/mmdet/models/necks/dyhead.py#L130 + Version: v2.22.0 + +Models: + - Name: atss_r50_caffe_fpn_dyhead_1x_coco + In Collection: DyHead + Config: configs/dyhead/atss_r50_caffe_fpn_dyhead_1x_coco.py + Metadata: + Training Memory (GB): 5.4 + inference time (ms/im): + - value: 75.7 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/dyhead/atss_r50_fpn_dyhead_for_reproduction_1x_coco/atss_r50_fpn_dyhead_for_reproduction_4x4_1x_coco_20220107_213939-162888e6.pth + + - Name: atss_r50_fpn_dyhead_1x_coco + In Collection: DyHead + Config: configs/dyhead/atss_r50_fpn_dyhead_1x_coco.py + Metadata: + Training Memory (GB): 4.9 + inference time (ms/im): + - value: 73.1 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/dyhead/atss_r50_fpn_dyhead_4x4_1x_coco/atss_r50_fpn_dyhead_4x4_1x_coco_20211219_023314-eaa620c6.pth + + - Name: atss_swin-l-p4-w12_fpn_dyhead_mstrain_2x_coco + In Collection: DyHead + Config: configs/dyhead/atss_swin-l-p4-w12_fpn_dyhead_mstrain_2x_coco.py + Metadata: + Training Memory (GB): 58.4 + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 56.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/dyhead/atss_swin-l-p4-w12_fpn_dyhead_mstrain_2x_coco/atss_swin-l-p4-w12_fpn_dyhead_mstrain_2x_coco_20220509_100315-bc5b6516.pth diff --git a/configs/dynamic_rcnn/README.md b/configs/dynamic_rcnn/README.md index d79b181..0045df7 100644 --- a/configs/dynamic_rcnn/README.md +++ b/configs/dynamic_rcnn/README.md @@ -1,24 +1,26 @@ -# Dynamic R-CNN: Towards High Quality Object Detection via Dynamic Training +# Dynamic R-CNN -## Abstract +> [Dynamic R-CNN: Towards High Quality Object Detection via Dynamic Training](https://arxiv.org/abs/2004.06002) + + - +## Abstract Although two-stage object detectors have continuously advanced the state-of-the-art performance in recent years, the training process itself is far from crystal. In this work, we first point out the inconsistency problem between the fixed network settings and the dynamic training procedure, which greatly affects the performance. For example, the fixed label assignment strategy and regression loss function cannot fit the distribution change of proposals and thus are harmful to training high quality detectors. Consequently, we propose Dynamic R-CNN to adjust the label assignment criteria (IoU threshold) and the shape of regression loss function (parameters of SmoothL1 Loss) automatically based on the statistics of proposals during training. This dynamic design makes better use of the training samples and pushes the detector to fit more high quality samples. Specifically, our method improves upon ResNet-50-FPN baseline with 1.9% AP and 5.5% AP90 on the MS COCO dataset with no extra overhead. -
- - +## Results and Models -## Citation +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :------: | :-----: | :-----: | :------: | :------------: | :----: | :------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | pytorch | 1x | 3.8 | | 38.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x/dynamic_rcnn_r50_fpn_1x-62a3f276.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x/dynamic_rcnn_r50_fpn_1x_20200618_095048.log.json) | - +## Citation -``` +```latex @article{DynamicRCNN, author = {Hongkai Zhang and Hong Chang and Bingpeng Ma and Naiyan Wang and Xilin Chen}, title = {Dynamic {R-CNN}: Towards High Quality Object Detection via Dynamic Training}, @@ -26,9 +28,3 @@ Although two-stage object detectors have continuously advanced the state-of-the- year = {2020} } ``` - -## Results and Models - -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -|:---------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| -| R-50 | pytorch | 1x | 3.8 | | 38.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x/dynamic_rcnn_r50_fpn_1x-62a3f276.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x/dynamic_rcnn_r50_fpn_1x_20200618_095048.log.json) | diff --git a/configs/efficientnet/README.md b/configs/efficientnet/README.md new file mode 100644 index 0000000..99b0572 --- /dev/null +++ b/configs/efficientnet/README.md @@ -0,0 +1,30 @@ +# EfficientNet + +> [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946v5) + + + +## Introduction + +Convolutional Neural Networks (ConvNets) are commonly developed at a fixed resource budget, and then scaled up for better accuracy if more resources are available. In this paper, we systematically study model scaling and identify that carefully balancing network depth, width, and resolution can lead to better performance. Based on this observation, we propose a new scaling method that uniformly scales all dimensions of depth/width/resolution using a simple yet highly effective compound coefficient. We demonstrate the effectiveness of this method on scaling up MobileNets and ResNet. + +To go even further, we use neural architecture search to design a new baseline network and scale it up to obtain a family of models, called EfficientNets, which achieve much better accuracy and efficiency than previous ConvNets. In particular, our EfficientNet-B7 achieves state-of-the-art 84.3% top-1 accuracy on ImageNet, while being 8.4x smaller and 6.1x faster on inference than the best existing ConvNet. Our EfficientNets also transfer well and achieve state-of-the-art accuracy on CIFAR-100 (91.7%), Flowers (98.8%), and 3 other transfer learning datasets, with an order of magnitude fewer parameters. + +## Results and Models + +### RetinaNet + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| Efficientnet-b3 | pytorch | 1x | - | - | 40.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/efficientnet/retinanet_effb3_fpn_crop896_8x4_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/efficientnet/retinanet_effb3_fpn_crop896_8x4_1x_coco/retinanet_effb3_fpn_crop896_8x4_1x_coco_20220322_234806-615a0dda.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/efficientnet/retinanet_effb3_fpn_crop896_8x4_1x_coco/retinanet_effb3_fpn_crop896_8x4_1x_coco_20220322_234806.log.json) | + +## Citation + +```latex +@article{tan2019efficientnet, + title={Efficientnet: Rethinking model scaling for convolutional neural networks}, + author={Tan, Mingxing and Le, Quoc V}, + journal={arXiv preprint arXiv:1905.11946}, + year={2019} +} +``` diff --git a/configs/efficientnet/metafile.yml b/configs/efficientnet/metafile.yml new file mode 100644 index 0000000..de40b95 --- /dev/null +++ b/configs/efficientnet/metafile.yml @@ -0,0 +1,19 @@ +Models: + - Name: retinanet_effb3_fpn_crop896_8x4_1x_coco + In Collection: RetinaNet + Config: configs/efficientnet/retinanet_effb3_fpn_crop896_8x4_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/efficientnet/retinanet_effb3_fpn_crop896_8x4_1x_coco/retinanet_effb3_fpn_crop896_8x4_1x_coco_20220322_234806-615a0dda.pth + Paper: + URL: https://arxiv.org/abs/1905.11946v5 + Title: 'EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks' + README: configs/efficientnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.23.0/mmdet/models/backbones/efficientnet.py#L159 + Version: v2.23.0 diff --git a/configs/efficientnet/retinanet_effb3_fpn_crop896_8x4_1x_coco.py b/configs/efficientnet/retinanet_effb3_fpn_crop896_8x4_1x_coco.py new file mode 100644 index 0000000..c90bc16 --- /dev/null +++ b/configs/efficientnet/retinanet_effb3_fpn_crop896_8x4_1x_coco.py @@ -0,0 +1,94 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py' +] + +cudnn_benchmark = True +norm_cfg = dict(type='BN', requires_grad=True) +checkpoint = 'https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b3_3rdparty_8xb32-aa_in1k_20220119-5b4887a0.pth' # noqa +model = dict( + backbone=dict( + _delete_=True, + type='EfficientNet', + arch='b3', + drop_path_rate=0.2, + out_indices=(3, 4, 5), + frozen_stages=0, + norm_cfg=dict( + type='SyncBN', requires_grad=True, eps=1e-3, momentum=0.01), + norm_eval=False, + init_cfg=dict( + type='Pretrained', prefix='backbone', checkpoint=checkpoint)), + neck=dict( + in_channels=[48, 136, 384], + start_level=0, + out_channels=256, + relu_before_extra_convs=True, + no_norm_on_lateral=True, + norm_cfg=norm_cfg), + bbox_head=dict(type='RetinaSepBNHead', num_ins=5, norm_cfg=norm_cfg), + # training and testing settings + train_cfg=dict(assigner=dict(neg_iou_thr=0.5))) + +# dataset settings +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_size = (896, 896) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=img_size, + ratio_range=(0.8, 1.2), + keep_ratio=True), + dict(type='RandomCrop', crop_size=img_size), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=img_size), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_size, + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=img_size), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer_config = dict(grad_clip=None) +optimizer = dict( + type='SGD', + lr=0.04, + momentum=0.9, + weight_decay=0.0001, + paramwise_cfg=dict(norm_decay_mult=0, bypass_duplicate=True)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=0.1, + step=[8, 11]) +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=12) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (4 samples per GPU) +auto_scale_lr = dict(base_batch_size=32) diff --git a/configs/empirical_attention/README.md b/configs/empirical_attention/README.md index 8937be2..fc2620a 100644 --- a/configs/empirical_attention/README.md +++ b/configs/empirical_attention/README.md @@ -1,22 +1,27 @@ -# An Empirical Study of Spatial Attention Mechanisms in Deep Networks +# Empirical Attention -## Abstract +> [An Empirical Study of Spatial Attention Mechanisms in Deep Networks](https://arxiv.org/abs/1904.05873) + + - +## Abstract Attention mechanisms have become a popular component in deep neural networks, yet there has been little examination of how different influencing factors and methods for computing attention from these factors affect performance. Toward a better general understanding of attention mechanisms, we present an empirical study that ablates various spatial attention elements within a generalized attention formulation, encompassing the dominant Transformer attention as well as the prevalent deformable convolution and dynamic convolution modules. Conducted on a variety of applications, the study yields significant findings about spatial attention in deep networks, some of which run counter to conventional understanding. For example, we find that the query and key content comparison in Transformer attention is negligible for self-attention, but vital for encoder-decoder attention. A proper combination of deformable convolution with key content only saliency achieves the best accuracy-efficiency tradeoff in self-attention. Our results suggest that there exists much room for improvement in the design of attention mechanisms. -
- - +## Results and Models -## Citation +| Backbone | Attention Component | DCN | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :------: | :-----------------: | :-: | :-----: | :------: | :------------: | :----: | :-------------------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | 1111 | N | 1x | 8.0 | 13.8 | 40.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco/faster_rcnn_r50_fpn_attention_1111_1x_coco_20200130-403cccba.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco/faster_rcnn_r50_fpn_attention_1111_1x_coco_20200130_210344.log.json) | +| R-50 | 0010 | N | 1x | 4.2 | 18.4 | 39.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco/faster_rcnn_r50_fpn_attention_0010_1x_coco_20200130-7cb0c14d.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco/faster_rcnn_r50_fpn_attention_0010_1x_coco_20200130_210125.log.json) | +| R-50 | 1111 | Y | 1x | 8.0 | 12.7 | 42.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco_20200130-8b2523a6.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco_20200130_204442.log.json) | +| R-50 | 0010 | Y | 1x | 4.2 | 17.1 | 42.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco_20200130-1a2e831d.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco_20200130_210410.log.json) | - +## Citation ```latex @article{zhu2019empirical, @@ -26,12 +31,3 @@ Attention mechanisms have become a popular component in deep neural networks, ye year={2019} } ``` - -## Results and Models - -| Backbone | Attention Component | DCN | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -|:---------:|:-------------------:|:----:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| -| R-50 | 1111 | N | 1x | 8.0 | 13.8 | 40.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco/faster_rcnn_r50_fpn_attention_1111_1x_coco_20200130-403cccba.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco/faster_rcnn_r50_fpn_attention_1111_1x_coco_20200130_210344.log.json) | -| R-50 | 0010 | N | 1x | 4.2 | 18.4 | 39.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco/faster_rcnn_r50_fpn_attention_0010_1x_coco_20200130-7cb0c14d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco/faster_rcnn_r50_fpn_attention_0010_1x_coco_20200130_210125.log.json) | -| R-50 | 1111 | Y | 1x | 8.0 | 12.7 | 42.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco_20200130-8b2523a6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco_20200130_204442.log.json) | -| R-50 | 0010 | Y | 1x | 4.2 | 17.1 | 42.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco_20200130-1a2e831d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco_20200130_210410.log.json) | diff --git a/configs/fast_rcnn/README.md b/configs/fast_rcnn/README.md index 0e98c13..767f76c 100644 --- a/configs/fast_rcnn/README.md +++ b/configs/fast_rcnn/README.md @@ -1,22 +1,67 @@ # Fast R-CNN -## Abstract +> [Fast R-CNN](https://arxiv.org/abs/1504.08083) + + - +## Abstract This paper proposes a Fast Region-based Convolutional Network method (Fast R-CNN) for object detection. Fast R-CNN builds on previous work to efficiently classify object proposals using deep convolutional networks. Compared to previous work, Fast R-CNN employs several innovations to improve training and testing speed while also increasing detection accuracy. Fast R-CNN trains the very deep VGG16 network 9x faster than R-CNN, is 213x faster at test-time, and achieves a higher mAP on PASCAL VOC 2012. Compared to SPPnet, Fast R-CNN trains VGG16 3x faster, tests 10x faster, and is more accurate. -
- - +## Introduction -## Citation +Before training the Fast R-CNN, users should first train an [RPN](../rpn/README.md), and use the RPN to extract the region proposals. - +- Firstly, extract the region proposals of the val set by this command as below: + +```bash +./tools/dist_test.sh \ + configs/rpn_r50_fpn_1x_coco.py \ + checkpoints/rpn_r50_fpn_1x_coco_20200218-5525fa2e.pth \ + 8 \ + --out proposals/rpn_r50_fpn_1x_val2017.pkl +``` + +- Then, change the `ann_file` and `img_prefix` of `data.test` in the RPN config to train set as below: + +```python +data = dict( + test=dict( + ann_file='data/coco/annotations/instances_train2017.json', + img_prefix='data/coco/train2017/')) +``` + +- Extract the region proposals of the train set by this command as below: + +```bash +./tools/dist_test.sh \ + configs/rpn_r50_fpn_1x_coco.py \ + checkpoints/rpn_r50_fpn_1x_coco_20200218-5525fa2e.pth \ + 8 \ + --out proposals/rpn_r50_fpn_1x_train2017.pkl +``` + +- Modify the path of `proposal_file` in Fast R-CNN config as below: + +```python +data = dict( + train=dict( + proposal_file='proposals/rpn_r50_fpn_1x_train2017.pkl'), + val=dict( + proposal_file='proposals/rpn_r50_fpn_1x_val2017.pkl'), + test=dict( + proposal_file='proposals/rpn_r50_fpn_1x_val2017.pkl')) +``` + +Finally, users can start training the Fast R-CNN. + +## Results and Models + +## Citation ```latex @inproceedings{girshick2015fast, @@ -26,5 +71,3 @@ This paper proposes a Fast Region-based Convolutional Network method (Fast R-CNN year={2015} } ``` - -## Results and models diff --git a/configs/faster_rcnn/README.md b/configs/faster_rcnn/README.md index f96256e..865d375 100644 --- a/configs/faster_rcnn/README.md +++ b/configs/faster_rcnn/README.md @@ -1,81 +1,88 @@ -# Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks +# Faster R-CNN -## Abstract +> [Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks](https://arxiv.org/abs/1506.01497) - + + +## Abstract State-of-the-art object detection networks depend on region proposal algorithms to hypothesize object locations. Advances like SPPnet and Fast R-CNN have reduced the running time of these detection networks, exposing region proposal computation as a bottleneck. In this work, we introduce a Region Proposal Network (RPN) that shares full-image convolutional features with the detection network, thus enabling nearly cost-free region proposals. An RPN is a fully convolutional network that simultaneously predicts object bounds and objectness scores at each position. The RPN is trained end-to-end to generate high-quality region proposals, which are used by Fast R-CNN for detection. We further merge RPN and Fast R-CNN into a single network by sharing their convolutional features---using the recently popular terminology of neural networks with 'attention' mechanisms, the RPN component tells the unified network where to look. For the very deep VGG-16 model, our detection system has a frame rate of 5fps (including all steps) on a GPU, while achieving state-of-the-art object detection accuracy on PASCAL VOC 2007, 2012, and MS COCO datasets with only 300 proposals per image. In ILSVRC and COCO 2015 competitions, Faster R-CNN and RPN are the foundations of the 1st-place winning entries in several tracks. -
- - - -## Citation - - -```latex -@article{Ren_2017, - title={Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks}, - journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, - publisher={Institute of Electrical and Electronics Engineers (IEEE)}, - author={Ren, Shaoqing and He, Kaiming and Girshick, Ross and Sun, Jian}, - year={2017}, - month={Jun}, -} -``` - -## Results and models - -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | -| R-50-DC5 | caffe | 1x | - | - | 37.2 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco/faster_rcnn_r50_caffe_dc5_1x_coco_20201030_151909-531f0f43.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco/faster_rcnn_r50_caffe_dc5_1x_coco_20201030_151909.log.json) | -| R-50-FPN | caffe | 1x | 3.8 | | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco/faster_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.378_20200504_180032-c5925ee5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco/faster_rcnn_r50_caffe_fpn_1x_coco_20200504_180032.log.json) | -| R-50-FPN | pytorch | 1x | 4.0 | 21.4 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130_204655.log.json) | -| R-50-FPN (FP16) | pytorch | 1x | 3.4 | 28.8 | 37.5 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fp16/faster_rcnn_r50_fpn_fp16_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fp16/faster_rcnn_r50_fpn_fp16_1x_coco/faster_rcnn_r50_fpn_fp16_1x_coco_20200204-d4dc1471.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fp16/faster_rcnn_r50_fpn_fp16_1x_coco/faster_rcnn_r50_fpn_fp16_1x_coco_20200204_143530.log.json) | -| R-50-FPN | pytorch | 2x | - | - | 38.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_20200504_210434.log.json) | -| R-101-FPN | caffe | 1x | 5.7 | | 39.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco/faster_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.398_20200504_180057-b269e9dd.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco/faster_rcnn_r101_caffe_fpn_1x_coco_20200504_180057.log.json) | -| R-101-FPN | pytorch | 1x | 6.0 | 15.6 | 39.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_1x_coco/faster_rcnn_r101_fpn_1x_coco_20200130-f513f705.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_1x_coco/faster_rcnn_r101_fpn_1x_coco_20200130_204655.log.json) | -| R-101-FPN | pytorch | 2x | - | - | 39.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r101_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_2x_coco/faster_rcnn_r101_fpn_2x_coco_bbox_mAP-0.398_20200504_210455-1d2dac9c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_2x_coco/faster_rcnn_r101_fpn_2x_coco_20200504_210455.log.json) | -| X-101-32x4d-FPN | pytorch | 1x | 7.2 | 13.8 | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco/faster_rcnn_x101_32x4d_fpn_1x_coco_20200203-cff10310.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco/faster_rcnn_x101_32x4d_fpn_1x_coco_20200203_000520.log.json) | -| X-101-32x4d-FPN | pytorch | 2x | - | - | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco/faster_rcnn_x101_32x4d_fpn_2x_coco_bbox_mAP-0.412_20200506_041400-64a12c0b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco/faster_rcnn_x101_32x4d_fpn_2x_coco_20200506_041400.log.json) | -| X-101-64x4d-FPN | pytorch | 1x | 10.3 | 9.4 | 42.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco/faster_rcnn_x101_64x4d_fpn_1x_coco_20200204-833ee192.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco/faster_rcnn_x101_64x4d_fpn_1x_coco_20200204_134340.log.json) | -| X-101-64x4d-FPN | pytorch | 2x | - | - | 41.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco/faster_rcnn_x101_64x4d_fpn_2x_coco_20200512_161033-5961fa95.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco/faster_rcnn_x101_64x4d_fpn_2x_coco_20200512_161033.log.json) | +## Results and Models + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-C4 | caffe | 1x | - | - | 35.6 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/faster_rcnn/faster_rcnn_r50_caffe_c4_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_c4_1x_coco/faster_rcnn_r50_caffe_c4_1x_coco_20220316_150152-3f885b85.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_c4_1x_coco/faster_rcnn_r50_caffe_c4_1x_coco_20220316_150152.log.json) | +| R-50-DC5 | caffe | 1x | - | - | 37.2 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco/faster_rcnn_r50_caffe_dc5_1x_coco_20201030_151909-531f0f43.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco/faster_rcnn_r50_caffe_dc5_1x_coco_20201030_151909.log.json) | +| R-50-FPN | caffe | 1x | 3.8 | | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco/faster_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.378_20200504_180032-c5925ee5.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco/faster_rcnn_r50_caffe_fpn_1x_coco_20200504_180032.log.json) | +| R-50-FPN | pytorch | 1x | 4.0 | 21.4 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130_204655.log.json) | +| R-50-FPN (FP16) | pytorch | 1x | 3.4 | 28.8 | 37.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fp16/faster_rcnn_r50_fpn_fp16_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fp16/faster_rcnn_r50_fpn_fp16_1x_coco/faster_rcnn_r50_fpn_fp16_1x_coco_20200204-d4dc1471.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/fp16/faster_rcnn_r50_fpn_fp16_1x_coco/faster_rcnn_r50_fpn_fp16_1x_coco_20200204_143530.log.json) | +| R-50-FPN | pytorch | 2x | - | - | 38.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_20200504_210434.log.json) | +| R-101-FPN | caffe | 1x | 5.7 | | 39.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco/faster_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.398_20200504_180057-b269e9dd.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco/faster_rcnn_r101_caffe_fpn_1x_coco_20200504_180057.log.json) | +| R-101-FPN | pytorch | 1x | 6.0 | 15.6 | 39.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_1x_coco/faster_rcnn_r101_fpn_1x_coco_20200130-f513f705.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_1x_coco/faster_rcnn_r101_fpn_1x_coco_20200130_204655.log.json) | +| R-101-FPN | pytorch | 2x | - | - | 39.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r101_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_2x_coco/faster_rcnn_r101_fpn_2x_coco_bbox_mAP-0.398_20200504_210455-1d2dac9c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_2x_coco/faster_rcnn_r101_fpn_2x_coco_20200504_210455.log.json) | +| X-101-32x4d-FPN | pytorch | 1x | 7.2 | 13.8 | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco/faster_rcnn_x101_32x4d_fpn_1x_coco_20200203-cff10310.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco/faster_rcnn_x101_32x4d_fpn_1x_coco_20200203_000520.log.json) | +| X-101-32x4d-FPN | pytorch | 2x | - | - | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco/faster_rcnn_x101_32x4d_fpn_2x_coco_bbox_mAP-0.412_20200506_041400-64a12c0b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco/faster_rcnn_x101_32x4d_fpn_2x_coco_20200506_041400.log.json) | +| X-101-64x4d-FPN | pytorch | 1x | 10.3 | 9.4 | 42.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco/faster_rcnn_x101_64x4d_fpn_1x_coco_20200204-833ee192.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco/faster_rcnn_x101_64x4d_fpn_1x_coco_20200204_134340.log.json) | +| X-101-64x4d-FPN | pytorch | 2x | - | - | 41.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco/faster_rcnn_x101_64x4d_fpn_2x_coco_20200512_161033-5961fa95.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco/faster_rcnn_x101_64x4d_fpn_2x_coco_20200512_161033.log.json) | ## Different regression loss We trained with R-50-FPN pytorch style backbone for 1x schedule. -| Backbone | Loss type | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :-------------: | :-------: | :------: | :------------: | :----: | :------: | :--------: | -| R-50-FPN | L1Loss | 4.0 | 21.4 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130_204655.log.json) | -| R-50-FPN | IoULoss | | | 37.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_iou_1x_coco-fdd207f3.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_iou_1x_coco_20200506_095954.log.json) | -| R-50-FPN | GIoULoss | | | 37.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_giou_1x_coco-0eada910.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_giou_1x_coco_20200505_161120.log.json) | -| R-50-FPN | BoundedIoULoss | | | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_bounded_iou_1x_coco-98ad993b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_bounded_iou_1x_coco_20200505_160738.log.json) | +| Backbone | Loss type | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :------: | :------------: | :------: | :------------: | :----: | :----------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | L1Loss | 4.0 | 21.4 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130_204655.log.json) | +| R-50-FPN | IoULoss | | | 37.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_iou_1x_coco-fdd207f3.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_iou_1x_coco_20200506_095954.log.json) | +| R-50-FPN | GIoULoss | | | 37.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_giou_1x_coco-0eada910.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_giou_1x_coco_20200505_161120.log.json) | +| R-50-FPN | BoundedIoULoss | | | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_bounded_iou_1x_coco-98ad993b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_bounded_iou_1x_coco_20200505_160738.log.json) | ## Pre-trained Models We also train some models with longer schedules and multi-scale training. The users could finetune them for downstream tasks. -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | -| [R-50-DC5](./faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py) | caffe | 1x | - | | 37.4 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco_20201028_233851-b33d21b9.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco_20201028_233851.log.json) | -| [R-50-DC5](./faster_rcnn_r50_caffe_dc5_mstrain_3x_coco.py) | caffe | 3x | - | | 38.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco_20201028_002107-34a53b2c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco_20201028_002107.log.json) | -| [R-50-FPN](./faster_rcnn_r50_caffe_fpn_mstrain_2x_coco.py) | caffe | 2x | 3.7 | | 39.7 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco_bbox_mAP-0.397_20200504_231813-10b2de58.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco_20200504_231813.log.json) | -| [R-50-FPN](./faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py) | caffe | 3x | 3.7 | | 39.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210526_095054-1f77628b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210526_095054.log.json) | -| [R-50-FPN](./faster_rcnn_r50_fpn_mstrain_3x_coco.py) | pytorch | 3x | 3.9 | | 40.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_mstrain_3x_coco/faster_rcnn_r50_fpn_mstrain_3x_coco_20210524_110822-e10bd31c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_mstrain_3x_coco/faster_rcnn_r50_fpn_mstrain_3x_coco_20210524_110822.log.json) | -| [R-101-FPN](./faster_rcnn_r101_caffe_fpn_mstrain_3x_coco.py) | caffe | 3x | 5.6 | | 42.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco_20210526_095742-a7ae426d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco_20210526_095742.log.json) | -| [R-101-FPN](./faster_rcnn_r101_fpn_mstrain_3x_coco.py) | pytorch | 3x | 5.8 | | 41.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r101_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_mstrain_3x_coco/faster_rcnn_r101_fpn_mstrain_3x_coco_20210524_110822-4d4d2ca8.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_mstrain_3x_coco/faster_rcnn_r101_fpn_mstrain_3x_coco_20210524_110822.log.json) | -| [X-101-32x4d-FPN](./faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py) | pytorch | 3x | 7.0 | | 42.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco_20210524_124151-16b9b260.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco_20210524_124151.log.json) | -| [X-101-32x8d-FPN](./faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py) | pytorch | 3x | 10.1 | | 42.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco_20210604_182954-002e082a.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco_20210604_182954.log.json) | -| [X-101-64x4d-FPN](./faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py) | pytorch | 3x | 10.0 | | 43.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210524_124528-26c63de6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210524_124528.log.json) | +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :----------------------------------------------------------------: | :-----: | :-----: | :------: | :------------: | :----: | :-------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| [R-50-C4](./faster_rcnn_r50_caffe_c4_mstrain_1x_coco.py) | caffe | 1x | - | | 35.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/faster_rcnn/faster_rcnn_r50_caffe_c4_mstrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_c4_mstrain_1x_coco/faster_rcnn_r50_caffe_c4_mstrain_1x_coco_20220316_150527-db276fed.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_c4_mstrain_1x_coco/faster_rcnn_r50_caffe_c4_mstrain_1x_coco_20220316_150527.log.json) | +| [R-50-DC5](./faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py) | caffe | 1x | - | | 37.4 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco_20201028_233851-b33d21b9.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco_20201028_233851.log.json) | +| [R-50-DC5](./faster_rcnn_r50_caffe_dc5_mstrain_3x_coco.py) | caffe | 3x | - | | 38.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco_20201028_002107-34a53b2c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco_20201028_002107.log.json) | +| [R-50-FPN](./faster_rcnn_r50_caffe_fpn_mstrain_2x_coco.py) | caffe | 2x | 3.7 | | 39.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco_bbox_mAP-0.397_20200504_231813-10b2de58.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco_20200504_231813.log.json) | +| [R-50-FPN](./faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py) | caffe | 3x | 3.7 | | 39.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210526_095054-1f77628b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210526_095054.log.json) | +| [R-50-FPN](./faster_rcnn_r50_fpn_mstrain_3x_coco.py) | pytorch | 3x | 3.9 | | 40.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_mstrain_3x_coco/faster_rcnn_r50_fpn_mstrain_3x_coco_20210524_110822-e10bd31c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_mstrain_3x_coco/faster_rcnn_r50_fpn_mstrain_3x_coco_20210524_110822.log.json) | +| [R-101-FPN](./faster_rcnn_r101_caffe_fpn_mstrain_3x_coco.py) | caffe | 3x | 5.6 | | 42.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco_20210526_095742-a7ae426d.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco_20210526_095742.log.json) | +| [R-101-FPN](./faster_rcnn_r101_fpn_mstrain_3x_coco.py) | pytorch | 3x | 5.8 | | 41.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r101_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_mstrain_3x_coco/faster_rcnn_r101_fpn_mstrain_3x_coco_20210524_110822-4d4d2ca8.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_mstrain_3x_coco/faster_rcnn_r101_fpn_mstrain_3x_coco_20210524_110822.log.json) | +| [X-101-32x4d-FPN](./faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py) | pytorch | 3x | 7.0 | | 42.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco_20210524_124151-16b9b260.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco_20210524_124151.log.json) | +| [X-101-32x8d-FPN](./faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py) | pytorch | 3x | 10.1 | | 42.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco_20210604_182954-002e082a.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco_20210604_182954.log.json) | +| [X-101-64x4d-FPN](./faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py) | pytorch | 3x | 10.0 | | 43.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210524_124528-26c63de6.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210524_124528.log.json) | We further finetune some pre-trained models on the COCO subsets, which only contain only a few of the 80 categories. -| Backbone | Style | Class name | Pre-traind model | Mem (GB) | box AP | Config | Download | -| ------------------------------------------------------------ | ----- | ------------------ | ------------------------------------------------------------ | -------- | ------ | ------------------------------------------------------------ | ------------------------------------------------------------ | -| [R-50-FPN](./faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person.py) | caffe | person | [R-50-FPN-Caffe-3x](./faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py) | 3.7 | 55.8 | [config](./faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco-person/faster_rcnn_r50_fpn_1x_coco-person_20201216_175929-d022e227.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco-person/faster_rcnn_r50_fpn_1x_coco-person_20201216_175929.log.json) | -| [R-50-FPN](./faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person-bicycle-car.py) | caffe | person-bicycle-car | [R-50-FPN-Caffe-3x](./faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py) | 3.7 | 44.1 | [config](./faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person-bicycle-car.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco-person-bicycle-car/faster_rcnn_r50_fpn_1x_coco-person-bicycle-car_20201216_173117-6eda6d92.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco-person-bicycle-car/faster_rcnn_r50_fpn_1x_coco-person-bicycle-car_20201216_173117.log.json) | +| Backbone | Style | Class name | Pre-traind model | Mem (GB) | box AP | Config | Download | +| ----------------------------------------------------------------------------- | ----- | ------------------ | ------------------------------------------------------------------- | -------- | ------ | --------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| [R-50-FPN](./faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person.py) | caffe | person | [R-50-FPN-Caffe-3x](./faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py) | 3.7 | 55.8 | [config](./faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco-person/faster_rcnn_r50_fpn_1x_coco-person_20201216_175929-d022e227.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco-person/faster_rcnn_r50_fpn_1x_coco-person_20201216_175929.log.json) | +| [R-50-FPN](./faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person-bicycle-car.py) | caffe | person-bicycle-car | [R-50-FPN-Caffe-3x](./faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py) | 3.7 | 44.1 | [config](./faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person-bicycle-car.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco-person-bicycle-car/faster_rcnn_r50_fpn_1x_coco-person-bicycle-car_20201216_173117-6eda6d92.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco-person-bicycle-car/faster_rcnn_r50_fpn_1x_coco-person-bicycle-car_20201216_173117.log.json) | + +## Torchvision New Receipe (TNR) + +Torchvision released its high-precision ResNet models. The training details can be found on the [Pytorch website](https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/). Here, we have done grid searches on learning rate and weight decay and found the optimal hyper-parameter on the detection task. + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------------------------------------------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| [R-50-TNR](./faster_rcnn_r50_fpn_tnr-pretrain_1x_coco.py) | pytorch | 1x | - | | 40.2 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/faster_rcnn/faster_rcnn_r50_fpn_tnr-pretrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_tnr-pretrain_1x_coco/faster_rcnn_r50_fpn_tnr-pretrain_1x_coco_20220320_085147-efedfda4.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_tnr-pretrain_1x_coco/faster_rcnn_r50_fpn_tnr-pretrain_1x_coco_20220320_085147.log.json) | + +## Citation + +```latex +@article{Ren_2017, + title={Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks}, + journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, + publisher={Institute of Electrical and Electronics Engineers (IEEE)}, + author={Ren, Shaoqing and He, Kaiming and Girshick, Ross and Sun, Jian}, + year={2017}, + month={Jun}, +} +``` diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_c4_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_c4_1x_coco.py index 92344a1..b071962 100644 --- a/configs/faster_rcnn/faster_rcnn_r50_caffe_c4_1x_coco.py +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_c4_1x_coco.py @@ -36,4 +36,4 @@ val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline)) # optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_c4_mstrain_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_c4_mstrain_1x_coco.py new file mode 100644 index 0000000..f4d83e6 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_c4_mstrain_1x_coco.py @@ -0,0 +1,38 @@ +_base_ = './faster_rcnn_r50_caffe_c4_1x_coco.py' +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/faster_rcnn/faster_rcnn_r50_fpn_tnr-pretrain_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_fpn_tnr-pretrain_1x_coco.py new file mode 100644 index 0000000..ecbfb92 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_fpn_tnr-pretrain_1x_coco.py @@ -0,0 +1,17 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +checkpoint = 'https://download.pytorch.org/models/resnet50-11ad3fa6.pth' +model = dict( + backbone=dict(init_cfg=dict(type='Pretrained', checkpoint=checkpoint))) + +# `lr` and `weight_decay` have been searched to be optimal. +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.0001, + weight_decay=0.1, + paramwise_cfg=dict(norm_decay_mult=0., bypass_duplicate=True)) diff --git a/configs/faster_rcnn/metafile.yml b/configs/faster_rcnn/metafile.yml index 5178a3b..91d6751 100644 --- a/configs/faster_rcnn/metafile.yml +++ b/configs/faster_rcnn/metafile.yml @@ -20,6 +20,30 @@ Collections: Version: v2.0.0 Models: + - Name: faster_rcnn_r50_caffe_c4_1x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_r50_caffe_c4_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 35.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_c4_1x_coco/faster_rcnn_r50_caffe_c4_1x_coco_20220316_150152-3f885b85.pth + + - Name: faster_rcnn_r50_caffe_c4_mstrain_1x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_r50_caffe_c4_mstrain_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 35.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_c4_mstrain_1x_coco/faster_rcnn_r50_caffe_c4_mstrain_1x_coco_20220316_150527-db276fed.pth + - Name: faster_rcnn_r50_caffe_dc5_1x_coco In Collection: Faster R-CNN Config: configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco.py @@ -405,3 +429,23 @@ Models: Metrics: box AP: 43.1 Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210524_124528-26c63de6.pth + + - Name: faster_rcnn_r50_fpn_tnr-pretrain_1x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_r50_fpn_tnr-pretrain_1x_coco.py + Metadata: + Training Memory (GB): 4.0 + inference time (ms/im): + - value: 46.73 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_tnr-pretrain_1x_coco/faster_rcnn_r50_fpn_tnr-pretrain_1x_coco_20220320_085147-efedfda4.pth diff --git a/configs/fcos/README.md b/configs/fcos/README.md index fc5ca54..76be365 100644 --- a/configs/fcos/README.md +++ b/configs/fcos/README.md @@ -1,22 +1,39 @@ -# FCOS: Fully Convolutional One-Stage Object Detection +# FCOS -## Abstract +> [FCOS: Fully Convolutional One-Stage Object Detection](https://arxiv.org/abs/1904.01355) + + - +## Abstract We propose a fully convolutional one-stage object detector (FCOS) to solve object detection in a per-pixel prediction fashion, analogue to semantic segmentation. Almost all state-of-the-art object detectors such as RetinaNet, SSD, YOLOv3, and Faster R-CNN rely on pre-defined anchor boxes. In contrast, our proposed detector FCOS is anchor box free, as well as proposal free. By eliminating the predefined set of anchor boxes, FCOS completely avoids the complicated computation related to anchor boxes such as calculating overlapping during training. More importantly, we also avoid all hyper-parameters related to anchor boxes, which are often very sensitive to the final detection performance. With the only post-processing non-maximum suppression (NMS), FCOS with ResNeXt-64x4d-101 achieves 44.7% in AP with single-model and single-scale testing, surpassing previous one-stage detectors with the advantage of being much simpler. For the first time, we demonstrate a much simpler and flexible detection framework achieving improved detection accuracy. We hope that the proposed FCOS framework can serve as a simple and strong alternative for many other instance-level tasks. -
- - +## Results and Models -## Citation +| Backbone | Style | GN | MS train | Tricks | DCN | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :------: | :---: | :-: | :------: | :----: | :-: | :-----: | :------: | :------------: | :----: | :-----------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | caffe | Y | N | N | N | 1x | 3.6 | 22.7 | 36.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco/fcos_r50_caffe_fpn_gn-head_1x_coco-821213aa.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco/20201227_180009.log.json) | +| R-50 | caffe | Y | N | Y | N | 1x | 3.7 | - | 38.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco/20210105_135818.log.json) | +| R-50 | caffe | Y | N | Y | Y | 1x | 3.8 | - | 42.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco-ae4d8b3d.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco/20210105_224556.log.json) | +| R-101 | caffe | Y | N | N | N | 1x | 5.5 | 17.3 | 39.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco/fcos_r101_caffe_fpn_gn-head_1x_coco-0e37b982.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco/20210103_155046.log.json) | - +| Backbone | Style | GN | MS train | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :------: | :-----: | :-: | :------: | :-----: | :------: | :------------: | :----: | :---------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | caffe | Y | Y | 2x | 2.6 | 22.9 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco-d92ceeea.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco/20201227_161900.log.json) | +| R-101 | caffe | Y | Y | 2x | 5.5 | 17.3 | 40.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco-511424d6.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco/20210103_155046.log.json) | +| X-101 | pytorch | Y | Y | 2x | 10.0 | 9.7 | 42.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco-ede514a8.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco/20210114_133041.log.json) | + +**Notes:** + +- The X-101 backbone is X-101-64x4d. +- Tricks means setting `norm_on_bbox`, `centerness_on_reg`, `center_sampling` as `True`. +- DCN means using `DCNv2` in both backbone and head. + +## Citation ```latex @article{tian2019fcos, @@ -26,24 +43,3 @@ We propose a fully convolutional one-stage object detector (FCOS) to solve objec year={2019} } ``` - -## Results and Models - -| Backbone | Style | GN | MS train | Tricks | DCN | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -|:---------:|:-------:|:-------:|:--------:|:-------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| -| R-50 | caffe | Y | N | N | N | 1x | 3.6 | 22.7 | 36.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco/fcos_r50_caffe_fpn_gn-head_1x_coco-821213aa.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco/20201227_180009.log.json) | -| R-50 | caffe | Y | N | Y | N | 1x | 3.7 | - | 38.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco/20210105_135818.log.json)| -| R-50 | caffe | Y | N | Y | Y | 1x | 3.8 | - | 42.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco-ae4d8b3d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco/20210105_224556.log.json)| -| R-101 | caffe | Y | N | N | N | 1x | 5.5 | 17.3 | 39.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco/fcos_r101_caffe_fpn_gn-head_1x_coco-0e37b982.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco/20210103_155046.log.json) | - -| Backbone | Style | GN | MS train | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -|:---------:|:-------:|:-------:|:--------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| -| R-50 | caffe | Y | Y | 2x | 2.6 | 22.9 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco-d92ceeea.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco/20201227_161900.log.json) | -| R-101 | caffe | Y | Y | 2x | 5.5 | 17.3 | 40.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco-511424d6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco/20210103_155046.log.json) | -| X-101 | pytorch | Y | Y | 2x | 10.0 | 9.7 | 42.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco-ede514a8.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco/20210114_133041.log.json) | - -**Notes:** - -- The X-101 backbone is X-101-64x4d. -- Tricks means setting `norm_on_bbox`, `centerness_on_reg`, `center_sampling` as `True`. -- DCN means using `DCNv2` in both backbone and head. diff --git a/configs/foveabox/README.md b/configs/foveabox/README.md index 7009e97..7fcd094 100644 --- a/configs/foveabox/README.md +++ b/configs/foveabox/README.md @@ -1,49 +1,45 @@ -# FoveaBox: Beyond Anchor-based Object Detector +# FoveaBox -## Abstract +> [FoveaBox: Beyond Anchor-based Object Detector](https://arxiv.org/abs/1904.03797) + + - +## Abstract We present FoveaBox, an accurate, flexible, and completely anchor-free framework for object detection. While almost all state-of-the-art object detectors utilize predefined anchors to enumerate possible locations, scales and aspect ratios for the search of the objects, their performance and generalization ability are also limited to the design of anchors. Instead, FoveaBox directly learns the object existing possibility and the bounding box coordinates without anchor reference. This is achieved by: (a) predicting category-sensitive semantic maps for the object existing possibility, and (b) producing category-agnostic bounding box for each position that potentially contains an object. The scales of target boxes are naturally associated with feature pyramid representations. In FoveaBox, an instance is assigned to adjacent feature levels to make the model more accurate.We demonstrate its effectiveness on standard benchmarks and report extensive experimental analysis. Without bells and whistles, FoveaBox achieves state-of-the-art single model performance on the standard COCO and Pascal VOC object detection benchmark. More importantly, FoveaBox avoids all computation and hyper-parameters related to anchor boxes, which are often sensitive to the final detection performance. We believe the simple and effective approach will serve as a solid baseline and help ease future research for object detection. -
- - - - - ## Introduction FoveaBox is an accurate, flexible and completely anchor-free object detection system for object detection framework, as presented in our paper [https://arxiv.org/abs/1904.03797](https://arxiv.org/abs/1904.03797): Different from previous anchor-based methods, FoveaBox directly learns the object existing possibility and the bounding box coordinates without anchor reference. This is achieved by: (a) predicting category-sensitive semantic maps for the object existing possibility, and (b) producing category-agnostic bounding box for each position that potentially contains an object. -## Main Results +## Results and Models ### Results on R50/101-FPN -| Backbone | Style | align | ms-train| Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -|:---------:|:-------:|:-------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| -| R-50 | pytorch | N | N | 1x | 5.6 | 24.1 | 36.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r50_fpn_4x4_1x_coco/fovea_r50_fpn_4x4_1x_coco_20200219-ee4d5303.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r50_fpn_4x4_1x_coco/fovea_r50_fpn_4x4_1x_coco_20200219_223025.log.json) | -| R-50 | pytorch | N | N | 2x | 5.6 | - | 37.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_r50_fpn_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r50_fpn_4x4_2x_coco/fovea_r50_fpn_4x4_2x_coco_20200203-2df792b1.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r50_fpn_4x4_2x_coco/fovea_r50_fpn_4x4_2x_coco_20200203_112043.log.json) | -| R-50 | pytorch | Y | N | 2x | 8.1 | 19.4 | 37.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco/fovea_align_r50_fpn_gn-head_4x4_2x_coco_20200203-8987880d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco/fovea_align_r50_fpn_gn-head_4x4_2x_coco_20200203_134252.log.json) | -| R-50 | pytorch | Y | Y | 2x | 8.1 | 18.3 | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco_20200205-85ce26cb.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco_20200205_112557.log.json) | -| R-101 | pytorch | N | N | 1x | 9.2 | 17.4 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_r101_fpn_4x4_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r101_fpn_4x4_1x_coco/fovea_r101_fpn_4x4_1x_coco_20200219-05e38f1c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r101_fpn_4x4_1x_coco/fovea_r101_fpn_4x4_1x_coco_20200219_011740.log.json) | -| R-101 | pytorch | N | N | 2x | 11.7 | - | 40.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_r101_fpn_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r101_fpn_4x4_2x_coco/fovea_r101_fpn_4x4_2x_coco_20200208-02320ea4.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r101_fpn_4x4_2x_coco/fovea_r101_fpn_4x4_2x_coco_20200208_202059.log.json) | -| R-101 | pytorch | Y | N | 2x | 11.7 | 14.7 | 40.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco/fovea_align_r101_fpn_gn-head_4x4_2x_coco_20200208-c39a027a.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco/fovea_align_r101_fpn_gn-head_4x4_2x_coco_20200208_203337.log.json) | -| R-101 | pytorch | Y | Y | 2x | 11.7 | 14.7 | 42.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco_20200208-649c5eb6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco_20200208_202124.log.json) | - -[1] *1x and 2x mean the model is trained for 12 and 24 epochs, respectively.* \ -[2] *Align means utilizing deformable convolution to align the cls branch.* \ -[3] *All results are obtained with a single model and without any test time data augmentation.*\ -[4] *We use 4 GPUs for training.* +| Backbone | Style | align | ms-train | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :------: | :-----: | :---: | :------: | :-----: | :------: | :------------: | :----: | :------------------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | pytorch | N | N | 1x | 5.6 | 24.1 | 36.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r50_fpn_4x4_1x_coco/fovea_r50_fpn_4x4_1x_coco_20200219-ee4d5303.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r50_fpn_4x4_1x_coco/fovea_r50_fpn_4x4_1x_coco_20200219_223025.log.json) | +| R-50 | pytorch | N | N | 2x | 5.6 | - | 37.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_r50_fpn_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r50_fpn_4x4_2x_coco/fovea_r50_fpn_4x4_2x_coco_20200203-2df792b1.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r50_fpn_4x4_2x_coco/fovea_r50_fpn_4x4_2x_coco_20200203_112043.log.json) | +| R-50 | pytorch | Y | N | 2x | 8.1 | 19.4 | 37.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco/fovea_align_r50_fpn_gn-head_4x4_2x_coco_20200203-8987880d.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco/fovea_align_r50_fpn_gn-head_4x4_2x_coco_20200203_134252.log.json) | +| R-50 | pytorch | Y | Y | 2x | 8.1 | 18.3 | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco_20200205-85ce26cb.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco_20200205_112557.log.json) | +| R-101 | pytorch | N | N | 1x | 9.2 | 17.4 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_r101_fpn_4x4_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r101_fpn_4x4_1x_coco/fovea_r101_fpn_4x4_1x_coco_20200219-05e38f1c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r101_fpn_4x4_1x_coco/fovea_r101_fpn_4x4_1x_coco_20200219_011740.log.json) | +| R-101 | pytorch | N | N | 2x | 11.7 | - | 40.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_r101_fpn_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r101_fpn_4x4_2x_coco/fovea_r101_fpn_4x4_2x_coco_20200208-02320ea4.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r101_fpn_4x4_2x_coco/fovea_r101_fpn_4x4_2x_coco_20200208_202059.log.json) | +| R-101 | pytorch | Y | N | 2x | 11.7 | 14.7 | 40.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco/fovea_align_r101_fpn_gn-head_4x4_2x_coco_20200208-c39a027a.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco/fovea_align_r101_fpn_gn-head_4x4_2x_coco_20200208_203337.log.json) | +| R-101 | pytorch | Y | Y | 2x | 11.7 | 14.7 | 42.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco_20200208-649c5eb6.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco_20200208_202124.log.json) | + +\[1\] *1x and 2x mean the model is trained for 12 and 24 epochs, respectively.* \ +\[2\] *Align means utilizing deformable convolution to align the cls branch.* \ +\[3\] *All results are obtained with a single model and without any test time data augmentation.*\ +\[4\] *We use 4 GPUs for training.* Any pull requests or issues are welcome. -## Citations +## Citation Please consider citing our paper in your publications if the project helps your research. BibTeX reference is as follows. diff --git a/configs/fpg/README.md b/configs/fpg/README.md index dd69c61..c9bb1fe 100644 --- a/configs/fpg/README.md +++ b/configs/fpg/README.md @@ -1,19 +1,35 @@ -# Feature Pyramid Grids +# FPG + +> [Feature Pyramid Grids](https://arxiv.org/abs/2004.03580) + ## Abstract - - Feature pyramid networks have been widely adopted in the object detection literature to improve feature representations for better handling of variations in scale. In this paper, we present Feature Pyramid Grids (FPG), a deep multi-pathway feature pyramid, that represents the feature scale-space as a regular grid of parallel bottom-up pathways which are fused by multi-directional lateral connections. FPG can improve single-pathway feature pyramid networks by significantly increasing its performance at similar computation cost, highlighting importance of deep pyramid representations. In addition to its general and uniform structure, over complicated structures that have been found with neural architecture search, it also compares favorably against such approaches without relying on search. We hope that FPG with its uniform and effective nature can serve as a strong component for future work in object recognition. -
- - +## Results and Models + +We benchmark the new training schedule (crop training, large batch, unfrozen BN, 50 epochs) introduced in NAS-FPN. +All backbones are Resnet-50 in pytorch style. + +| Method | Neck | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :----------: | :--------: | :-----: | :------: | :------------: | :----: | :-----: | :------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| Faster R-CNN | FPG | 50e | 20.0 | - | 42.3 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/faster_rcnn_r50_fpg_crop640_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fpg/faster_rcnn_r50_fpg_crop640_50e_coco/faster_rcnn_r50_fpg_crop640_50e_coco_20220311_011856-74109f42.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/fpg/faster_rcnn_r50_fpg_crop640_50e_coco/faster_rcnn_r50_fpg_crop640_50e_coco_20220311_011856.log.json) | +| Faster R-CNN | FPG-chn128 | 50e | 11.9 | - | 41.2 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/faster_rcnn_r50_fpg-chn128_crop640_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fpg/faster_rcnn_r50_fpg-chn128_crop640_50e_coco/faster_rcnn_r50_fpg-chn128_crop640_50e_coco_20220311_011857-9376aa9d.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/fpg/faster_rcnn_r50_fpg-chn128_crop640_50e_coco/faster_rcnn_r50_fpg-chn128_crop640_50e_coco_20220311_011857.log.json) | +| Faster R-CNN | FPN | 50e | 20.0 | - | 38.9 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/faster_rcnn_r50_fpn_crop640_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fpg/faster_rcnn_r50_fpn_crop640_50e_coco/faster_rcnn_r50_fpn_crop640_50e_coco_20220311_011857-be7c9f42.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/fpg/faster_rcnn_r50_fpn_crop640_50e_coco/faster_rcnn_r50_fpn_crop640_50e_coco_20220311_011857.log.json) | +| Mask R-CNN | FPG | 50e | 23.2 | - | 43.0 | 38.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/mask_rcnn_r50_fpg_crop640_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fpg/mask_rcnn_r50_fpg_crop640_50e_coco/mask_rcnn_r50_fpg_crop640_50e_coco_20220311_011857-233b8334.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/fpg/mask_rcnn_r50_fpg_crop640_50e_coco/mask_rcnn_r50_fpg_crop640_50e_coco_20220311_011857.log.json) | +| Mask R-CNN | FPG-chn128 | 50e | 15.3 | - | 41.7 | 37.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/mask_rcnn_r50_fpg-chn128_crop640_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fpg/mask_rcnn_r50_fpg-chn128_crop640_50e_coco/mask_rcnn_r50_fpg-chn128_crop640_50e_coco_20220311_011859-043c9b4e.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/fpg/mask_rcnn_r50_fpg-chn128_crop640_50e_coco/mask_rcnn_r50_fpg-chn128_crop640_50e_coco_20220311_011859.log.json) | +| Mask R-CNN | FPN | 50e | 23.2 | - | 49.6 | 35.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/mask_rcnn_r50_fpn_crop640_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fpg/mask_rcnn_r50_fpn_crop640_50e_coco/mask_rcnn_r50_fpn_crop640_50e_coco_20220311_011855-a756664a.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/fpg/mask_rcnn_r50_fpn_crop640_50e_coco/mask_rcnn_r50_fpn_crop640_50e_coco_20220311_011855.log.json) | +| RetinaNet | FPG | 50e | 20.8 | - | 40.5 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/retinanet_r50_fpg_crop640_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fpg/retinanet_r50_fpg_crop640_50e_coco/retinanet_r50_fpg_crop640_50e_coco_20220311_110809-b0bcf5f4.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/fpg/retinanet_r50_fpg_crop640_50e_coco/retinanet_r50_fpg_crop640_50e_coco_20220311_110809.log.json) | +| RetinaNet | FPG-chn128 | 50e | 19.9 | - | 39.9 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/retinanet_r50_fpg-chn128_crop640_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fpg/retinanet_r50_fpg-chn128_crop640_50e_coco/retinanet_r50_fpg-chn128_crop640_50e_coco_20220313_104829-ee99a686.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/fpg/retinanet_r50_fpg-chn128_crop640_50e_coco/retinanet_r50_fpg-chn128_crop640_50e_coco_20220313_104829.log.json) | + +**Note**: Chn128 means to decrease the number of channels of features and convs from 256 (default) to 128 in +Neck and BBox Head, which can greatly decrease memory consumption without sacrificing much precision. ## Citation @@ -25,20 +41,3 @@ Feature pyramid networks have been widely adopted in the object detection litera year={2020} } ``` - -## Results and Models - -We benchmark the new training schedule (crop training, large batch, unfrozen BN, 50 epochs) introduced in NAS-FPN. -All backbones are Resnet-50 in pytorch style. - -| Method | Neck | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -|:------------:|:-----------:|:-------:|:--------:|:--------------:|:------:|:-------:|:-------:|:--------:| -| Faster R-CNN | FPG | 50e | 20.0 | - | 42.2 | - |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/faster_rcnn_r50_fpg_crop640_50e_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/fpg/faster_rcnn_r50_fpg_crop640_50e_coco/faster_rcnn_r50_fpg_crop640_50e_coco-76220505.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fpg/faster_rcnn_r50_fpg_crop640_50e_coco/20210218_223520.log.json) | -| Faster R-CNN | FPG-chn128 | 50e | 11.9 | - | 41.2 | - |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/faster_rcnn_r50_fpg-chn128_crop640_50e_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/fpg/faster_rcnn_r50_fpg-chn128_crop640_50e_coco/faster_rcnn_r50_fpg-chn128_crop640_50e_coco-24257de9.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fpg/faster_rcnn_r50_fpg-chn128_crop640_50e_coco/20210218_221412.log.json) | -| Mask R-CNN | FPG | 50e | 23.2 | - | 42.7 | 37.8 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/mask_rcnn_r50_fpg_crop640_50e_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/fpg/mask_rcnn_r50_fpg_crop640_50e_coco/mask_rcnn_r50_fpg_crop640_50e_coco-c5860453.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fpg/mask_rcnn_r50_fpg_crop640_50e_coco/20210222_205447.log.json) | -| Mask R-CNN | FPG-chn128 | 50e | 15.3 | - | 41.7 | 36.9 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/mask_rcnn_r50_fpg-chn128_crop640_50e_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/fpg/mask_rcnn_r50_fpg-chn128_crop640_50e_coco/mask_rcnn_r50_fpg-chn128_crop640_50e_coco-5c6ea10d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fpg/mask_rcnn_r50_fpg-chn128_crop640_50e_coco/20210223_025039.log.json) | -| RetinaNet | FPG | 50e | 20.8 | - | 40.5 | - |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/retinanet_r50_fpg_crop640_50e_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/fpg/retinanet_r50_fpg_crop640_50e_coco/retinanet_r50_fpg_crop640_50e_coco-46fdd1c6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fpg/retinanet_r50_fpg_crop640_50e_coco/20210225_143957.log.json) | -| RetinaNet | FPG-chn128 | 50e | 19.9 | - | 40.3 | - |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/retinanet_r50_fpg-chn128_crop640_50e_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/fpg/retinanet_r50_fpg-chn128_crop640_50e_coco/retinanet_r50_fpg-chn128_crop640_50e_coco-5cf33c76.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fpg/retinanet_r50_fpg-chn128_crop640_50e_coco/20210225_184328.log.json) | - -**Note**: Chn128 means to decrease the number of channels of features and convs from 256 (default) to 128 in -Neck and BBox Head, which can greatly decrease memory consumption without sacrificing much precision. diff --git a/configs/fpg/faster_rcnn_r50_fpn_crop640_50e_coco.py b/configs/fpg/faster_rcnn_r50_fpn_crop640_50e_coco.py index 95f4e91..e4ec940 100644 --- a/configs/fpg/faster_rcnn_r50_fpn_crop640_50e_coco.py +++ b/configs/fpg/faster_rcnn_r50_fpn_crop640_50e_coco.py @@ -66,3 +66,8 @@ # runtime settings runner = dict(max_epochs=50) evaluation = dict(interval=2) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (8 samples per GPU) +auto_scale_lr = dict(base_batch_size=64) diff --git a/configs/fpg/mask_rcnn_r50_fpn_crop640_50e_coco.py b/configs/fpg/mask_rcnn_r50_fpn_crop640_50e_coco.py index 8dfdbb4..c6bcc24 100644 --- a/configs/fpg/mask_rcnn_r50_fpn_crop640_50e_coco.py +++ b/configs/fpg/mask_rcnn_r50_fpn_crop640_50e_coco.py @@ -72,3 +72,8 @@ # runtime settings runner = dict(max_epochs=50) evaluation = dict(interval=2) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (8 samples per GPU) +auto_scale_lr = dict(base_batch_size=64) diff --git a/configs/fpg/metafile.yml b/configs/fpg/metafile.yml index 885d857..6b0a6a7 100644 --- a/configs/fpg/metafile.yml +++ b/configs/fpg/metafile.yml @@ -27,8 +27,8 @@ Models: - Task: Object Detection Dataset: COCO Metrics: - box AP: 42.2 - Weights: https://download.openmmlab.com/mmdetection/v2.0/fpg/faster_rcnn_r50_fpg_crop640_50e_coco/faster_rcnn_r50_fpg_crop640_50e_coco-76220505.pth + box AP: 42.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/fpg/faster_rcnn_r50_fpg_crop640_50e_coco/faster_rcnn_r50_fpg_crop640_50e_coco_20220311_011856-74109f42.pth - Name: faster_rcnn_r50_fpg-chn128_crop640_50e_coco In Collection: Feature Pyramid Grids @@ -41,7 +41,7 @@ Models: Dataset: COCO Metrics: box AP: 41.2 - Weights: https://download.openmmlab.com/mmdetection/v2.0/fpg/faster_rcnn_r50_fpg-chn128_crop640_50e_coco/faster_rcnn_r50_fpg-chn128_crop640_50e_coco-24257de9.pth + Weights: https://download.openmmlab.com/mmdetection/v2.0/fpg/faster_rcnn_r50_fpg-chn128_crop640_50e_coco/faster_rcnn_r50_fpg-chn128_crop640_50e_coco_20220311_011857-9376aa9d.pth - Name: mask_rcnn_r50_fpg_crop640_50e_coco In Collection: Feature Pyramid Grids @@ -53,12 +53,12 @@ Models: - Task: Object Detection Dataset: COCO Metrics: - box AP: 42.7 + box AP: 43.0 - Task: Instance Segmentation Dataset: COCO Metrics: - mask AP: 37.8 - Weights: https://download.openmmlab.com/mmdetection/v2.0/fpg/mask_rcnn_r50_fpg_crop640_50e_coco/mask_rcnn_r50_fpg_crop640_50e_coco-c5860453.pth + mask AP: 38.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/fpg/mask_rcnn_r50_fpg_crop640_50e_coco/mask_rcnn_r50_fpg_crop640_50e_coco_20220311_011857-233b8334.pth - Name: mask_rcnn_r50_fpg-chn128_crop640_50e_coco In Collection: Feature Pyramid Grids @@ -74,8 +74,8 @@ Models: - Task: Instance Segmentation Dataset: COCO Metrics: - mask AP: 36.9 - Weights: https://download.openmmlab.com/mmdetection/v2.0/fpg/mask_rcnn_r50_fpg-chn128_crop640_50e_coco/mask_rcnn_r50_fpg-chn128_crop640_50e_coco-5c6ea10d.pth + mask AP: 37.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/fpg/mask_rcnn_r50_fpg-chn128_crop640_50e_coco/mask_rcnn_r50_fpg-chn128_crop640_50e_coco_20220311_011859-043c9b4e.pth - Name: retinanet_r50_fpg_crop640_50e_coco In Collection: Feature Pyramid Grids @@ -88,7 +88,7 @@ Models: Dataset: COCO Metrics: box AP: 40.5 - Weights: https://download.openmmlab.com/mmdetection/v2.0/fpg/retinanet_r50_fpg_crop640_50e_coco/retinanet_r50_fpg_crop640_50e_coco-46fdd1c6.pth + Weights: https://download.openmmlab.com/mmdetection/v2.0/fpg/retinanet_r50_fpg_crop640_50e_coco/retinanet_r50_fpg_crop640_50e_coco_20220311_110809-b0bcf5f4.pth - Name: retinanet_r50_fpg-chn128_crop640_50e_coco In Collection: Feature Pyramid Grids @@ -100,5 +100,5 @@ Models: - Task: Object Detection Dataset: COCO Metrics: - box AP: 40.3 - Weights: https://download.openmmlab.com/mmdetection/v2.0/fpg/retinanet_r50_fpg-chn128_crop640_50e_coco/retinanet_r50_fpg-chn128_crop640_50e_coco-5cf33c76.pth + box AP: 39.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/fpg/retinanet_r50_fpg-chn128_crop640_50e_coco/retinanet_r50_fpg-chn128_crop640_50e_coco_20220313_104829-ee99a686.pth diff --git a/configs/free_anchor/README.md b/configs/free_anchor/README.md index a1dcb17..d24c340 100644 --- a/configs/free_anchor/README.md +++ b/configs/free_anchor/README.md @@ -1,22 +1,31 @@ -# FreeAnchor: Learning to Match Anchors for Visual Object Detection +# FreeAnchor -## Abstract +> [FreeAnchor: Learning to Match Anchors for Visual Object Detection](https://arxiv.org/abs/1909.02466) + + - +## Abstract Modern CNN-based object detectors assign anchors for ground-truth objects under the restriction of object-anchor Intersection-over-Unit (IoU). In this study, we propose a learning-to-match approach to break IoU restriction, allowing objects to match anchors in a flexible manner. Our approach, referred to as FreeAnchor, updates hand-crafted anchor assignment to "free" anchor matching by formulating detector training as a maximum likelihood estimation (MLE) procedure. FreeAnchor targets at learning features which best explain a class of objects in terms of both classification and localization. FreeAnchor is implemented by optimizing detection customized likelihood and can be fused with CNN-based detectors in a plug-and-play manner. Experiments on COCO demonstrate that FreeAnchor consistently outperforms their counterparts with significant margins. -
- - +## Results and Models -## Citation +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :---------: | :-----: | :-----: | :------: | :------------: | :----: | :---------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | pytorch | 1x | 4.9 | 18.4 | 38.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco/retinanet_free_anchor_r50_fpn_1x_coco_20200130-0f67375f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco/retinanet_free_anchor_r50_fpn_1x_coco_20200130_095625.log.json) | +| R-101 | pytorch | 1x | 6.8 | 14.9 | 40.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco/retinanet_free_anchor_r101_fpn_1x_coco_20200130-358324e6.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco/retinanet_free_anchor_r101_fpn_1x_coco_20200130_100723.log.json) | +| X-101-32x4d | pytorch | 1x | 8.1 | 11.1 | 41.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/free_anchor/retinanet_free_anchor_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_x101_32x4d_fpn_1x_coco/retinanet_free_anchor_x101_32x4d_fpn_1x_coco_20200130-d4846968.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_x101_32x4d_fpn_1x_coco/retinanet_free_anchor_x101_32x4d_fpn_1x_coco_20200130_095627.log.json) | - +**Notes:** + +- We use 8 GPUs with 2 images/GPU. +- For more settings and models, please refer to the [official repo](https://github.com/zhangxiaosong18/FreeAnchor). + +## Citation ```latex @inproceedings{zhang2019freeanchor, @@ -26,16 +35,3 @@ Modern CNN-based object detectors assign anchors for ground-truth objects under year = {2019} } ``` - -## Results and Models - -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -|:--------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| -| R-50 | pytorch | 1x | 4.9 | 18.4 | 38.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco/retinanet_free_anchor_r50_fpn_1x_coco_20200130-0f67375f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco/retinanet_free_anchor_r50_fpn_1x_coco_20200130_095625.log.json) | -| R-101 | pytorch | 1x | 6.8 | 14.9 | 40.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco/retinanet_free_anchor_r101_fpn_1x_coco_20200130-358324e6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco/retinanet_free_anchor_r101_fpn_1x_coco_20200130_100723.log.json) | -| X-101-32x4d | pytorch | 1x | 8.1 | 11.1 | 41.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/free_anchor/retinanet_free_anchor_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_x101_32x4d_fpn_1x_coco/retinanet_free_anchor_x101_32x4d_fpn_1x_coco_20200130-d4846968.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_x101_32x4d_fpn_1x_coco/retinanet_free_anchor_x101_32x4d_fpn_1x_coco_20200130_095627.log.json) | - -**Notes:** - -- We use 8 GPUs with 2 images/GPU. -- For more settings and models, please refer to the [official repo](https://github.com/zhangxiaosong18/FreeAnchor). diff --git a/configs/fsaf/README.md b/configs/fsaf/README.md index ba4d409..4392a6e 100644 --- a/configs/fsaf/README.md +++ b/configs/fsaf/README.md @@ -1,23 +1,19 @@ -# Feature Selective Anchor-Free Module for Single-Shot Object Detection +# FSAF -## Abstract +> [Feature Selective Anchor-Free Module for Single-Shot Object Detection](https://arxiv.org/abs/1903.00621) + + - +## Abstract We motivate and present feature selective anchor-free (FSAF) module, a simple and effective building block for single-shot object detectors. It can be plugged into single-shot detectors with feature pyramid structure. The FSAF module addresses two limitations brought up by the conventional anchor-based detection: 1) heuristic-guided feature selection; 2) overlap-based anchor sampling. The general concept of the FSAF module is online feature selection applied to the training of multi-level anchor-free branches. Specifically, an anchor-free branch is attached to each level of the feature pyramid, allowing box encoding and decoding in the anchor-free manner at an arbitrary level. During training, we dynamically assign each instance to the most suitable feature level. At the time of inference, the FSAF module can work jointly with anchor-based branches by outputting predictions in parallel. We instantiate this concept with simple implementations of anchor-free branches and online feature selection strategy. Experimental results on the COCO detection track show that our FSAF module performs better than anchor-based counterparts while being faster. When working jointly with anchor-based branches, the FSAF module robustly improves the baseline RetinaNet by a large margin under various settings, while introducing nearly free inference overhead. And the resulting best model can achieve a state-of-the-art 44.6% mAP, outperforming all existing single-shot detectors on COCO. -
- - - ## Introduction - - FSAF is an anchor-free method published in CVPR2019 ([https://arxiv.org/pdf/1903.00621.pdf](https://arxiv.org/pdf/1903.00621.pdf)). Actually it is equivalent to the anchor-based method with only one anchor at each feature map position in each FPN level. And this is how we implemented it. @@ -26,16 +22,16 @@ Only the anchor-free branch is released for its better compatibility with the cu In the original paper, feature maps within the central 0.2-0.5 area of a gt box are tagged as ignored. However, it is empirically found that a hard threshold (0.2-0.2) gives a further gain on the performance. (see the table below) -## Main Results +## Results and Models ### Results on R50/R101/X101-FPN -| Backbone | ignore range | ms-train| Lr schd |Train Mem (GB)| Train time (s/iter) | Inf time (fps) | box AP | Config | Download | -|:----------:| :-------: |:-------:|:-------:|:------------:|:---------------:|:--------------:|:-------------:|:------:|:--------:| -| R-50 | 0.2-0.5 | N | 1x | 3.15 | 0.43 | 12.3 | 36.0 (35.9) | | [model](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_pscale0.2_nscale0.5_r50_fpn_1x_coco/fsaf_pscale0.2_nscale0.5_r50_fpn_1x_coco_20200715-b555b0e0.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_pscale0.2_nscale0.5_r50_fpn_1x_coco/fsaf_pscale0.2_nscale0.5_r50_fpn_1x_coco_20200715_094657.log.json) | -| R-50 | 0.2-0.2 | N | 1x | 3.15 | 0.43 | 13.0 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf/fsaf_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r50_fpn_1x_coco/fsaf_r50_fpn_1x_coco-94ccc51f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r50_fpn_1x_coco/fsaf_r50_fpn_1x_coco_20200428_072327.log.json)| -| R-101 | 0.2-0.2 | N | 1x | 5.08 | 0.58 | 10.8 | 39.3 (37.9) | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf/fsaf_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r101_fpn_1x_coco/fsaf_r101_fpn_1x_coco-9e71098f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r101_fpn_1x_coco/fsaf_r101_fpn_1x_coco_20200428_160348.log.json)| -| X-101 | 0.2-0.2 | N | 1x | 9.38 | 1.23 | 5.6 | 42.4 (41.0) | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf/fsaf_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_x101_64x4d_fpn_1x_coco/fsaf_x101_64x4d_fpn_1x_coco-e3f6e6fd.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_x101_64x4d_fpn_1x_coco/fsaf_x101_64x4d_fpn_1x_coco_20200428_160424.log.json)| +| Backbone | ignore range | ms-train | Lr schd | Train Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Config | Download | +| :------: | :----------: | :------: | :-----: | :------------: | :-----------------: | :------------: | :---------: | :---------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | 0.2-0.5 | N | 1x | 3.15 | 0.43 | 12.3 | 36.0 (35.9) | | [model](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_pscale0.2_nscale0.5_r50_fpn_1x_coco/fsaf_pscale0.2_nscale0.5_r50_fpn_1x_coco_20200715-b555b0e0.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_pscale0.2_nscale0.5_r50_fpn_1x_coco/fsaf_pscale0.2_nscale0.5_r50_fpn_1x_coco_20200715_094657.log.json) | +| R-50 | 0.2-0.2 | N | 1x | 3.15 | 0.43 | 13.0 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf/fsaf_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r50_fpn_1x_coco/fsaf_r50_fpn_1x_coco-94ccc51f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r50_fpn_1x_coco/fsaf_r50_fpn_1x_coco_20200428_072327.log.json) | +| R-101 | 0.2-0.2 | N | 1x | 5.08 | 0.58 | 10.8 | 39.3 (37.9) | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf/fsaf_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r101_fpn_1x_coco/fsaf_r101_fpn_1x_coco-9e71098f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r101_fpn_1x_coco/fsaf_r101_fpn_1x_coco_20200428_160348.log.json) | +| X-101 | 0.2-0.2 | N | 1x | 9.38 | 1.23 | 5.6 | 42.4 (41.0) | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf/fsaf_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_x101_64x4d_fpn_1x_coco/fsaf_x101_64x4d_fpn_1x_coco-e3f6e6fd.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_x101_64x4d_fpn_1x_coco/fsaf_x101_64x4d_fpn_1x_coco_20200428_160424.log.json) | **Notes:** @@ -46,7 +42,7 @@ it is empirically found that a hard threshold (0.2-0.2) gives a further gain on - *All pretrained backbones use pytorch style.* - *All models are trained on 8 Titan-XP gpus and tested on a single gpu.* -## Citations +## Citation BibTeX reference is as follows. diff --git a/configs/gcnet/README.md b/configs/gcnet/README.md index c5c436a..403e086 100644 --- a/configs/gcnet/README.md +++ b/configs/gcnet/README.md @@ -1,19 +1,17 @@ -# GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond +# GCNet -## Abstract +> [GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond](https://arxiv.org/abs/1904.11492) + + - +## Abstract The Non-Local Network (NLNet) presents a pioneering approach for capturing long-range dependencies, via aggregating query-specific global context to each query position. However, through a rigorous empirical analysis, we have found that the global contexts modeled by non-local network are almost the same for different query positions within an image. In this paper, we take advantage of this finding to create a simplified network based on a query-independent formulation, which maintains the accuracy of NLNet but with significantly less computation. We further observe that this simplified design shares similar structure with Squeeze-Excitation Network (SENet). Hence we unify them into a three-step general framework for global context modeling. Within the general framework, we design a better instantiation, called the global context (GC) block, which is lightweight and can effectively model the global context. The lightweight property allows us to apply it for multiple layers in a backbone network to construct a global context network (GCNet), which generally outperforms both simplified NLNet and SENet on major benchmarks for various recognition tasks. -
- - - ## Introduction By [Yue Cao](http://yue-cao.me), [Jiarui Xu](http://jerryxu.net), [Stephen Lin](https://scholar.google.com/citations?user=c3PYmxUAAAAJ&hl=en), Fangyun Wei, [Han Hu](https://sites.google.com/site/hanhushomepage/). @@ -21,49 +19,36 @@ By [Yue Cao](http://yue-cao.me), [Jiarui Xu](http://jerryxu.net), [Stephen Lin]( We provide config files to reproduce the results in the paper for ["GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond"](https://arxiv.org/abs/1904.11492) on COCO object detection. - - **GCNet** is initially described in [arxiv](https://arxiv.org/abs/1904.11492). Via absorbing advantages of Non-Local Networks (NLNet) and Squeeze-Excitation Networks (SENet), GCNet provides a simple, fast and effective approach for global context modeling, which generally outperforms both NLNet and SENet on major benchmarks for various recognition tasks. -## Citation - -```latex -@article{cao2019GCNet, - title={GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond}, - author={Cao, Yue and Xu, Jiarui and Lin, Stephen and Wei, Fangyun and Hu, Han}, - journal={arXiv preprint arXiv:1904.11492}, - year={2019} -} -``` - -## Results and models +## Results and Models The results on COCO 2017val are shown in the below table. -| Backbone | Model | Context | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -| :-------: | :--------------: | :------------: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | -| R-50-FPN | Mask | GC(c3-c5, r16) | 1x | 5.0 | | 39.7 | 35.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco_20200515_211915-187da160.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco_20200515_211915.log.json) | -| R-50-FPN | Mask | GC(c3-c5, r4) | 1x | 5.1 | 15.0 | 39.9 | 36.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco_20200204-17235656.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco_20200204_024626.log.json) | -| R-101-FPN | Mask | GC(c3-c5, r16) | 1x | 7.6 | 11.4 | 41.3 | 37.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco_20200205-e58ae947.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco_20200205_192835.log.json) | -| R-101-FPN | Mask | GC(c3-c5, r4) | 1x | 7.8 | 11.6 | 42.2 | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco_20200206-af22dc9d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco_20200206_112128.log.json) | - -| Backbone | Model | Context | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -| :-------: | :--------------: | :------------: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :-------: | -| R-50-FPN | Mask | - | 1x | 4.4 | 16.6 | 38.4 | 34.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco_20200202-bb3eb55c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco_20200202_214122.log.json) | -| R-50-FPN | Mask | GC(c3-c5, r16) | 1x | 5.0 | 15.5 | 40.4 | 36.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200202-587b99aa.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200202_174907.log.json) | -| R-50-FPN | Mask | GC(c3-c5, r4) | 1x | 5.1 | 15.1 | 40.7 | 36.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200202-50b90e5c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200202_085547.log.json) | -| R-101-FPN | Mask | - | 1x | 6.4 | 13.3 | 40.5 | 36.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco_20200210-81658c8a.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco_20200210_220422.log.json) | -| R-101-FPN | Mask | GC(c3-c5, r16) | 1x | 7.6 | 12.0 | 42.2 | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200207-945e77ca.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200207_015330.log.json) | -| R-101-FPN | Mask | GC(c3-c5, r4) | 1x | 7.8 | 11.8 | 42.2 | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200206-8407a3f0.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200206_142508.log.json) | -| X-101-FPN | Mask | - | 1x | 7.6 | 11.3 | 42.4 | 37.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco_20200211-7584841c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco_20200211_054326.log.json) | -| X-101-FPN | Mask | GC(c3-c5, r16) | 1x | 8.8 | 9.8 | 43.5 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200211-cbed3d2c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200211_164715.log.json) | -| X-101-FPN | Mask | GC(c3-c5, r4) | 1x | 9.0 | 9.7 | 43.9 | 39.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200212-68164964.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200212_070942.log.json) | -| X-101-FPN | Cascade Mask | - | 1x | 9.2 | 8.4 | 44.7 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco_20200310-d5ad2a5e.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco_20200310_115217.log.json) | -| X-101-FPN | Cascade Mask | GC(c3-c5, r16) | 1x | 10.3 | 7.7 | 46.2 | 39.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200211-10bf2463.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200211_184154.log.json) | -| X-101-FPN | Cascade Mask | GC(c3-c5, r4) | 1x | 10.6 | | 46.4 | 40.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200703_180653-ed035291.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200703_180653.log.json) | -| X-101-FPN | DCN Cascade Mask | - | 1x | | | 47.5 | 40.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco_20210615_211019-abbc39ea.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco_20210615_211019.log.json)| -| X-101-FPN | DCN Cascade Mask | GC(c3-c5, r16) | 1x | | | 48.0 | 41.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco_20210615_215648-44aa598a.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco_20210615_215648.log.json) | -| X-101-FPN | DCN Cascade Mask | GC(c3-c5, r4) | 1x | | | 47.9 | 41.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco_20210615_161851-720338ec.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco_20210615_161851.log.json) | +| Backbone | Model | Context | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------: | :---: | :------------: | :-----: | :------: | :------------: | :----: | :-----: | :-----------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | Mask | GC(c3-c5, r16) | 1x | 5.0 | | 39.7 | 35.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco_20200515_211915-187da160.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco_20200515_211915.log.json) | +| R-50-FPN | Mask | GC(c3-c5, r4) | 1x | 5.1 | 15.0 | 39.9 | 36.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco_20200204-17235656.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco_20200204_024626.log.json) | +| R-101-FPN | Mask | GC(c3-c5, r16) | 1x | 7.6 | 11.4 | 41.3 | 37.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco_20200205-e58ae947.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco_20200205_192835.log.json) | +| R-101-FPN | Mask | GC(c3-c5, r4) | 1x | 7.8 | 11.6 | 42.2 | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco_20200206-af22dc9d.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco_20200206_112128.log.json) | + +| Backbone | Model | Context | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------: | :--------------: | :------------: | :-----: | :------: | :------------: | :----: | :-----: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | Mask | - | 1x | 4.4 | 16.6 | 38.4 | 34.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco_20200202-bb3eb55c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco_20200202_214122.log.json) | +| R-50-FPN | Mask | GC(c3-c5, r16) | 1x | 5.0 | 15.5 | 40.4 | 36.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200202-587b99aa.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200202_174907.log.json) | +| R-50-FPN | Mask | GC(c3-c5, r4) | 1x | 5.1 | 15.1 | 40.7 | 36.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200202-50b90e5c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200202_085547.log.json) | +| R-101-FPN | Mask | - | 1x | 6.4 | 13.3 | 40.5 | 36.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco_20200210-81658c8a.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco_20200210_220422.log.json) | +| R-101-FPN | Mask | GC(c3-c5, r16) | 1x | 7.6 | 12.0 | 42.2 | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200207-945e77ca.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200207_015330.log.json) | +| R-101-FPN | Mask | GC(c3-c5, r4) | 1x | 7.8 | 11.8 | 42.2 | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200206-8407a3f0.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200206_142508.log.json) | +| X-101-FPN | Mask | - | 1x | 7.6 | 11.3 | 42.4 | 37.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco_20200211-7584841c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco_20200211_054326.log.json) | +| X-101-FPN | Mask | GC(c3-c5, r16) | 1x | 8.8 | 9.8 | 43.5 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200211-cbed3d2c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200211_164715.log.json) | +| X-101-FPN | Mask | GC(c3-c5, r4) | 1x | 9.0 | 9.7 | 43.9 | 39.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200212-68164964.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200212_070942.log.json) | +| X-101-FPN | Cascade Mask | - | 1x | 9.2 | 8.4 | 44.7 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco_20200310-d5ad2a5e.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco_20200310_115217.log.json) | +| X-101-FPN | Cascade Mask | GC(c3-c5, r16) | 1x | 10.3 | 7.7 | 46.2 | 39.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200211-10bf2463.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200211_184154.log.json) | +| X-101-FPN | Cascade Mask | GC(c3-c5, r4) | 1x | 10.6 | | 46.4 | 40.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200703_180653-ed035291.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200703_180653.log.json) | +| X-101-FPN | DCN Cascade Mask | - | 1x | | | 47.5 | 40.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco_20210615_211019-abbc39ea.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco_20210615_211019.log.json) | +| X-101-FPN | DCN Cascade Mask | GC(c3-c5, r16) | 1x | | | 48.0 | 41.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco_20210615_215648-44aa598a.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco_20210615_215648.log.json) | +| X-101-FPN | DCN Cascade Mask | GC(c3-c5, r4) | 1x | | | 47.9 | 41.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco_20210615_161851-720338ec.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco_20210615_161851.log.json) | **Notes:** @@ -71,3 +56,14 @@ The results on COCO 2017val are shown in the below table. - `GC` denotes Global Context (GC) block is inserted after 1x1 conv of backbone. - `DCN` denotes replace 3x3 conv with 3x3 Deformable Convolution in `c3-c5` stages of backbone. - `r4` and `r16` denote ratio 4 and ratio 16 in GC block respectively. + +## Citation + +```latex +@article{cao2019GCNet, + title={GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond}, + author={Cao, Yue and Xu, Jiarui and Lin, Stephen and Wei, Fangyun and Hu, Han}, + journal={arXiv preprint arXiv:1904.11492}, + year={2019} +} +``` diff --git a/configs/gfl/README.md b/configs/gfl/README.md index 46a7a08..703936b 100644 --- a/configs/gfl/README.md +++ b/configs/gfl/README.md @@ -1,22 +1,34 @@ -# Generalized Focal Loss: Learning Qualified and Distributed Bounding Boxes for Dense Object Detection +# GFL -## Abstract +> [Generalized Focal Loss: Learning Qualified and Distributed Bounding Boxes for Dense Object Detection](https://arxiv.org/abs/2006.04388) + + - +## Abstract -One-stage detector basically formulates object detection as dense classification and localization. The classification is usually optimized by Focal Loss and the box location is commonly learned under Dirac delta distribution. A recent trend for one-stage detectors is to introduce an individual prediction branch to estimate the quality of localization, where the predicted quality facilitates the classification to improve detection performance. This paper delves into the representations of the above three fundamental elements: quality estimation, classification and localization. Two problems are discovered in existing practices, including (1) the inconsistent usage of the quality estimation and classification between training and inference and (2) the inflexible Dirac delta distribution for localization when there is ambiguity and uncertainty in complex scenes. To address the problems, we design new representations for these elements. Specifically, we merge the quality estimation into the class prediction vector to form a joint representation of localization quality and classification, and use a vector to represent arbitrary distribution of box locations. The improved representations eliminate the inconsistency risk and accurately depict the flexible distribution in real data, but contain continuous labels, which is beyond the scope of Focal Loss. We then propose Generalized Focal Loss (GFL) that generalizes Focal Loss from its discrete form to the continuous version for successful optimization. On COCO test-dev, GFL achieves 45.0\% AP using ResNet-101 backbone, surpassing state-of-the-art SAPD (43.5\%) and ATSS (43.6\%) with higher or comparable inference speed, under the same backbone and training settings. Notably, our best model can achieve a single-model single-scale AP of 48.2\%, at 10 FPS on a single 2080Ti GPU. +One-stage detector basically formulates object detection as dense classification and localization. The classification is usually optimized by Focal Loss and the box location is commonly learned under Dirac delta distribution. A recent trend for one-stage detectors is to introduce an individual prediction branch to estimate the quality of localization, where the predicted quality facilitates the classification to improve detection performance. This paper delves into the representations of the above three fundamental elements: quality estimation, classification and localization. Two problems are discovered in existing practices, including (1) the inconsistent usage of the quality estimation and classification between training and inference and (2) the inflexible Dirac delta distribution for localization when there is ambiguity and uncertainty in complex scenes. To address the problems, we design new representations for these elements. Specifically, we merge the quality estimation into the class prediction vector to form a joint representation of localization quality and classification, and use a vector to represent arbitrary distribution of box locations. The improved representations eliminate the inconsistency risk and accurately depict the flexible distribution in real data, but contain continuous labels, which is beyond the scope of Focal Loss. We then propose Generalized Focal Loss (GFL) that generalizes Focal Loss from its discrete form to the continuous version for successful optimization. On COCO test-dev, GFL achieves 45.0% AP using ResNet-101 backbone, surpassing state-of-the-art SAPD (43.5%) and ATSS (43.6%) with higher or comparable inference speed, under the same backbone and training settings. Notably, our best model can achieve a single-model single-scale AP of 48.2%, at 10 FPS on a single 2080Ti GPU. -
- - +## Results and Models -## Citation +| Backbone | Style | Lr schd | Multi-scale Training | Inf time (fps) | box AP | Config | Download | +| :---------------: | :-----: | :-----: | :------------------: | :------------: | :----: | :---------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | pytorch | 1x | No | 19.5 | 40.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r50_fpn_1x_coco/gfl_r50_fpn_1x_coco_20200629_121244-25944287.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r50_fpn_1x_coco/gfl_r50_fpn_1x_coco_20200629_121244.log.json) | +| R-50 | pytorch | 2x | Yes | 19.5 | 42.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_r50_fpn_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r50_fpn_mstrain_2x_coco/gfl_r50_fpn_mstrain_2x_coco_20200629_213802-37bb1edc.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r50_fpn_mstrain_2x_coco/gfl_r50_fpn_mstrain_2x_coco_20200629_213802.log.json) | +| R-101 | pytorch | 2x | Yes | 14.7 | 44.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_r101_fpn_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_mstrain_2x_coco/gfl_r101_fpn_mstrain_2x_coco_20200629_200126-dd12f847.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_mstrain_2x_coco/gfl_r101_fpn_mstrain_2x_coco_20200629_200126.log.json) | +| R-101-dcnv2 | pytorch | 2x | Yes | 12.9 | 47.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco_20200630_102002-134b07df.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco_20200630_102002.log.json) | +| X-101-32x4d | pytorch | 2x | Yes | 12.1 | 45.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco/gfl_x101_32x4d_fpn_mstrain_2x_coco_20200630_102002-50c1ffdb.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco/gfl_x101_32x4d_fpn_mstrain_2x_coco_20200630_102002.log.json) | +| X-101-32x4d-dcnv2 | pytorch | 2x | Yes | 10.7 | 48.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco_20200630_102002-14a2bf25.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco_20200630_102002.log.json) | - +\[1\] *1x and 2x mean the model is trained for 90K and 180K iterations, respectively.* \ +\[2\] *All results are obtained with a single model and without any test time data augmentation such as multi-scale, flipping and etc..* \ +\[3\] *`dcnv2` denotes deformable convolutional networks v2.* \ +\[4\] *FPS is tested with a single GeForce RTX 2080Ti GPU, using a batch size of 1.* + +## Citation We provide config files to reproduce the object detection results in the paper [Generalized Focal Loss: Learning Qualified and Distributed Bounding Boxes for Dense Object Detection](https://arxiv.org/abs/2006.04388) @@ -28,19 +40,3 @@ We provide config files to reproduce the object detection results in the paper [ year={2020} } ``` - -## Results and Models - -| Backbone | Style | Lr schd | Multi-scale Training| Inf time (fps) | box AP | Config | Download | -|:-----------------:|:-------:|:-------:|:-------------------:|:--------------:|:------:|:------:|:--------:| -| R-50 | pytorch | 1x | No | 19.5 | 40.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r50_fpn_1x_coco/gfl_r50_fpn_1x_coco_20200629_121244-25944287.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r50_fpn_1x_coco/gfl_r50_fpn_1x_coco_20200629_121244.log.json) | -| R-50 | pytorch | 2x | Yes | 19.5 | 42.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_r50_fpn_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r50_fpn_mstrain_2x_coco/gfl_r50_fpn_mstrain_2x_coco_20200629_213802-37bb1edc.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r50_fpn_mstrain_2x_coco/gfl_r50_fpn_mstrain_2x_coco_20200629_213802.log.json) | -| R-101 | pytorch | 2x | Yes | 14.7 | 44.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_r101_fpn_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_mstrain_2x_coco/gfl_r101_fpn_mstrain_2x_coco_20200629_200126-dd12f847.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_mstrain_2x_coco/gfl_r101_fpn_mstrain_2x_coco_20200629_200126.log.json) | -| R-101-dcnv2 | pytorch | 2x | Yes | 12.9 | 47.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco_20200630_102002-134b07df.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco_20200630_102002.log.json) | -| X-101-32x4d | pytorch | 2x | Yes | 12.1 | 45.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco/gfl_x101_32x4d_fpn_mstrain_2x_coco_20200630_102002-50c1ffdb.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco/gfl_x101_32x4d_fpn_mstrain_2x_coco_20200630_102002.log.json) | -| X-101-32x4d-dcnv2 | pytorch | 2x | Yes | 10.7 | 48.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco_20200630_102002-14a2bf25.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco_20200630_102002.log.json) | - -[1] *1x and 2x mean the model is trained for 90K and 180K iterations, respectively.* \ -[2] *All results are obtained with a single model and without any test time data augmentation such as multi-scale, flipping and etc..* \ -[3] *`dcnv2` denotes deformable convolutional networks v2.* \ -[4] *FPS is tested with a single GeForce RTX 2080Ti GPU, using a batch size of 1.* diff --git a/configs/ghm/README.md b/configs/ghm/README.md index 97b6090..cf9fb73 100644 --- a/configs/ghm/README.md +++ b/configs/ghm/README.md @@ -1,24 +1,29 @@ -# Gradient Harmonized Single-stage Detector +# GHM -## Abstract +> [Gradient Harmonized Single-stage Detector](https://arxiv.org/abs/1811.05181) + + - +## Abstract Despite the great success of two-stage detectors, single-stage detector is still a more elegant and efficient way, yet suffers from the two well-known disharmonies during training, i.e. the huge difference in quantity between positive and negative examples as well as between easy and hard examples. In this work, we first point out that the essential effect of the two disharmonies can be summarized in term of the gradient. Further, we propose a novel gradient harmonizing mechanism (GHM) to be a hedging for the disharmonies. The philosophy behind GHM can be easily embedded into both classification loss function like cross-entropy (CE) and regression loss function like smooth-L1 (SL1) loss. To this end, two novel loss functions called GHM-C and GHM-R are designed to balancing the gradient flow for anchor classification and bounding box refinement, respectively. Ablation study on MS COCO demonstrates that without laborious hyper-parameter tuning, both GHM-C and GHM-R can bring substantial improvement for single-stage detector. Without any whistles and bells, our model achieves 41.6 mAP on COCO test-dev set which surpasses the state-of-the-art method, Focal Loss (FL) + SL1, by 0.8. -
- - +## Results and Models -## Citation +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | pytorch | 1x | 4.0 | 3.3 | 37.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ghm/retinanet_ghm_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_r50_fpn_1x_coco/retinanet_ghm_r50_fpn_1x_coco_20200130-a437fda3.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_r50_fpn_1x_coco/retinanet_ghm_r50_fpn_1x_coco_20200130_004213.log.json) | +| R-101-FPN | pytorch | 1x | 6.0 | 4.4 | 39.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ghm/retinanet_ghm_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_r101_fpn_1x_coco/retinanet_ghm_r101_fpn_1x_coco_20200130-c148ee8f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_r101_fpn_1x_coco/retinanet_ghm_r101_fpn_1x_coco_20200130_145259.log.json) | +| X-101-32x4d-FPN | pytorch | 1x | 7.2 | 5.1 | 40.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ghm/retinanet_ghm_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_x101_32x4d_fpn_1x_coco/retinanet_ghm_x101_32x4d_fpn_1x_coco_20200131-e4333bd0.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_x101_32x4d_fpn_1x_coco/retinanet_ghm_x101_32x4d_fpn_1x_coco_20200131_113653.log.json) | +| X-101-64x4d-FPN | pytorch | 1x | 10.3 | 5.2 | 41.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ghm/retinanet_ghm_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_x101_64x4d_fpn_1x_coco/retinanet_ghm_x101_64x4d_fpn_1x_coco_20200131-dd381cef.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_x101_64x4d_fpn_1x_coco/retinanet_ghm_x101_64x4d_fpn_1x_coco_20200131_113723.log.json) | - +## Citation -``` +```latex @inproceedings{li2019gradient, title={Gradient Harmonized Single-stage Detector}, author={Li, Buyu and Liu, Yu and Wang, Xiaogang}, @@ -26,12 +31,3 @@ Despite the great success of two-stage detectors, single-stage detector is still year={2019} } ``` - -## Results and Models - -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | -| R-50-FPN | pytorch | 1x | 4.0 | 3.3 | 37.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ghm/retinanet_ghm_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_r50_fpn_1x_coco/retinanet_ghm_r50_fpn_1x_coco_20200130-a437fda3.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_r50_fpn_1x_coco/retinanet_ghm_r50_fpn_1x_coco_20200130_004213.log.json) | -| R-101-FPN | pytorch | 1x | 6.0 | 4.4 | 39.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ghm/retinanet_ghm_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_r101_fpn_1x_coco/retinanet_ghm_r101_fpn_1x_coco_20200130-c148ee8f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_r101_fpn_1x_coco/retinanet_ghm_r101_fpn_1x_coco_20200130_145259.log.json) | -| X-101-32x4d-FPN | pytorch | 1x | 7.2 | 5.1 | 40.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ghm/retinanet_ghm_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_x101_32x4d_fpn_1x_coco/retinanet_ghm_x101_32x4d_fpn_1x_coco_20200131-e4333bd0.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_x101_32x4d_fpn_1x_coco/retinanet_ghm_x101_32x4d_fpn_1x_coco_20200131_113653.log.json) | -| X-101-64x4d-FPN | pytorch | 1x | 10.3 | 5.2 | 41.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ghm/retinanet_ghm_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_x101_64x4d_fpn_1x_coco/retinanet_ghm_x101_64x4d_fpn_1x_coco_20200131-dd381cef.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_x101_64x4d_fpn_1x_coco/retinanet_ghm_x101_64x4d_fpn_1x_coco_20200131_113723.log.json) | diff --git a/configs/gn+ws/README.md b/configs/gn+ws/README.md index 5cde531..184bed3 100644 --- a/configs/gn+ws/README.md +++ b/configs/gn+ws/README.md @@ -1,58 +1,54 @@ -# Weight Standardization +# GN + WS -## Abstract +> [Weight Standardization](https://arxiv.org/abs/1903.10520) + + - +## Abstract Batch Normalization (BN) has become an out-of-box technique to improve deep network training. However, its effectiveness is limited for micro-batch training, i.e., each GPU typically has only 1-2 images for training, which is inevitable for many computer vision tasks, e.g., object detection and semantic segmentation, constrained by memory consumption. To address this issue, we propose Weight Standardization (WS) and Batch-Channel Normalization (BCN) to bring two success factors of BN into micro-batch training: 1) the smoothing effects on the loss landscape and 2) the ability to avoid harmful elimination singularities along the training trajectory. WS standardizes the weights in convolutional layers to smooth the loss landscape by reducing the Lipschitz constants of the loss and the gradients; BCN combines batch and channel normalizations and leverages estimated statistics of the activations in convolutional layers to keep networks away from elimination singularities. We validate WS and BCN on comprehensive computer vision tasks, including image classification, object detection, instance segmentation, video recognition and semantic segmentation. All experimental results consistently show that WS and BCN improve micro-batch training significantly. Moreover, using WS and BCN with micro-batch training is even able to match or outperform the performances of BN with large-batch training. -
- - - -## Citation - - - -``` -@article{weightstandardization, - author = {Siyuan Qiao and Huiyu Wang and Chenxi Liu and Wei Shen and Alan Yuille}, - title = {Weight Standardization}, - journal = {arXiv preprint arXiv:1903.10520}, - year = {2019}, -} -``` - ## Results and Models Faster R-CNN -| Backbone | Style | Normalization | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -|:---------:|:-------:|:-------------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| -| R-50-FPN | pytorch | GN+WS | 1x | 5.9 | 11.7 | 39.7 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco/faster_rcnn_r50_fpn_gn_ws-all_1x_coco_20200130-613d9fe2.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco/faster_rcnn_r50_fpn_gn_ws-all_1x_coco_20200130_210936.log.json) | -| R-101-FPN | pytorch | GN+WS | 1x | 8.9 | 9.0 | 41.7 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/faster_rcnn_r101_fpn_gn_ws-all_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_r101_fpn_gn_ws-all_1x_coco/faster_rcnn_r101_fpn_gn_ws-all_1x_coco_20200205-a93b0d75.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_r101_fpn_gn_ws-all_1x_coco/faster_rcnn_r101_fpn_gn_ws-all_1x_coco_20200205_232146.log.json) | -| X-50-32x4d-FPN | pytorch | GN+WS | 1x | 7.0 | 10.3 | 40.7 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco_20200203-839c5d9d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco_20200203_220113.log.json) | -| X-101-32x4d-FPN | pytorch | GN+WS | 1x | 10.8 | 7.6 | 42.1 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco_20200212-27da1bc2.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco_20200212_195302.log.json) | +| Backbone | Style | Normalization | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----------: | :-----: | :------: | :------------: | :----: | :-----: | :-----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | pytorch | GN+WS | 1x | 5.9 | 11.7 | 39.7 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco/faster_rcnn_r50_fpn_gn_ws-all_1x_coco_20200130-613d9fe2.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco/faster_rcnn_r50_fpn_gn_ws-all_1x_coco_20200130_210936.log.json) | +| R-101-FPN | pytorch | GN+WS | 1x | 8.9 | 9.0 | 41.7 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/faster_rcnn_r101_fpn_gn_ws-all_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_r101_fpn_gn_ws-all_1x_coco/faster_rcnn_r101_fpn_gn_ws-all_1x_coco_20200205-a93b0d75.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_r101_fpn_gn_ws-all_1x_coco/faster_rcnn_r101_fpn_gn_ws-all_1x_coco_20200205_232146.log.json) | +| X-50-32x4d-FPN | pytorch | GN+WS | 1x | 7.0 | 10.3 | 40.7 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco_20200203-839c5d9d.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco_20200203_220113.log.json) | +| X-101-32x4d-FPN | pytorch | GN+WS | 1x | 10.8 | 7.6 | 42.1 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco_20200212-27da1bc2.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco_20200212_195302.log.json) | Mask R-CNN -| Backbone | Style | Normalization | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -|:---------:|:-------:|:-------------:|:---------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| -| R-50-FPN | pytorch | GN+WS | 2x | 7.3 | 10.5 | 40.6 | 36.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco/mask_rcnn_r50_fpn_gn_ws-all_2x_coco_20200226-16acb762.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco/mask_rcnn_r50_fpn_gn_ws-all_2x_coco_20200226_062128.log.json) | -| R-101-FPN | pytorch | GN+WS | 2x | 10.3 | 8.6 | 42.0 | 37.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_2x_coco/mask_rcnn_r101_fpn_gn_ws-all_2x_coco_20200212-ea357cd9.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_2x_coco/mask_rcnn_r101_fpn_gn_ws-all_2x_coco_20200212_213627.log.json) | -| X-50-32x4d-FPN | pytorch | GN+WS | 2x | 8.4 | 9.3 | 41.1 | 37.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco_20200216-649fdb6f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco_20200216_201500.log.json) | -| X-101-32x4d-FPN | pytorch | GN+WS | 2x | 12.2 | 7.1 | 42.1 | 37.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco_20200319-33fb95b5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco_20200319_104101.log.json) | -| R-50-FPN | pytorch | GN+WS | 20-23-24e | 7.3 | - | 41.1 | 37.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco_20200213-487d1283.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco_20200213_035123.log.json) | -| R-101-FPN | pytorch | GN+WS | 20-23-24e | 10.3 | - | 43.1 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco_20200213-57b5a50f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco_20200213_130142.log.json) | -| X-50-32x4d-FPN | pytorch | GN+WS | 20-23-24e | 8.4 | - | 42.1 | 38.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco_20200226-969bcb2c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco_20200226_093732.log.json) | -| X-101-32x4d-FPN | pytorch | GN+WS | 20-23-24e | 12.2 | - | 42.7 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco_20200316-e6cd35ef.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco_20200316_013741.log.json) | +| Backbone | Style | Normalization | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----------: | :-------: | :------: | :------------: | :----: | :-----: | :----------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | pytorch | GN+WS | 2x | 7.3 | 10.5 | 40.6 | 36.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco/mask_rcnn_r50_fpn_gn_ws-all_2x_coco_20200226-16acb762.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco/mask_rcnn_r50_fpn_gn_ws-all_2x_coco_20200226_062128.log.json) | +| R-101-FPN | pytorch | GN+WS | 2x | 10.3 | 8.6 | 42.0 | 37.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_2x_coco/mask_rcnn_r101_fpn_gn_ws-all_2x_coco_20200212-ea357cd9.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_2x_coco/mask_rcnn_r101_fpn_gn_ws-all_2x_coco_20200212_213627.log.json) | +| X-50-32x4d-FPN | pytorch | GN+WS | 2x | 8.4 | 9.3 | 41.1 | 37.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco_20200216-649fdb6f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco_20200216_201500.log.json) | +| X-101-32x4d-FPN | pytorch | GN+WS | 2x | 12.2 | 7.1 | 42.1 | 37.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco_20200319-33fb95b5.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco_20200319_104101.log.json) | +| R-50-FPN | pytorch | GN+WS | 20-23-24e | 7.3 | - | 41.1 | 37.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco_20200213-487d1283.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco_20200213_035123.log.json) | +| R-101-FPN | pytorch | GN+WS | 20-23-24e | 10.3 | - | 43.1 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco_20200213-57b5a50f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco_20200213_130142.log.json) | +| X-50-32x4d-FPN | pytorch | GN+WS | 20-23-24e | 8.4 | - | 42.1 | 38.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco_20200226-969bcb2c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco_20200226_093732.log.json) | +| X-101-32x4d-FPN | pytorch | GN+WS | 20-23-24e | 12.2 | - | 42.7 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco_20200316-e6cd35ef.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco_20200316_013741.log.json) | Note: - GN+WS requires about 5% more memory than GN, and it is only 5% slower than GN. - In the paper, a 20-23-24e lr schedule is used instead of 2x. - The X-50-GN and X-101-GN pretrained models are also shared by the authors. + +## Citation + +```latex +@article{weightstandardization, + author = {Siyuan Qiao and Huiyu Wang and Chenxi Liu and Wei Shen and Alan Yuille}, + title = {Weight Standardization}, + journal = {arXiv preprint arXiv:1903.10520}, + year = {2019}, +} +``` diff --git a/configs/gn/README.md b/configs/gn/README.md index 4a9d9b5..9bb2888 100644 --- a/configs/gn/README.md +++ b/configs/gn/README.md @@ -1,22 +1,35 @@ -# Group Normalization +# GN -## Abstract +> [Group Normalization](https://arxiv.org/abs/1803.08494) + + - +## Abstract Batch Normalization (BN) is a milestone technique in the development of deep learning, enabling various networks to train. However, normalizing along the batch dimension introduces problems --- BN's error increases rapidly when the batch size becomes smaller, caused by inaccurate batch statistics estimation. This limits BN's usage for training larger models and transferring features to computer vision tasks including detection, segmentation, and video, which require small batches constrained by memory consumption. In this paper, we present Group Normalization (GN) as a simple alternative to BN. GN divides the channels into groups and computes within each group the mean and variance for normalization. GN's computation is independent of batch sizes, and its accuracy is stable in a wide range of batch sizes. On ResNet-50 trained in ImageNet, GN has 10.6% lower error than its BN counterpart when using a batch size of 2; when using typical batch sizes, GN is comparably good with BN and outperforms other normalization variants. Moreover, GN can be naturally transferred from pre-training to fine-tuning. GN can outperform its BN-based counterparts for object detection and segmentation in COCO, and for video classification in Kinetics, showing that GN can effectively replace the powerful BN in a variety of tasks. GN can be easily implemented by a few lines of code in modern libraries. -
- - +## Results and Models -## Citation +| Backbone | model | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-----------: | :--------: | :-----: | :------: | :------------: | :----: | :-----: | :--------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN (d) | Mask R-CNN | 2x | 7.1 | 11.0 | 40.2 | 36.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_2x_coco/mask_rcnn_r50_fpn_gn-all_2x_coco_20200206-8eee02a6.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_2x_coco/mask_rcnn_r50_fpn_gn-all_2x_coco_20200206_050355.log.json) | +| R-50-FPN (d) | Mask R-CNN | 3x | 7.1 | - | 40.5 | 36.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn/mask_rcnn_r50_fpn_gn-all_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_3x_coco/mask_rcnn_r50_fpn_gn-all_3x_coco_20200214-8b23b1e5.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_3x_coco/mask_rcnn_r50_fpn_gn-all_3x_coco_20200214_063512.log.json) | +| R-101-FPN (d) | Mask R-CNN | 2x | 9.9 | 9.0 | 41.9 | 37.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn/mask_rcnn_r101_fpn_gn-all_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r101_fpn_gn-all_2x_coco/mask_rcnn_r101_fpn_gn-all_2x_coco_20200205-d96b1b50.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r101_fpn_gn-all_2x_coco/mask_rcnn_r101_fpn_gn-all_2x_coco_20200205_234402.log.json) | +| R-101-FPN (d) | Mask R-CNN | 3x | 9.9 | | 42.1 | 38.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn/mask_rcnn_r101_fpn_gn-all_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r101_fpn_gn-all_3x_coco/mask_rcnn_r101_fpn_gn-all_3x_coco_20200513_181609-0df864f4.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r101_fpn_gn-all_3x_coco/mask_rcnn_r101_fpn_gn-all_3x_coco_20200513_181609.log.json) | +| R-50-FPN (c) | Mask R-CNN | 2x | 7.1 | 10.9 | 40.0 | 36.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco_20200207-20d3e849.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco_20200207_225832.log.json) | +| R-50-FPN (c) | Mask R-CNN | 3x | 7.1 | - | 40.1 | 36.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco_20200225-542aefbc.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco_20200225_235135.log.json) | - +**Notes:** + +- (d) means pretrained model converted from Detectron, and (c) means the contributed model pretrained by [@thangvubk](https://github.com/thangvubk). +- The `3x` schedule is epoch \[28, 34, 36\]. +- **Memory, Train/Inf time is outdated.** + +## Citation ```latex @inproceedings{wu2018group, @@ -26,20 +39,3 @@ Batch Normalization (BN) is a milestone technique in the development of deep lea year={2018} } ``` - -## Results and Models - -| Backbone | model | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -|:-------------:|:----------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| -| R-50-FPN (d) | Mask R-CNN | 2x | 7.1 | 11.0 | 40.2 | 36.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_2x_coco/mask_rcnn_r50_fpn_gn-all_2x_coco_20200206-8eee02a6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_2x_coco/mask_rcnn_r50_fpn_gn-all_2x_coco_20200206_050355.log.json) | -| R-50-FPN (d) | Mask R-CNN | 3x | 7.1 | - | 40.5 | 36.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn/mask_rcnn_r50_fpn_gn-all_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_3x_coco/mask_rcnn_r50_fpn_gn-all_3x_coco_20200214-8b23b1e5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_3x_coco/mask_rcnn_r50_fpn_gn-all_3x_coco_20200214_063512.log.json) | -| R-101-FPN (d) | Mask R-CNN | 2x | 9.9 | 9.0 | 41.9 | 37.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn/mask_rcnn_r101_fpn_gn-all_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r101_fpn_gn-all_2x_coco/mask_rcnn_r101_fpn_gn-all_2x_coco_20200205-d96b1b50.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r101_fpn_gn-all_2x_coco/mask_rcnn_r101_fpn_gn-all_2x_coco_20200205_234402.log.json) | -| R-101-FPN (d) | Mask R-CNN | 3x | 9.9 | | 42.1 | 38.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn/mask_rcnn_r101_fpn_gn-all_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r101_fpn_gn-all_3x_coco/mask_rcnn_r101_fpn_gn-all_3x_coco_20200513_181609-0df864f4.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r101_fpn_gn-all_3x_coco/mask_rcnn_r101_fpn_gn-all_3x_coco_20200513_181609.log.json) | -| R-50-FPN (c) | Mask R-CNN | 2x | 7.1 | 10.9 | 40.0 | 36.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco_20200207-20d3e849.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco_20200207_225832.log.json) | -| R-50-FPN (c) | Mask R-CNN | 3x | 7.1 | - | 40.1 | 36.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco_20200225-542aefbc.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco_20200225_235135.log.json) | - -**Notes:** - -- (d) means pretrained model converted from Detectron, and (c) means the contributed model pretrained by [@thangvubk](https://github.com/thangvubk). -- The `3x` schedule is epoch [28, 34, 36]. -- **Memory, Train/Inf time is outdated.** diff --git a/configs/grid_rcnn/README.md b/configs/grid_rcnn/README.md index b53cc9e..e844021 100644 --- a/configs/grid_rcnn/README.md +++ b/configs/grid_rcnn/README.md @@ -1,24 +1,34 @@ # Grid R-CNN -## Abstract +> [Grid R-CNN](https://arxiv.org/abs/1811.12030) + + - +## Abstract This paper proposes a novel object detection framework named Grid R-CNN, which adopts a grid guided localization mechanism for accurate object detection. Different from the traditional regression based methods, the Grid R-CNN captures the spatial information explicitly and enjoys the position sensitive property of fully convolutional architecture. Instead of using only two independent points, we design a multi-point supervision formulation to encode more clues in order to reduce the impact of inaccurate prediction of specific points. To take the full advantage of the correlation of points in a grid, we propose a two-stage information fusion strategy to fuse feature maps of neighbor grid points. The grid guided localization approach is easy to be extended to different state-of-the-art detection frameworks. Grid R-CNN leads to high quality object localization, and experiments demonstrate that it achieves a 4.1% AP gain at IoU=0.8 and a 10.0% AP gain at IoU=0.9 on COCO benchmark compared to Faster R-CNN with Res50 backbone and FPN architecture. Grid R-CNN is a well-performed objection detection framework. It transforms the traditional box offset regression problem into a grid point estimation problem. With the guidance of the grid points, it can obtain high-quality localization results. However, the speed of Grid R-CNN is not so satisfactory. In this technical report we present Grid R-CNN Plus, a better and faster version of Grid R-CNN. We have made several updates that significantly speed up the framework and simultaneously improve the accuracy. On COCO dataset, the Res50-FPN based Grid R-CNN Plus detector achieves an mAP of 40.4%, outperforming the baseline on the same model by 3.0 points with similar inference time. -
- - +## Results and Models -## Citation +| Backbone | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :---------: | :-----: | :------: | :------------: | :----: | :---------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | 2x | 5.1 | 15.0 | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco/grid_rcnn_r50_fpn_gn-head_2x_coco_20200130-6cca8223.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco/grid_rcnn_r50_fpn_gn-head_2x_coco_20200130_221140.log.json) | +| R-101 | 2x | 7.0 | 12.6 | 41.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco/grid_rcnn_r101_fpn_gn-head_2x_coco_20200309-d6eca030.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco/grid_rcnn_r101_fpn_gn-head_2x_coco_20200309_164224.log.json) | +| X-101-32x4d | 2x | 8.3 | 10.8 | 42.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco_20200130-d8f0e3ff.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco_20200130_215413.log.json) | +| X-101-64x4d | 2x | 11.3 | 7.7 | 43.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco_20200204-ec76a754.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco_20200204_080641.log.json) | - +**Notes:** + +- All models are trained with 8 GPUs instead of 32 GPUs in the original paper. +- The warming up lasts for 1 epoch and `2x` here indicates 25 epochs. + +## Citation ```latex @inproceedings{lu2019grid, @@ -35,17 +45,3 @@ Grid R-CNN is a well-performed objection detection framework. It transforms the year={2019} } ``` - -## Results and Models - -| Backbone | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -|:-----------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| -| R-50 | 2x | 5.1 | 15.0 | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco/grid_rcnn_r50_fpn_gn-head_2x_coco_20200130-6cca8223.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco/grid_rcnn_r50_fpn_gn-head_2x_coco_20200130_221140.log.json) | -| R-101 | 2x | 7.0 | 12.6 | 41.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco/grid_rcnn_r101_fpn_gn-head_2x_coco_20200309-d6eca030.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco/grid_rcnn_r101_fpn_gn-head_2x_coco_20200309_164224.log.json) | -| X-101-32x4d | 2x | 8.3 | 10.8 | 42.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco_20200130-d8f0e3ff.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco_20200130_215413.log.json) | -| X-101-64x4d | 2x | 11.3 | 7.7 | 43.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco_20200204-ec76a754.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco_20200204_080641.log.json) | - -**Notes:** - -- All models are trained with 8 GPUs instead of 32 GPUs in the original paper. -- The warming up lasts for 1 epoch and `2x` here indicates 25 epochs. diff --git a/configs/groie/README.md b/configs/groie/README.md index 5a45be7..4a1eba0 100644 --- a/configs/groie/README.md +++ b/configs/groie/README.md @@ -1,21 +1,19 @@ -# A novel Region of Interest Extraction Layer for Instance Segmentation +# GRoIE -## Abstract +> [A novel Region of Interest Extraction Layer for Instance Segmentation](https://arxiv.org/abs/2004.13665) + + - +## Abstract Given the wide diffusion of deep neural network architectures for computer vision tasks, several new applications are nowadays more and more feasible. Among them, a particular attention has been recently given to instance segmentation, by exploiting the results achievable by two-stage networks (such as Mask R-CNN or Faster R-CNN), derived from R-CNN. In these complex architectures, a crucial role is played by the Region of Interest (RoI) extraction layer, devoted to extracting a coherent subset of features from a single Feature Pyramid Network (FPN) layer attached on top of a backbone. This paper is motivated by the need to overcome the limitations of existing RoI extractors which select only one (the best) layer from FPN. Our intuition is that all the layers of FPN retain useful information. Therefore, the proposed layer (called Generic RoI Extractor - GRoIE) introduces non-local building blocks and attention mechanisms to boost the performance. A comprehensive ablation study at component level is conducted to find the best set of algorithms and parameters for the GRoIE layer. Moreover, GRoIE can be integrated seamlessly with every two-stage architecture for both object detection and instance segmentation tasks. Therefore, the improvements brought about by the use of GRoIE in different state-of-the-art architectures are also evaluated. The proposed layer leads up to gain a 1.1% AP improvement on bounding box detection and 1.7% AP improvement on instance segmentation. -
- - - ## Introduction By Leonardo Rossi, Akbar Karimi and Andrea Prati from @@ -25,9 +23,6 @@ We provide configs to reproduce the results in the paper for "*A novel Region of Interest Extraction Layer for Instance Segmentation*" on COCO object detection. - - - This paper is motivated by the need to overcome to the limitations of existing RoI extractors which select only one (the best) layer from FPN. @@ -37,24 +32,24 @@ Therefore, the proposed layer (called Generic RoI Extractor - **GRoIE**) introduces non-local building blocks and attention mechanisms to boost the performance. -## Results and models +## Results and Models The results on COCO 2017 minival (5k images) are shown in the below table. ### Application of GRoIE to different architectures -| Backbone | Method | Lr schd | box AP | mask AP | Config | Download| -| :-------: | :--------------: | :-----: | :----: | :-----: | :-------:| :--------:| -| R-50-FPN | Faster Original | 1x | 37.4 | | [config](../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130_204655.log.json) | -| R-50-FPN | + GRoIE | 1x | 38.3 | | [config](./faster_rcnn_r50_fpn_groie_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/groie/faster_rcnn_r50_fpn_groie_1x_coco/faster_rcnn_r50_fpn_groie_1x_coco_20200604_211715-66ee9516.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/groie/faster_rcnn_r50_fpn_groie_1x_coco/faster_rcnn_r50_fpn_groie_1x_coco_20200604_211715.log.json) | -| R-50-FPN | Grid R-CNN | 1x | 39.1 | | [config](./grid_rcnn_r50_fpn_gn-head_1x_coco.py)| [model](https://download.openmmlab.com/mmdetection/v2.0/groie/grid_rcnn_r50_fpn_gn-head_1x_coco/grid_rcnn_r50_fpn_gn-head_1x_coco_20200605_202059-64f00ee8.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/groie/grid_rcnn_r50_fpn_gn-head_1x_coco/grid_rcnn_r50_fpn_gn-head_1x_coco_20200605_202059.log.json) | -| R-50-FPN | + GRoIE | 1x | | | [config](./grid_rcnn_r50_fpn_gn-head_groie_1x_coco.py)|| -| R-50-FPN | Mask R-CNN | 1x | 38.2 | 34.7 | [config](../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py)| [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205_050542.log.json) | -| R-50-FPN | + GRoIE | 1x | 39.0 | 36.0 | [config](./mask_rcnn_r50_fpn_groie_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_groie_1x_coco/mask_rcnn_r50_fpn_groie_1x_coco_20200604_211715-50d90c74.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_groie_1x_coco/mask_rcnn_r50_fpn_groie_1x_coco_20200604_211715.log.json) | -| R-50-FPN | GC-Net | 1x | 40.7 | 36.5 | [config](../gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200202-50b90e5c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200202_085547.log.json) | -| R-50-FPN | + GRoIE | 1x | 41.0 | 37.8 | [config](./mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200604_211715-42eb79e1.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200604_211715-42eb79e1.pth) | -| R-101-FPN | GC-Net | 1x | 42.2 | 37.8 | [config](../gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200206-8407a3f0.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200206_142508.log.json) | -| R-101-FPN | + GRoIE | 1x | 42.6 | 38.7 | [config](./mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py)| [model](https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200607_224507-8daae01c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200607_224507.log.json) | +| Backbone | Method | Lr schd | box AP | mask AP | Config | Download | +| :-------: | :-------------: | :-----: | :----: | :-----: | :---------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | Faster Original | 1x | 37.4 | | [config](../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130_204655.log.json) | +| R-50-FPN | + GRoIE | 1x | 38.3 | | [config](./faster_rcnn_r50_fpn_groie_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/groie/faster_rcnn_r50_fpn_groie_1x_coco/faster_rcnn_r50_fpn_groie_1x_coco_20200604_211715-66ee9516.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/groie/faster_rcnn_r50_fpn_groie_1x_coco/faster_rcnn_r50_fpn_groie_1x_coco_20200604_211715.log.json) | +| R-50-FPN | Grid R-CNN | 1x | 39.1 | | [config](./grid_rcnn_r50_fpn_gn-head_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/groie/grid_rcnn_r50_fpn_gn-head_1x_coco/grid_rcnn_r50_fpn_gn-head_1x_coco_20200605_202059-64f00ee8.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/groie/grid_rcnn_r50_fpn_gn-head_1x_coco/grid_rcnn_r50_fpn_gn-head_1x_coco_20200605_202059.log.json) | +| R-50-FPN | + GRoIE | 1x | | | [config](./grid_rcnn_r50_fpn_gn-head_groie_1x_coco.py) | | +| R-50-FPN | Mask R-CNN | 1x | 38.2 | 34.7 | [config](../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205_050542.log.json) | +| R-50-FPN | + GRoIE | 1x | 39.0 | 36.0 | [config](./mask_rcnn_r50_fpn_groie_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_groie_1x_coco/mask_rcnn_r50_fpn_groie_1x_coco_20200604_211715-50d90c74.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_groie_1x_coco/mask_rcnn_r50_fpn_groie_1x_coco_20200604_211715.log.json) | +| R-50-FPN | GC-Net | 1x | 40.7 | 36.5 | [config](../gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200202-50b90e5c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200202_085547.log.json) | +| R-50-FPN | + GRoIE | 1x | 41.0 | 37.8 | [config](./mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200604_211715-42eb79e1.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200604_211715-42eb79e1.pth) | +| R-101-FPN | GC-Net | 1x | 42.2 | 37.8 | [config](../gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200206-8407a3f0.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200206_142508.log.json) | +| R-101-FPN | + GRoIE | 1x | 42.6 | 38.7 | [config](./mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200607_224507-8daae01c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200607_224507.log.json) | ## Citation diff --git a/configs/guided_anchoring/README.md b/configs/guided_anchoring/README.md index d99ced9..563e43f 100644 --- a/configs/guided_anchoring/README.md +++ b/configs/guided_anchoring/README.md @@ -1,22 +1,51 @@ -# Region Proposal by Guided Anchoring +# Guided Anchoring -## Abstract +> [Region Proposal by Guided Anchoring](https://arxiv.org/abs/1901.03278) + + - +## Abstract Region anchors are the cornerstone of modern object detection techniques. State-of-the-art detectors mostly rely on a dense anchoring scheme, where anchors are sampled uniformly over the spatial domain with a predefined set of scales and aspect ratios. In this paper, we revisit this foundational stage. Our study shows that it can be done much more effectively and efficiently. Specifically, we present an alternative scheme, named Guided Anchoring, which leverages semantic features to guide the anchoring. The proposed method jointly predicts the locations where the center of objects of interest are likely to exist as well as the scales and aspect ratios at different locations. On top of predicted anchor shapes, we mitigate the feature inconsistency with a feature adaption module. We also study the use of high-quality proposals to improve detection performance. The anchoring scheme can be seamlessly integrated into proposal methods and detectors. With Guided Anchoring, we achieve 9.1% higher recall on MS COCO with 90% fewer anchors than the RPN baseline. We also adopt Guided Anchoring in Fast R-CNN, Faster R-CNN and RetinaNet, respectively improving the detection mAP by 2.2%, 2.7% and 1.2%. -
- - +## Results and Models -## Citation +The results on COCO 2017 val is shown in the below table. (results on test-dev are usually slightly higher than val). - +| Method | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | AR 1000 | Config | Download | +| :----: | :-------------: | :-----: | :-----: | :------: | :------------: | :-----: | :-----------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| GA-RPN | R-50-FPN | caffe | 1x | 5.3 | 15.8 | 68.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco/ga_rpn_r50_caffe_fpn_1x_coco_20200531-899008a6.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco/ga_rpn_r50_caffe_fpn_1x_coco_20200531_011819.log.json) | +| GA-RPN | R-101-FPN | caffe | 1x | 7.3 | 13.0 | 69.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco/ga_rpn_r101_caffe_fpn_1x_coco_20200531-ca9ba8fb.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco/ga_rpn_r101_caffe_fpn_1x_coco_20200531_011812.log.json) | +| GA-RPN | X-101-32x4d-FPN | pytorch | 1x | 8.5 | 10.0 | 70.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco/ga_rpn_x101_32x4d_fpn_1x_coco_20200220-c28d1b18.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco/ga_rpn_x101_32x4d_fpn_1x_coco_20200220_221326.log.json) | +| GA-RPN | X-101-64x4d-FPN | pytorch | 1x | 7.1 | 7.5 | 71.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco/ga_rpn_x101_64x4d_fpn_1x_coco_20200225-3c6e1aa2.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco/ga_rpn_x101_64x4d_fpn_1x_coco_20200225_152704.log.json) | + +| Method | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :------------: | :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| GA-Faster RCNN | R-50-FPN | caffe | 1x | 5.5 | | 39.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco/ga_faster_r50_caffe_fpn_1x_coco_20200702_000718-a11ccfe6.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco/ga_faster_r50_caffe_fpn_1x_coco_20200702_000718.log.json) | +| GA-Faster RCNN | R-101-FPN | caffe | 1x | 7.5 | | 41.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco/ga_faster_r101_caffe_fpn_1x_coco_bbox_mAP-0.415_20200505_115528-fb82e499.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco/ga_faster_r101_caffe_fpn_1x_coco_20200505_115528.log.json) | +| GA-Faster RCNN | X-101-32x4d-FPN | pytorch | 1x | 8.7 | 9.7 | 43.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco/ga_faster_x101_32x4d_fpn_1x_coco_20200215-1ded9da3.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco/ga_faster_x101_32x4d_fpn_1x_coco_20200215_184547.log.json) | +| GA-Faster RCNN | X-101-64x4d-FPN | pytorch | 1x | 11.8 | 7.3 | 43.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco/ga_faster_x101_64x4d_fpn_1x_coco_20200215-0fa7bde7.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco/ga_faster_x101_64x4d_fpn_1x_coco_20200215_104455.log.json) | +| GA-RetinaNet | R-50-FPN | caffe | 1x | 3.5 | 16.8 | 36.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco/ga_retinanet_r50_caffe_fpn_1x_coco_20201020-39581c6f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco/ga_retinanet_r50_caffe_fpn_1x_coco_20201020_225450.log.json) | +| GA-RetinaNet | R-101-FPN | caffe | 1x | 5.5 | 12.9 | 39.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco/ga_retinanet_r101_caffe_fpn_1x_coco_20200531-6266453c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco/ga_retinanet_r101_caffe_fpn_1x_coco_20200531_012847.log.json) | +| GA-RetinaNet | X-101-32x4d-FPN | pytorch | 1x | 6.9 | 10.6 | 40.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco/ga_retinanet_x101_32x4d_fpn_1x_coco_20200219-40c56caa.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco/ga_retinanet_x101_32x4d_fpn_1x_coco_20200219_223025.log.json) | +| GA-RetinaNet | X-101-64x4d-FPN | pytorch | 1x | 9.9 | 7.7 | 41.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco/ga_retinanet_x101_64x4d_fpn_1x_coco_20200226-ef9f7f1f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco/ga_retinanet_x101_64x4d_fpn_1x_coco_20200226_221123.log.json) | + +- In the Guided Anchoring paper, `score_thr` is set to 0.001 in Fast/Faster RCNN and 0.05 in RetinaNet for both baselines and Guided Anchoring. + +- Performance on COCO test-dev benchmark are shown as follows. + +| Method | Backbone | Style | Lr schd | Aug Train | Score thr | AP | AP_50 | AP_75 | AP_small | AP_medium | AP_large | Download | +| :------------: | :-------: | :---: | :-----: | :-------: | :-------: | :-: | :---: | :---: | :------: | :-------: | :------: | :------: | +| GA-Faster RCNN | R-101-FPN | caffe | 1x | F | 0.05 | | | | | | | | +| GA-Faster RCNN | R-101-FPN | caffe | 1x | F | 0.001 | | | | | | | | +| GA-RetinaNet | R-101-FPN | caffe | 1x | F | 0.05 | | | | | | | | +| GA-RetinaNet | R-101-FPN | caffe | 2x | T | 0.05 | | | | | | | | + +## Citation We provide config files to reproduce the results in the CVPR 2019 paper for [Region Proposal by Guided Anchoring](https://arxiv.org/abs/1901.03278). @@ -28,36 +57,3 @@ We provide config files to reproduce the results in the CVPR 2019 paper for [Reg year={2019} } ``` - -## Results and Models - -The results on COCO 2017 val is shown in the below table. (results on test-dev are usually slightly higher than val). - -| Method | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | AR 1000 | Config | Download | -| :----: | :-------------: | :-----: | :-----: | :------: | :------------: | :-----: | :------: | :--------: | -| GA-RPN | R-50-FPN | caffe | 1x | 5.3 | 15.8 | 68.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco/ga_rpn_r50_caffe_fpn_1x_coco_20200531-899008a6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco/ga_rpn_r50_caffe_fpn_1x_coco_20200531_011819.log.json) | -| GA-RPN | R-101-FPN | caffe | 1x | 7.3 | 13.0 | 69.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco/ga_rpn_r101_caffe_fpn_1x_coco_20200531-ca9ba8fb.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco/ga_rpn_r101_caffe_fpn_1x_coco_20200531_011812.log.json) | -| GA-RPN | X-101-32x4d-FPN | pytorch | 1x | 8.5 | 10.0 | 70.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco/ga_rpn_x101_32x4d_fpn_1x_coco_20200220-c28d1b18.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco/ga_rpn_x101_32x4d_fpn_1x_coco_20200220_221326.log.json) | -| GA-RPN | X-101-64x4d-FPN | pytorch | 1x | 7.1 | 7.5 | 71.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco/ga_rpn_x101_64x4d_fpn_1x_coco_20200225-3c6e1aa2.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco/ga_rpn_x101_64x4d_fpn_1x_coco_20200225_152704.log.json) | - -| Method | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :------------: | :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | -| GA-Faster RCNN | R-50-FPN | caffe | 1x | 5.5 | | 39.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco/ga_faster_r50_caffe_fpn_1x_coco_20200702_000718-a11ccfe6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco/ga_faster_r50_caffe_fpn_1x_coco_20200702_000718.log.json) | -| GA-Faster RCNN | R-101-FPN | caffe | 1x | 7.5 | | 41.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco/ga_faster_r101_caffe_fpn_1x_coco_bbox_mAP-0.415_20200505_115528-fb82e499.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco/ga_faster_r101_caffe_fpn_1x_coco_20200505_115528.log.json) | -| GA-Faster RCNN | X-101-32x4d-FPN | pytorch | 1x | 8.7 | 9.7 | 43.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco/ga_faster_x101_32x4d_fpn_1x_coco_20200215-1ded9da3.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco/ga_faster_x101_32x4d_fpn_1x_coco_20200215_184547.log.json) | -| GA-Faster RCNN | X-101-64x4d-FPN | pytorch | 1x | 11.8 | 7.3 | 43.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco/ga_faster_x101_64x4d_fpn_1x_coco_20200215-0fa7bde7.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco/ga_faster_x101_64x4d_fpn_1x_coco_20200215_104455.log.json) | -| GA-RetinaNet | R-50-FPN | caffe | 1x | 3.5 | 16.8 | 36.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco/ga_retinanet_r50_caffe_fpn_1x_coco_20201020-39581c6f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco/ga_retinanet_r50_caffe_fpn_1x_coco_20201020_225450.log.json) | -| GA-RetinaNet | R-101-FPN | caffe | 1x | 5.5 | 12.9 | 39.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco/ga_retinanet_r101_caffe_fpn_1x_coco_20200531-6266453c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco/ga_retinanet_r101_caffe_fpn_1x_coco_20200531_012847.log.json) | -| GA-RetinaNet | X-101-32x4d-FPN | pytorch | 1x | 6.9 | 10.6 | 40.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco/ga_retinanet_x101_32x4d_fpn_1x_coco_20200219-40c56caa.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco/ga_retinanet_x101_32x4d_fpn_1x_coco_20200219_223025.log.json) | -| GA-RetinaNet | X-101-64x4d-FPN | pytorch | 1x | 9.9 | 7.7 | 41.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco/ga_retinanet_x101_64x4d_fpn_1x_coco_20200226-ef9f7f1f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco/ga_retinanet_x101_64x4d_fpn_1x_coco_20200226_221123.log.json) | - -- In the Guided Anchoring paper, `score_thr` is set to 0.001 in Fast/Faster RCNN and 0.05 in RetinaNet for both baselines and Guided Anchoring. - -- Performance on COCO test-dev benchmark are shown as follows. - -| Method | Backbone | Style | Lr schd | Aug Train | Score thr | AP | AP_50 | AP_75 | AP_small | AP_medium | AP_large | Download | -| :------------: | :-------: | :---: | :-----: | :-------: | :-------: | :---: | :---: | :---: | :------: | :-------: | :------: | :------: | -| GA-Faster RCNN | R-101-FPN | caffe | 1x | F | 0.05 | | | | | | | | -| GA-Faster RCNN | R-101-FPN | caffe | 1x | F | 0.001 | | | | | | | | -| GA-RetinaNet | R-101-FPN | caffe | 1x | F | 0.05 | | | | | | | | -| GA-RetinaNet | R-101-FPN | caffe | 2x | T | 0.05 | | | | | | | | diff --git a/configs/hrnet/README.md b/configs/hrnet/README.md index faeb673..e340c78 100644 --- a/configs/hrnet/README.md +++ b/configs/hrnet/README.md @@ -1,105 +1,101 @@ -# High-resolution networks (HRNets) for object detection +# HRNet -## Abstract +> [Deep High-Resolution Representation Learning for Human Pose Estimation](https://arxiv.org/abs/1902.09212) + + - +## Abstract This is an official pytorch implementation of Deep High-Resolution Representation Learning for Human Pose Estimation. In this work, we are interested in the human pose estimation problem with a focus on learning reliable high-resolution representations. Most existing methods recover high-resolution representations from low-resolution representations produced by a high-to-low resolution network. Instead, our proposed network maintains high-resolution representations through the whole process. We start from a high-resolution subnetwork as the first stage, gradually add high-to-low resolution subnetworks one by one to form more stages, and connect the mutli-resolution subnetworks in parallel. We conduct repeated multi-scale fusions such that each of the high-to-low resolution representations receives information from other parallel representations over and over, leading to rich high-resolution representations. As a result, the predicted keypoint heatmap is potentially more accurate and spatially more precise. We empirically demonstrate the effectiveness of our network through the superior pose estimation results over two benchmark datasets: the COCO keypoint detection dataset and the MPII Human Pose dataset. High-resolution representation learning plays an essential role in many vision problems, e.g., pose estimation and semantic segmentation. The high-resolution network (HRNet), recently developed for human pose estimation, maintains high-resolution representations through the whole process by connecting high-to-low resolution convolutions in parallel and produces strong high-resolution representations by repeatedly conducting fusions across parallel convolutions. In this paper, we conduct a further study on high-resolution representations by introducing a simple yet effective modification and apply it to a wide range of vision tasks. We augment the high-resolution representation by aggregating the (upsampled) representations from all the parallel convolutions rather than only the representation from the high-resolution convolution as done in HRNet. This simple modification leads to stronger representations, evidenced by superior results. We show top results in semantic segmentation on Cityscapes, LIP, and PASCAL Context, and facial landmark detection on AFLW, COFW, 300W, and WFLW. In addition, we build a multi-level representation from the high-resolution representation and apply it to the Faster R-CNN object detection framework and the extended frameworks. The proposed approach achieves superior results to existing single-model networks on COCO object detection. -
- - - -## Citation - - - -```latex -@inproceedings{SunXLW19, - title={Deep High-Resolution Representation Learning for Human Pose Estimation}, - author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang}, - booktitle={CVPR}, - year={2019} -} - -@article{SunZJCXLMWLW19, - title={High-Resolution Representations for Labeling Pixels and Regions}, - author={Ke Sun and Yang Zhao and Borui Jiang and Tianheng Cheng and Bin Xiao - and Dong Liu and Yadong Mu and Xinggang Wang and Wenyu Liu and Jingdong Wang}, - journal = {CoRR}, - volume = {abs/1904.04514}, - year={2019} -} -``` - ## Results and Models ### Faster R-CNN -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :-------------:|:------:| :------:| :--------:| -| HRNetV2p-W18 | pytorch | 1x | 6.6 | 13.4 | 36.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco/faster_rcnn_hrnetv2p_w18_1x_coco_20200130-56651a6d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco/faster_rcnn_hrnetv2p_w18_1x_coco_20200130_211246.log.json) | -| HRNetV2p-W18 | pytorch | 2x | 6.6 | - | 38.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco/faster_rcnn_hrnetv2p_w18_2x_coco_20200702_085731-a4ec0611.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco/faster_rcnn_hrnetv2p_w18_2x_coco_20200702_085731.log.json) | -| HRNetV2p-W32 | pytorch | 1x | 9.0 | 12.4 | 40.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco/faster_rcnn_hrnetv2p_w32_1x_coco_20200130-6e286425.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco/faster_rcnn_hrnetv2p_w32_1x_coco_20200130_204442.log.json) | -| HRNetV2p-W32 | pytorch | 2x | 9.0 | - | 41.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco/faster_rcnn_hrnetv2p_w32_2x_coco_20200529_015927-976a9c15.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco/faster_rcnn_hrnetv2p_w32_2x_coco_20200529_015927.log.json) | -| HRNetV2p-W40 | pytorch | 1x | 10.4 | 10.5 | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco/faster_rcnn_hrnetv2p_w40_1x_coco_20200210-95c1f5ce.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco/faster_rcnn_hrnetv2p_w40_1x_coco_20200210_125315.log.json) | -| HRNetV2p-W40 | pytorch | 2x | 10.4 | - | 42.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco/faster_rcnn_hrnetv2p_w40_2x_coco_20200512_161033-0f236ef4.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco/faster_rcnn_hrnetv2p_w40_2x_coco_20200512_161033.log.json) | +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :----------: | :-----: | :-----: | :------: | :------------: | :----: | :---------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| HRNetV2p-W18 | pytorch | 1x | 6.6 | 13.4 | 36.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco/faster_rcnn_hrnetv2p_w18_1x_coco_20200130-56651a6d.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco/faster_rcnn_hrnetv2p_w18_1x_coco_20200130_211246.log.json) | +| HRNetV2p-W18 | pytorch | 2x | 6.6 | - | 38.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco/faster_rcnn_hrnetv2p_w18_2x_coco_20200702_085731-a4ec0611.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco/faster_rcnn_hrnetv2p_w18_2x_coco_20200702_085731.log.json) | +| HRNetV2p-W32 | pytorch | 1x | 9.0 | 12.4 | 40.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco/faster_rcnn_hrnetv2p_w32_1x_coco_20200130-6e286425.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco/faster_rcnn_hrnetv2p_w32_1x_coco_20200130_204442.log.json) | +| HRNetV2p-W32 | pytorch | 2x | 9.0 | - | 41.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco/faster_rcnn_hrnetv2p_w32_2x_coco_20200529_015927-976a9c15.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco/faster_rcnn_hrnetv2p_w32_2x_coco_20200529_015927.log.json) | +| HRNetV2p-W40 | pytorch | 1x | 10.4 | 10.5 | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco/faster_rcnn_hrnetv2p_w40_1x_coco_20200210-95c1f5ce.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco/faster_rcnn_hrnetv2p_w40_1x_coco_20200210_125315.log.json) | +| HRNetV2p-W40 | pytorch | 2x | 10.4 | - | 42.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco/faster_rcnn_hrnetv2p_w40_2x_coco_20200512_161033-0f236ef4.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco/faster_rcnn_hrnetv2p_w40_2x_coco_20200512_161033.log.json) | ### Mask R-CNN -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :-------------:|:------:| :------:|:------:|:--------:| -| HRNetV2p-W18 | pytorch | 1x | 7.0 | 11.7 | 37.7 | 34.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco/mask_rcnn_hrnetv2p_w18_1x_coco_20200205-1c3d78ed.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco/mask_rcnn_hrnetv2p_w18_1x_coco_20200205_232523.log.json) | -| HRNetV2p-W18 | pytorch | 2x | 7.0 | - | 39.8 | 36.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco/mask_rcnn_hrnetv2p_w18_2x_coco_20200212-b3c825b1.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco/mask_rcnn_hrnetv2p_w18_2x_coco_20200212_134222.log.json) | -| HRNetV2p-W32 | pytorch | 1x | 9.4 | 11.3 | 41.2 | 37.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco/mask_rcnn_hrnetv2p_w32_1x_coco_20200207-b29f616e.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco/mask_rcnn_hrnetv2p_w32_1x_coco_20200207_055017.log.json) | -| HRNetV2p-W32 | pytorch | 2x | 9.4 | - | 42.5 | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco/mask_rcnn_hrnetv2p_w32_2x_coco_20200213-45b75b4d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco/mask_rcnn_hrnetv2p_w32_2x_coco_20200213_150518.log.json) | -| HRNetV2p-W40 | pytorch | 1x | 10.9 | | 42.1 | 37.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco/mask_rcnn_hrnetv2p_w40_1x_coco_20200511_015646-66738b35.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco/mask_rcnn_hrnetv2p_w40_1x_coco_20200511_015646.log.json) | -| HRNetV2p-W40 | pytorch | 2x | 10.9 | | 42.8 | 38.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco/mask_rcnn_hrnetv2p_w40_2x_coco_20200512_163732-aed5e4ab.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco/mask_rcnn_hrnetv2p_w40_2x_coco_20200512_163732.log.json) | +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :----------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :-------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| HRNetV2p-W18 | pytorch | 1x | 7.0 | 11.7 | 37.7 | 34.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco/mask_rcnn_hrnetv2p_w18_1x_coco_20200205-1c3d78ed.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco/mask_rcnn_hrnetv2p_w18_1x_coco_20200205_232523.log.json) | +| HRNetV2p-W18 | pytorch | 2x | 7.0 | - | 39.8 | 36.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco/mask_rcnn_hrnetv2p_w18_2x_coco_20200212-b3c825b1.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco/mask_rcnn_hrnetv2p_w18_2x_coco_20200212_134222.log.json) | +| HRNetV2p-W32 | pytorch | 1x | 9.4 | 11.3 | 41.2 | 37.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco/mask_rcnn_hrnetv2p_w32_1x_coco_20200207-b29f616e.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco/mask_rcnn_hrnetv2p_w32_1x_coco_20200207_055017.log.json) | +| HRNetV2p-W32 | pytorch | 2x | 9.4 | - | 42.5 | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco/mask_rcnn_hrnetv2p_w32_2x_coco_20200213-45b75b4d.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco/mask_rcnn_hrnetv2p_w32_2x_coco_20200213_150518.log.json) | +| HRNetV2p-W40 | pytorch | 1x | 10.9 | | 42.1 | 37.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco/mask_rcnn_hrnetv2p_w40_1x_coco_20200511_015646-66738b35.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco/mask_rcnn_hrnetv2p_w40_1x_coco_20200511_015646.log.json) | +| HRNetV2p-W40 | pytorch | 2x | 10.9 | | 42.8 | 38.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco/mask_rcnn_hrnetv2p_w40_2x_coco_20200512_163732-aed5e4ab.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco/mask_rcnn_hrnetv2p_w40_2x_coco_20200512_163732.log.json) | ### Cascade R-CNN -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :-------------:|:------:| :------: | :--------: | -| HRNetV2p-W18 | pytorch | 20e | 7.0 | 11.0 | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco/cascade_rcnn_hrnetv2p_w18_20e_coco_20200210-434be9d7.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco/cascade_rcnn_hrnetv2p_w18_20e_coco_20200210_105632.log.json) | -| HRNetV2p-W32 | pytorch | 20e | 9.4 | 11.0 | 43.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco/cascade_rcnn_hrnetv2p_w32_20e_coco_20200208-928455a4.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco/cascade_rcnn_hrnetv2p_w32_20e_coco_20200208_160511.log.json) | -| HRNetV2p-W40 | pytorch | 20e | 10.8 | | 43.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco/cascade_rcnn_hrnetv2p_w40_20e_coco_20200512_161112-75e47b04.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco/cascade_rcnn_hrnetv2p_w40_20e_coco_20200512_161112.log.json) | +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :----------: | :-----: | :-----: | :------: | :------------: | :----: | :-----------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| HRNetV2p-W18 | pytorch | 20e | 7.0 | 11.0 | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco/cascade_rcnn_hrnetv2p_w18_20e_coco_20200210-434be9d7.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco/cascade_rcnn_hrnetv2p_w18_20e_coco_20200210_105632.log.json) | +| HRNetV2p-W32 | pytorch | 20e | 9.4 | 11.0 | 43.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco/cascade_rcnn_hrnetv2p_w32_20e_coco_20200208-928455a4.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco/cascade_rcnn_hrnetv2p_w32_20e_coco_20200208_160511.log.json) | +| HRNetV2p-W40 | pytorch | 20e | 10.8 | | 43.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco/cascade_rcnn_hrnetv2p_w40_20e_coco_20200512_161112-75e47b04.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco/cascade_rcnn_hrnetv2p_w40_20e_coco_20200512_161112.log.json) | ### Cascade Mask R-CNN -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :-------------:|:------:| :------:|:------:|:--------:| -| HRNetV2p-W18 | pytorch | 20e | 8.5 | 8.5 |41.6 |36.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco/cascade_mask_rcnn_hrnetv2p_w18_20e_coco_20200210-b543cd2b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco/cascade_mask_rcnn_hrnetv2p_w18_20e_coco_20200210_093149.log.json) | -| HRNetV2p-W32 | pytorch | 20e | | 8.3 |44.3 |38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco/cascade_mask_rcnn_hrnetv2p_w32_20e_coco_20200512_154043-39d9cf7b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco/cascade_mask_rcnn_hrnetv2p_w32_20e_coco_20200512_154043.log.json) | -| HRNetV2p-W40 | pytorch | 20e | 12.5 | |45.1 |39.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco/cascade_mask_rcnn_hrnetv2p_w40_20e_coco_20200527_204922-969c4610.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco/cascade_mask_rcnn_hrnetv2p_w40_20e_coco_20200527_204922.log.json) | +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :----------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :----------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| HRNetV2p-W18 | pytorch | 20e | 8.5 | 8.5 | 41.6 | 36.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco/cascade_mask_rcnn_hrnetv2p_w18_20e_coco_20200210-b543cd2b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco/cascade_mask_rcnn_hrnetv2p_w18_20e_coco_20200210_093149.log.json) | +| HRNetV2p-W32 | pytorch | 20e | | 8.3 | 44.3 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco/cascade_mask_rcnn_hrnetv2p_w32_20e_coco_20200512_154043-39d9cf7b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco/cascade_mask_rcnn_hrnetv2p_w32_20e_coco_20200512_154043.log.json) | +| HRNetV2p-W40 | pytorch | 20e | 12.5 | | 45.1 | 39.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco/cascade_mask_rcnn_hrnetv2p_w40_20e_coco_20200527_204922-969c4610.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco/cascade_mask_rcnn_hrnetv2p_w40_20e_coco_20200527_204922.log.json) | ### Hybrid Task Cascade (HTC) -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :-------------:|:------:| :------:|:------:|:--------:| -| HRNetV2p-W18 | pytorch | 20e | 10.8 | 4.7 | 42.8 | 37.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/htc_hrnetv2p_w18_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w18_20e_coco/htc_hrnetv2p_w18_20e_coco_20200210-b266988c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w18_20e_coco/htc_hrnetv2p_w18_20e_coco_20200210_182735.log.json) | -| HRNetV2p-W32 | pytorch | 20e | 13.1 | 4.9 | 45.4 | 39.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/htc_hrnetv2p_w32_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w32_20e_coco/htc_hrnetv2p_w32_20e_coco_20200207-7639fa12.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w32_20e_coco/htc_hrnetv2p_w32_20e_coco_20200207_193153.log.json) | -| HRNetV2p-W40 | pytorch | 20e | 14.6 | | 46.4 | 40.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/htc_hrnetv2p_w40_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w40_20e_coco/htc_hrnetv2p_w40_20e_coco_20200529_183411-417c4d5b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w40_20e_coco/htc_hrnetv2p_w40_20e_coco_20200529_183411.log.json) | +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :----------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :--------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| HRNetV2p-W18 | pytorch | 20e | 10.8 | 4.7 | 42.8 | 37.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/htc_hrnetv2p_w18_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w18_20e_coco/htc_hrnetv2p_w18_20e_coco_20200210-b266988c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w18_20e_coco/htc_hrnetv2p_w18_20e_coco_20200210_182735.log.json) | +| HRNetV2p-W32 | pytorch | 20e | 13.1 | 4.9 | 45.4 | 39.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/htc_hrnetv2p_w32_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w32_20e_coco/htc_hrnetv2p_w32_20e_coco_20200207-7639fa12.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w32_20e_coco/htc_hrnetv2p_w32_20e_coco_20200207_193153.log.json) | +| HRNetV2p-W40 | pytorch | 20e | 14.6 | | 46.4 | 40.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/htc_hrnetv2p_w40_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w40_20e_coco/htc_hrnetv2p_w40_20e_coco_20200529_183411-417c4d5b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w40_20e_coco/htc_hrnetv2p_w40_20e_coco_20200529_183411.log.json) | ### FCOS -| Backbone | Style | GN | MS train | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -|:---------:|:-------:|:-------:|:--------:|:-------:|:------:|:------:|:------:|:------:|:--------:| -|HRNetV2p-W18| pytorch | Y | N | 1x | 13.0 | 12.9 | 35.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco_20201212_100710-4ad151de.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco_20201212_100710.log.json) | -|HRNetV2p-W18| pytorch | Y | N | 2x | 13.0 | - | 38.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco_20201212_101110-5c575fa5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco_20201212_101110.log.json) | -|HRNetV2p-W32| pytorch | Y | N | 1x | 17.5 | 12.9 | 39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco_20201211_134730-cb8055c0.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco_20201211_134730.log.json) | -|HRNetV2p-W32| pytorch | Y | N | 2x | 17.5 | - | 40.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco_20201212_112133-77b6b9bb.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco_20201212_112133.log.json) | -|HRNetV2p-W18| pytorch | Y | Y | 2x | 13.0 | 12.9 | 38.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco_20201212_111651-441e9d9f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco_20201212_111651.log.json) | -|HRNetV2p-W32| pytorch | Y | Y | 2x | 17.5 | 12.4 | 41.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco_20201212_090846-b6f2b49f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco_20201212_090846.log.json) | -|HRNetV2p-W48| pytorch | Y | Y | 2x | 20.3 | 10.8 | 42.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco_20201212_124752-f22d2ce5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco_20201212_124752.log.json) | +| Backbone | Style | GN | MS train | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :----------: | :-----: | :-: | :------: | :-----: | :------: | :------------: | :----: | :------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| HRNetV2p-W18 | pytorch | Y | N | 1x | 13.0 | 12.9 | 35.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco_20201212_100710-4ad151de.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco_20201212_100710.log.json) | +| HRNetV2p-W18 | pytorch | Y | N | 2x | 13.0 | - | 38.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco_20201212_101110-5c575fa5.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco_20201212_101110.log.json) | +| HRNetV2p-W32 | pytorch | Y | N | 1x | 17.5 | 12.9 | 39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco_20201211_134730-cb8055c0.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco_20201211_134730.log.json) | +| HRNetV2p-W32 | pytorch | Y | N | 2x | 17.5 | - | 40.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco_20201212_112133-77b6b9bb.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco_20201212_112133.log.json) | +| HRNetV2p-W18 | pytorch | Y | Y | 2x | 13.0 | 12.9 | 38.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco_20201212_111651-441e9d9f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco_20201212_111651.log.json) | +| HRNetV2p-W32 | pytorch | Y | Y | 2x | 17.5 | 12.4 | 41.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco_20201212_090846-b6f2b49f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco_20201212_090846.log.json) | +| HRNetV2p-W48 | pytorch | Y | Y | 2x | 20.3 | 10.8 | 42.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco_20201212_124752-f22d2ce5.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco_20201212_124752.log.json) | **Note:** - The `28e` schedule in HTC indicates decreasing the lr at 24 and 27 epochs, with a total of 28 epochs. - HRNetV2 ImageNet pretrained models are in [HRNets for Image Classification](https://github.com/HRNet/HRNet-Image-Classification). + +## Citation + +```latex +@inproceedings{SunXLW19, + title={Deep High-Resolution Representation Learning for Human Pose Estimation}, + author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang}, + booktitle={CVPR}, + year={2019} +} + +@article{SunZJCXLMWLW19, + title={High-Resolution Representations for Labeling Pixels and Regions}, + author={Ke Sun and Yang Zhao and Borui Jiang and Tianheng Cheng and Bin Xiao + and Dong Liu and Yadong Mu and Xinggang Wang and Wenyu Liu and Jingdong Wang}, + journal = {CoRR}, + volume = {abs/1904.04514}, + year={2019} +} +``` diff --git a/configs/hrnet/metafile.yml b/configs/hrnet/metafile.yml index 37703aa..ac36efa 100644 --- a/configs/hrnet/metafile.yml +++ b/configs/hrnet/metafile.yml @@ -1,24 +1,6 @@ -Collections: - - Name: HRNet - Metadata: - Training Data: COCO - Training Techniques: - - SGD with Momentum - - Weight Decay - Training Resources: 8x V100 GPUs - Architecture: - - HRNet - Paper: - URL: https://arxiv.org/abs/1904.04514 - Title: 'Deep High-Resolution Representation Learning for Visual Recognition' - README: configs/hrnet/README.md - Code: - URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 - Version: v2.0.0 - Models: - Name: faster_rcnn_hrnetv2p_w18_1x_coco - In Collection: HRNet + In Collection: Faster R-CNN Config: configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.py Metadata: Training Memory (GB): 6.6 @@ -30,15 +12,29 @@ Models: mode: FP32 resolution: (800, 1333) Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 36.9 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco/faster_rcnn_hrnetv2p_w18_1x_coco_20200130-56651a6d.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: faster_rcnn_hrnetv2p_w18_2x_coco - In Collection: HRNet + In Collection: Faster R-CNN Config: configs/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco.py Metadata: Training Memory (GB): 6.6 @@ -50,15 +46,29 @@ Models: mode: FP32 resolution: (800, 1333) Epochs: 24 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 38.9 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco/faster_rcnn_hrnetv2p_w18_2x_coco_20200702_085731-a4ec0611.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: faster_rcnn_hrnetv2p_w32_1x_coco - In Collection: HRNet + In Collection: Faster R-CNN Config: configs/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco.py Metadata: Training Memory (GB): 9.0 @@ -70,15 +80,29 @@ Models: mode: FP32 resolution: (800, 1333) Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 40.2 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco/faster_rcnn_hrnetv2p_w32_1x_coco_20200130-6e286425.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: faster_rcnn_hrnetv2p_w32_2x_coco - In Collection: HRNet + In Collection: Faster R-CNN Config: configs/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco.py Metadata: Training Memory (GB): 9.0 @@ -90,15 +114,29 @@ Models: mode: FP32 resolution: (800, 1333) Epochs: 24 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 41.4 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco/faster_rcnn_hrnetv2p_w32_2x_coco_20200529_015927-976a9c15.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: faster_rcnn_hrnetv2p_w40_1x_coco - In Collection: HRNet + In Collection: Faster R-CNN Config: configs/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco.py Metadata: Training Memory (GB): 10.4 @@ -110,15 +148,29 @@ Models: mode: FP32 resolution: (800, 1333) Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 41.2 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco/faster_rcnn_hrnetv2p_w40_1x_coco_20200210-95c1f5ce.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: faster_rcnn_hrnetv2p_w40_2x_coco - In Collection: HRNet + In Collection: Faster R-CNN Config: configs/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco.py Metadata: Training Memory (GB): 10.4 @@ -130,15 +182,29 @@ Models: mode: FP32 resolution: (800, 1333) Epochs: 24 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 42.1 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco/faster_rcnn_hrnetv2p_w40_2x_coco_20200512_161033-0f236ef4.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: mask_rcnn_hrnetv2p_w18_1x_coco - In Collection: HRNet + In Collection: Mask R-CNN Config: configs/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py Metadata: Training Memory (GB): 7.0 @@ -150,6 +216,13 @@ Models: mode: FP32 resolution: (800, 1333) Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO @@ -160,9 +233,16 @@ Models: Metrics: mask AP: 34.2 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco/mask_rcnn_hrnetv2p_w18_1x_coco_20200205-1c3d78ed.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: mask_rcnn_hrnetv2p_w18_2x_coco - In Collection: HRNet + In Collection: Mask R-CNN Config: configs/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco.py Metadata: Training Memory (GB): 7.0 @@ -174,6 +254,13 @@ Models: mode: FP32 resolution: (800, 1333) Epochs: 24 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO @@ -184,9 +271,16 @@ Models: Metrics: mask AP: 36.0 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco/mask_rcnn_hrnetv2p_w18_2x_coco_20200212-b3c825b1.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: mask_rcnn_hrnetv2p_w32_1x_coco - In Collection: HRNet + In Collection: Mask R-CNN Config: configs/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco.py Metadata: Training Memory (GB): 9.4 @@ -198,6 +292,13 @@ Models: mode: FP32 resolution: (800, 1333) Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO @@ -208,9 +309,16 @@ Models: Metrics: mask AP: 37.1 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco/mask_rcnn_hrnetv2p_w32_1x_coco_20200207-b29f616e.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: mask_rcnn_hrnetv2p_w32_2x_coco - In Collection: HRNet + In Collection: Mask R-CNN Config: configs/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco.py Metadata: Training Memory (GB): 9.4 @@ -222,6 +330,13 @@ Models: mode: FP32 resolution: (800, 1333) Epochs: 24 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO @@ -232,13 +347,27 @@ Models: Metrics: mask AP: 37.8 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco/mask_rcnn_hrnetv2p_w32_2x_coco_20200213-45b75b4d.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: mask_rcnn_hrnetv2p_w40_1x_coco - In Collection: HRNet + In Collection: Mask R-CNN Config: configs/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco.py Metadata: Training Memory (GB): 10.9 Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO @@ -249,13 +378,27 @@ Models: Metrics: mask AP: 37.5 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco/mask_rcnn_hrnetv2p_w40_1x_coco_20200511_015646-66738b35.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: mask_rcnn_hrnetv2p_w40_2x_coco - In Collection: HRNet + In Collection: Mask R-CNN Config: configs/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco.py Metadata: Training Memory (GB): 10.9 Epochs: 24 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO @@ -266,9 +409,16 @@ Models: Metrics: mask AP: 38.2 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco/mask_rcnn_hrnetv2p_w40_2x_coco_20200512_163732-aed5e4ab.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: cascade_rcnn_hrnetv2p_w18_20e_coco - In Collection: HRNet + In Collection: Cascade R-CNN Config: configs/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco.py Metadata: Training Memory (GB): 7.0 @@ -280,15 +430,29 @@ Models: mode: FP32 resolution: (800, 1333) Epochs: 20 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 41.2 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco/cascade_rcnn_hrnetv2p_w18_20e_coco_20200210-434be9d7.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: cascade_rcnn_hrnetv2p_w32_20e_coco - In Collection: HRNet + In Collection: Cascade R-CNN Config: configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco.py Metadata: Training Memory (GB): 9.4 @@ -300,28 +464,56 @@ Models: mode: FP32 resolution: (800, 1333) Epochs: 20 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 43.3 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco/cascade_rcnn_hrnetv2p_w32_20e_coco_20200208-928455a4.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: cascade_rcnn_hrnetv2p_w40_20e_coco - In Collection: HRNet + In Collection: Cascade R-CNN Config: configs/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco.py Metadata: Training Memory (GB): 10.8 Epochs: 20 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 43.8 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco/cascade_rcnn_hrnetv2p_w40_20e_coco_20200512_161112-75e47b04.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: cascade_mask_rcnn_hrnetv2p_w18_20e_coco - In Collection: HRNet + In Collection: Cascade R-CNN Config: configs/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco.py Metadata: Training Memory (GB): 8.5 @@ -333,6 +525,13 @@ Models: mode: FP32 resolution: (800, 1333) Epochs: 20 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO @@ -343,9 +542,16 @@ Models: Metrics: mask AP: 36.4 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco/cascade_mask_rcnn_hrnetv2p_w18_20e_coco_20200210-b543cd2b.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: cascade_mask_rcnn_hrnetv2p_w32_20e_coco - In Collection: HRNet + In Collection: Cascade R-CNN Config: configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py Metadata: inference time (ms/im): @@ -356,6 +562,13 @@ Models: mode: FP32 resolution: (800, 1333) Epochs: 20 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO @@ -366,13 +579,27 @@ Models: Metrics: mask AP: 38.6 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco/cascade_mask_rcnn_hrnetv2p_w32_20e_coco_20200512_154043-39d9cf7b.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: cascade_mask_rcnn_hrnetv2p_w40_20e_coco - In Collection: HRNet + In Collection: Cascade R-CNN Config: configs/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco.py Metadata: Training Memory (GB): 12.5 Epochs: 20 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO @@ -383,9 +610,16 @@ Models: Metrics: mask AP: 39.3 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco/cascade_mask_rcnn_hrnetv2p_w40_20e_coco_20200527_204922-969c4610.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: htc_hrnetv2p_w18_20e_coco - In Collection: HRNet + In Collection: HTC Config: configs/hrnet/htc_hrnetv2p_w18_20e_coco.py Metadata: Training Memory (GB): 10.8 @@ -397,6 +631,13 @@ Models: mode: FP32 resolution: (800, 1333) Epochs: 20 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO @@ -407,9 +648,16 @@ Models: Metrics: mask AP: 37.9 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w18_20e_coco/htc_hrnetv2p_w18_20e_coco_20200210-b266988c.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: htc_hrnetv2p_w32_20e_coco - In Collection: HRNet + In Collection: HTC Config: configs/hrnet/htc_hrnetv2p_w32_20e_coco.py Metadata: Training Memory (GB): 13.1 @@ -421,6 +669,13 @@ Models: mode: FP32 resolution: (800, 1333) Epochs: 20 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO @@ -431,13 +686,27 @@ Models: Metrics: mask AP: 39.9 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w32_20e_coco/htc_hrnetv2p_w32_20e_coco_20200207-7639fa12.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: htc_hrnetv2p_w40_20e_coco - In Collection: HRNet + In Collection: HTC Config: configs/hrnet/htc_hrnetv2p_w40_20e_coco.py Metadata: Training Memory (GB): 14.6 Epochs: 20 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO @@ -448,9 +717,16 @@ Models: Metrics: mask AP: 40.8 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w40_20e_coco/htc_hrnetv2p_w40_20e_coco_20200529_183411-417c4d5b.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: fcos_hrnetv2p_w18_gn-head_4x4_1x_coco - In Collection: HRNet + In Collection: FCOS Config: configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco.py Metadata: Training Resources: 4x V100 GPUs @@ -464,15 +740,28 @@ Models: mode: FP32 resolution: (800, 1333) Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 35.3 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco_20201212_100710-4ad151de.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: fcos_hrnetv2p_w18_gn-head_4x4_2x_coco - In Collection: HRNet + In Collection: FCOS Config: configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco.py Metadata: Training Resources: 4x V100 GPUs @@ -486,15 +775,28 @@ Models: mode: FP32 resolution: (800, 1333) Epochs: 24 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 38.2 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco_20201212_101110-5c575fa5.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: fcos_hrnetv2p_w32_gn-head_4x4_1x_coco - In Collection: HRNet + In Collection: FCOS Config: configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py Metadata: Training Resources: 4x V100 GPUs @@ -508,15 +810,28 @@ Models: mode: FP32 resolution: (800, 1333) Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 39.5 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco_20201211_134730-cb8055c0.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: fcos_hrnetv2p_w32_gn-head_4x4_2x_coco - In Collection: HRNet + In Collection: FCOS Config: configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco.py Metadata: Training Resources: 4x V100 GPUs @@ -530,15 +845,28 @@ Models: mode: FP32 resolution: (800, 1333) Epochs: 24 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 40.8 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco_20201212_112133-77b6b9bb.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco - In Collection: HRNet + In Collection: FCOS Config: configs/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco.py Metadata: Training Resources: 4x V100 GPUs @@ -552,15 +880,28 @@ Models: mode: FP32 resolution: (800, 1333) Epochs: 24 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 38.3 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco_20201212_111651-441e9d9f.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco - In Collection: HRNet + In Collection: FCOS Config: configs/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco.py Metadata: Training Resources: 4x V100 GPUs @@ -574,15 +915,28 @@ Models: mode: FP32 resolution: (800, 1333) Epochs: 24 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 41.9 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco_20201212_090846-b6f2b49f.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 - Name: fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco - In Collection: HRNet + In Collection: FCOS Config: configs/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco.py Metadata: Training Resources: 4x V100 GPUs @@ -596,9 +950,22 @@ Models: mode: FP32 resolution: (800, 1333) Epochs: 24 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Architecture: + - HRNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 42.7 Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco_20201212_124752-f22d2ce5.pth + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 diff --git a/configs/htc/README.md b/configs/htc/README.md index d722a80..03a89aa 100644 --- a/configs/htc/README.md +++ b/configs/htc/README.md @@ -1,35 +1,18 @@ -# Hybrid Task Cascade for Instance Segmentation +# HTC -## Abstract +> [Hybrid Task Cascade for Instance Segmentation](ttps://arxiv.org/abs/1901.07518) - + + +## Abstract Cascade is a classic yet powerful architecture that has boosted performance on various tasks. However, how to introduce cascade to instance segmentation remains an open question. A simple combination of Cascade R-CNN and Mask R-CNN only brings limited gain. In exploring a more effective approach, we find that the key to a successful instance segmentation cascade is to fully leverage the reciprocal relationship between detection and segmentation. In this work, we propose a new framework, Hybrid Task Cascade (HTC), which differs in two important aspects: (1) instead of performing cascaded refinement on these two tasks separately, it interweaves them for a joint multi-stage processing; (2) it adopts a fully convolutional branch to provide spatial context, which can help distinguishing hard foreground from cluttered background. Overall, this framework can learn more discriminative features progressively while integrating complementary features together in each stage. Without bells and whistles, a single HTC obtains 38.4 and 1.5 improvement over a strong Cascade Mask R-CNN baseline on MSCOCO dataset. Moreover, our overall system achieves 48.6 mask AP on the test-challenge split, ranking 1st in the COCO 2018 Challenge Object Detection Task. -
- - - -## Citation - - - -We provide config files to reproduce the results in the CVPR 2019 paper for [Hybrid Task Cascade](https://arxiv.org/abs/1901.07518). - -```latex -@inproceedings{chen2019hybrid, - title={Hybrid task cascade for instance segmentation}, - author={Chen, Kai and Pang, Jiangmiao and Wang, Jiaqi and Xiong, Yu and Li, Xiaoxiao and Sun, Shuyang and Feng, Wansen and Liu, Ziwei and Shi, Jianping and Ouyang, Wanli and Chen Change Loy and Dahua Lin}, - booktitle={IEEE Conference on Computer Vision and Pattern Recognition}, - year={2019} -} -``` - -## Dataset +## Introduction HTC requires COCO and [COCO-stuff](http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/stuffthingmaps_trainval2017.zip) dataset for training. You need to download and extract it in the COCO dataset path. The directory should be like this. @@ -52,13 +35,13 @@ mmdetection The results on COCO 2017val are shown in the below table. (results on test-dev are usually slightly higher than val) -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -|:---------:|:-------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| -| R-50-FPN | pytorch | 1x | 8.2 | 5.8 | 42.3 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc/htc_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r50_fpn_1x_coco/htc_r50_fpn_1x_coco_20200317-7332cf16.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r50_fpn_1x_coco/htc_r50_fpn_1x_coco_20200317_070435.log.json) | -| R-50-FPN | pytorch | 20e | 8.2 | - | 43.3 | 38.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc/htc_r50_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r50_fpn_20e_coco/htc_r50_fpn_20e_coco_20200319-fe28c577.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r50_fpn_20e_coco/htc_r50_fpn_20e_coco_20200319_070313.log.json) | -| R-101-FPN | pytorch | 20e | 10.2 | 5.5 | 44.8 | 39.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc/htc_r101_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r101_fpn_20e_coco/htc_r101_fpn_20e_coco_20200317-9b41b48f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r101_fpn_20e_coco/htc_r101_fpn_20e_coco_20200317_153107.log.json) | -| X-101-32x4d-FPN | pytorch |20e| 11.4 | 5.0 | 46.1 | 40.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc/htc_x101_32x4d_fpn_16x1_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_32x4d_fpn_16x1_20e_coco/htc_x101_32x4d_fpn_16x1_20e_coco_20200318-de97ae01.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_32x4d_fpn_16x1_20e_coco/htc_x101_32x4d_fpn_16x1_20e_coco_20200318_034519.log.json) | -| X-101-64x4d-FPN | pytorch |20e| 14.5 | 4.4 | 47.0 | 41.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc/htc_x101_64x4d_fpn_16x1_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_64x4d_fpn_16x1_20e_coco/htc_x101_64x4d_fpn_16x1_20e_coco_20200318-b181fd7a.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_64x4d_fpn_16x1_20e_coco/htc_x101_64x4d_fpn_16x1_20e_coco_20200318_081711.log.json) | +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :-------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | pytorch | 1x | 8.2 | 5.8 | 42.3 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc/htc_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r50_fpn_1x_coco/htc_r50_fpn_1x_coco_20200317-7332cf16.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r50_fpn_1x_coco/htc_r50_fpn_1x_coco_20200317_070435.log.json) | +| R-50-FPN | pytorch | 20e | 8.2 | - | 43.3 | 38.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc/htc_r50_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r50_fpn_20e_coco/htc_r50_fpn_20e_coco_20200319-fe28c577.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r50_fpn_20e_coco/htc_r50_fpn_20e_coco_20200319_070313.log.json) | +| R-101-FPN | pytorch | 20e | 10.2 | 5.5 | 44.8 | 39.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc/htc_r101_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r101_fpn_20e_coco/htc_r101_fpn_20e_coco_20200317-9b41b48f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r101_fpn_20e_coco/htc_r101_fpn_20e_coco_20200317_153107.log.json) | +| X-101-32x4d-FPN | pytorch | 20e | 11.4 | 5.0 | 46.1 | 40.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc/htc_x101_32x4d_fpn_16x1_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_32x4d_fpn_16x1_20e_coco/htc_x101_32x4d_fpn_16x1_20e_coco_20200318-de97ae01.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_32x4d_fpn_16x1_20e_coco/htc_x101_32x4d_fpn_16x1_20e_coco_20200318_034519.log.json) | +| X-101-64x4d-FPN | pytorch | 20e | 14.5 | 4.4 | 47.0 | 41.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc/htc_x101_64x4d_fpn_16x1_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_64x4d_fpn_16x1_20e_coco/htc_x101_64x4d_fpn_16x1_20e_coco_20200318-b181fd7a.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_64x4d_fpn_16x1_20e_coco/htc_x101_64x4d_fpn_16x1_20e_coco_20200318_081711.log.json) | - In the HTC paper and COCO 2018 Challenge, `score_thr` is set to 0.001 for both baselines and HTC. - We use 8 GPUs with 2 images/GPU for R-50 and R-101 models, and 16 GPUs with 1 image/GPU for X-101 models. @@ -66,6 +49,19 @@ The results on COCO 2017val are shown in the below table. (results on test-dev a We also provide a powerful HTC with DCN and multi-scale training model. No testing augmentation is used. -| Backbone | Style | DCN | training scales | Lr schd | box AP | mask AP | Config | Download | -|:----------------:|:-------:|:-----:|:---------------:|:-------:|:------:|:-------:|:------:|:--------:| -| X-101-64x4d-FPN | pytorch | c3-c5 | 400~1400 | 20e | 50.4 | 43.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco_20200312-946fd751.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco_20200312_203410.log.json) | +| Backbone | Style | DCN | training scales | Lr schd | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :---: | :-------------: | :-----: | :----: | :-----: | :------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| X-101-64x4d-FPN | pytorch | c3-c5 | 400~1400 | 20e | 50.4 | 43.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco_20200312-946fd751.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco_20200312_203410.log.json) | + +## Citation + +We provide config files to reproduce the results in the CVPR 2019 paper for [Hybrid Task Cascade](https://arxiv.org/abs/1901.07518). + +```latex +@inproceedings{chen2019hybrid, + title={Hybrid task cascade for instance segmentation}, + author={Chen, Kai and Pang, Jiangmiao and Wang, Jiaqi and Xiong, Yu and Li, Xiaoxiao and Sun, Shuyang and Feng, Wansen and Liu, Ziwei and Shi, Jianping and Ouyang, Wanli and Chen Change Loy and Dahua Lin}, + booktitle={IEEE Conference on Computer Vision and Pattern Recognition}, + year={2019} +} +``` diff --git a/configs/instaboost/README.md b/configs/instaboost/README.md index e948ccb..82ed334 100644 --- a/configs/instaboost/README.md +++ b/configs/instaboost/README.md @@ -1,38 +1,21 @@ -# Instaboost: Boosting instance segmentation via probability map guided copy-pasting +# Instaboost -## Abstract +> [Instaboost: Boosting instance segmentation via probability map guided copy-pasting](https://arxiv.org/abs/1908.07801) + + - +## Abstract Instance segmentation requires a large number of training samples to achieve satisfactory performance and benefits from proper data augmentation. To enlarge the training set and increase the diversity, previous methods have investigated using data annotation from other domain (e.g. bbox, point) in a weakly supervised mechanism. In this paper, we present a simple, efficient and effective method to augment the training set using the existing instance mask annotations. Exploiting the pixel redundancy of the background, we are able to improve the performance of Mask R-CNN for 1.7 mAP on COCO dataset and 3.3 mAP on Pascal VOC dataset by simply introducing random jittering to objects. Furthermore, we propose a location probability map based approach to explore the feasible locations that objects can be placed based on local appearance similarity. With the guidance of such map, we boost the performance of R101-Mask R-CNN on instance segmentation from 35.7 mAP to 37.9 mAP without modifying the backbone or network structure. Our method is simple to implement and does not increase the computational complexity. It can be integrated into the training pipeline of any instance segmentation model without affecting the training and inference efficiency. -
- - - ## Introduction - - Configs in this directory is the implementation for ICCV2019 paper "InstaBoost: Boosting Instance Segmentation Via Probability Map Guided Copy-Pasting" and provided by the authors of the paper. InstaBoost is a data augmentation method for object detection and instance segmentation. The paper has been released on [`arXiv`](https://arxiv.org/abs/1908.07801). -## Citation - -```latex -@inproceedings{fang2019instaboost, - title={Instaboost: Boosting instance segmentation via probability map guided copy-pasting}, - author={Fang, Hao-Shu and Sun, Jianhua and Wang, Runzhong and Gou, Minghao and Li, Yong-Lu and Lu, Cewu}, - booktitle={Proceedings of the IEEE International Conference on Computer Vision}, - pages={682--691}, - year={2019} -} -``` - ## Usage ### Requirements @@ -55,9 +38,21 @@ InstaBoost have been already integrated in the data pipeline, thus all you need - To balance accuracy and training time when using InstaBoost, models released in this page are all trained for 48 Epochs. Other training and testing configs strictly follow the original framework. - For results and models in MMDetection V1.x, please refer to [Instaboost](https://github.com/GothicAi/Instaboost). -| Network | Backbone | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -| :-------------: | :--------: | :-----: | :------: | :------------: | :------:| :-----: | :------: | :-----------------: | -| Mask R-CNN | R-50-FPN | 4x | 4.4 | 17.5 | 40.6 | 36.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco/mask_rcnn_r50_fpn_instaboost_4x_coco_20200307-d025f83a.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco/mask_rcnn_r50_fpn_instaboost_4x_coco_20200307_223635.log.json) | -| Mask R-CNN | R-101-FPN | 4x | 6.4 | | 42.5 | 38.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/instaboost/mask_rcnn_r101_fpn_instaboost_4x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_r101_fpn_instaboost_4x_coco/mask_rcnn_r101_fpn_instaboost_4x_coco_20200703_235738-f23f3a5f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_r101_fpn_instaboost_4x_coco/mask_rcnn_r101_fpn_instaboost_4x_coco_20200703_235738.log.json) | -| Mask R-CNN | X-101-64x4d-FPN | 4x | 10.7 | | 44.7 | 39.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/instaboost/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco_20200515_080947-8ed58c1b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco_20200515_080947.log.json) | -| Cascade R-CNN | R-101-FPN | 4x | 6.0 | 12.0 | 43.7 | 38.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco_20200307-c19d98d9.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco_20200307_223646.log.json) | +| Network | Backbone | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-----------: | :-------------: | :-----: | :------: | :------------: | :----: | :-----: | :--------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| Mask R-CNN | R-50-FPN | 4x | 4.4 | 17.5 | 40.6 | 36.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco/mask_rcnn_r50_fpn_instaboost_4x_coco_20200307-d025f83a.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco/mask_rcnn_r50_fpn_instaboost_4x_coco_20200307_223635.log.json) | +| Mask R-CNN | R-101-FPN | 4x | 6.4 | | 42.5 | 38.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/instaboost/mask_rcnn_r101_fpn_instaboost_4x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_r101_fpn_instaboost_4x_coco/mask_rcnn_r101_fpn_instaboost_4x_coco_20200703_235738-f23f3a5f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_r101_fpn_instaboost_4x_coco/mask_rcnn_r101_fpn_instaboost_4x_coco_20200703_235738.log.json) | +| Mask R-CNN | X-101-64x4d-FPN | 4x | 10.7 | | 44.7 | 39.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/instaboost/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco_20200515_080947-8ed58c1b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco_20200515_080947.log.json) | +| Cascade R-CNN | R-101-FPN | 4x | 6.0 | 12.0 | 43.7 | 38.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco_20200307-c19d98d9.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco_20200307_223646.log.json) | + +## Citation + +```latex +@inproceedings{fang2019instaboost, + title={Instaboost: Boosting instance segmentation via probability map guided copy-pasting}, + author={Fang, Hao-Shu and Sun, Jianhua and Wang, Runzhong and Gou, Minghao and Li, Yong-Lu and Lu, Cewu}, + booktitle={Proceedings of the IEEE International Conference on Computer Vision}, + pages={682--691}, + year={2019} +} +``` diff --git a/configs/lad/README.md b/configs/lad/README.md index b48b063..6cad7d9 100644 --- a/configs/lad/README.md +++ b/configs/lad/README.md @@ -1,34 +1,17 @@ -# Improving Object Detection by Label Assignment Distillation +# LAD +> [Improving Object Detection by Label Assignment Distillation](https://arxiv.org/abs/2108.10520) -## Abstract + - +## Abstract Label assignment in object detection aims to assign targets, foreground or background, to sampled regions in an image. Unlike labeling for image classification, this problem is not well defined due to the object's bounding box. In this paper, we investigate the problem from a perspective of distillation, hence we call Label Assignment Distillation (LAD). Our initial motivation is very simple, we use a teacher network to generate labels for the student. This can be achieved in two ways: either using the teacher's prediction as the direct targets (soft label), or through the hard labels dynamically assigned by the teacher (LAD). Our experiments reveal that: (i) LAD is more effective than soft-label, but they are complementary. (ii) Using LAD, a smaller teacher can also improve a larger student significantly, while soft-label can't. We then introduce Co-learning LAD, in which two networks simultaneously learn from scratch and the role of teacher and student are dynamically interchanged. Using PAA-ResNet50 as a teacher, our LAD techniques can improve detectors PAA-ResNet101 and PAA-ResNeXt101 to 46AP and 47.5AP on the COCO test-dev set. With a stronger teacher PAA-SwinB, we improve the students PAA-ResNet50 to 43.7AP by only 1x schedule training and standard setting, and PAA-ResNet101 to 47.9AP, significantly surpassing the current methods. -
- - - -## Citation - - - - -```latex -@inproceedings{nguyen2021improving, - title={Improving Object Detection by Label Assignment Distillation}, - author={Chuong H. Nguyen and Thuy C. Nguyen and Tuan N. Tang and Nam L. H. Phan}, - booktitle = {WACV}, - year={2022} -} -``` - ## Results and Models We provide config files to reproduce the object detection results in the @@ -37,14 +20,25 @@ Distillation. ### PAA with LAD -| Teacher | Student | Training schedule | AP (val) | Config | -| :-------: | :-----: | :---------------: | :------: | :----------------------------------------------------: | -| -- | R-50 | 1x | 40.4 | | -| -- | R-101 | 1x | 42.6 | | -| R-101 | R-50 | 1x | 41.6 | [config](configs/lad/lad_r50_paa_r101_fpn_coco_1x.py) | -| R-50 | R-101 | 1x | 43.2 | [config](configs/lad/lad_r101_paa_r50_fpn_coco_1x.py) | +| Teacher | Student | Training schedule | AP (val) | Config | +| :-----: | :-----: | :---------------: | :------: | :---------------------------------------------------: | +| -- | R-50 | 1x | 40.4 | | +| -- | R-101 | 1x | 42.6 | | +| R-101 | R-50 | 1x | 41.6 | [config](configs/lad/lad_r50_paa_r101_fpn_coco_1x.py) | +| R-50 | R-101 | 1x | 43.2 | [config](configs/lad/lad_r101_paa_r50_fpn_coco_1x.py) | ## Note -- Meaning of Config name: lad_r50(student model)_paa(based on paa)_r101(teacher model)_fpn(neck)_coco(dataset)_1x(12 epoch).py +- Meaning of Config name: lad_r50(student model)\_paa(based on paa)\_r101(teacher model)\_fpn(neck)\_coco(dataset)\_1x(12 epoch).py - Results may fluctuate by about 0.2 mAP. + +## Citation + +```latex +@inproceedings{nguyen2021improving, + title={Improving Object Detection by Label Assignment Distillation}, + author={Chuong H. Nguyen and Thuy C. Nguyen and Tuan N. Tang and Nam L. H. Phan}, + booktitle = {WACV}, + year={2022} +} +``` diff --git a/configs/lad/lad_r101_paa_r50_fpn_coco_1x.py b/configs/lad/lad_r101_paa_r50_fpn_coco_1x.py index c7e3fcf..4877d95 100644 --- a/configs/lad/lad_r101_paa_r50_fpn_coco_1x.py +++ b/configs/lad/lad_r101_paa_r50_fpn_coco_1x.py @@ -119,3 +119,8 @@ data = dict(samples_per_gpu=8, workers_per_gpu=4) optimizer = dict(lr=0.01) fp16 = dict(loss_scale=512.) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (8 samples per GPU) +auto_scale_lr = dict(base_batch_size=64) diff --git a/configs/lad/lad_r50_paa_r101_fpn_coco_1x.py b/configs/lad/lad_r50_paa_r101_fpn_coco_1x.py index 3c46f7d..29bbe69 100644 --- a/configs/lad/lad_r50_paa_r101_fpn_coco_1x.py +++ b/configs/lad/lad_r50_paa_r101_fpn_coco_1x.py @@ -118,3 +118,8 @@ data = dict(samples_per_gpu=8, workers_per_gpu=4) optimizer = dict(lr=0.01) fp16 = dict(loss_scale=512.) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (8 samples per GPU) +auto_scale_lr = dict(base_batch_size=64) diff --git a/configs/lad/metafile.yml b/configs/lad/metafile.yml new file mode 100644 index 0000000..5076f28 --- /dev/null +++ b/configs/lad/metafile.yml @@ -0,0 +1,42 @@ +Collections: + - Name: Label Assignment Distillation + Metadata: + Training Data: COCO + Training Techniques: + - Label Assignment Distillation + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - FPN + - ResNet + Paper: + URL: https://arxiv.org/abs/2108.10520 + Title: 'Improving Object Detection by Label Assignment Distillation' + README: configs/lad/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.19.0/mmdet/models/detectors/lad.py#L10 + Version: v2.19.0 + +Models: + - Name: lad_r50_paa_r101_fpn_coco_1x + In Collection: Label Assignment Distillation + Config: configs/lad/lad_r50_paa_r101_fpn_coco_1x.py + Metadata: + Teacher: R-101 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.6 + + - Name: lad_r101_paa_r50_fpn_coco_1x + In Collection: Label Assignment Distillation + Config: configs/lad/lad_r101_paa_r50_fpn_coco_1x.py + Metadata: + Teacher: R-50 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.2 diff --git a/configs/ld/README.md b/configs/ld/README.md index baa8247..6556542 100644 --- a/configs/ld/README.md +++ b/configs/ld/README.md @@ -1,45 +1,43 @@ -# Localization Distillation for Object Detection +# LD -## Abstract +> [Localization Distillation for Dense Object Detection](https://arxiv.org/abs/2102.12252) - + + +## Abstract Knowledge distillation (KD) has witnessed its powerful capability in learning compact models in object detection. Previous KD methods for object detection mostly focus on imitating deep features within the imitation regions instead of mimicking classification logits due to its inefficiency in distilling localization information. In this paper, by reformulating the knowledge distillation process on localization, we present a novel localization distillation (LD) method which can efficiently transfer the localization knowledge from the teacher to the student. Moreover, we also heuristically introduce the concept of valuable localization region that can aid to selectively distill the semantic and localization knowledge for a certain region. Combining these two new components, for the first time, we show that logit mimicking can outperform feature imitation and localization knowledge distillation is more important and efficient than semantic knowledge for distilling object detectors. Our distillation scheme is simple as well as effective and can be easily applied to different dense object detectors. Experiments show that our LD can boost the AP score of GFocal-ResNet-50 with a single-scale 1× training schedule from 40.1 to 42.1 on the COCO benchmark without any sacrifice on the inference speed. -
- - - -## Citation - - - -```latex -@Article{zheng2021LD, - title={Localization Distillation for Object Detection}, - author= {Zhaohui Zheng, Rongguang Ye, Ping Wang, Jun Wang, Dongwei Ren, Wangmeng Zuo}, - journal={arXiv:2102.12252}, - year={2021} -} -``` +## Results and Models ### GFocalV1 with LD -| Teacher | Student | Training schedule | Mini-batch size | AP (val) | AP50 (val) | AP75 (val) | Config | -| :-------: | :-----: | :---------------: | :-------------: | :------: | :--------: | :--------: | :--------------: | -| -- | R-18 | 1x | 6 | 35.8 | 53.1 | 38.2 | | -| R-101 | R-18 | 1x | 6 | 36.5 | 52.9 | 39.3 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/ld/ld_r18_gflv1_r101_fpn_coco_1x.py) | -| -- | R-34 | 1x | 6 | 38.9 | 56.6 | 42.2 | | -| R-101 | R-34 | 1x | 6 | 39.8 | 56.6 | 43.1 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/ld/ld_r34_gflv1_r101_fpn_coco_1x.py) | -| -- | R-50 | 1x | 6 | 40.1 | 58.2 | 43.1 | | -| R-101 | R-50 | 1x | 6 | 41.1 | 58.7 | 44.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/ld/ld_r50_gflv1_r101_fpn_coco_1x.py) | -| -- | R-101 | 2x | 6 | 44.6 | 62.9 | 48.4 | | -| R-101-DCN | R-101 | 2x | 6 | 45.4 | 63.1 | 49.5 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/ld/ld_r101_gflv1_r101dcn_fpn_coco_1x.py) | +| Teacher | Student | Training schedule | Mini-batch size | AP (val) | AP50 (val) | AP75 (val) | Config | +| :-------: | :-----: | :---------------: | :-------------: | :------: | :--------: | :--------: | :-------------------------------------------------------------------------------------------------------------: | +| -- | R-18 | 1x | 6 | 35.8 | 53.1 | 38.2 | | +| R-101 | R-18 | 1x | 6 | 36.5 | 52.9 | 39.3 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/ld/ld_r18_gflv1_r101_fpn_coco_1x.py) | +| -- | R-34 | 1x | 6 | 38.9 | 56.6 | 42.2 | | +| R-101 | R-34 | 1x | 6 | 39.8 | 56.6 | 43.1 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/ld/ld_r34_gflv1_r101_fpn_coco_1x.py) | +| -- | R-50 | 1x | 6 | 40.1 | 58.2 | 43.1 | | +| R-101 | R-50 | 1x | 6 | 41.1 | 58.7 | 44.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/ld/ld_r50_gflv1_r101_fpn_coco_1x.py) | +| -- | R-101 | 2x | 6 | 44.6 | 62.9 | 48.4 | | +| R-101-DCN | R-101 | 2x | 6 | 45.4 | 63.1 | 49.5 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/ld/ld_r101_gflv1_r101dcn_fpn_coco_1x.py) | ## Note -- Meaning of Config name: ld_r18(student model)_gflv1(based on gflv1)_r101(teacher model)_fpn(neck)_coco(dataset)_1x(12 epoch).py +- Meaning of Config name: ld_r18(student model)\_gflv1(based on gflv1)\_r101(teacher model)\_fpn(neck)\_coco(dataset)\_1x(12 epoch).py + +## Citation + +```latex +@Inproceedings{zheng2022LD, + title={Localization Distillation for Dense Object Detection}, + author= {Zheng, Zhaohui and Ye, Rongguang and Wang, Ping and Ren, Dongwei and Zuo, Wangmeng and Hou, Qibin and Cheng, Mingming}, + booktitle={CVPR}, + year={2022} +} +``` diff --git a/configs/ld/metafile.yml b/configs/ld/metafile.yml index cd833bf..d555a6d 100644 --- a/configs/ld/metafile.yml +++ b/configs/ld/metafile.yml @@ -12,7 +12,7 @@ Collections: - ResNet Paper: URL: https://arxiv.org/abs/2102.12252 - Title: 'Localization Distillation for Object Detection' + Title: 'Localization Distillation for Dense Object Detection' README: configs/ld/README.md Code: URL: https://github.com/open-mmlab/mmdetection/blob/v2.11.0/mmdet/models/dense_heads/ld_head.py#L11 diff --git a/configs/legacy_1.x/README.md b/configs/legacy_1.x/README.md index 38a2a0e..c48477f 100644 --- a/configs/legacy_1.x/README.md +++ b/configs/legacy_1.x/README.md @@ -44,10 +44,11 @@ We provide configs of some common structures in this directory. ## Performance The performance change after converting the models in this directory are listed as the following. -| Method | Style | Lr schd | V1.x box AP | V1.x mask AP | V2.0 box AP | V2.0 mask AP | Config | Download | -| :-------------: | :-----: | :-----: | :------:| :-----: |:------:| :-----: | :-------: |:------------------------------------------------------------------------------------------------------------------------------: | -| Mask R-CNN R-50-FPN | pytorch | 1x | 37.3 | 34.2 | 36.8 | 33.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/legacy_1.x/mask_rcnn_r50_fpn_1x_coco_v1.py) | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth)| -| RetinaNet R-50-FPN | caffe | 1x | 35.8 | - | 35.4 | - | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/legacy_1.x/retinanet_r50_caffe_1x_coco_v1.py) | -| RetinaNet R-50-FPN | pytorch | 1x | 35.6 |-|35.2| -| [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/legacy_1.x/retinanet_r50_fpn_1x_coco_v1.py) | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_r50_fpn_1x_20181125-7b0c2548.pth) | -| Cascade Mask R-CNN R-50-FPN | pytorch | 1x | 41.2 | 35.7 |40.8| 35.6| [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/legacy_1.x/cascade_mask_rcnn_r50_fpn_1x_coco_v1.py) | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r50_fpn_1x_20181123-88b170c9.pth) | -| SSD300-VGG16 | caffe | 120e | 25.7 |-|25.4|-| [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/legacy_1.x/ssd300_coco_v1.py) | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd300_coco_vgg16_caffe_120e_20181221-84d7110b.pth) | + +| Method | Style | Lr schd | V1.x box AP | V1.x mask AP | V2.0 box AP | V2.0 mask AP | Config | Download | +| :-------------------------: | :-----: | :-----: | :---------: | :----------: | :---------: | :----------: | :------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------: | +| Mask R-CNN R-50-FPN | pytorch | 1x | 37.3 | 34.2 | 36.8 | 33.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/legacy_1.x/mask_rcnn_r50_fpn_1x_coco_v1.py) | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth) | +| RetinaNet R-50-FPN | caffe | 1x | 35.8 | - | 35.4 | - | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/legacy_1.x/retinanet_r50_caffe_1x_coco_v1.py) | | +| RetinaNet R-50-FPN | pytorch | 1x | 35.6 | - | 35.2 | - | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/legacy_1.x/retinanet_r50_fpn_1x_coco_v1.py) | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_r50_fpn_1x_20181125-7b0c2548.pth) | +| Cascade Mask R-CNN R-50-FPN | pytorch | 1x | 41.2 | 35.7 | 40.8 | 35.6 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/legacy_1.x/cascade_mask_rcnn_r50_fpn_1x_coco_v1.py) | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r50_fpn_1x_20181123-88b170c9.pth) | +| SSD300-VGG16 | caffe | 120e | 25.7 | - | 25.4 | - | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/legacy_1.x/ssd300_coco_v1.py) | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd300_coco_vgg16_caffe_120e_20181221-84d7110b.pth) | diff --git a/configs/legacy_1.x/ssd300_coco_v1.py b/configs/legacy_1.x/ssd300_coco_v1.py index b194e76..65ccc1e 100644 --- a/configs/legacy_1.x/ssd300_coco_v1.py +++ b/configs/legacy_1.x/ssd300_coco_v1.py @@ -77,3 +77,8 @@ optimizer = dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4) optimizer_config = dict(_delete_=True) dist_params = dict(backend='nccl', port=29555) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (8 samples per GPU) +auto_scale_lr = dict(base_batch_size=64) diff --git a/configs/libra_rcnn/README.md b/configs/libra_rcnn/README.md index dac1049..87a128a 100644 --- a/configs/libra_rcnn/README.md +++ b/configs/libra_rcnn/README.md @@ -1,29 +1,38 @@ -# Libra R-CNN: Towards Balanced Learning for Object Detection +# Libra R-CNN -## Abstract +> [Libra R-CNN: Towards Balanced Learning for Object Detection](https://arxiv.org/abs/1904.02701) + + - +## Abstract Compared with model architectures, the training process, which is also crucial to the success of detectors, has received relatively less attention in object detection. In this work, we carefully revisit the standard training practice of detectors, and find that the detection performance is often limited by the imbalance during the training process, which generally consists in three levels - sample level, feature level, and objective level. To mitigate the adverse effects caused thereby, we propose Libra R-CNN, a simple but effective framework towards balanced learning for object detection. It integrates three novel components: IoU-balanced sampling, balanced feature pyramid, and balanced L1 loss, respectively for reducing the imbalance at sample, feature, and objective level. Benefitted from the overall balanced design, Libra R-CNN significantly improves the detection performance. Without bells and whistles, it achieves 2.5 points and 2.0 points higher Average Precision (AP) than FPN Faster R-CNN and RetinaNet respectively on MSCOCO. Instance recognition is rapidly advanced along with the developments of various deep convolutional neural networks. Compared to the architectures of networks, the training process, which is also crucial to the success of detectors, has received relatively less attention. In this work, we carefully revisit the standard training practice of detectors, and find that the detection performance is often limited by the imbalance during the training process, which generally consists in three levels - sample level, feature level, and objective level. To mitigate the adverse effects caused thereby, we propose Libra R-CNN, a simple yet effective framework towards balanced learning for instance recognition. It integrates IoU-balanced sampling, balanced feature pyramid, and objective re-weighting, respectively for reducing the imbalance at sample, feature, and objective level. Extensive experiments conducted on MS COCO, LVIS and Pascal VOC datasets prove the effectiveness of the overall balanced design. - +
- - +## Results and Models -## Citation +The results on COCO 2017val are shown in the below table. (results on test-dev are usually slightly higher than val) - +| Architecture | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :----------: | :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :----------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| Faster R-CNN | R-50-FPN | pytorch | 1x | 4.6 | 19.0 | 38.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco/libra_faster_rcnn_r50_fpn_1x_coco_20200130-3afee3a9.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco/libra_faster_rcnn_r50_fpn_1x_coco_20200130_204655.log.json) | +| Fast R-CNN | R-50-FPN | pytorch | 1x | | | | | | +| Faster R-CNN | R-101-FPN | pytorch | 1x | 6.5 | 14.4 | 40.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco/libra_faster_rcnn_r101_fpn_1x_coco_20200203-8dba6a5a.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco/libra_faster_rcnn_r101_fpn_1x_coco_20200203_001405.log.json) | +| Faster R-CNN | X-101-64x4d-FPN | pytorch | 1x | 10.8 | 8.5 | 42.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco/libra_faster_rcnn_x101_64x4d_fpn_1x_coco_20200315-3a7d0488.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco/libra_faster_rcnn_x101_64x4d_fpn_1x_coco_20200315_231625.log.json) | +| RetinaNet | R-50-FPN | pytorch | 1x | 4.2 | 17.7 | 37.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/libra_rcnn/libra_retinanet_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_retinanet_r50_fpn_1x_coco/libra_retinanet_r50_fpn_1x_coco_20200205-804d94ce.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_retinanet_r50_fpn_1x_coco/libra_retinanet_r50_fpn_1x_coco_20200205_112757.log.json) | + +## Citation We provide config files to reproduce the results in the CVPR 2019 paper [Libra R-CNN](https://arxiv.org/pdf/1904.02701.pdf). The extended version of [Libra R-CNN](https://arxiv.org/pdf/2108.10175.pdf) is accpeted by IJCV. -``` +```latex @inproceedings{pang2019libra, title={Libra R-CNN: Towards Balanced Learning for Object Detection}, author={Pang, Jiangmiao and Chen, Kai and Shi, Jianping and Feng, Huajun and Ouyang, Wanli and Dahua Lin}, @@ -42,15 +51,3 @@ The extended version of [Libra R-CNN](https://arxiv.org/pdf/2108.10175.pdf) is a publisher={Springer} } ``` - -## Results and models - -The results on COCO 2017val are shown in the below table. (results on test-dev are usually slightly higher than val) - -| Architecture | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -|:------------:|:---------------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| -| Faster R-CNN | R-50-FPN | pytorch | 1x | 4.6 | 19.0 | 38.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco/libra_faster_rcnn_r50_fpn_1x_coco_20200130-3afee3a9.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco/libra_faster_rcnn_r50_fpn_1x_coco_20200130_204655.log.json) | -| Fast R-CNN | R-50-FPN | pytorch | 1x | | | | | -| Faster R-CNN | R-101-FPN | pytorch | 1x | 6.5 | 14.4 | 40.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco/libra_faster_rcnn_r101_fpn_1x_coco_20200203-8dba6a5a.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco/libra_faster_rcnn_r101_fpn_1x_coco_20200203_001405.log.json) | -| Faster R-CNN | X-101-64x4d-FPN | pytorch | 1x | 10.8 | 8.5 | 42.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco/libra_faster_rcnn_x101_64x4d_fpn_1x_coco_20200315-3a7d0488.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco/libra_faster_rcnn_x101_64x4d_fpn_1x_coco_20200315_231625.log.json) | -| RetinaNet | R-50-FPN | pytorch | 1x | 4.2 | 17.7 | 37.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/libra_rcnn/libra_retinanet_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_retinanet_r50_fpn_1x_coco/libra_retinanet_r50_fpn_1x_coco_20200205-804d94ce.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_retinanet_r50_fpn_1x_coco/libra_retinanet_r50_fpn_1x_coco_20200205_112757.log.json) | diff --git a/configs/lvis/README.md b/configs/lvis/README.md index daf27e8..0c2760e 100644 --- a/configs/lvis/README.md +++ b/configs/lvis/README.md @@ -1,58 +1,56 @@ -# LVIS: A Dataset for Large Vocabulary Instance Segmentation +# LVIS -## Abstract +> [LVIS: A Dataset for Large Vocabulary Instance Segmentation](https://arxiv.org/abs/1908.03195) + + - +## Abstract -Progress on object detection is enabled by datasets that focus the research community's attention on open challenges. This process led us from simple images to complex scenes and from bounding boxes to segmentation masks. In this work, we introduce LVIS (pronounced `el-vis'): a new dataset for Large Vocabulary Instance Segmentation. We plan to collect ~2 million high-quality instance segmentation masks for over 1000 entry-level object categories in 164k images. Due to the Zipfian distribution of categories in natural images, LVIS naturally has a long tail of categories with few training samples. Given that state-of-the-art deep learning methods for object detection perform poorly in the low-sample regime, we believe that our dataset poses an important and exciting new scientific challenge. +Progress on object detection is enabled by datasets that focus the research community's attention on open challenges. This process led us from simple images to complex scenes and from bounding boxes to segmentation masks. In this work, we introduce LVIS (pronounced \`el-vis'): a new dataset for Large Vocabulary Instance Segmentation. We plan to collect ~2 million high-quality instance segmentation masks for over 1000 entry-level object categories in 164k images. Due to the Zipfian distribution of categories in natural images, LVIS naturally has a long tail of categories with few training samples. Given that state-of-the-art deep learning methods for object detection perform poorly in the low-sample regime, we believe that our dataset poses an important and exciting new scientific challenge. -
- - +## Common Setting -## Citation +- Please follow [install guide](../../docs/get_started.md#install-mmdetection) to install open-mmlab forked cocoapi first. - +- Run following scripts to install our forked lvis-api. -```latex -@inproceedings{gupta2019lvis, - title={{LVIS}: A Dataset for Large Vocabulary Instance Segmentation}, - author={Gupta, Agrim and Dollar, Piotr and Girshick, Ross}, - booktitle={Proceedings of the {IEEE} Conference on Computer Vision and Pattern Recognition}, - year={2019} -} -``` + ```shell + pip install git+https://github.com/lvis-dataset/lvis-api.git + ``` -## Common Setting - -* Please follow [install guide](../../docs/get_started.md#install-mmdetection) to install open-mmlab forked cocoapi first. -* Run following scripts to install our forked lvis-api. +- All experiments use oversample strategy [here](../../docs/tutorials/customize_dataset.md#class-balanced-dataset) with oversample threshold `1e-3`. - ```shell - pip install git+https://github.com/lvis-dataset/lvis-api.git - ``` - -* All experiments use oversample strategy [here](../../docs/tutorials/customize_dataset.md#class-balanced-dataset) with oversample threshold `1e-3`. -* The size of LVIS v0.5 is half of COCO, so schedule `2x` in LVIS is roughly the same iterations as `1x` in COCO. +- The size of LVIS v0.5 is half of COCO, so schedule `2x` in LVIS is roughly the same iterations as `1x` in COCO. ## Results and models of LVIS v0.5 -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: |:--------: | -| R-50-FPN | pytorch | 2x | - | - | 26.1 | 25.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis-dbd06831.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_20200531_160435.log.json) | -| R-101-FPN | pytorch | 2x | - | - | 27.1 | 27.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis-54582ee2.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis_20200601_134748.log.json) | -| X-101-32x4d-FPN | pytorch | 2x | - | - | 26.7 | 26.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis-3cf55ea2.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis_20200531_221749.log.json) | -| X-101-64x4d-FPN | pytorch | 2x | - | - | 26.4 | 26.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis-1c99a5ad.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis_20200601_194651.log.json) | +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :--------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | pytorch | 2x | - | - | 26.1 | 25.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis-dbd06831.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_20200531_160435.log.json) | +| R-101-FPN | pytorch | 2x | - | - | 27.1 | 27.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis-54582ee2.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis_20200601_134748.log.json) | +| X-101-32x4d-FPN | pytorch | 2x | - | - | 26.7 | 26.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis-3cf55ea2.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis_20200531_221749.log.json) | +| X-101-64x4d-FPN | pytorch | 2x | - | - | 26.4 | 26.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis-1c99a5ad.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis_20200601_194651.log.json) | ## Results and models of LVIS v1 -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | -| R-50-FPN | pytorch | 1x | 9.1 | - | 22.5 | 21.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1-aa78ac3d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1-20200829_061305.log.json) | -| R-101-FPN | pytorch | 1x | 10.8 | - | 24.6 | 23.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1-ec55ce32.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1-20200829_070959.log.json) | -| X-101-32x4d-FPN | pytorch | 1x | 11.8 | - | 26.7 | 25.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1-ebbc5c81.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1-20200829_071317.log.json) | -| X-101-64x4d-FPN | pytorch | 1x | 14.6 | - | 27.2 | 25.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1-43d9edfe.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1-20200830_060206.log.json) | +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | pytorch | 1x | 9.1 | - | 22.5 | 21.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1-aa78ac3d.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1-20200829_061305.log.json) | +| R-101-FPN | pytorch | 1x | 10.8 | - | 24.6 | 23.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1-ec55ce32.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1-20200829_070959.log.json) | +| X-101-32x4d-FPN | pytorch | 1x | 11.8 | - | 26.7 | 25.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1-ebbc5c81.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1-20200829_071317.log.json) | +| X-101-64x4d-FPN | pytorch | 1x | 14.6 | - | 27.2 | 25.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1-43d9edfe.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1-20200830_060206.log.json) | + +## Citation + +```latex +@inproceedings{gupta2019lvis, + title={{LVIS}: A Dataset for Large Vocabulary Instance Segmentation}, + author={Gupta, Agrim and Dollar, Piotr and Girshick, Ross}, + booktitle={Proceedings of the {IEEE} Conference on Computer Vision and Pattern Recognition}, + year={2019} +} +``` diff --git a/configs/mask2former/README.md b/configs/mask2former/README.md new file mode 100644 index 0000000..ebce50d --- /dev/null +++ b/configs/mask2former/README.md @@ -0,0 +1,73 @@ +# Mask2Former + +> [Masked-attention Mask Transformer for Universal Image Segmentation](http://arxiv.org/abs/2112.01527) + + + +## Abstract + +Image segmentation is about grouping pixels with different semantics, e.g., category or instance membership, where each choice of semantics defines a task. While only the semantics of each task differ, current research focuses on designing specialized architectures for each task. We present Masked-attention Mask Transformer (Mask2Former), a new architecture capable of addressing any image segmentation task (panoptic, instance or semantic). Its key components include masked attention, which extracts localized features by constraining cross-attention within predicted mask regions. In addition to reducing the research effort by at least three times, it outperforms the best specialized architectures by a significant margin on four popular datasets. Most notably, Mask2Former sets a new state-of-the-art for panoptic segmentation (57.8 PQ on COCO), instance segmentation (50.1 AP on COCO) and semantic segmentation (57.7 mIoU on ADE20K). + +
+ +
+ +## Introduction + +Mask2Former requires COCO and [COCO-panoptic](http://images.cocodataset.org/annotations/panoptic_annotations_trainval2017.zip) dataset for training and evaluation. You need to download and extract it in the COCO dataset path. +The directory should be like this. + +```none +mmdetection +├── mmdet +├── tools +├── configs +├── data +│ ├── coco +│ │ ├── annotations +| | | ├── instances_train2017.json +| | | ├── instances_val2017.json +│ │ │ ├── panoptic_train2017.json +│ │ │ ├── panoptic_train2017 +│ │ │ ├── panoptic_val2017.json +│ │ │ ├── panoptic_val2017 +│ │ ├── train2017 +│ │ ├── val2017 +│ │ ├── test2017 +``` + +## Results and Models + +### Panoptic segmentation + +| Backbone | style | Pretrain | Lr schd | Mem (GB) | Inf time (fps) | PQ | box mAP | mask mAP | Config | Download | +| :------: | :-----: | :----------: | :-----: | :------: | :------------: | :--: | :-----: | :------: | :----------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | pytorch | ImageNet-1K | 50e | 13.9 | - | 51.9 | 44.8 | 41.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic/mask2former_r50_lsj_8x2_50e_coco-panoptic_20220326_224516-11a44721.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic/mask2former_r50_lsj_8x2_50e_coco-panoptic_20220326_224516.log.json) | +| R-101 | pytorch | ImageNet-1K | 50e | 16.1 | - | 52.4 | 45.3 | 42.4 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco-panoptic/mask2former_r101_lsj_8x2_50e_coco-panoptic_20220329_225104-c54e64c9.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco-panoptic/mask2former_r101_lsj_8x2_50e_coco-panoptic_20220329_225104.log.json) | +| Swin-T | - | ImageNet-1K | 50e | 15.9 | - | 53.4 | 46.3 | 43.4 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic_20220326_224553-fc567107.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic_20220326_224553.log.json) | +| Swin-S | - | ImageNet-1K | 50e | 19.1 | - | 54.5 | 47.8 | 44.5 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic_20220329_225200-c7b94355.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic_20220329_225200.log.json) | +| Swin-B | - | ImageNet-1K | 50e | 26.0 | - | 55.1 | 48.2 | 44.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic_20220331_002244-c149a9e9.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic_20220331_002244.log.json) | +| Swin-B | - | ImageNet-21K | 50e | 25.8 | - | 56.3 | 50.0 | 46.3 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic_20220329_230021-3bb8b482.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic_20220329_230021.log.json) | +| Swin-L | - | ImageNet-21K | 100e | 21.1 | - | 57.6 | 52.2 | 48.5 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic_20220407_104949-d4919c44.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic_20220407_104949.log.json) | + +### Instance segmentation + +| Backbone | style | Pretrain | Lr schd | Mem (GB) | Inf time (fps) | box mAP | mask mAP | Config | Download | +| -------- | ------- | ----------- | ------- | -------- | -------------- | ------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| R-50 | pytorch | ImageNet-1K | 50e | 13.7 | - | 45.7 | 42.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco/mask2former_r50_lsj_8x2_50e_coco_20220506_191028-8e96e88b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco/mask2former_r50_lsj_8x2_50e_coco_20220506_191028.log.json) | +| R-101 | pytorch | ImageNet-1K | 50e | 15.5 | - | 46.7 | 44.0 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco/mask2former_r101_lsj_8x2_50e_coco_20220426_100250-c50b6fa6.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco/mask2former_r101_lsj_8x2_50e_coco_20220426_100250.log.json) | +| Swin-T | - | ImageNet-1K | 50e | 15.3 | - | 47.7 | 44.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220508_091649-4a943037.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220508_091649.log.json) | +| Swin-S | - | ImageNet-1K | 50e | 18.8 | - | 49.3 | 46.1 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756-743b7d99.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756.log.json) | + +Note: We have trained the instance segmentation models many times (see more details in [PR 7571](https://github.com/open-mmlab/mmdetection/pull/7571)). The results of the trained models are relatively stable (+- 0.2), and have a certain gap (about 0.2 AP) in comparison with the results in the [paper](http://arxiv.org/abs/2112.01527). However, the performance of the model trained with the official code is unstable and may also be slightly lower than the reported results as mentioned in the [issue](https://github.com/facebookresearch/Mask2Former/issues/46). + +## Citation + +```latex +@article{cheng2021mask2former, + title={Masked-attention Mask Transformer for Universal Image Segmentation}, + author={Bowen Cheng and Ishan Misra and Alexander G. Schwing and Alexander Kirillov and Rohit Girdhar}, + journal={arXiv}, + year={2021} +} +``` diff --git a/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco-panoptic.py b/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco-panoptic.py new file mode 100644 index 0000000..33fdde6 --- /dev/null +++ b/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco-panoptic.py @@ -0,0 +1,7 @@ +_base_ = './mask2former_r50_lsj_8x2_50e_coco-panoptic.py' + +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py b/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py new file mode 100644 index 0000000..5543fb0 --- /dev/null +++ b/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py @@ -0,0 +1,7 @@ +_base_ = ['./mask2former_r50_lsj_8x2_50e_coco.py'] + +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic.py b/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic.py new file mode 100644 index 0000000..2c23625 --- /dev/null +++ b/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic.py @@ -0,0 +1,253 @@ +_base_ = [ + '../_base_/datasets/coco_panoptic.py', '../_base_/default_runtime.py' +] +num_things_classes = 80 +num_stuff_classes = 53 +num_classes = num_things_classes + num_stuff_classes +model = dict( + type='Mask2Former', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=-1, + norm_cfg=dict(type='BN', requires_grad=False), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + panoptic_head=dict( + type='Mask2FormerHead', + in_channels=[256, 512, 1024, 2048], # pass to pixel_decoder inside + strides=[4, 8, 16, 32], + feat_channels=256, + out_channels=256, + num_things_classes=num_things_classes, + num_stuff_classes=num_stuff_classes, + num_queries=100, + num_transformer_feat_level=3, + pixel_decoder=dict( + type='MSDeformAttnPixelDecoder', + num_outs=3, + norm_cfg=dict(type='GN', num_groups=32), + act_cfg=dict(type='ReLU'), + encoder=dict( + type='DetrTransformerEncoder', + num_layers=6, + transformerlayers=dict( + type='BaseTransformerLayer', + attn_cfgs=dict( + type='MultiScaleDeformableAttention', + embed_dims=256, + num_heads=8, + num_levels=3, + num_points=4, + im2col_step=64, + dropout=0.0, + batch_first=False, + norm_cfg=None, + init_cfg=None), + ffn_cfgs=dict( + type='FFN', + embed_dims=256, + feedforward_channels=1024, + num_fcs=2, + ffn_drop=0.0, + act_cfg=dict(type='ReLU', inplace=True)), + operation_order=('self_attn', 'norm', 'ffn', 'norm')), + init_cfg=None), + positional_encoding=dict( + type='SinePositionalEncoding', num_feats=128, normalize=True), + init_cfg=None), + enforce_decoder_input_project=False, + positional_encoding=dict( + type='SinePositionalEncoding', num_feats=128, normalize=True), + transformer_decoder=dict( + type='DetrTransformerDecoder', + return_intermediate=True, + num_layers=9, + transformerlayers=dict( + type='DetrTransformerDecoderLayer', + attn_cfgs=dict( + type='MultiheadAttention', + embed_dims=256, + num_heads=8, + attn_drop=0.0, + proj_drop=0.0, + dropout_layer=None, + batch_first=False), + ffn_cfgs=dict( + embed_dims=256, + feedforward_channels=2048, + num_fcs=2, + act_cfg=dict(type='ReLU', inplace=True), + ffn_drop=0.0, + dropout_layer=None, + add_identity=True), + feedforward_channels=2048, + operation_order=('cross_attn', 'norm', 'self_attn', 'norm', + 'ffn', 'norm')), + init_cfg=None), + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=2.0, + reduction='mean', + class_weight=[1.0] * num_classes + [0.1]), + loss_mask=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + reduction='mean', + loss_weight=5.0), + loss_dice=dict( + type='DiceLoss', + use_sigmoid=True, + activate=True, + reduction='mean', + naive_dice=True, + eps=1.0, + loss_weight=5.0)), + panoptic_fusion_head=dict( + type='MaskFormerFusionHead', + num_things_classes=num_things_classes, + num_stuff_classes=num_stuff_classes, + loss_panoptic=None, + init_cfg=None), + train_cfg=dict( + num_points=12544, + oversample_ratio=3.0, + importance_sample_ratio=0.75, + assigner=dict( + type='MaskHungarianAssigner', + cls_cost=dict(type='ClassificationCost', weight=2.0), + mask_cost=dict( + type='CrossEntropyLossCost', weight=5.0, use_sigmoid=True), + dice_cost=dict( + type='DiceCost', weight=5.0, pred_act=True, eps=1.0)), + sampler=dict(type='MaskPseudoSampler')), + test_cfg=dict( + panoptic_on=True, + # For now, the dataset does not support + # evaluating semantic segmentation metric. + semantic_on=False, + instance_on=True, + # max_per_image is for instance segmentation. + max_per_image=100, + iou_thr=0.8, + # In Mask2Former's panoptic postprocessing, + # it will filter mask area where score is less than 0.5 . + filter_low_score=True), + init_cfg=None) + +# dataset settings +image_size = (1024, 1024) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict( + type='LoadPanopticAnnotations', + with_bbox=True, + with_mask=True, + with_seg=True), + dict(type='RandomFlip', flip_ratio=0.5), + # large scale jittering + dict( + type='Resize', + img_scale=image_size, + ratio_range=(0.1, 2.0), + multiscale_mode='range', + keep_ratio=True), + dict( + type='RandomCrop', + crop_size=image_size, + crop_type='absolute', + recompute_bbox=True, + allow_negative_crop=True), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=image_size), + dict(type='DefaultFormatBundle', img_to_float=True), + dict( + type='Collect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data_root = 'data/coco/' +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict(pipeline=train_pipeline), + val=dict( + pipeline=test_pipeline, + ins_ann_file=data_root + 'annotations/instances_val2017.json', + ), + test=dict( + pipeline=test_pipeline, + ins_ann_file=data_root + 'annotations/instances_val2017.json', + )) + +embed_multi = dict(lr_mult=1.0, decay_mult=0.0) +# optimizer +optimizer = dict( + type='AdamW', + lr=0.0001, + weight_decay=0.05, + eps=1e-8, + betas=(0.9, 0.999), + paramwise_cfg=dict( + custom_keys={ + 'backbone': dict(lr_mult=0.1, decay_mult=1.0), + 'query_embed': embed_multi, + 'query_feat': embed_multi, + 'level_embed': embed_multi, + }, + norm_decay_mult=0.0)) +optimizer_config = dict(grad_clip=dict(max_norm=0.01, norm_type=2)) + +# learning policy +lr_config = dict( + policy='step', + gamma=0.1, + by_epoch=False, + step=[327778, 355092], + warmup='linear', + warmup_by_epoch=False, + warmup_ratio=1.0, # no warmup + warmup_iters=10) + +max_iters = 368750 +runner = dict(type='IterBasedRunner', max_iters=max_iters) + +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook', by_epoch=False), + dict(type='TensorboardLoggerHook', by_epoch=False) + ]) +interval = 5000 +workflow = [('train', interval)] +checkpoint_config = dict( + by_epoch=False, interval=interval, save_last=True, max_keep_ckpts=3) + +# Before 365001th iteration, we do evaluation every 5000 iterations. +# After 365000th iteration, we do evaluation every 368750 iterations, +# which means that we do evaluation at the end of training. +dynamic_intervals = [(max_iters // interval * interval + 1, max_iters)] +evaluation = dict( + interval=interval, + dynamic_intervals=dynamic_intervals, + metric=['PQ', 'bbox', 'segm']) diff --git a/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco.py b/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco.py new file mode 100644 index 0000000..eca6135 --- /dev/null +++ b/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco.py @@ -0,0 +1,79 @@ +_base_ = ['./mask2former_r50_lsj_8x2_50e_coco-panoptic.py'] +num_things_classes = 80 +num_stuff_classes = 0 +num_classes = num_things_classes + num_stuff_classes +model = dict( + panoptic_head=dict( + num_things_classes=num_things_classes, + num_stuff_classes=num_stuff_classes, + loss_cls=dict(class_weight=[1.0] * num_classes + [0.1])), + panoptic_fusion_head=dict( + num_things_classes=num_things_classes, + num_stuff_classes=num_stuff_classes), + test_cfg=dict(panoptic_on=False)) + +# dataset settings +image_size = (1024, 1024) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +pad_cfg = dict(img=(128, 128, 128), masks=0, seg=255) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='RandomFlip', flip_ratio=0.5), + # large scale jittering + dict( + type='Resize', + img_scale=image_size, + ratio_range=(0.1, 2.0), + multiscale_mode='range', + keep_ratio=True), + dict( + type='RandomCrop', + crop_size=image_size, + crop_type='absolute', + recompute_bbox=True, + allow_negative_crop=True), + dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-5, 1e-5), by_mask=True), + dict(type='Pad', size=image_size, pad_val=pad_cfg), + dict(type='Normalize', **img_norm_cfg), + dict(type='DefaultFormatBundle', img_to_float=True), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Pad', size_divisor=32, pad_val=pad_cfg), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +data = dict( + _delete_=True, + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) +evaluation = dict(metric=['bbox', 'segm']) diff --git a/configs/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic.py b/configs/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic.py new file mode 100644 index 0000000..f13f5e1 --- /dev/null +++ b/configs/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic.py @@ -0,0 +1,5 @@ +_base_ = ['./mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic.py'] +pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22k.pth' # noqa + +model = dict( + backbone=dict(init_cfg=dict(type='Pretrained', checkpoint=pretrained))) diff --git a/configs/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic.py b/configs/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic.py new file mode 100644 index 0000000..33a805c --- /dev/null +++ b/configs/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic.py @@ -0,0 +1,42 @@ +_base_ = ['./mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic.py'] +pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384.pth' # noqa + +depths = [2, 2, 18, 2] +model = dict( + backbone=dict( + pretrain_img_size=384, + embed_dims=128, + depths=depths, + num_heads=[4, 8, 16, 32], + window_size=12, + init_cfg=dict(type='Pretrained', checkpoint=pretrained)), + panoptic_head=dict(in_channels=[128, 256, 512, 1024])) + +# set all layers in backbone to lr_mult=0.1 +# set all norm layers, position_embeding, +# query_embeding, level_embeding to decay_multi=0.0 +backbone_norm_multi = dict(lr_mult=0.1, decay_mult=0.0) +backbone_embed_multi = dict(lr_mult=0.1, decay_mult=0.0) +embed_multi = dict(lr_mult=1.0, decay_mult=0.0) +custom_keys = { + 'backbone': dict(lr_mult=0.1, decay_mult=1.0), + 'backbone.patch_embed.norm': backbone_norm_multi, + 'backbone.norm': backbone_norm_multi, + 'absolute_pos_embed': backbone_embed_multi, + 'relative_position_bias_table': backbone_embed_multi, + 'query_embed': embed_multi, + 'query_feat': embed_multi, + 'level_embed': embed_multi +} +custom_keys.update({ + f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi + for stage_id, num_blocks in enumerate(depths) + for block_id in range(num_blocks) +}) +custom_keys.update({ + f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi + for stage_id in range(len(depths) - 1) +}) +# optimizer +optimizer = dict( + paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0)) diff --git a/configs/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic.py b/configs/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic.py new file mode 100644 index 0000000..91a180d --- /dev/null +++ b/configs/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic.py @@ -0,0 +1,26 @@ +_base_ = ['./mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic.py'] +pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth' # noqa + +model = dict( + backbone=dict( + embed_dims=192, + num_heads=[6, 12, 24, 48], + init_cfg=dict(type='Pretrained', checkpoint=pretrained)), + panoptic_head=dict(num_queries=200, in_channels=[192, 384, 768, 1536])) + +data = dict(samples_per_gpu=1, workers_per_gpu=1) + +lr_config = dict(step=[655556, 710184]) + +max_iters = 737500 +runner = dict(type='IterBasedRunner', max_iters=max_iters) + +# Before 735001th iteration, we do evaluation every 5000 iterations. +# After 735000th iteration, we do evaluation every 737500 iterations, +# which means that we do evaluation at the end of training.' +interval = 5000 +dynamic_intervals = [(max_iters // interval * interval + 1, max_iters)] +evaluation = dict( + interval=interval, + dynamic_intervals=dynamic_intervals, + metric=['PQ', 'bbox', 'segm']) diff --git a/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic.py b/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic.py new file mode 100644 index 0000000..b2b621c --- /dev/null +++ b/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic.py @@ -0,0 +1,37 @@ +_base_ = ['./mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic.py'] +pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth' # noqa + +depths = [2, 2, 18, 2] +model = dict( + backbone=dict( + depths=depths, init_cfg=dict(type='Pretrained', + checkpoint=pretrained))) + +# set all layers in backbone to lr_mult=0.1 +# set all norm layers, position_embeding, +# query_embeding, level_embeding to decay_multi=0.0 +backbone_norm_multi = dict(lr_mult=0.1, decay_mult=0.0) +backbone_embed_multi = dict(lr_mult=0.1, decay_mult=0.0) +embed_multi = dict(lr_mult=1.0, decay_mult=0.0) +custom_keys = { + 'backbone': dict(lr_mult=0.1, decay_mult=1.0), + 'backbone.patch_embed.norm': backbone_norm_multi, + 'backbone.norm': backbone_norm_multi, + 'absolute_pos_embed': backbone_embed_multi, + 'relative_position_bias_table': backbone_embed_multi, + 'query_embed': embed_multi, + 'query_feat': embed_multi, + 'level_embed': embed_multi +} +custom_keys.update({ + f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi + for stage_id, num_blocks in enumerate(depths) + for block_id in range(num_blocks) +}) +custom_keys.update({ + f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi + for stage_id in range(len(depths) - 1) +}) +# optimizer +optimizer = dict( + paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0)) diff --git a/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py b/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py new file mode 100644 index 0000000..7b1b05a --- /dev/null +++ b/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py @@ -0,0 +1,37 @@ +_base_ = ['./mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py'] +pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth' # noqa + +depths = [2, 2, 18, 2] +model = dict( + backbone=dict( + depths=depths, init_cfg=dict(type='Pretrained', + checkpoint=pretrained))) + +# set all layers in backbone to lr_mult=0.1 +# set all norm layers, position_embeding, +# query_embeding, level_embeding to decay_multi=0.0 +backbone_norm_multi = dict(lr_mult=0.1, decay_mult=0.0) +backbone_embed_multi = dict(lr_mult=0.1, decay_mult=0.0) +embed_multi = dict(lr_mult=1.0, decay_mult=0.0) +custom_keys = { + 'backbone': dict(lr_mult=0.1, decay_mult=1.0), + 'backbone.patch_embed.norm': backbone_norm_multi, + 'backbone.norm': backbone_norm_multi, + 'absolute_pos_embed': backbone_embed_multi, + 'relative_position_bias_table': backbone_embed_multi, + 'query_embed': embed_multi, + 'query_feat': embed_multi, + 'level_embed': embed_multi +} +custom_keys.update({ + f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi + for stage_id, num_blocks in enumerate(depths) + for block_id in range(num_blocks) +}) +custom_keys.update({ + f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi + for stage_id in range(len(depths) - 1) +}) +# optimizer +optimizer = dict( + paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0)) diff --git a/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic.py b/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic.py new file mode 100644 index 0000000..04b2f10 --- /dev/null +++ b/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic.py @@ -0,0 +1,62 @@ +_base_ = ['./mask2former_r50_lsj_8x2_50e_coco-panoptic.py'] +pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth' # noqa + +depths = [2, 2, 6, 2] +model = dict( + type='Mask2Former', + backbone=dict( + _delete_=True, + type='SwinTransformer', + embed_dims=96, + depths=depths, + num_heads=[3, 6, 12, 24], + window_size=7, + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.3, + patch_norm=True, + out_indices=(0, 1, 2, 3), + with_cp=False, + convert_weights=True, + frozen_stages=-1, + init_cfg=dict(type='Pretrained', checkpoint=pretrained)), + panoptic_head=dict( + type='Mask2FormerHead', in_channels=[96, 192, 384, 768]), + init_cfg=None) + +# set all layers in backbone to lr_mult=0.1 +# set all norm layers, position_embeding, +# query_embeding, level_embeding to decay_multi=0.0 +backbone_norm_multi = dict(lr_mult=0.1, decay_mult=0.0) +backbone_embed_multi = dict(lr_mult=0.1, decay_mult=0.0) +embed_multi = dict(lr_mult=1.0, decay_mult=0.0) +custom_keys = { + 'backbone': dict(lr_mult=0.1, decay_mult=1.0), + 'backbone.patch_embed.norm': backbone_norm_multi, + 'backbone.norm': backbone_norm_multi, + 'absolute_pos_embed': backbone_embed_multi, + 'relative_position_bias_table': backbone_embed_multi, + 'query_embed': embed_multi, + 'query_feat': embed_multi, + 'level_embed': embed_multi +} +custom_keys.update({ + f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi + for stage_id, num_blocks in enumerate(depths) + for block_id in range(num_blocks) +}) +custom_keys.update({ + f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi + for stage_id in range(len(depths) - 1) +}) +# optimizer +optimizer = dict( + type='AdamW', + lr=0.0001, + weight_decay=0.05, + eps=1e-8, + betas=(0.9, 0.999), + paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0)) diff --git a/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py b/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py new file mode 100644 index 0000000..0ccbe91 --- /dev/null +++ b/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py @@ -0,0 +1,61 @@ +_base_ = ['./mask2former_r50_lsj_8x2_50e_coco.py'] +pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth' # noqa +depths = [2, 2, 6, 2] +model = dict( + type='Mask2Former', + backbone=dict( + _delete_=True, + type='SwinTransformer', + embed_dims=96, + depths=depths, + num_heads=[3, 6, 12, 24], + window_size=7, + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.3, + patch_norm=True, + out_indices=(0, 1, 2, 3), + with_cp=False, + convert_weights=True, + frozen_stages=-1, + init_cfg=dict(type='Pretrained', checkpoint=pretrained)), + panoptic_head=dict( + type='Mask2FormerHead', in_channels=[96, 192, 384, 768]), + init_cfg=None) + +# set all layers in backbone to lr_mult=0.1 +# set all norm layers, position_embeding, +# query_embeding, level_embeding to decay_multi=0.0 +backbone_norm_multi = dict(lr_mult=0.1, decay_mult=0.0) +backbone_embed_multi = dict(lr_mult=0.1, decay_mult=0.0) +embed_multi = dict(lr_mult=1.0, decay_mult=0.0) +custom_keys = { + 'backbone': dict(lr_mult=0.1, decay_mult=1.0), + 'backbone.patch_embed.norm': backbone_norm_multi, + 'backbone.norm': backbone_norm_multi, + 'absolute_pos_embed': backbone_embed_multi, + 'relative_position_bias_table': backbone_embed_multi, + 'query_embed': embed_multi, + 'query_feat': embed_multi, + 'level_embed': embed_multi +} +custom_keys.update({ + f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi + for stage_id, num_blocks in enumerate(depths) + for block_id in range(num_blocks) +}) +custom_keys.update({ + f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi + for stage_id in range(len(depths) - 1) +}) +# optimizer +optimizer = dict( + type='AdamW', + lr=0.0001, + weight_decay=0.05, + eps=1e-8, + betas=(0.9, 0.999), + paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0)) diff --git a/configs/mask2former/metafile.yml b/configs/mask2former/metafile.yml new file mode 100644 index 0000000..d9f4692 --- /dev/null +++ b/configs/mask2former/metafile.yml @@ -0,0 +1,223 @@ +Collections: + - Name: Mask2Former + Metadata: + Training Data: COCO + Training Techniques: + - AdamW + - Weight Decay + Training Resources: 8x A100 GPUs + Architecture: + - Mask2Former + Paper: + URL: https://arxiv.org/pdf/2112.01527 + Title: 'Masked-attention Mask Transformer for Universal Image Segmentation' + README: configs/mask2former/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.23.0/mmdet/models/detectors/mask2former.py#L7 + Version: v2.23.0 + +Models: +- Name: mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic + In Collection: Mask2Former + Config: configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic.py + Metadata: + Training Memory (GB): 19.1 + Iterations: 368750 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 47.8 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 44.5 + - Task: Panoptic Segmentation + Dataset: COCO + Metrics: + PQ: 54.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic_20220329_225200-c7b94355.pth +- Name: mask2former_r101_lsj_8x2_50e_coco + In Collection: Mask2Former + Config: configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py + Metadata: + Training Memory (GB): 15.5 + Iterations: 368750 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 46.7 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 44.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco/mask2former_r101_lsj_8x2_50e_coco_20220426_100250-c50b6fa6.pth +- Name: mask2former_r101_lsj_8x2_50e_coco-panoptic + In Collection: Mask2Former + Config: configs/mask2former/mask2former_r101_lsj_8x2_50e_coco-panoptic.py + Metadata: + Training Memory (GB): 16.1 + Iterations: 368750 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 45.3 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 42.4 + - Task: Panoptic Segmentation + Dataset: COCO + Metrics: + PQ: 52.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco-panoptic/mask2former_r101_lsj_8x2_50e_coco-panoptic_20220329_225104-c54e64c9.pth +- Name: mask2former_r50_lsj_8x2_50e_coco-panoptic + In Collection: Mask2Former + Config: configs/mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic.py + Metadata: + Training Memory (GB): 13.9 + Iterations: 368750 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.8 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 41.9 + - Task: Panoptic Segmentation + Dataset: COCO + Metrics: + PQ: 51.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic/mask2former_r50_lsj_8x2_50e_coco-panoptic_20220326_224516-11a44721.pth +- Name: mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic + In Collection: Mask2Former + Config: configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic.py + Metadata: + Training Memory (GB): 15.9 + Iterations: 368750 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 46.3 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 43.4 + - Task: Panoptic Segmentation + Dataset: COCO + Metrics: + PQ: 53.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic_20220326_224553-fc567107.pth +- Name: mask2former_r50_lsj_8x2_50e_coco + In Collection: Mask2Former + Config: configs/mask2former/mask2former_r50_lsj_8x2_50e_coco.py + Metadata: + Training Memory (GB): 13.7 + Iterations: 368750 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 45.7 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 42.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco/mask2former_r50_lsj_8x2_50e_coco_20220506_191028-8e96e88b.pth +- Name: mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic + In Collection: Mask2Former + Config: configs/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic.py + Metadata: + Training Memory (GB): 21.1 + Iterations: 737500 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 52.2 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 48.5 + - Task: Panoptic Segmentation + Dataset: COCO + Metrics: + PQ: 57.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic_20220407_104949-d4919c44.pth +- Name: mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic + In Collection: Mask2Former + Config: configs/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic.py + Metadata: + Training Memory (GB): 25.8 + Iterations: 368750 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 50.0 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 46.3 + - Task: Panoptic Segmentation + Dataset: COCO + Metrics: + PQ: 56.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic_20220329_230021-3bb8b482.pth +- Name: mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic + In Collection: Mask2Former + Config: configs/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic.py + Metadata: + Training Memory (GB): 26.0 + Iterations: 368750 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 48.2 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 44.9 + - Task: Panoptic Segmentation + Dataset: COCO + Metrics: + PQ: 55.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic_20220331_002244-c149a9e9.pth +- Name: mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco + In Collection: Mask2Former + Config: configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py + Metadata: + Training Memory (GB): 15.3 + Iterations: 368750 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 47.7 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 44.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220508_091649-4a943037.pth +- Name: mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco + In Collection: Mask2Former + Config: configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py + Metadata: + Training Memory (GB): 18.8 + Iterations: 368750 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 49.3 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 46.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756-743b7d99.pth diff --git a/configs/mask_rcnn/README.md b/configs/mask_rcnn/README.md index 5080a04..6aad9a2 100644 --- a/configs/mask_rcnn/README.md +++ b/configs/mask_rcnn/README.md @@ -1,22 +1,51 @@ # Mask R-CNN -## Abstract +> [Mask R-CNN](https://arxiv.org/abs/1703.06870) - + + +## Abstract We present a conceptually simple, flexible, and general framework for object instance segmentation. Our approach efficiently detects objects in an image while simultaneously generating a high-quality segmentation mask for each instance. The method, called Mask R-CNN, extends Faster R-CNN by adding a branch for predicting an object mask in parallel with the existing branch for bounding box recognition. Mask R-CNN is simple to train and adds only a small overhead to Faster R-CNN, running at 5 fps. Moreover, Mask R-CNN is easy to generalize to other tasks, e.g., allowing us to estimate human poses in the same framework. We show top results in all three tracks of the COCO suite of challenges, including instance segmentation, bounding-box object detection, and person keypoint detection. Without bells and whistles, Mask R-CNN outperforms all existing, single-model entries on every task, including the COCO 2016 challenge winners. We hope our simple and effective approach will serve as a solid baseline and help ease future research in instance-level recognition. -
- - +## Results and Models + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :-------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | caffe | 1x | 4.3 | | 38.0 | 34.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco/mask_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.38__segm_mAP-0.344_20200504_231812-0ebd1859.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco/mask_rcnn_r50_caffe_fpn_1x_coco_20200504_231812.log.json) | +| R-50-FPN | pytorch | 1x | 4.4 | 16.1 | 38.2 | 34.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205_050542.log.json) | +| R-50-FPN (FP16) | pytorch | 1x | 3.6 | 24.1 | 38.1 | 34.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_fpn_fp16_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_1x_coco/mask_rcnn_r50_fpn_fp16_1x_coco_20200205-59faf7e4.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_1x_coco/mask_rcnn_r50_fpn_fp16_1x_coco_20200205_130539.log.json) | +| R-50-FPN | pytorch | 2x | - | - | 39.2 | 35.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_2x_coco/mask_rcnn_r50_fpn_2x_coco_bbox_mAP-0.392__segm_mAP-0.354_20200505_003907-3e542a40.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_2x_coco/mask_rcnn_r50_fpn_2x_coco_20200505_003907.log.json) | +| R-101-FPN | caffe | 1x | | | 40.4 | 36.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco/mask_rcnn_r101_caffe_fpn_1x_coco_20200601_095758-805e06c1.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco/mask_rcnn_r101_caffe_fpn_1x_coco_20200601_095758.log.json) | +| R-101-FPN | pytorch | 1x | 6.4 | 13.5 | 40.0 | 36.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_1x_coco/mask_rcnn_r101_fpn_1x_coco_20200204-1efe0ed5.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_1x_coco/mask_rcnn_r101_fpn_1x_coco_20200204_144809.log.json) | +| R-101-FPN | pytorch | 2x | - | - | 40.8 | 36.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r101_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_2x_coco/mask_rcnn_r101_fpn_2x_coco_bbox_mAP-0.408__segm_mAP-0.366_20200505_071027-14b391c7.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_2x_coco/mask_rcnn_r101_fpn_2x_coco_20200505_071027.log.json) | +| X-101-32x4d-FPN | pytorch | 1x | 7.6 | 11.3 | 41.9 | 37.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco/mask_rcnn_x101_32x4d_fpn_1x_coco_20200205-478d0b67.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco/mask_rcnn_x101_32x4d_fpn_1x_coco_20200205_034906.log.json) | +| X-101-32x4d-FPN | pytorch | 2x | - | - | 42.2 | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco/mask_rcnn_x101_32x4d_fpn_2x_coco_bbox_mAP-0.422__segm_mAP-0.378_20200506_004702-faef898c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco/mask_rcnn_x101_32x4d_fpn_2x_coco_20200506_004702.log.json) | +| X-101-64x4d-FPN | pytorch | 1x | 10.7 | 8.0 | 42.8 | 38.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco/mask_rcnn_x101_64x4d_fpn_1x_coco_20200201-9352eb0d.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco/mask_rcnn_x101_64x4d_fpn_1x_coco_20200201_124310.log.json) | +| X-101-64x4d-FPN | pytorch | 2x | - | - | 42.7 | 38.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco/mask_rcnn_x101_64x4d_fpn_2x_coco_20200509_224208-39d6f70c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco/mask_rcnn_x101_64x4d_fpn_2x_coco_20200509_224208.log.json) | +| X-101-32x8d-FPN | pytorch | 1x | - | - | 42.8 | 38.3 | | | -## Citation +## Pre-trained Models - +We also train some models with longer schedules and multi-scale training. The users could finetune them for downstream tasks. + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------------------------------------------------------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :--------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| [R-50-FPN](./mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco.py) | caffe | 2x | 4.3 | | 40.3 | 36.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco_bbox_mAP-0.403__segm_mAP-0.365_20200504_231822-a75c98ce.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco_20200504_231822.log.json) | +| [R-50-FPN](./mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py) | caffe | 3x | 4.3 | | 40.8 | 37.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_20200504_163245.log.json) | +| [R-50-FPN](./mask_rcnn_r50_fpn_mstrain-poly_3x_coco.py) | pytorch | 3x | 4.1 | | 40.9 | 37.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_fpn_mstrain-poly_3x_coco_20210524_201154-21b550bb.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_fpn_mstrain-poly_3x_coco_20210524_201154.log.json) | +| [R-101-FPN](./mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco.py) | caffe | 3x | 5.9 | | 42.9 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco_20210526_132339-3c33ce02.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco_20210526_132339.log.json) | +| [R-101-FPN](./mask_rcnn_r101_fpn_mstrain-poly_3x_coco.py) | pytorch | 3x | 6.1 | | 42.7 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r101_fpn_mstrain-poly_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_mstrain-poly_3x_coco/mask_rcnn_r101_fpn_mstrain-poly_3x_coco_20210524_200244-5675c317.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_mstrain-poly_3x_coco/mask_rcnn_r101_fpn_mstrain-poly_3x_coco_20210524_200244.log.json) | +| [x101-32x4d-FPN](./mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco.py) | pytorch | 3x | 7.3 | | 43.6 | 39.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco_20210524_201410-abcd7859.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco_20210524_201410.log.json) | +| [X-101-32x8d-FPN](./mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco.py) | pytorch | 1x | - | | 43.6 | 39.0 | | | +| [X-101-32x8d-FPN](./mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco.py) | pytorch | 3x | 10.3 | | 44.3 | 39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco_20210607_161042-8bd2c639.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco_20210607_161042.log.json) | +| [X-101-64x4d-FPN](./mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco.py) | pytorch | 3x | 10.4 | | 44.5 | 39.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco_20210526_120447-c376f129.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco_20210526_120447.log.json) | + +## Citation ```latex @article{He_2017, @@ -28,36 +57,3 @@ We present a conceptually simple, flexible, and general framework for object ins month={Oct} } ``` - -## Results and models - -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | -| R-50-FPN | caffe | 1x | 4.3 | | 38.0 | 34.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco/mask_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.38__segm_mAP-0.344_20200504_231812-0ebd1859.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco/mask_rcnn_r50_caffe_fpn_1x_coco_20200504_231812.log.json) | -| R-50-FPN | pytorch | 1x | 4.4 | 16.1 | 38.2 | 34.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205_050542.log.json) | -| R-50-FPN (FP16) | pytorch | 1x | 3.6 | 24.1 | 38.1 | 34.7 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_fpn_fp16_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_1x_coco/mask_rcnn_r50_fpn_fp16_1x_coco_20200205-59faf7e4.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_1x_coco/mask_rcnn_r50_fpn_fp16_1x_coco_20200205_130539.log.json) | -| R-50-FPN | pytorch | 2x | - | - | 39.2 | 35.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_2x_coco/mask_rcnn_r50_fpn_2x_coco_bbox_mAP-0.392__segm_mAP-0.354_20200505_003907-3e542a40.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_2x_coco/mask_rcnn_r50_fpn_2x_coco_20200505_003907.log.json) | -| R-101-FPN | caffe | 1x | | | 40.4 | 36.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco/mask_rcnn_r101_caffe_fpn_1x_coco_20200601_095758-805e06c1.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco/mask_rcnn_r101_caffe_fpn_1x_coco_20200601_095758.log.json)| -| R-101-FPN | pytorch | 1x | 6.4 | 13.5 | 40.0 | 36.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_1x_coco/mask_rcnn_r101_fpn_1x_coco_20200204-1efe0ed5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_1x_coco/mask_rcnn_r101_fpn_1x_coco_20200204_144809.log.json) | -| R-101-FPN | pytorch | 2x | - | - | 40.8 | 36.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r101_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_2x_coco/mask_rcnn_r101_fpn_2x_coco_bbox_mAP-0.408__segm_mAP-0.366_20200505_071027-14b391c7.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_2x_coco/mask_rcnn_r101_fpn_2x_coco_20200505_071027.log.json) | -| X-101-32x4d-FPN | pytorch | 1x | 7.6 | 11.3 | 41.9 | 37.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco/mask_rcnn_x101_32x4d_fpn_1x_coco_20200205-478d0b67.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco/mask_rcnn_x101_32x4d_fpn_1x_coco_20200205_034906.log.json) | -| X-101-32x4d-FPN | pytorch | 2x | - | - | 42.2 | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco/mask_rcnn_x101_32x4d_fpn_2x_coco_bbox_mAP-0.422__segm_mAP-0.378_20200506_004702-faef898c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco/mask_rcnn_x101_32x4d_fpn_2x_coco_20200506_004702.log.json) | -| X-101-64x4d-FPN | pytorch | 1x | 10.7 | 8.0 | 42.8 | 38.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco/mask_rcnn_x101_64x4d_fpn_1x_coco_20200201-9352eb0d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco/mask_rcnn_x101_64x4d_fpn_1x_coco_20200201_124310.log.json) | -| X-101-64x4d-FPN | pytorch | 2x | - | - | 42.7 | 38.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco/mask_rcnn_x101_64x4d_fpn_2x_coco_20200509_224208-39d6f70c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco/mask_rcnn_x101_64x4d_fpn_2x_coco_20200509_224208.log.json)| -| X-101-32x8d-FPN | pytorch | 1x | - | - | 42.8 | 38.3 | | - -## Pre-trained Models - -We also train some models with longer schedules and multi-scale training. The users could finetune them for downstream tasks. - -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | -| [R-50-FPN](./mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco.py) | caffe | 2x | 4.3 | | 40.3 | 36.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco_bbox_mAP-0.403__segm_mAP-0.365_20200504_231822-a75c98ce.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco_20200504_231822.log.json) -| [R-50-FPN](./mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py) | caffe | 3x | 4.3 | | 40.8 | 37.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_20200504_163245.log.json) -| [R-50-FPN](./mask_rcnn_r50_fpn_mstrain-poly_3x_coco.py) | pytorch| 3x | 4.1 | | 40.9 | 37.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_fpn_mstrain-poly_3x_coco_20210524_201154-21b550bb.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_fpn_mstrain-poly_3x_coco_20210524_201154.log.json) -| [R-101-FPN](./mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco.py) | caffe | 3x | 5.9 | | 42.9 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco_20210526_132339-3c33ce02.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco_20210526_132339.log.json) -| [R-101-FPN](./mask_rcnn_r101_fpn_mstrain-poly_3x_coco.py) | pytorch| 3x | 6.1 | | 42.7 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r101_fpn_mstrain-poly_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_mstrain-poly_3x_coco/mask_rcnn_r101_fpn_mstrain-poly_3x_coco_20210524_200244-5675c317.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_mstrain-poly_3x_coco/mask_rcnn_r101_fpn_mstrain-poly_3x_coco_20210524_200244.log.json) -| [x101-32x4d-FPN](./mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco.py) | pytorch| 3x | 7.3 | | 43.6 | 39.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco_20210524_201410-abcd7859.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco_20210524_201410.log.json) -| [X-101-32x8d-FPN](./mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco.py) | pytorch | 1x | - | | 43.6 | 39.0 | -| [X-101-32x8d-FPN](./mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco.py) | pytorch | 3x | 10.3 | | 44.3 | 39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco_20210607_161042-8bd2c639.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco_20210607_161042.log.json) -| [X-101-64x4d-FPN](./mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco.py) | pytorch | 3x | 10.4 | | 44.5 | 39.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco_20210526_120447-c376f129.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco_20210526_120447.log.json) diff --git a/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_wandb_coco.py b/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_wandb_coco.py new file mode 100644 index 0000000..88c8576 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_wandb_coco.py @@ -0,0 +1,26 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +# Set evaluation interval +evaluation = dict(interval=2) +# Set checkpoint interval +checkpoint_config = dict(interval=4) + +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + dict(type='MMDetWandbHook', + init_kwargs={ + 'project': 'mmdetection', + 'group': 'maskrcnn-r50-fpn-1x-coco' + }, + interval=50, + log_checkpoint=True, + log_checkpoint_metadata=True, + num_eval_images=100) + ]) diff --git a/configs/maskformer/README.md b/configs/maskformer/README.md new file mode 100644 index 0000000..5d8daa2 --- /dev/null +++ b/configs/maskformer/README.md @@ -0,0 +1,53 @@ +# MaskFormer + +> [Per-Pixel Classification is Not All You Need for Semantic Segmentation](https://arxiv.org/abs/2107.06278) + + + +## Abstract + +Modern approaches typically formulate semantic segmentation as a per-pixel classification task, while instance-level segmentation is handled with an alternative mask classification. Our key insight: mask classification is sufficiently general to solve both semantic- and instance-level segmentation tasks in a unified manner using the exact same model, loss, and training procedure. Following this observation, we propose MaskFormer, a simple mask classification model which predicts a set of binary masks, each associated with a single global class label prediction. Overall, the proposed mask classification-based method simplifies the landscape of effective approaches to semantic and panoptic segmentation tasks and shows excellent empirical results. In particular, we observe that MaskFormer outperforms per-pixel classification baselines when the number of classes is large. Our mask classification-based method outperforms both current state-of-the-art semantic (55.6 mIoU on ADE20K) and panoptic segmentation (52.7 PQ on COCO) models. + +
+ +
+ +## Introduction + +MaskFormer requires COCO and [COCO-panoptic](http://images.cocodataset.org/annotations/panoptic_annotations_trainval2017.zip) dataset for training and evaluation. You need to download and extract it in the COCO dataset path. +The directory should be like this. + +```none +mmdetection +├── mmdet +├── tools +├── configs +├── data +│ ├── coco +│ │ ├── annotations +│ │ │ ├── panoptic_train2017.json +│ │ │ ├── panoptic_train2017 +│ │ │ ├── panoptic_val2017.json +│ │ │ ├── panoptic_val2017 +│ │ ├── train2017 +│ │ ├── val2017 +│ │ ├── test2017 +``` + +## Results and Models + +| Backbone | style | Lr schd | Mem (GB) | Inf time (fps) | PQ | SQ | RQ | PQ_th | SQ_th | RQ_th | PQ_st | SQ_st | RQ_st | Config | Download | detail | +| :------: | :-----: | :-----: | :------: | :------------: | :----: | :----: | :----: | :----: | :----: | :----: | :----: | :----: | :----: | :-----------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | pytorch | 75e | 16.2 | - | 46.854 | 80.617 | 57.085 | 51.089 | 81.511 | 61.853 | 40.463 | 79.269 | 49.888 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/maskformer/maskformer_r50_mstrain_16x1_75e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_r50_mstrain_16x1_75e_coco/maskformer_r50_mstrain_16x1_75e_coco_20220221_141956-bc2699cb.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_r50_mstrain_16x1_75e_coco/maskformer_r50_mstrain_16x1_75e_coco_20220221_141956.log.json) | This version was mentioned in Table XI, in paper [Masked-attention Mask Transformer for Universal Image Segmentation](https://arxiv.org/abs/2112.01527) | +| Swin-L | pytorch | 300e | 27.2 | - | 53.249 | 81.704 | 64.231 | 58.798 | 82.923 | 70.282 | 44.874 | 79.863 | 55.097 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/maskformer/maskformer_swin-l-p4-w12_mstrain_64x1_300e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_swin-l-p4-w12_mstrain_64x1_300e_coco/maskformer_swin-l-p4-w12_mstrain_64x1_300e_coco_20220326_221612-061b4eb8.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_swin-l-p4-w12_mstrain_64x1_300e_coco/maskformer_swin-l-p4-w12_mstrain_64x1_300e_coco_20220326_221612.log.json) | - | + +## Citation + +```latex +@inproceedings{cheng2021maskformer, + title={Per-Pixel Classification is Not All You Need for Semantic Segmentation}, + author={Bowen Cheng and Alexander G. Schwing and Alexander Kirillov}, + journal={NeurIPS}, + year={2021} +} +``` diff --git a/configs/maskformer/maskformer_r50_mstrain_16x1_75e_coco.py b/configs/maskformer/maskformer_r50_mstrain_16x1_75e_coco.py new file mode 100644 index 0000000..46b3c13 --- /dev/null +++ b/configs/maskformer/maskformer_r50_mstrain_16x1_75e_coco.py @@ -0,0 +1,238 @@ +_base_ = [ + '../_base_/datasets/coco_panoptic.py', '../_base_/default_runtime.py' +] +num_things_classes = 80 +num_stuff_classes = 53 +num_classes = num_things_classes + num_stuff_classes +model = dict( + type='MaskFormer', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=-1, + norm_cfg=dict(type='BN', requires_grad=False), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + panoptic_head=dict( + type='MaskFormerHead', + in_channels=[256, 512, 1024, 2048], # pass to pixel_decoder inside + feat_channels=256, + out_channels=256, + num_things_classes=num_things_classes, + num_stuff_classes=num_stuff_classes, + num_queries=100, + pixel_decoder=dict( + type='TransformerEncoderPixelDecoder', + norm_cfg=dict(type='GN', num_groups=32), + act_cfg=dict(type='ReLU'), + encoder=dict( + type='DetrTransformerEncoder', + num_layers=6, + transformerlayers=dict( + type='BaseTransformerLayer', + attn_cfgs=dict( + type='MultiheadAttention', + embed_dims=256, + num_heads=8, + attn_drop=0.1, + proj_drop=0.1, + dropout_layer=None, + batch_first=False), + ffn_cfgs=dict( + embed_dims=256, + feedforward_channels=2048, + num_fcs=2, + act_cfg=dict(type='ReLU', inplace=True), + ffn_drop=0.1, + dropout_layer=None, + add_identity=True), + operation_order=('self_attn', 'norm', 'ffn', 'norm'), + norm_cfg=dict(type='LN'), + init_cfg=None, + batch_first=False), + init_cfg=None), + positional_encoding=dict( + type='SinePositionalEncoding', num_feats=128, normalize=True)), + enforce_decoder_input_project=False, + positional_encoding=dict( + type='SinePositionalEncoding', num_feats=128, normalize=True), + transformer_decoder=dict( + type='DetrTransformerDecoder', + return_intermediate=True, + num_layers=6, + transformerlayers=dict( + type='DetrTransformerDecoderLayer', + attn_cfgs=dict( + type='MultiheadAttention', + embed_dims=256, + num_heads=8, + attn_drop=0.1, + proj_drop=0.1, + dropout_layer=None, + batch_first=False), + ffn_cfgs=dict( + embed_dims=256, + feedforward_channels=2048, + num_fcs=2, + act_cfg=dict(type='ReLU', inplace=True), + ffn_drop=0.1, + dropout_layer=None, + add_identity=True), + # the following parameter was not used, + # just make current api happy + feedforward_channels=2048, + operation_order=('self_attn', 'norm', 'cross_attn', 'norm', + 'ffn', 'norm')), + init_cfg=None), + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0, + reduction='mean', + class_weight=[1.0] * num_classes + [0.1]), + loss_mask=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + reduction='mean', + loss_weight=20.0), + loss_dice=dict( + type='DiceLoss', + use_sigmoid=True, + activate=True, + reduction='mean', + naive_dice=True, + eps=1.0, + loss_weight=1.0)), + panoptic_fusion_head=dict( + type='MaskFormerFusionHead', + num_things_classes=num_things_classes, + num_stuff_classes=num_stuff_classes, + loss_panoptic=None, + init_cfg=None), + train_cfg=dict( + assigner=dict( + type='MaskHungarianAssigner', + cls_cost=dict(type='ClassificationCost', weight=1.0), + mask_cost=dict( + type='FocalLossCost', weight=20.0, binary_input=True), + dice_cost=dict( + type='DiceCost', weight=1.0, pred_act=True, eps=1.0)), + sampler=dict(type='MaskPseudoSampler')), + test_cfg=dict( + panoptic_on=True, + # For now, the dataset does not support + # evaluating semantic segmentation metric. + semantic_on=False, + instance_on=False, + # max_per_image is for instance segmentation. + max_per_image=100, + object_mask_thr=0.8, + iou_thr=0.8, + # In MaskFormer's panoptic postprocessing, + # it will not filter masks whose score is smaller than 0.5 . + filter_low_score=False), + init_cfg=None) + +# dataset settings +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadPanopticAnnotations', + with_bbox=True, + with_mask=True, + with_seg=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict( + type='AutoAugment', + policies=[[ + dict( + type='Resize', + img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), + (608, 1333), (640, 1333), (672, 1333), (704, 1333), + (736, 1333), (768, 1333), (800, 1333)], + multiscale_mode='value', + keep_ratio=True) + ], + [ + dict( + type='Resize', + img_scale=[(400, 1333), (500, 1333), (600, 1333)], + multiscale_mode='value', + keep_ratio=True), + dict( + type='RandomCrop', + crop_type='absolute_range', + crop_size=(384, 600), + allow_negative_crop=True), + dict( + type='Resize', + img_scale=[(480, 1333), (512, 1333), (544, 1333), + (576, 1333), (608, 1333), (640, 1333), + (672, 1333), (704, 1333), (736, 1333), + (768, 1333), (800, 1333)], + multiscale_mode='value', + override=True, + keep_ratio=True) + ]]), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=1), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=1), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=1, + workers_per_gpu=1, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) + +# optimizer +optimizer = dict( + type='AdamW', + lr=0.0001, + weight_decay=0.0001, + eps=1e-8, + betas=(0.9, 0.999), + paramwise_cfg=dict( + custom_keys={ + 'backbone': dict(lr_mult=0.1, decay_mult=1.0), + 'query_embed': dict(lr_mult=1.0, decay_mult=0.0) + }, + norm_decay_mult=0.0)) +optimizer_config = dict(grad_clip=dict(max_norm=0.01, norm_type=2)) + +# learning policy +lr_config = dict( + policy='step', + gamma=0.1, + by_epoch=True, + step=[50], + warmup='linear', + warmup_by_epoch=False, + warmup_ratio=1.0, # no warmup + warmup_iters=10) +runner = dict(type='EpochBasedRunner', max_epochs=75) diff --git a/configs/maskformer/maskformer_swin-l-p4-w12_mstrain_64x1_300e_coco.py b/configs/maskformer/maskformer_swin-l-p4-w12_mstrain_64x1_300e_coco.py new file mode 100644 index 0000000..bc23c54 --- /dev/null +++ b/configs/maskformer/maskformer_swin-l-p4-w12_mstrain_64x1_300e_coco.py @@ -0,0 +1,67 @@ +_base_ = './maskformer_r50_mstrain_16x1_75e_coco.py' + +pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth' # noqa +depths = [2, 2, 18, 2] +model = dict( + backbone=dict( + _delete_=True, + type='SwinTransformer', + pretrain_img_size=384, + embed_dims=192, + patch_size=4, + window_size=12, + mlp_ratio=4, + depths=depths, + num_heads=[6, 12, 24, 48], + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.3, + patch_norm=True, + out_indices=(0, 1, 2, 3), + with_cp=False, + convert_weights=True, + init_cfg=dict(type='Pretrained', checkpoint=pretrained)), + panoptic_head=dict( + in_channels=[192, 384, 768, 1536], # pass to pixel_decoder inside + pixel_decoder=dict( + _delete_=True, + type='PixelDecoder', + norm_cfg=dict(type='GN', num_groups=32), + act_cfg=dict(type='ReLU')), + enforce_decoder_input_project=True)) + +# weight_decay = 0.01 +# norm_weight_decay = 0.0 +# embed_weight_decay = 0.0 +embed_multi = dict(lr_mult=1.0, decay_mult=0.0) +norm_multi = dict(lr_mult=1.0, decay_mult=0.0) +custom_keys = { + 'norm': norm_multi, + 'absolute_pos_embed': embed_multi, + 'relative_position_bias_table': embed_multi, + 'query_embed': embed_multi +} + +# optimizer +optimizer = dict( + type='AdamW', + lr=6e-5, + weight_decay=0.01, + eps=1e-8, + betas=(0.9, 0.999), + paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0)) +optimizer_config = dict(grad_clip=dict(max_norm=0.01, norm_type=2)) + +# learning policy +lr_config = dict( + policy='step', + gamma=0.1, + by_epoch=True, + step=[250], + warmup='linear', + warmup_by_epoch=False, + warmup_ratio=1e-6, + warmup_iters=1500) +runner = dict(type='EpochBasedRunner', max_epochs=300) diff --git a/configs/maskformer/metafile.yml b/configs/maskformer/metafile.yml new file mode 100644 index 0000000..6530fa1 --- /dev/null +++ b/configs/maskformer/metafile.yml @@ -0,0 +1,43 @@ +Collections: + - Name: MaskFormer + Metadata: + Training Data: COCO + Training Techniques: + - AdamW + - Weight Decay + Training Resources: 16x V100 GPUs + Architecture: + - MaskFormer + Paper: + URL: https://arxiv.org/pdf/2107.06278 + Title: 'Per-Pixel Classification is Not All You Need for Semantic Segmentation' + README: configs/maskformer/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.22.0/mmdet/models/detectors/maskformer.py#L7 + Version: v2.22.0 + +Models: + - Name: maskformer_r50_mstrain_16x1_75e_coco + In Collection: MaskFormer + Config: configs/maskformer/maskformer_r50_mstrain_16x1_75e_coco.py + Metadata: + Training Memory (GB): 16.2 + Epochs: 75 + Results: + - Task: Panoptic Segmentation + Dataset: COCO + Metrics: + PQ: 46.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_r50_mstrain_16x1_75e_coco/maskformer_r50_mstrain_16x1_75e_coco_20220221_141956-bc2699cb.pth + - Name: maskformer_swin-l-p4-w12_mstrain_64x1_300e_coco + In Collection: MaskFormer + Config: configs/maskformer/maskformer_swin-l-p4-w12_mstrain_64x1_300e_coco.py + Metadata: + Training Memory (GB): 27.2 + Epochs: 300 + Results: + - Task: Panoptic Segmentation + Dataset: COCO + Metrics: + PQ: 53.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_swin-l-p4-w12_mstrain_64x1_300e_coco/maskformer_swin-l-p4-w12_mstrain_64x1_300e_coco_20220326_221612-061b4eb8.pth diff --git a/configs/ms_rcnn/README.md b/configs/ms_rcnn/README.md index 5a80c0d..97bca05 100644 --- a/configs/ms_rcnn/README.md +++ b/configs/ms_rcnn/README.md @@ -1,24 +1,32 @@ -# Mask Scoring R-CNN +# MS R-CNN -## Abstract +> [Mask Scoring R-CNN](https://arxiv.org/abs/1903.00241) + + - +## Abstract Letting a deep network be aware of the quality of its own predictions is an interesting yet important problem. In the task of instance segmentation, the confidence of instance classification is used as mask quality score in most instance segmentation frameworks. However, the mask quality, quantified as the IoU between the instance mask and its ground truth, is usually not well correlated with classification score. In this paper, we study this problem and propose Mask Scoring R-CNN which contains a network block to learn the quality of the predicted instance masks. The proposed network block takes the instance feature and the corresponding predicted mask together to regress the mask IoU. The mask scoring strategy calibrates the misalignment between mask quality and mask score, and improves instance segmentation performance by prioritizing more accurate mask predictions during COCO AP evaluation. By extensive evaluations on the COCO dataset, Mask Scoring R-CNN brings consistent and noticeable gain with different models, and outperforms the state-of-the-art Mask R-CNN. We hope our simple and effective approach will provide a new direction for improving instance segmentation. -
- - +## Results and Models -## Citation +| Backbone | style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :----------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :---------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | caffe | 1x | 4.5 | | 38.2 | 36.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco/ms_rcnn_r50_caffe_fpn_1x_coco_20200702_180848-61c9355e.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco/ms_rcnn_r50_caffe_fpn_1x_coco_20200702_180848.log.json) | +| R-50-FPN | caffe | 2x | - | - | 38.8 | 36.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco/ms_rcnn_r50_caffe_fpn_2x_coco_bbox_mAP-0.388__segm_mAP-0.363_20200506_004738-ee87b137.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco/ms_rcnn_r50_caffe_fpn_2x_coco_20200506_004738.log.json) | +| R-101-FPN | caffe | 1x | 6.5 | | 40.4 | 37.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco/ms_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.404__segm_mAP-0.376_20200506_004755-b9b12a37.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco/ms_rcnn_r101_caffe_fpn_1x_coco_20200506_004755.log.json) | +| R-101-FPN | caffe | 2x | - | - | 41.1 | 38.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco/ms_rcnn_r101_caffe_fpn_2x_coco_bbox_mAP-0.411__segm_mAP-0.381_20200506_011134-5f3cc74f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco/ms_rcnn_r101_caffe_fpn_2x_coco_20200506_011134.log.json) | +| R-X101-32x4d | pytorch | 2x | 7.9 | 11.0 | 41.8 | 38.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco/ms_rcnn_x101_32x4d_fpn_1x_coco_20200206-81fd1740.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco/ms_rcnn_x101_32x4d_fpn_1x_coco_20200206_100113.log.json) | +| R-X101-64x4d | pytorch | 1x | 11.0 | 8.0 | 43.0 | 39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco/ms_rcnn_x101_64x4d_fpn_1x_coco_20200206-86ba88d2.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco/ms_rcnn_x101_64x4d_fpn_1x_coco_20200206_091744.log.json) | +| R-X101-64x4d | pytorch | 2x | 11.0 | 8.0 | 42.6 | 39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco/ms_rcnn_x101_64x4d_fpn_2x_coco_20200308-02a445e2.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco/ms_rcnn_x101_64x4d_fpn_2x_coco_20200308_012247.log.json) | - +## Citation -``` +```latex @inproceedings{huang2019msrcnn, title={Mask Scoring R-CNN}, author={Zhaojin Huang and Lichao Huang and Yongchao Gong and Chang Huang and Xinggang Wang}, @@ -26,15 +34,3 @@ Letting a deep network be aware of the quality of its own predictions is an inte year={2019}, } ``` - -## Results and Models - -| Backbone | style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -|:-------------:|:----------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| -| R-50-FPN | caffe | 1x | 4.5 | | 38.2 | 36.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco/ms_rcnn_r50_caffe_fpn_1x_coco_20200702_180848-61c9355e.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco/ms_rcnn_r50_caffe_fpn_1x_coco_20200702_180848.log.json) | -| R-50-FPN | caffe | 2x | - | - | 38.8 | 36.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco/ms_rcnn_r50_caffe_fpn_2x_coco_bbox_mAP-0.388__segm_mAP-0.363_20200506_004738-ee87b137.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco/ms_rcnn_r50_caffe_fpn_2x_coco_20200506_004738.log.json) | -| R-101-FPN | caffe | 1x | 6.5 | | 40.4 | 37.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco/ms_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.404__segm_mAP-0.376_20200506_004755-b9b12a37.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco/ms_rcnn_r101_caffe_fpn_1x_coco_20200506_004755.log.json) | -| R-101-FPN | caffe | 2x | - | - | 41.1 | 38.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco/ms_rcnn_r101_caffe_fpn_2x_coco_bbox_mAP-0.411__segm_mAP-0.381_20200506_011134-5f3cc74f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco/ms_rcnn_r101_caffe_fpn_2x_coco_20200506_011134.log.json) | -| R-X101-32x4d | pytorch | 2x | 7.9 | 11.0 | 41.8 | 38.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco/ms_rcnn_x101_32x4d_fpn_1x_coco_20200206-81fd1740.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco/ms_rcnn_x101_32x4d_fpn_1x_coco_20200206_100113.log.json) | -| R-X101-64x4d | pytorch | 1x | 11.0 | 8.0 | 43.0 | 39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco/ms_rcnn_x101_64x4d_fpn_1x_coco_20200206-86ba88d2.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco/ms_rcnn_x101_64x4d_fpn_1x_coco_20200206_091744.log.json) | -| R-X101-64x4d | pytorch | 2x | 11.0 | 8.0 | 42.6 | 39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco/ms_rcnn_x101_64x4d_fpn_2x_coco_20200308-02a445e2.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco/ms_rcnn_x101_64x4d_fpn_2x_coco_20200308_012247.log.json) | diff --git a/configs/nas_fcos/README.md b/configs/nas_fcos/README.md index 2e3b56e..def8831 100644 --- a/configs/nas_fcos/README.md +++ b/configs/nas_fcos/README.md @@ -1,22 +1,29 @@ -# NAS-FCOS: Fast Neural Architecture Search for Object Detection +# NAS-FCOS -## Abstract +> [NAS-FCOS: Fast Neural Architecture Search for Object Detection](https://arxiv.org/abs/1906.04423) - + + +## Abstract The success of deep neural networks relies on significant architecture engineering. Recently neural architecture search (NAS) has emerged as a promise to greatly reduce manual effort in network design by automatically searching for optimal architectures, although typically such algorithms need an excessive amount of computational resources, e.g., a few thousand GPU-days. To date, on challenging vision tasks such as object detection, NAS, especially fast versions of NAS, is less studied. Here we propose to search for the decoder structure of object detectors with search efficiency being taken into consideration. To be more specific, we aim to efficiently search for the feature pyramid network (FPN) as well as the prediction head of a simple anchor-free object detector, namely FCOS, using a tailored reinforcement learning paradigm. With carefully designed search space, search algorithms and strategies for evaluating network quality, we are able to efficiently search a top-performing detection architecture within 4 days using 8 V100 GPUs. The discovered architecture surpasses state-of-the-art object detection models (such as Faster R-CNN, RetinaNet and FCOS) by 1.5 to 3.5 points in AP on the COCO dataset, with comparable computation complexity and memory footprint, demonstrating the efficacy of the proposed NAS for object detection. -
- - +## Results and Models + +| Head | Backbone | Style | GN-head | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :----------: | :------: | :---: | :-----: | :-----: | :------: | :------------: | :----: | :-------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| NAS-FCOSHead | R-50 | caffe | Y | 1x | | | 39.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco_20200520-1bdba3ce.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco_20200520.log.json) | +| FCOSHead | R-50 | caffe | Y | 1x | | | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco_20200521-7fdcbce0.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco_20200521.log.json) | -## Citation +**Notes:** - +- To be consistent with the author's implementation, we use 4 GPUs with 4 images/GPU. + +## Citation ```latex @article{wang2019fcos, @@ -26,14 +33,3 @@ The success of deep neural networks relies on significant architecture engineeri year={2019} } ``` - -## Results and Models - -| Head | Backbone | Style | GN-head | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -|:---------:|:---------:|:-------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| -| NAS-FCOSHead | R-50 | caffe | Y | 1x | | | 39.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco_20200520-1bdba3ce.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco_20200520.log.json) | -| FCOSHead | R-50 | caffe | Y | 1x | | | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco_20200521-7fdcbce0.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco_20200521.log.json) | - -**Notes:** - -- To be consistent with the author's implementation, we use 4 GPUs with 4 images/GPU. diff --git a/configs/nas_fpn/README.md b/configs/nas_fpn/README.md index b2836d7..c5acf40 100644 --- a/configs/nas_fpn/README.md +++ b/configs/nas_fpn/README.md @@ -1,22 +1,29 @@ -# NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection +# NAS-FPN -## Abstract +> [NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection](https://arxiv.org/abs/1904.07392) - + -Current state-of-the-art convolutional architectures for object detection are manually designed. Here we aim to learn a better architecture of feature pyramid network for object detection. We adopt Neural Architecture Search and discover a new feature pyramid architecture in a novel scalable search space covering all cross-scale connections. The discovered architecture, named NAS-FPN, consists of a combination of top-down and bottom-up connections to fuse features across scales. NAS-FPN, combined with various backbone models in the RetinaNet framework, achieves better accuracy and latency tradeoff compared to state-of-the-art object detection models. NAS-FPN improves mobile detection accuracy by 2 AP compared to state-of-the-art SSDLite with MobileNetV2 model in [32] and achieves 48.3 AP which surpasses Mask R-CNN [10] detection accuracy with less computation time. +## Abstract + +Current state-of-the-art convolutional architectures for object detection are manually designed. Here we aim to learn a better architecture of feature pyramid network for object detection. We adopt Neural Architecture Search and discover a new feature pyramid architecture in a novel scalable search space covering all cross-scale connections. The discovered architecture, named NAS-FPN, consists of a combination of top-down and bottom-up connections to fuse features across scales. NAS-FPN, combined with various backbone models in the RetinaNet framework, achieves better accuracy and latency tradeoff compared to state-of-the-art object detection models. NAS-FPN improves mobile detection accuracy by 2 AP compared to state-of-the-art SSDLite with MobileNetV2 model in \[32\] and achieves 48.3 AP which surpasses Mask R-CNN \[10\] detection accuracy with less computation time. -
- - +## Results and Models + +We benchmark the new training schedule (crop training, large batch, unfrozen BN, 50 epochs) introduced in NAS-FPN. RetinaNet is used in the paper. -## Citation +| Backbone | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :---------: | :-----: | :------: | :------------: | :----: | :----------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | 50e | 12.9 | 22.9 | 37.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/nas_fpn/retinanet_r50_fpn_crop640_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/nas_fpn/retinanet_r50_fpn_crop640_50e_coco/retinanet_r50_fpn_crop640_50e_coco-9b953d76.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/nas_fpn/retinanet_r50_fpn_crop640_50e_coco/retinanet_r50_fpn_crop640_50e_coco_20200529_095329.log.json) | +| R-50-NASFPN | 50e | 13.2 | 23.0 | 40.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco/retinanet_r50_nasfpn_crop640_50e_coco-0ad1f644.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco/retinanet_r50_nasfpn_crop640_50e_coco_20200528_230008.log.json) | - +**Note**: We find that it is unstable to train NAS-FPN and there is a small chance that results can be 3% mAP lower. + +## Citation ```latex @inproceedings{ghiasi2019fpn, @@ -27,14 +34,3 @@ Current state-of-the-art convolutional architectures for object detection are ma year={2019} } ``` - -## Results and Models - -We benchmark the new training schedule (crop training, large batch, unfrozen BN, 50 epochs) introduced in NAS-FPN. RetinaNet is used in the paper. - -| Backbone | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -|:-----------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| -| R-50-FPN | 50e | 12.9 | 22.9 | 37.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/nas_fpn/retinanet_r50_fpn_crop640_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/nas_fpn/retinanet_r50_fpn_crop640_50e_coco/retinanet_r50_fpn_crop640_50e_coco-9b953d76.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/nas_fpn/retinanet_r50_fpn_crop640_50e_coco/retinanet_r50_fpn_crop640_50e_coco_20200529_095329.log.json) | -| R-50-NASFPN | 50e | 13.2 | 23.0 | 40.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco/retinanet_r50_nasfpn_crop640_50e_coco-0ad1f644.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco/retinanet_r50_nasfpn_crop640_50e_coco_20200528_230008.log.json) | - -**Note**: We find that it is unstable to train NAS-FPN and there is a small chance that results can be 3% mAP lower. diff --git a/configs/nas_fpn/retinanet_r50_fpn_crop640_50e_coco.py b/configs/nas_fpn/retinanet_r50_fpn_crop640_50e_coco.py index 6ea44a0..e4408fe 100644 --- a/configs/nas_fpn/retinanet_r50_fpn_crop640_50e_coco.py +++ b/configs/nas_fpn/retinanet_r50_fpn_crop640_50e_coco.py @@ -78,3 +78,8 @@ step=[30, 40]) # runtime settings runner = dict(type='EpochBasedRunner', max_epochs=50) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (8 samples per GPU) +auto_scale_lr = dict(base_batch_size=64) diff --git a/configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py b/configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py index 3e03919..1387a10 100644 --- a/configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py +++ b/configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py @@ -77,3 +77,8 @@ step=[30, 40]) # runtime settings runner = dict(type='EpochBasedRunner', max_epochs=50) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (8 samples per GPU) +auto_scale_lr = dict(base_batch_size=64) diff --git a/configs/openimages/README.md b/configs/openimages/README.md new file mode 100644 index 0000000..e5c1c27 --- /dev/null +++ b/configs/openimages/README.md @@ -0,0 +1,148 @@ +# Open Images Dataset + +> [Open Images Dataset](https://arxiv.org/abs/1811.00982) + + + +## Abstract + + + +#### Open Images v6 + +[Open Images](https://storage.googleapis.com/openimages/web/index.html) is a dataset of ~9M images annotated with image-level labels, +object bounding boxes, object segmentation masks, visual relationships, +and localized narratives: + +- It contains a total of 16M bounding boxes for 600 object classes on + 1.9M images, making it the largest existing dataset with object location + annotations. The boxes have been largely manually drawn by professional + annotators to ensure accuracy and consistency. The images are very diverse + and often contain complex scenes with several objects (8.3 per image on + average). + +- Open Images also offers visual relationship annotations, indicating pairs + of objects in particular relations (e.g. "woman playing guitar", "beer on + table"), object properties (e.g. "table is wooden"), and human actions (e.g. + "woman is jumping"). In total it has 3.3M annotations from 1,466 distinct + relationship triplets. + +- In V5 we added segmentation masks for 2.8M object instances in 350 classes. + Segmentation masks mark the outline of objects, which characterizes their + spatial extent to a much higher level of detail. + +- In V6 we added 675k localized narratives: multimodal descriptions of images + consisting of synchronized voice, text, and mouse traces over the objects being + described. (Note we originally launched localized narratives only on train in V6, + but since July 2020 we also have validation and test covered.) + +- Finally, the dataset is annotated with 59.9M image-level labels spanning 19,957 + classes. + +We believe that having a single dataset with unified annotations for image +classification, object detection, visual relationship detection, instance +segmentation, and multimodal image descriptions will enable to study these +tasks jointly and stimulate progress towards genuine scene understanding. + + + +
+ +
+ +#### Open Images Challenge 2019 + +[Open Images Challenges 2019](https://storage.googleapis.com/openimages/web/challenge2019.html) is based on the V5 release of the Open +Images dataset. The images of the dataset are very varied and +often contain complex scenes with several objects (explore the dataset). + +## Citation + +``` +@article{OpenImages, + author = {Alina Kuznetsova and Hassan Rom and Neil Alldrin and Jasper Uijlings and Ivan Krasin and Jordi Pont-Tuset and Shahab Kamali and Stefan Popov and Matteo Malloci and Alexander Kolesnikov and Tom Duerig and Vittorio Ferrari}, + title = {The Open Images Dataset V4: Unified image classification, object detection, and visual relationship detection at scale}, + year = {2020}, + journal = {IJCV} +} +``` + +## Prepare Dataset + +1. You need to download and extract Open Images dataset. + +2. The Open Images dataset does not have image metas (width and height of the image), + which will be used during evaluation. We suggest to get test image metas before + training/testing by using `tools/misc/get_image_metas.py`. + + **Usage** + + ```shell + python tools/misc/get_image_metas.py ${CONFIG} \ + --out ${OUTPUT FILE NAME} + ``` + +3. The directory should be like this: + + ```none + mmdetection + ├── mmdet + ├── tools + ├── configs + ├── data + │ ├── OpenImages + │ │ ├── annotations + │ │ │ ├── bbox_labels_600_hierarchy.json + │ │ │ ├── class-descriptions-boxable.csv + │ │ │ ├── oidv6-train-annotations-bbox.scv + │ │ │ ├── validation-annotations-bbox.csv + │ │ │ ├── validation-annotations-human-imagelabels-boxable.csv + │ │ │ ├── validation-image-metas.pkl # get from script + │ │ ├── challenge2019 + │ │ │ ├── challenge-2019-train-detection-bbox.txt + │ │ │ ├── challenge-2019-validation-detection-bbox.txt + │ │ │ ├── class_label_tree.np + │ │ │ ├── class_sample_train.pkl + │ │ │ ├── challenge-2019-validation-detection-human-imagelabels.csv # download from official website + │ │ │ ├── challenge-2019-validation-metas.pkl # get from script + │ │ ├── OpenImages + │ │ │ ├── train # training images + │ │ │ ├── test # testing images + │ │ │ ├── validation # validation images + ``` + +**Note**: + +1. The training and validation images of Open Images Challenge dataset are based on + Open Images v6, but the test images are different. +2. The Open Images Challenges annotations are obtained from [TSD](https://github.com/Sense-X/TSD). + You can also download the annotations from [official website](https://storage.googleapis.com/openimages/web/challenge2019_downloads.html), + and set data.train.type=OpenImagesDataset, data.val.type=OpenImagesDataset, and data.test.type=OpenImagesDataset in the config +3. If users do not want to use `validation-annotations-human-imagelabels-boxable.csv` and `challenge-2019-validation-detection-human-imagelabels.csv` + users can set `data.val.load_image_level_labels=False` and `data.test.load_image_level_labels=False` in the config. + Please note that loading image-levels label is the default of Open Images evaluation metric. + More details please refer to the [official website](https://storage.googleapis.com/openimages/web/evaluation.html) + +## Results and Models + +| Architecture | Backbone | Style | Lr schd | Sampler | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :---------------------------: | :------: | :-----: | :-----: | :-----------------: | :------: | :------------: | :----: | :----------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| Faster R-CNN | R-50 | pytorch | 1x | Group Sampler | 7.7 | - | 51.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/openimages/faster_rcnn_r50_fpn_32x2_1x_openimages.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/openimages/faster_rcnn_r50_fpn_32x2_1x_openimages/faster_rcnn_r50_fpn_32x2_1x_openimages_20211130_231159-e87ab7ce.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/openimages/faster_rcnn_r50_fpn_32x2_1x_openimages/faster_rcnn_r50_fpn_32x2_1x_openimages_20211130_231159.log.json) | +| Faster R-CNN | R-50 | pytorch | 1x | Class Aware Sampler | 7.7 | - | 60.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages_20220306_202424-98c630e5.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/openimages/faster_rcnn_r50_fpn_32x2_1x_openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages_20220306_202424.log.json) | +| Faster R-CNN (Challenge 2019) | R-50 | pytorch | 1x | Group Sampler | 7.7 | - | 54.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/openimages/faster_rcnn_r50_fpn_32x2_1x_openimages_challenge.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/openimages/faster_rcnn_r50_fpn_32x2_1x_openimages_challenge/faster_rcnn_r50_fpn_32x2_1x_openimages_challenge_20220114_045100-0e79e5df.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/openimages/faster_rcnn_r50_fpn_32x2_1x_openimages_challenge/faster_rcnn_r50_fpn_32x2_1x_openimages_challenge_20220114_045100.log.json) | +| Faster R-CNN (Challenge 2019) | R-50 | pytorch | 1x | Class Aware Sampler | 7.1 | - | 65.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages_challenge.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages_challenge/faster_rcnn_r50_fpn_32x2_cas_1x_openimages_challenge_20220221_192021-34c402d9.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages_challenge/faster_rcnn_r50_fpn_32x2_cas_1x_openimages_challenge_20220221_192021.log.json) | +| Retinanet | R-50 | pytorch | 1x | Group Sampler | 6.6 | - | 61.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/openimages/retinanet_r50_fpn_32x2_1x_openimages.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/openimages/retinanet_r50_fpn_32x2_1x_openimages/retinanet_r50_fpn_32x2_1x_openimages_20211223_071954-d2ae5462.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/openimages/retinanet_r50_fpn_32x2_1x_openimages/retinanet_r50_fpn_32x2_1x_openimages_20211223_071954.log.json) | +| SSD | VGG16 | pytorch | 36e | Group Sampler | 10.8 | - | 35.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/openimages/ssd300_32x8_36e_openimages.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/openimages/ssd300_32x8_36e_openimages/ssd300_32x8_36e_openimages_20211224_000232-dce93846.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/openimages/ssd300_32x8_36e_openimages/ssd300_32x8_36e_openimages_20211224_000232.log.json) | + +**Notes:** + +- 'cas' is short for 'Class Aware Sampler' + +### Results of consider image level labels + +| Architecture | Sampler | Consider Image Level Labels | box AP | +| :-------------------------------: | :-----------------: | :-------------------------: | :----: | +| Faster R-CNN r50 (Challenge 2019) | Group Sampler | w/o | 62.19 | +| Faster R-CNN r50 (Challenge 2019) | Group Sampler | w/ | 54.87 | +| Faster R-CNN r50 (Challenge 2019) | Class Aware Sampler | w/o | 71.77 | +| Faster R-CNN r50 (Challenge 2019) | Class Aware Sampler | w/ | 64.98 | diff --git a/configs/openimages/faster_rcnn_r50_fpn_32x2_1x_openimages.py b/configs/openimages/faster_rcnn_r50_fpn_32x2_1x_openimages.py new file mode 100644 index 0000000..3dfc341 --- /dev/null +++ b/configs/openimages/faster_rcnn_r50_fpn_32x2_1x_openimages.py @@ -0,0 +1,23 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', + '../_base_/datasets/openimages_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +model = dict(roi_head=dict(bbox_head=dict(num_classes=601))) + +# Using 32 GPUS while training +optimizer = dict(type='SGD', lr=0.08, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=26000, + warmup_ratio=1.0 / 64, + step=[8, 11]) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (32 GPUs) x (2 samples per GPU) +auto_scale_lr = dict(base_batch_size=64) diff --git a/configs/openimages/faster_rcnn_r50_fpn_32x2_1x_openimages_challenge.py b/configs/openimages/faster_rcnn_r50_fpn_32x2_1x_openimages_challenge.py new file mode 100644 index 0000000..c8900ad --- /dev/null +++ b/configs/openimages/faster_rcnn_r50_fpn_32x2_1x_openimages_challenge.py @@ -0,0 +1,47 @@ +_base_ = ['faster_rcnn_r50_fpn_32x2_1x_openimages.py'] + +model = dict( + roi_head=dict(bbox_head=dict(num_classes=500)), + test_cfg=dict(rcnn=dict(score_thr=0.01))) + +# dataset settings +dataset_type = 'OpenImagesChallengeDataset' +data_root = 'data/OpenImages/' +data = dict( + train=dict( + type=dataset_type, + ann_file=data_root + + 'challenge2019/challenge-2019-train-detection-bbox.txt', + img_prefix=data_root + 'OpenImages/', + label_file=data_root + 'challenge2019/cls-label-description.csv', + hierarchy_file=data_root + 'challenge2019/class_label_tree.np'), + val=dict( + type=dataset_type, + ann_file=data_root + + 'challenge2019/challenge-2019-validation-detection-bbox.txt', + img_prefix=data_root + 'OpenImages/', + label_file=data_root + 'challenge2019/cls-label-description.csv', + hierarchy_file=data_root + 'challenge2019/class_label_tree.np', + meta_file=data_root + + 'challenge2019/challenge-2019-validation-metas.pkl', + image_level_ann_file=data_root + + 'challenge2019/challenge-2019-validation-detection-' + 'human-imagelabels.csv'), + test=dict( + type=dataset_type, + ann_file=data_root + + 'challenge2019/challenge-2019-validation-detection-bbox.txt', + img_prefix=data_root + 'OpenImages/', + label_file=data_root + 'challenge2019/cls-label-description.csv', + hierarchy_file=data_root + 'challenge2019/class_label_tree.np', + meta_file=data_root + + 'challenge2019/challenge-2019-validation-metas.pkl', + image_level_ann_file=data_root + + 'challenge2019/challenge-2019-validation-detection-' + 'human-imagelabels.csv')) +evaluation = dict(interval=1, metric='mAP') + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (32 GPUs) x (2 samples per GPU) +auto_scale_lr = dict(base_batch_size=64) diff --git a/configs/openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages.py b/configs/openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages.py new file mode 100644 index 0000000..88d029d --- /dev/null +++ b/configs/openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages.py @@ -0,0 +1,5 @@ +_base_ = ['faster_rcnn_r50_fpn_32x2_1x_openimages.py'] + +# Use ClassAwareSampler +data = dict( + train_dataloader=dict(class_aware_sampler=dict(num_sample_class=1))) diff --git a/configs/openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages_challenge.py b/configs/openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages_challenge.py new file mode 100644 index 0000000..26bd64e --- /dev/null +++ b/configs/openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages_challenge.py @@ -0,0 +1,5 @@ +_base_ = ['faster_rcnn_r50_fpn_32x2_1x_openimages_challenge.py'] + +# Use ClassAwareSampler +data = dict( + train_dataloader=dict(class_aware_sampler=dict(num_sample_class=1))) diff --git a/configs/openimages/metafile.yml b/configs/openimages/metafile.yml new file mode 100644 index 0000000..9be1726 --- /dev/null +++ b/configs/openimages/metafile.yml @@ -0,0 +1,102 @@ +Models: + - Name: faster_rcnn_r50_fpn_32x2_1x_openimages + In Collection: Faster R-CNN + Config: configs/openimages/faster_rcnn_r50_fpn_32x2_1x_openimages.py + Metadata: + Training Memory (GB): 7.7 + Epochs: 12 + Training Data: Open Images v6 + Training Techniques: + - SGD with Momentum + - Weight Decay + Results: + - Task: Object Detection + Dataset: Open Images v6 + Metrics: + box AP: 51.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/openimages/faster_rcnn_r50_fpn_32x2_1x_openimages/faster_rcnn_r50_fpn_32x2_1x_openimages_20211130_231159-e87ab7ce.pth + + - Name: retinanet_r50_fpn_32x2_1x_openimages + In Collection: RetinaNet + Config: configs/openimages/retinanet_r50_fpn_32x2_1x_openimages.py + Metadata: + Training Memory (GB): 6.6 + Epochs: 12 + Training Data: Open Images v6 + Training Techniques: + - SGD with Momentum + - Weight Decay + Results: + - Task: Object Detection + Dataset: Open Images v6 + Metrics: + box AP: 61.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/openimages/retinanet_r50_fpn_32x2_1x_openimages/retinanet_r50_fpn_32x2_1x_openimages_20211223_071954-d2ae5462.pth + + - Name: ssd300_32x8_36e_openimages + In Collection: SSD + Config: configs/openimages/ssd300_32x8_36e_openimages + Metadata: + Training Memory (GB): 10.8 + Epochs: 36 + Training Data: Open Images v6 + Training Techniques: + - SGD with Momentum + - Weight Decay + Results: + - Task: Object Detection + Dataset: Open Images v6 + Metrics: + box AP: 35.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/openimages/ssd300_32x8_36e_openimages/ssd300_32x8_36e_openimages_20211224_000232-dce93846.pth + + - Name: faster_rcnn_r50_fpn_32x2_1x_openimages_challenge + In Collection: Faster R-CNN + Config: configs/openimages/faster_rcnn_r50_fpn_32x2_1x_openimages_challenge.py + Metadata: + Training Memory (GB): 7.7 + Epochs: 12 + Training Data: Open Images Challenge 2019 + Training Techniques: + - SGD with Momentum + - Weight Decay + Results: + - Task: Object Detection + Dataset: Open Images Challenge 2019 + Metrics: + box AP: 54.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/openimages/faster_rcnn_r50_fpn_32x2_1x_openimages_challenge/faster_rcnn_r50_fpn_32x2_1x_openimages_challenge_20220114_045100-0e79e5df.pth + + - Name: faster_rcnn_r50_fpn_32x2_cas_1x_openimages + In Collection: Faster R-CNN + Config: configs/openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages.py + Metadata: + Training Memory (GB): 7.7 + Epochs: 12 + Training Data: Open Images Challenge 2019 + Training Techniques: + - SGD with Momentum + - Weight Decay + Results: + - Task: Object Detection + Dataset: Open Images Challenge 2019 + Metrics: + box AP: 60.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages_20220306_202424-98c630e5.pth + + - Name: faster_rcnn_r50_fpn_32x2_cas_1x_openimages_challenge + In Collection: Faster R-CNN + Config: configs/openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages_challenge.py + Metadata: + Training Memory (GB): 7.1 + Epochs: 12 + Training Data: Open Images Challenge 2019 + Training Techniques: + - SGD with Momentum + - Weight Decay + Results: + - Task: Object Detection + Dataset: Open Images Challenge 2019 + Metrics: + box AP: 65.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages_challenge/faster_rcnn_r50_fpn_32x2_cas_1x_openimages_challenge_20220221_192021-34c402d9.pth diff --git a/configs/openimages/retinanet_r50_fpn_32x2_1x_openimages.py b/configs/openimages/retinanet_r50_fpn_32x2_1x_openimages.py new file mode 100644 index 0000000..0191aa1 --- /dev/null +++ b/configs/openimages/retinanet_r50_fpn_32x2_1x_openimages.py @@ -0,0 +1,22 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/openimages_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +model = dict(bbox_head=dict(num_classes=601)) + +optimizer = dict(type='SGD', lr=0.08, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=26000, + warmup_ratio=1.0 / 64, + step=[8, 11]) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (32 GPUs) x (2 samples per GPU) +auto_scale_lr = dict(base_batch_size=64) diff --git a/configs/openimages/ssd300_32x8_36e_openimages.py b/configs/openimages/ssd300_32x8_36e_openimages.py new file mode 100644 index 0000000..e2565b9 --- /dev/null +++ b/configs/openimages/ssd300_32x8_36e_openimages.py @@ -0,0 +1,83 @@ +_base_ = [ + '../_base_/models/ssd300.py', '../_base_/datasets/openimages_detection.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_1x.py' +] +model = dict( + bbox_head=dict( + num_classes=601, + anchor_generator=dict(basesize_ratio_range=(0.2, 0.9)))) +# dataset settings +dataset_type = 'OpenImagesDataset' +data_root = 'data/OpenImages/' +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True, normed_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 4)), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=(300, 300), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(300, 300), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=8, # using 32 GPUS while training. + workers_per_gpu=0, # workers_per_gpu > 0 may occur out of memory + train=dict( + _delete_=True, + type='RepeatDataset', + times=3, + dataset=dict( + type=dataset_type, + ann_file=data_root + + 'annotations/oidv6-train-annotations-bbox.csv', + img_prefix=data_root + 'OpenImages/train/', + label_file=data_root + + 'annotations/class-descriptions-boxable.csv', + hierarchy_file=data_root + + 'annotations/bbox_labels_600_hierarchy.json', + pipeline=train_pipeline)), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=0.04, momentum=0.9, weight_decay=5e-4) +optimizer_config = dict() +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=20000, + warmup_ratio=0.001, + step=[8, 11]) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (32 GPUs) x (8 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/paa/README.md b/configs/paa/README.md index 05e65d2..c8861ec 100644 --- a/configs/paa/README.md +++ b/configs/paa/README.md @@ -1,22 +1,41 @@ -# Probabilistic Anchor Assignment with IoU Prediction for Object Detection +# PAA -## Abstract +> [Probabilistic Anchor Assignment with IoU Prediction for Object Detection](https://arxiv.org/abs/2007.08103) + + - +## Abstract In object detection, determining which anchors to assign as positive or negative samples, known as anchor assignment, has been revealed as a core procedure that can significantly affect a model's performance. In this paper we propose a novel anchor assignment strategy that adaptively separates anchors into positive and negative samples for a ground truth bounding box according to the model's learning status such that it is able to reason about the separation in a probabilistic manner. To do so we first calculate the scores of anchors conditioned on the model and fit a probability distribution to these scores. The model is then trained with anchors separated into positive and negative samples according to their probabilities. Moreover, we investigate the gap between the training and testing objectives and propose to predict the Intersection-over-Unions of detected boxes as a measure of localization quality to reduce the discrepancy. The combined score of classification and localization qualities serving as a box selection metric in non-maximum suppression well aligns with the proposed anchor assignment strategy and leads significant performance improvements. The proposed methods only add a single convolutional layer to RetinaNet baseline and does not require multiple anchors per location, so are efficient. Experimental results verify the effectiveness of the proposed methods. Especially, our models set new records for single-stage detectors on MS COCO test-dev dataset with various backbones. -
- - +## Results and Models -## Citation +We provide config files to reproduce the object detection results in the +ECCV 2020 paper for Probabilistic Anchor Assignment with IoU +Prediction for Object Detection. - +| Backbone | Lr schd | Mem (GB) | Score voting | box AP | Config | Download | +| :-------: | :-----: | :------: | :----------: | :----: | :---------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | 12e | 3.7 | True | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_1x_coco/paa_r50_fpn_1x_coco_20200821-936edec3.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_1x_coco/paa_r50_fpn_1x_coco_20200821-936edec3.log.json) | +| R-50-FPN | 12e | 3.7 | False | 40.2 | - | | +| R-50-FPN | 18e | 3.7 | True | 41.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r50_fpn_1.5x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_1.5x_coco/paa_r50_fpn_1.5x_coco_20200823-805d6078.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_1.5x_coco/paa_r50_fpn_1.5x_coco_20200823-805d6078.log.json) | +| R-50-FPN | 18e | 3.7 | False | 41.2 | - | | +| R-50-FPN | 24e | 3.7 | True | 41.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r50_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_2x_coco/paa_r50_fpn_2x_coco_20200821-c98bfc4e.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_2x_coco/paa_r50_fpn_2x_coco_20200821-c98bfc4e.log.json) | +| R-50-FPN | 36e | 3.7 | True | 43.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r50_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_mstrain_3x_coco/paa_r50_fpn_mstrain_3x_coco_20210121_145722-06a6880b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_mstrain_3x_coco/paa_r50_fpn_mstrain_3x_coco_20210121_145722.log.json) | +| R-101-FPN | 12e | 6.2 | True | 42.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_1x_coco/paa_r101_fpn_1x_coco_20200821-0a1825a4.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_1x_coco/paa_r101_fpn_1x_coco_20200821-0a1825a4.log.json) | +| R-101-FPN | 12e | 6.2 | False | 42.4 | - | | +| R-101-FPN | 24e | 6.2 | True | 43.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r101_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_2x_coco/paa_r101_fpn_2x_coco_20200821-6829f96b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_2x_coco/paa_r101_fpn_2x_coco_20200821-6829f96b.log.json) | +| R-101-FPN | 36e | 6.2 | True | 45.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r101_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_mstrain_3x_coco/paa_r101_fpn_mstrain_3x_coco_20210122_084202-83250d22.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_mstrain_3x_coco/paa_r101_fpn_mstrain_3x_coco_20210122_084202.log.json) | + +**Note**: + +1. We find that the performance is unstable with 1x setting and may fluctuate by about 0.2 mAP. We report the best results. + +## Citation ```latex @inproceedings{paa-eccv2020, @@ -26,26 +45,3 @@ In object detection, determining which anchors to assign as positive or negative year={2020} } ``` - -## Results and Models - -We provide config files to reproduce the object detection results in the -ECCV 2020 paper for Probabilistic Anchor Assignment with IoU -Prediction for Object Detection. - -| Backbone | Lr schd | Mem (GB) | Score voting | box AP | Config | Download | -|:-----------:|:-------:|:--------:|:------------:|:------:|:------:|:--------:| -| R-50-FPN | 12e | 3.7 | True | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_1x_coco/paa_r50_fpn_1x_coco_20200821-936edec3.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_1x_coco/paa_r50_fpn_1x_coco_20200821-936edec3.log.json) | -| R-50-FPN | 12e | 3.7 | False | 40.2 | - | -| R-50-FPN | 18e | 3.7 | True | 41.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r50_fpn_1.5x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_1.5x_coco/paa_r50_fpn_1.5x_coco_20200823-805d6078.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_1.5x_coco/paa_r50_fpn_1.5x_coco_20200823-805d6078.log.json) | -| R-50-FPN | 18e | 3.7 | False | 41.2 | - | -| R-50-FPN | 24e | 3.7 | True | 41.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r50_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_2x_coco/paa_r50_fpn_2x_coco_20200821-c98bfc4e.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_2x_coco/paa_r50_fpn_2x_coco_20200821-c98bfc4e.log.json) | -| R-50-FPN | 36e | 3.7 | True | 43.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r50_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_mstrain_3x_coco/paa_r50_fpn_mstrain_3x_coco_20210121_145722-06a6880b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_mstrain_3x_coco/paa_r50_fpn_mstrain_3x_coco_20210121_145722.log.json) | -| R-101-FPN | 12e | 6.2 | True | 42.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_1x_coco/paa_r101_fpn_1x_coco_20200821-0a1825a4.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_1x_coco/paa_r101_fpn_1x_coco_20200821-0a1825a4.log.json) | -| R-101-FPN | 12e | 6.2 | False | 42.4 | - | -| R-101-FPN | 24e | 6.2 | True | 43.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r101_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_2x_coco/paa_r101_fpn_2x_coco_20200821-6829f96b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_2x_coco/paa_r101_fpn_2x_coco_20200821-6829f96b.log.json) | -| R-101-FPN | 36e | 6.2 | True | 45.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r101_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_mstrain_3x_coco/paa_r101_fpn_mstrain_3x_coco_20210122_084202-83250d22.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_mstrain_3x_coco/paa_r101_fpn_mstrain_3x_coco_20210122_084202.log.json) | - -**Note**: - -1. We find that the performance is unstable with 1x setting and may fluctuate by about 0.2 mAP. We report the best results. diff --git a/configs/pafpn/README.md b/configs/pafpn/README.md index 8516d34..ae1e3a3 100644 --- a/configs/pafpn/README.md +++ b/configs/pafpn/README.md @@ -1,24 +1,26 @@ -# Path Aggregation Network for Instance Segmentation +# PAFPN -## Abstract +> [Path Aggregation Network for Instance Segmentation](https://arxiv.org/abs/1803.01534) + + - +## Abstract The way that information propagates in neural networks is of great importance. In this paper, we propose Path Aggregation Network (PANet) aiming at boosting information flow in proposal-based instance segmentation framework. Specifically, we enhance the entire feature hierarchy with accurate localization signals in lower layers by bottom-up path augmentation, which shortens the information path between lower layers and topmost feature. We present adaptive feature pooling, which links feature grid and all feature levels to make useful information in each feature level propagate directly to following proposal subnetworks. A complementary branch capturing different views for each proposal is created to further improve mask prediction. These improvements are simple to implement, with subtle extra computational overhead. Our PANet reaches the 1st place in the COCO 2017 Challenge Instance Segmentation task and the 2nd place in Object Detection task without large-batch training. It is also state-of-the-art on MVD and Cityscapes. -
- - +## Results and Models -## Citation +| Backbone | style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | pytorch | 1x | 4.0 | 17.2 | 37.5 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pafpn/faster_rcnn_r50_pafpn_1x_coco/faster_rcnn_r50_pafpn_1x_coco_bbox_mAP-0.375_20200503_105836-b7b4b9bd.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/pafpn/faster_rcnn_r50_pafpn_1x_coco/faster_rcnn_r50_pafpn_1x_coco_20200503_105836.log.json) | - +## Citation -``` +```latex @inproceedings{liu2018path, author = {Shu Liu and Lu Qi and @@ -30,11 +32,3 @@ The way that information propagates in neural networks is of great importance. I year = {2018} } ``` - -## Results and Models - -## Results and Models - -| Backbone | style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -|:-------------:|:----------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| -| R-50-FPN | pytorch | 1x | 4.0 | 17.2 | 37.5 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pafpn/faster_rcnn_r50_pafpn_1x_coco/faster_rcnn_r50_pafpn_1x_coco_bbox_mAP-0.375_20200503_105836-b7b4b9bd.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pafpn/faster_rcnn_r50_pafpn_1x_coco/faster_rcnn_r50_pafpn_1x_coco_20200503_105836.log.json) | diff --git a/configs/panoptic_fpn/README.md b/configs/panoptic_fpn/README.md index 083fb6e..12980ce 100644 --- a/configs/panoptic_fpn/README.md +++ b/configs/panoptic_fpn/README.md @@ -1,37 +1,17 @@ -# Panoptic feature pyramid networks -## Abstract +# Panoptic FPN + +> [Panoptic feature pyramid networks](https://arxiv.org/abs/1901.02446) + + - +## Abstract The recently introduced panoptic segmentation task has renewed our community's interest in unifying the tasks of instance segmentation (for thing classes) and semantic segmentation (for stuff classes). However, current state-of-the-art methods for this joint task use separate and dissimilar networks for instance and semantic segmentation, without performing any shared computation. In this work, we aim to unify these methods at the architectural level, designing a single network for both tasks. Our approach is to endow Mask R-CNN, a popular instance segmentation method, with a semantic segmentation branch using a shared Feature Pyramid Network (FPN) backbone. Surprisingly, this simple baseline not only remains effective for instance segmentation, but also yields a lightweight, top-performing method for semantic segmentation. In this work, we perform a detailed study of this minimally extended version of Mask R-CNN with FPN, which we refer to as Panoptic FPN, and show it is a robust and accurate baseline for both tasks. Given its effectiveness and conceptual simplicity, we hope our method can serve as a strong baseline and aid future research in panoptic segmentation. -
- - - -## Citation - - -The base method for panoptic segmentation task. - -``` -@inproceedings{kirillov2018panopticfpn, - author = { - Alexander Kirillov, - Ross Girshick, - Kaiming He, - Piotr Dollar, - }, - title = {Panoptic Feature Pyramid Networks}, - booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, - year = {2019} -} -``` - ## Dataset PanopticFPN requires COCO and [COCO-panoptic](http://images.cocodataset.org/annotations/panoptic_annotations_trainval2017.zip) dataset for training and evaluation. You need to download and extract it in the COCO dataset path. @@ -56,9 +36,27 @@ mmdetection ## Results and Models -| Backbone | style | Lr schd | Mem (GB) | Inf time (fps) | PQ | SQ | RQ | PQ_th | SQ_th | RQ_th | PQ_st | SQ_st | RQ_st | Config | Download | -|:-------------:|:----------:|:-------:|:--------:|:--------------:|:----:|:----:|:----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:------:|:--------:| -| R-50-FPN | pytorch | 1x | 4.7 | | 40.2 | 77.8 | 49.3 | 47.8 | 80.9 | 57.5 | 28.9 | 73.1 | 37.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco/panoptic_fpn_r50_fpn_1x_coco_20210821_101153-9668fd13.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco/panoptic_fpn_r50_fpn_1x_coco_20210821_101153.log.json) | -| R-50-FPN | pytorch | 3x | - | - | 42.5 | 78.1 | 51.7 | 50.3 | 81.5 | 60.3 | 30.7 | 73.0 | 38.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/panoptic_fpn/panoptic_fpn_r50_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r50_fpn_mstrain_3x_coco/panoptic_fpn_r50_fpn_mstrain_3x_coco_20210824_171155-5650f98b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r50_fpn_mstrain_3x_coco/panoptic_fpn_r50_fpn_mstrain_3x_coco_20210824_171155.log.json) | -| R-101-FPN | pytorch | 1x | 6.7 | | 42.2 | 78.3 | 51.4 | 50.1 | 81.4 | 59.9 | 30.3 | 73.6 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/panoptic_fpn/panoptic_fpn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r101_fpn_1x_coco/panoptic_fpn_r101_fpn_1x_coco_20210820_193950-ab9157a2.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r101_fpn_1x_coco/panoptic_fpn_r101_fpn_1x_coco_20210820_193950.log.json) | -| R-101-FPN | pytorch | 3x | - | - | 44.1 | 78.9 | 53.6 | 52.1 | 81.7 | 62.3 | 32.0 | 74.6 | 40.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/panoptic_fpn/panoptic_fpn_r101_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r101_fpn_mstrain_3x_coco/panoptic_fpn_r101_fpn_mstrain_3x_coco_20210823_114712-9c99acc4.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r101_fpn_mstrain_3x_coco/panoptic_fpn_r101_fpn_mstrain_3x_coco_20210823_114712.log.json) | +| Backbone | style | Lr schd | Mem (GB) | Inf time (fps) | PQ | SQ | RQ | PQ_th | SQ_th | RQ_th | PQ_st | SQ_st | RQ_st | Config | Download | +| :-------: | :-----: | :-----: | :------: | :------------: | :--: | :--: | :--: | :---: | :---: | :---: | :---: | :---: | :---: | :---------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | pytorch | 1x | 4.7 | | 40.2 | 77.8 | 49.3 | 47.8 | 80.9 | 57.5 | 28.9 | 73.1 | 37.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco/panoptic_fpn_r50_fpn_1x_coco_20210821_101153-9668fd13.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco/panoptic_fpn_r50_fpn_1x_coco_20210821_101153.log.json) | +| R-50-FPN | pytorch | 3x | - | - | 42.5 | 78.1 | 51.7 | 50.3 | 81.5 | 60.3 | 30.7 | 73.0 | 38.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/panoptic_fpn/panoptic_fpn_r50_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r50_fpn_mstrain_3x_coco/panoptic_fpn_r50_fpn_mstrain_3x_coco_20210824_171155-5650f98b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r50_fpn_mstrain_3x_coco/panoptic_fpn_r50_fpn_mstrain_3x_coco_20210824_171155.log.json) | +| R-101-FPN | pytorch | 1x | 6.7 | | 42.2 | 78.3 | 51.4 | 50.1 | 81.4 | 59.9 | 30.3 | 73.6 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/panoptic_fpn/panoptic_fpn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r101_fpn_1x_coco/panoptic_fpn_r101_fpn_1x_coco_20210820_193950-ab9157a2.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r101_fpn_1x_coco/panoptic_fpn_r101_fpn_1x_coco_20210820_193950.log.json) | +| R-101-FPN | pytorch | 3x | - | - | 44.1 | 78.9 | 53.6 | 52.1 | 81.7 | 62.3 | 32.0 | 74.6 | 40.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/panoptic_fpn/panoptic_fpn_r101_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r101_fpn_mstrain_3x_coco/panoptic_fpn_r101_fpn_mstrain_3x_coco_20210823_114712-9c99acc4.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r101_fpn_mstrain_3x_coco/panoptic_fpn_r101_fpn_mstrain_3x_coco_20210823_114712.log.json) | + +## Citation + +The base method for panoptic segmentation task. + +```latex +@inproceedings{kirillov2018panopticfpn, + author = { + Alexander Kirillov, + Ross Girshick, + Kaiming He, + Piotr Dollar, + }, + title = {Panoptic Feature Pyramid Networks}, + booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2019} +} +``` diff --git a/configs/pascal_voc/README.md b/configs/pascal_voc/README.md index af33edb..3c09813 100644 --- a/configs/pascal_voc/README.md +++ b/configs/pascal_voc/README.md @@ -1,26 +1,32 @@ -# The Pascal Visual Object Classes (VOC) Challenge +# Pascal VOC -## Abstract +> [The Pascal Visual Object Classes (VOC) Challenge](https://link.springer.com/article/10.1007/s11263-009-0275-4) + + - +## Abstract The Pascal Visual Object Classes (VOC) challenge is a benchmark in visual object category recognition and detection, providing the vision and machine learning communities with a standard dataset of images and annotation, and standard evaluation procedures. Organised annually from 2005 to present, the challenge and its associated dataset has become accepted as the benchmark for object detection. This paper describes the dataset and evaluation procedure. We review the state-of-the-art in evaluated methods for both classification and detection, analyse whether the methods are statistically different, what they are learning from the images (e.g. the object or its context), and what the methods find easy or confuse. The paper concludes with lessons learnt in the three year history of the challenge, and proposes directions for future improvement and extension. -
- - +## Results and Models -## Citation +| Architecture | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :------: | :-----: | :-----: | :------: | :------------: | :----: | :--------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| Faster R-CNN C4 | R-50 | caffe | 18k | | - | 80.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pascal_voc/faster_rcnn_r50_caffe_c4_mstrain_18k_voc0712.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pascal_voc/faster_rcnn_r50_caffe_c4_mstrain_18k_voc0712//home/dong/code_sensetime/2022Q1/mmdetection/work_dirs/prepare_voc/gather/pascal_voc/faster_rcnn_r50_caffe_c4_mstrain_18k_voc0712/faster_rcnn_r50_caffe_c4_mstrain_18k_voc0712_20220314_234327-847a14d2.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/pascal_voc/faster_rcnn_r50_caffe_c4_mstrain_18k_voc0712/faster_rcnn_r50_caffe_c4_mstrain_18k_voc0712_20220314_234327.log.json) | +| Faster R-CNN | R-50 | pytorch | 1x | 2.6 | - | 80.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712/faster_rcnn_r50_fpn_1x_voc0712_20220320_192712-54bef0f3.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712/faster_rcnn_r50_fpn_1x_voc0712_20220320_192712.log.json) | +| Retinanet | R-50 | pytorch | 1x | 2.1 | - | 77.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pascal_voc/retinanet_r50_fpn_1x_voc0712.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pascal_voc/retinanet_r50_fpn_1x_voc0712/retinanet_r50_fpn_1x_voc0712_20200617-47cbdd0e.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/pascal_voc/retinanet_r50_fpn_1x_voc0712/retinanet_r50_fpn_1x_voc0712_20200616_014642.log.json) | +| SSD300 | VGG16 | - | 120e | - | - | 76.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pascal_voc/ssd300_voc0712.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pascal_voc/ssd300_voc0712/ssd300_voc0712_20220320_194658-17edda1b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/pascal_voc/ssd300_voc0712/ssd300_voc0712_20220320_194658.log.json) | +| SSD512 | VGG16 | - | 120e | - | - | 79.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pascal_voc/ssd512_voc0712.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pascal_voc/ssd512_voc0712/ssd512_voc0712_20220320_194717-03cefefe.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/pascal_voc/ssd512_voc0712/ssd512_voc0712_20220320_194717.log.json) | - +## Citation -``` +```latex @Article{Everingham10, author = "Everingham, M. and Van~Gool, L. and Williams, C. K. I. and Winn, J. and Zisserman, A.", title = "The Pascal Visual Object Classes (VOC) Challenge", @@ -32,10 +38,3 @@ This paper describes the dataset and evaluation procedure. We review the state-o pages = "303--338", } ``` - -## Results and Models - -| Architecture | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -|:------------:|:---------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| -| Faster R-CNN | R-50 | pytorch | 1x | 2.6 | - | 79.5 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712/faster_rcnn_r50_fpn_1x_voc0712_20200624-c9895d40.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712/20200623_015208.log.json) | -| Retinanet | R-50 | pytorch | 1x | 2.1 | - | 77.3 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pascal_voc/retinanet_r50_fpn_1x_voc0712.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pascal_voc/retinanet_r50_fpn_1x_voc0712/retinanet_r50_fpn_1x_voc0712_20200617-47cbdd0e.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pascal_voc/retinanet_r50_fpn_1x_voc0712/retinanet_r50_fpn_1x_voc0712_20200616_014642.log.json) | diff --git a/configs/pascal_voc/faster_rcnn_r50_caffe_c4_mstrain_18k_voc0712.py b/configs/pascal_voc/faster_rcnn_r50_caffe_c4_mstrain_18k_voc0712.py new file mode 100644 index 0000000..7bb1d73 --- /dev/null +++ b/configs/pascal_voc/faster_rcnn_r50_caffe_c4_mstrain_18k_voc0712.py @@ -0,0 +1,81 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_caffe_c4.py', + '../_base_/default_runtime.py' +] +model = dict(roi_head=dict(bbox_head=dict(num_classes=20))) + +# dataset settings +dataset_type = 'VOCDataset' +data_root = 'data/VOCdevkit/' +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 480), (1333, 512), (1333, 544), (1333, 576), + (1333, 608), (1333, 640), (1333, 672), (1333, 704), + (1333, 736), (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=[ + data_root + 'VOC2007/ImageSets/Main/trainval.txt', + data_root + 'VOC2012/ImageSets/Main/trainval.txt' + ], + img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'], + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', + img_prefix=data_root + 'VOC2007/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', + img_prefix=data_root + 'VOC2007/', + pipeline=test_pipeline)) + +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) + +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=100, + warmup_ratio=0.001, + step=[12000, 16000]) + +# Runner type +runner = dict(type='IterBasedRunner', max_iters=18000) + +checkpoint_config = dict(interval=3000) +evaluation = dict(interval=3000, metric='mAP') diff --git a/configs/pascal_voc/ssd300_voc0712.py b/configs/pascal_voc/ssd300_voc0712.py index 4d6bbd9..e7008ae 100644 --- a/configs/pascal_voc/ssd300_voc0712.py +++ b/configs/pascal_voc/ssd300_voc0712.py @@ -67,3 +67,8 @@ checkpoint_config = dict(interval=1) # runtime settings runner = dict(type='EpochBasedRunner', max_epochs=24) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (8 samples per GPU) +auto_scale_lr = dict(base_batch_size=64) diff --git a/configs/pisa/README.md b/configs/pisa/README.md index d1ea3a4..c847c85 100644 --- a/configs/pisa/README.md +++ b/configs/pisa/README.md @@ -1,22 +1,44 @@ -# Prime Sample Attention in Object Detection +# PISA -## Abstract +> [Prime Sample Attention in Object Detection](https://arxiv.org/abs/1904.04821) - + + +## Abstract It is a common paradigm in object detection frameworks to treat all samples equally and target at maximizing the performance on average. In this work, we revisit this paradigm through a careful study on how different samples contribute to the overall performance measured in terms of mAP. Our study suggests that the samples in each mini-batch are neither independent nor equally important, and therefore a better classifier on average does not necessarily mean higher mAP. Motivated by this study, we propose the notion of Prime Samples, those that play a key role in driving the detection performance. We further develop a simple yet effective sampling and learning strategy called PrIme Sample Attention (PISA) that directs the focus of the training process towards such samples. Our experiments demonstrate that it is often more effective to focus on prime samples than hard samples when training a detector. Particularly, On the MSCOCO dataset, PISA outperforms the random sampling baseline and hard mining schemes, e.g., OHEM and Focal Loss, consistently by around 2% on both single-stage and two-stage detectors, even with a strong backbone ResNeXt-101. -
- - +## Results and Models + +| PISA | Network | Backbone | Lr schd | box AP | mask AP | Config | Download | +| :--: | :----------: | :------------: | :-----: | :----: | :-----: | :---------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| × | Faster R-CNN | R-50-FPN | 1x | 36.4 | | - | | +| √ | Faster R-CNN | R-50-FPN | 1x | 38.4 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_faster_rcnn_r50_fpn_1x_coco/pisa_faster_rcnn_r50_fpn_1x_coco-dea93523.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_faster_rcnn_r50_fpn_1x_coco/pisa_faster_rcnn_r50_fpn_1x_coco_20200506_185619.log.json) | +| × | Faster R-CNN | X101-32x4d-FPN | 1x | 40.1 | | - | | +| √ | Faster R-CNN | X101-32x4d-FPN | 1x | 41.9 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco-e4accec4.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco_20200505_181503.log.json) | +| × | Mask R-CNN | R-50-FPN | 1x | 37.3 | 34.2 | - | | +| √ | Mask R-CNN | R-50-FPN | 1x | 39.1 | 35.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_mask_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_mask_rcnn_r50_fpn_1x_coco/pisa_mask_rcnn_r50_fpn_1x_coco-dfcedba6.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_mask_rcnn_r50_fpn_1x_coco/pisa_mask_rcnn_r50_fpn_1x_coco_20200508_150500.log.json) | +| × | Mask R-CNN | X101-32x4d-FPN | 1x | 41.1 | 37.1 | - | | +| √ | Mask R-CNN | X101-32x4d-FPN | 1x | | | | | +| × | RetinaNet | R-50-FPN | 1x | 35.6 | | - | | +| √ | RetinaNet | R-50-FPN | 1x | 36.9 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_retinanet_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_retinanet_r50_fpn_1x_coco/pisa_retinanet_r50_fpn_1x_coco-76409952.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_retinanet_r50_fpn_1x_coco/pisa_retinanet_r50_fpn_1x_coco_20200504_014311.log.json) | +| × | RetinaNet | X101-32x4d-FPN | 1x | 39.0 | | - | | +| √ | RetinaNet | X101-32x4d-FPN | 1x | 40.7 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_retinanet_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_retinanet_x101_32x4d_fpn_1x_coco/pisa_retinanet_x101_32x4d_fpn_1x_coco-a0c13c73.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_retinanet_x101_32x4d_fpn_1x_coco/pisa_retinanet_x101_32x4d_fpn_1x_coco_20200505_001404.log.json) | +| × | SSD300 | VGG16 | 1x | 25.6 | | - | | +| √ | SSD300 | VGG16 | 1x | 27.6 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_ssd300_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_ssd300_coco/pisa_ssd300_coco-710e3ac9.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_ssd300_coco/pisa_ssd300_coco_20200504_144325.log.json) | +| × | SSD512 | VGG16 | 1x | 29.3 | | - | | +| √ | SSD512 | VGG16 | 1x | 31.8 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_ssd512_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_ssd512_coco/pisa_ssd512_coco-247addee.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_ssd512_coco/pisa_ssd512_coco_20200508_131030.log.json) | -## Citation +**Notes:** - +- In the original paper, all models are trained and tested on mmdet v1.x, thus results may not be exactly the same with this release on v2.0. +- It is noted PISA only modifies the training pipeline so the inference time remains the same with the baseline. + +## Citation ```latex @inproceedings{cao2019prime, @@ -26,29 +48,3 @@ It is a common paradigm in object detection frameworks to treat all samples equa year={2020} } ``` - -## Results and models - -| PISA | Network | Backbone | Lr schd | box AP | mask AP | Config | Download | -|:----:|:-------:|:-------------------:|:-------:|:------:|:-------:|:------:|:--------:| -| × | Faster R-CNN | R-50-FPN | 1x | 36.4 | | - | -| √ | Faster R-CNN | R-50-FPN | 1x | 38.4 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_faster_rcnn_r50_fpn_1x_coco/pisa_faster_rcnn_r50_fpn_1x_coco-dea93523.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_faster_rcnn_r50_fpn_1x_coco/pisa_faster_rcnn_r50_fpn_1x_coco_20200506_185619.log.json) | -| × | Faster R-CNN | X101-32x4d-FPN | 1x | 40.1 | | - | -| √ | Faster R-CNN | X101-32x4d-FPN | 1x | 41.9 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco-e4accec4.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco_20200505_181503.log.json) | -| × | Mask R-CNN | R-50-FPN | 1x | 37.3 | 34.2 | - | -| √ | Mask R-CNN | R-50-FPN | 1x | 39.1 | 35.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_mask_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_mask_rcnn_r50_fpn_1x_coco/pisa_mask_rcnn_r50_fpn_1x_coco-dfcedba6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_mask_rcnn_r50_fpn_1x_coco/pisa_mask_rcnn_r50_fpn_1x_coco_20200508_150500.log.json) | -| × | Mask R-CNN | X101-32x4d-FPN | 1x | 41.1 | 37.1 | - | -| √ | Mask R-CNN | X101-32x4d-FPN | 1x | | | | -| × | RetinaNet | R-50-FPN | 1x | 35.6 | | - | -| √ | RetinaNet | R-50-FPN | 1x | 36.9 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_retinanet_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_retinanet_r50_fpn_1x_coco/pisa_retinanet_r50_fpn_1x_coco-76409952.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_retinanet_r50_fpn_1x_coco/pisa_retinanet_r50_fpn_1x_coco_20200504_014311.log.json) | -| × | RetinaNet | X101-32x4d-FPN | 1x | 39.0 | | - | -| √ | RetinaNet | X101-32x4d-FPN | 1x | 40.7 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_retinanet_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_retinanet_x101_32x4d_fpn_1x_coco/pisa_retinanet_x101_32x4d_fpn_1x_coco-a0c13c73.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_retinanet_x101_32x4d_fpn_1x_coco/pisa_retinanet_x101_32x4d_fpn_1x_coco_20200505_001404.log.json) | -| × | SSD300 | VGG16 | 1x | 25.6 | | - | -| √ | SSD300 | VGG16 | 1x | 27.6 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_ssd300_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_ssd300_coco/pisa_ssd300_coco-710e3ac9.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_ssd300_coco/pisa_ssd300_coco_20200504_144325.log.json) | -| × | SSD300 | VGG16 | 1x | 29.3 | | - | -| √ | SSD300 | VGG16 | 1x | 31.8 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_ssd512_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_ssd512_coco/pisa_ssd512_coco-247addee.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_ssd512_coco/pisa_ssd512_coco_20200508_131030.log.json) | - -**Notes:** - -- In the original paper, all models are trained and tested on mmdet v1.x, thus results may not be exactly the same with this release on v2.0. -- It is noted PISA only modifies the training pipeline so the inference time remains the same with the baseline. diff --git a/configs/point_rend/README.md b/configs/point_rend/README.md index 4a9b6f2..183e83d 100644 --- a/configs/point_rend/README.md +++ b/configs/point_rend/README.md @@ -1,22 +1,27 @@ -# PointRend: Image Segmentation as Rendering +# PointRend -## Abstract +> [PointRend: Image Segmentation as Rendering](https://arxiv.org/abs/1912.08193) + + - +## Abstract We present a new method for efficient high-quality image segmentation of objects and scenes. By analogizing classical computer graphics methods for efficient rendering with over- and undersampling challenges faced in pixel labeling tasks, we develop a unique perspective of image segmentation as a rendering problem. From this vantage, we present the PointRend (Point-based Rendering) neural network module: a module that performs point-based segmentation predictions at adaptively selected locations based on an iterative subdivision algorithm. PointRend can be flexibly applied to both instance and semantic segmentation tasks by building on top of existing state-of-the-art models. While many concrete implementations of the general idea are possible, we show that a simple design already achieves excellent results. Qualitatively, PointRend outputs crisp object boundaries in regions that are over-smoothed by previous methods. Quantitatively, PointRend yields significant gains on COCO and Cityscapes, for both instance and semantic segmentation. PointRend's efficiency enables output resolutions that are otherwise impractical in terms of memory or computation compared to existing approaches. -
- - +## Results and Models -## Citation +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :------: | :---: | :-----: | :------: | :------------: | :----: | :-----: | :----------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | caffe | 1x | 4.6 | | 38.4 | 36.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco/point_rend_r50_caffe_fpn_mstrain_1x_coco-1bcb5fb4.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco/point_rend_r50_caffe_fpn_mstrain_1x_coco_20200612_161407.log.json) | +| R-50-FPN | caffe | 3x | 4.6 | | 41.0 | 38.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/point_rend/point_rend_r50_caffe_fpn_mstrain_3x_coco/point_rend_r50_caffe_fpn_mstrain_3x_coco-e0ebb6b7.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/point_rend/point_rend_r50_caffe_fpn_mstrain_3x_coco/point_rend_r50_caffe_fpn_mstrain_3x_coco_20200614_002632.log.json) | - +Note: All models are trained with multi-scale, the input image shorter side is randomly scaled to one of (640, 672, 704, 736, 768, 800). + +## Citation ```latex @InProceedings{kirillov2019pointrend, @@ -26,12 +31,3 @@ We present a new method for efficient high-quality image segmentation of objects year={2019} } ``` - -## Results and models - -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | -| R-50-FPN | caffe | 1x | 4.6 | | 38.4 | 36.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco/point_rend_r50_caffe_fpn_mstrain_1x_coco-1bcb5fb4.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco/point_rend_r50_caffe_fpn_mstrain_1x_coco_20200612_161407.log.json) | -| R-50-FPN | caffe | 3x | 4.6 | | 41.0 | 38.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/point_rend/point_rend_r50_caffe_fpn_mstrain_3x_coco/point_rend_r50_caffe_fpn_mstrain_3x_coco-e0ebb6b7.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/point_rend/point_rend_r50_caffe_fpn_mstrain_3x_coco/point_rend_r50_caffe_fpn_mstrain_3x_coco_20200614_002632.log.json) | - -Note: All models are trained with multi-scale, the input image shorter side is randomly scaled to one of (640, 672, 704, 736, 768, 800). diff --git a/configs/pvt/README.md b/configs/pvt/README.md index 368edb8..1fd090b 100644 --- a/configs/pvt/README.md +++ b/configs/pvt/README.md @@ -1,25 +1,42 @@ -# Pyramid vision transformer: A versatile backbone for dense prediction without convolutions +# PVT -## Abstract +> [Pyramid vision transformer: A versatile backbone for dense prediction without convolutions](https://arxiv.org/abs/2102.12122) + + - +## Abstract Although using convolutional neural networks (CNNs) as backbones achieves great successes in computer vision, this work investigates a simple backbone network useful for many dense prediction tasks without convolutions. Unlike the recently-proposed Transformer model (e.g., ViT) that is specially designed for image classification, we propose Pyramid Vision Transformer~(PVT), which overcomes the difficulties of porting Transformer to various dense prediction tasks. PVT has several merits compared to prior arts. (1) Different from ViT that typically has low-resolution outputs and high computational and memory cost, PVT can be not only trained on dense partitions of the image to achieve high output resolution, which is important for dense predictions but also using a progressive shrinking pyramid to reduce computations of large feature maps. (2) PVT inherits the advantages from both CNN and Transformer, making it a unified backbone in various vision tasks without convolutions by simply replacing CNN backbones. (3) We validate PVT by conducting extensive experiments, showing that it boosts the performance of many downstream tasks, e.g., object detection, semantic, and instance segmentation. For example, with a comparable number of parameters, RetinaNet+PVT achieves 40.4 AP on the COCO dataset, surpassing RetinNet+ResNet50 (36.3 AP) by 4.1 absolute AP. We hope PVT could serve as an alternative and useful backbone for pixel-level predictions and facilitate future researches. Transformer recently has shown encouraging progresses in computer vision. In this work, we present new baselines by improving the original Pyramid Vision Transformer (abbreviated as PVTv1) by adding three designs, including (1) overlapping patch embedding, (2) convolutional feed-forward networks, and (3) linear complexity attention layers. With these modifications, our PVTv2 significantly improves PVTv1 on three tasks e.g., classification, detection, and segmentation. Moreover, PVTv2 achieves comparable or better performances than recent works such as Swin Transformer. We hope this work will facilitate state-of-the-art Transformer researches in computer vision. -
- - +## Results and Models -## Citation +### RetinaNet (PVTv1) + +| Backbone | Lr schd | Mem (GB) | box AP | Config | Download | +| :--------: | :-----: | :------: | :----: | :--------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| PVT-Tiny | 12e | 8.5 | 36.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_t_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-t_fpn_1x_coco/retinanet_pvt-t_fpn_1x_coco_20210831_103110-17b566bd.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-t_fpn_1x_coco/retinanet_pvt-t_fpn_1x_coco_20210831_103110.log.json) | +| PVT-Small | 12e | 14.5 | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_s_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-s_fpn_1x_coco/retinanet_pvt-s_fpn_1x_coco_20210906_142921-b6c94a5b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-s_fpn_1x_coco/retinanet_pvt-s_fpn_1x_coco_20210906_142921.log.json) | +| PVT-Medium | 12e | 20.9 | 41.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_m_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-m_fpn_1x_coco/retinanet_pvt-m_fpn_1x_coco_20210831_103243-55effa1b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-m_fpn_1x_coco/retinanet_pvt-m_fpn_1x_coco_20210831_103243.log.json) | + +### RetinaNet (PVTv2) - +| Backbone | Lr schd | Mem (GB) | box AP | Config | Download | +| :------: | :-----: | :------: | :----: | :------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| PVTv2-B0 | 12e | 7.4 | 37.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_v2_b0_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b0_fpn_1x_coco/retinanet_pvtv2-b0_fpn_1x_coco_20210831_103157-13e9aabe.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b0_fpn_1x_coco/retinanet_pvtv2-b0_fpn_1x_coco_20210831_103157.log.json) | +| PVTv2-B1 | 12e | 9.5 | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_v2_b1_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b1_fpn_1x_coco/retinanet_pvtv2-b1_fpn_1x_coco_20210831_103318-7e169a7d.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b1_fpn_1x_coco/retinanet_pvtv2-b1_fpn_1x_coco_20210831_103318.log.json) | +| PVTv2-B2 | 12e | 16.2 | 44.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_v2_b2_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b2_fpn_1x_coco/retinanet_pvtv2-b2_fpn_1x_coco_20210901_174843-529f0b9a.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b2_fpn_1x_coco/retinanet_pvtv2-b2_fpn_1x_coco_20210901_174843.log.json) | +| PVTv2-B3 | 12e | 23.0 | 46.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_v2_b3_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b3_fpn_1x_coco/retinanet_pvtv2-b3_fpn_1x_coco_20210903_151512-8357deff.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b3_fpn_1x_coco/retinanet_pvtv2-b3_fpn_1x_coco_20210903_151512.log.json) | +| PVTv2-B4 | 12e | 17.0 | 46.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_v2_b4_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b4_fpn_1x_coco/retinanet_pvtv2-b4_fpn_1x_coco_20210901_170151-83795c86.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b4_fpn_1x_coco/retinanet_pvtv2-b4_fpn_1x_coco_20210901_170151.log.json) | +| PVTv2-B5 | 12e | 18.7 | 46.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_v2_b5_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b5_fpn_1x_coco/retinanet_pvtv2-b5_fpn_1x_coco_20210902_201800-3420eb57.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b5_fpn_1x_coco/retinanet_pvtv2-b5_fpn_1x_coco_20210902_201800.log.json) | + +## Citation ```latex @article{wang2021pyramid, @@ -38,23 +55,3 @@ With these modifications, our PVTv2 significantly improves PVTv1 on three tasks year={2021} } ``` -## Results and Models - -### RetinaNet (PVTv1) - -| Backbone | Lr schd | Mem (GB) | box AP | Config | Download | -|:-----------:|:-------:|:--------:|:------:|:------:|:--------:| -| PVT-Tiny | 12e |8.5 |36.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_t_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-t_fpn_1x_coco/retinanet_pvt-t_fpn_1x_coco_20210831_103110-17b566bd.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-t_fpn_1x_coco/retinanet_pvt-t_fpn_1x_coco_20210831_103110.log.json) | -| PVT-Small | 12e |14.5 |40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_s_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-s_fpn_1x_coco/retinanet_pvt-s_fpn_1x_coco_20210906_142921-b6c94a5b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-s_fpn_1x_coco/retinanet_pvt-s_fpn_1x_coco_20210906_142921.log.json) | -| PVT-Medium | 12e |20.9 |41.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_m_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-m_fpn_1x_coco/retinanet_pvt-m_fpn_1x_coco_20210831_103243-55effa1b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-m_fpn_1x_coco/retinanet_pvt-m_fpn_1x_coco_20210831_103243.log.json) | - -### RetinaNet (PVTv2) - -| Backbone | Lr schd | Mem (GB) | box AP | Config | Download | -|:-----------:|:-------:|:--------:|:------:|:------:|:--------:| -| PVTv2-B0 | 12e |7.4 |37.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_v2_b0_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b0_fpn_1x_coco/retinanet_pvtv2-b0_fpn_1x_coco_20210831_103157-13e9aabe.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b0_fpn_1x_coco/retinanet_pvtv2-b0_fpn_1x_coco_20210831_103157.log.json) | -| PVTv2-B1 | 12e |9.5 |41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_v2_b1_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b1_fpn_1x_coco/retinanet_pvtv2-b1_fpn_1x_coco_20210831_103318-7e169a7d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b1_fpn_1x_coco/retinanet_pvtv2-b1_fpn_1x_coco_20210831_103318.log.json) | -| PVTv2-B2 | 12e |16.2 |44.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_v2_b2_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b2_fpn_1x_coco/retinanet_pvtv2-b2_fpn_1x_coco_20210901_174843-529f0b9a.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b2_fpn_1x_coco/retinanet_pvtv2-b2_fpn_1x_coco_20210901_174843.log.json) | -| PVTv2-B3 | 12e |23.0 |46.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_v2_b3_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b3_fpn_1x_coco/retinanet_pvtv2-b3_fpn_1x_coco_20210903_151512-8357deff.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b3_fpn_1x_coco/retinanet_pvtv2-b3_fpn_1x_coco_20210903_151512.log.json) | -| PVTv2-B4 | 12e |17.0 |46.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_v2_b4_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b4_fpn_1x_coco/retinanet_pvtv2-b4_fpn_1x_coco_20210901_170151-83795c86.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b4_fpn_1x_coco/retinanet_pvtv2-b4_fpn_1x_coco_20210901_170151.log.json) | -| PVTv2-B5 | 12e |18.7 |46.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_v2_b5_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b5_fpn_1x_coco/retinanet_pvtv2-b5_fpn_1x_coco_20210902_201800-3420eb57.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b5_fpn_1x_coco/retinanet_pvtv2-b5_fpn_1x_coco_20210902_201800.log.json) | diff --git a/configs/pvt/metafile.yml b/configs/pvt/metafile.yml index 48a0e2c..5884378 100644 --- a/configs/pvt/metafile.yml +++ b/configs/pvt/metafile.yml @@ -1,136 +1,243 @@ -Collections: - - Name: PVT +Models: + - Name: retinanet_pvt-t_fpn_1x_coco + In Collection: RetinaNet + Config: configs/pvt/retinanet_pvt-t_fpn_1x_coco.py Metadata: + Training Memory (GB): 8.5 + Epochs: 12 Training Data: COCO Training Techniques: - SGD with Momentum - Weight Decay Training Resources: 8x NVIDIA V100 GPUs Architecture: - - RetinaNet - PyramidVisionTransformer - - FPN - Paper: https://arxiv.org/abs/2102.12122 - README: configs/pvt/README.md - - Name: PVT-v2 - Metadata: - Training Data: COCO - Training Techniques: - - SGD with Momentum - - Weight Decay - Training Resources: 8x NVIDIA V100 GPUs - Architecture: - - RetinaNet - - PyramidVisionTransformerV2 - - FPN - Paper: https://arxiv.org/abs/2106.13797 - README: configs/pvt/README.md -Models: - - Name: retinanet_pvt-t_fpn_1x_coco - In Collection: PVT - Config: configs/pvt/retinanet_pvt-t_fpn_1x_coco.py - Metadata: - Training Memory (GB): 8.5 - Epochs: 12 Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 36.6 Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-t_fpn_1x_coco/retinanet_pvt-t_fpn_1x_coco_20210831_103110-17b566bd.pth + Paper: + URL: https://arxiv.org/abs/2102.12122 + Title: "Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions" + README: configs/pvt/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.17.0/mmdet/models/backbones/pvt.py#L315 + Version: 2.17.0 + - Name: retinanet_pvt-s_fpn_1x_coco - In Collection: PVT + In Collection: RetinaNet Config: configs/pvt/retinanet_pvt-s_fpn_1x_coco.py Metadata: Training Memory (GB): 14.5 Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x NVIDIA V100 GPUs + Architecture: + - PyramidVisionTransformer Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 40.4 Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-s_fpn_1x_coco/retinanet_pvt-s_fpn_1x_coco_20210906_142921-b6c94a5b.pth + Paper: + URL: https://arxiv.org/abs/2102.12122 + Title: "Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions" + README: configs/pvt/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.17.0/mmdet/models/backbones/pvt.py#L315 + Version: 2.17.0 + - Name: retinanet_pvt-m_fpn_1x_coco - In Collection: PVT + In Collection: RetinaNet Config: configs/pvt/retinanet_pvt-m_fpn_1x_coco.py Metadata: Training Memory (GB): 20.9 Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x NVIDIA V100 GPUs + Architecture: + - PyramidVisionTransformer Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 41.7 Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-m_fpn_1x_coco/retinanet_pvt-m_fpn_1x_coco_20210831_103243-55effa1b.pth + Paper: + URL: https://arxiv.org/abs/2102.12122 + Title: "Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions" + README: configs/pvt/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.17.0/mmdet/models/backbones/pvt.py#L315 + Version: 2.17.0 + - Name: retinanet_pvtv2-b0_fpn_1x_coco - In Collection: PVT-v2 + In Collection: RetinaNet Config: configs/pvt/retinanet_pvtv2-b0_fpn_1x_coco.py Metadata: Training Memory (GB): 7.4 Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x NVIDIA V100 GPUs + Architecture: + - PyramidVisionTransformerV2 Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 37.1 Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b0_fpn_1x_coco/retinanet_pvtv2-b0_fpn_1x_coco_20210831_103157-13e9aabe.pth + Paper: + URL: https://arxiv.org/abs/2106.13797 + Title: "PVTv2: Improved Baselines with Pyramid Vision Transformer" + README: configs/pvt/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.17.0/mmdet/models/backbones/pvt.py#L543 + Version: 2.17.0 + - Name: retinanet_pvtv2-b1_fpn_1x_coco - In Collection: PVT-v2 + In Collection: RetinaNet Config: configs/pvt/retinanet_pvtv2-b1_fpn_1x_coco.py Metadata: Training Memory (GB): 9.5 Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x NVIDIA V100 GPUs + Architecture: + - PyramidVisionTransformerV2 Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 41.2 Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b1_fpn_1x_coco/retinanet_pvtv2-b1_fpn_1x_coco_20210831_103318-7e169a7d.pth + Paper: + URL: https://arxiv.org/abs/2106.13797 + Title: "PVTv2: Improved Baselines with Pyramid Vision Transformer" + README: configs/pvt/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.17.0/mmdet/models/backbones/pvt.py#L543 + Version: 2.17.0 + - Name: retinanet_pvtv2-b2_fpn_1x_coco - In Collection: PVT-v2 + In Collection: RetinaNet Config: configs/pvt/retinanet_pvtv2-b2_fpn_1x_coco.py Metadata: Training Memory (GB): 16.2 Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x NVIDIA V100 GPUs + Architecture: + - PyramidVisionTransformerV2 Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 44.6 Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b2_fpn_1x_coco/retinanet_pvtv2-b2_fpn_1x_coco_20210901_174843-529f0b9a.pth + Paper: + URL: https://arxiv.org/abs/2106.13797 + Title: "PVTv2: Improved Baselines with Pyramid Vision Transformer" + README: configs/pvt/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.17.0/mmdet/models/backbones/pvt.py#L543 + Version: 2.17.0 + - Name: retinanet_pvtv2-b3_fpn_1x_coco - In Collection: PVT-v2 + In Collection: RetinaNet Config: configs/pvt/retinanet_pvtv2-b3_fpn_1x_coco.py Metadata: Training Memory (GB): 23.0 Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x NVIDIA V100 GPUs + Architecture: + - PyramidVisionTransformerV2 Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 46.0 Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b3_fpn_1x_coco/retinanet_pvtv2-b3_fpn_1x_coco_20210903_151512-8357deff.pth + Paper: + URL: https://arxiv.org/abs/2106.13797 + Title: "PVTv2: Improved Baselines with Pyramid Vision Transformer" + README: configs/pvt/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.17.0/mmdet/models/backbones/pvt.py#L543 + Version: 2.17.0 + - Name: retinanet_pvtv2-b4_fpn_1x_coco - In Collection: PVT-v2 + In Collection: RetinaNet Config: configs/pvt/retinanet_pvtv2-b4_fpn_1x_coco.py Metadata: Training Memory (GB): 17.0 Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x NVIDIA V100 GPUs + Architecture: + - PyramidVisionTransformerV2 Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 46.3 Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b4_fpn_1x_coco/retinanet_pvtv2-b4_fpn_1x_coco_20210901_170151-83795c86.pth + Paper: + URL: https://arxiv.org/abs/2106.13797 + Title: "PVTv2: Improved Baselines with Pyramid Vision Transformer" + README: configs/pvt/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.17.0/mmdet/models/backbones/pvt.py#L543 + Version: 2.17.0 + - Name: retinanet_pvtv2-b5_fpn_1x_coco - In Collection: PVT-v2 + In Collection: RetinaNet Config: configs/pvt/retinanet_pvtv2-b5_fpn_1x_coco.py Metadata: Training Memory (GB): 18.7 Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x NVIDIA V100 GPUs + Architecture: + - PyramidVisionTransformerV2 Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 46.1 Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b5_fpn_1x_coco/retinanet_pvtv2-b5_fpn_1x_coco_20210902_201800-3420eb57.pth + Paper: + URL: https://arxiv.org/abs/2106.13797 + Title: "PVTv2: Improved Baselines with Pyramid Vision Transformer" + README: configs/pvt/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.17.0/mmdet/models/backbones/pvt.py#L543 + Version: 2.17.0 diff --git a/configs/pvt/retinanet_pvtv2-b4_fpn_1x_coco.py b/configs/pvt/retinanet_pvtv2-b4_fpn_1x_coco.py index ec9103b..9891d7b 100644 --- a/configs/pvt/retinanet_pvtv2-b4_fpn_1x_coco.py +++ b/configs/pvt/retinanet_pvtv2-b4_fpn_1x_coco.py @@ -11,3 +11,8 @@ _delete_=True, type='AdamW', lr=0.0001 / 1.4, weight_decay=0.0001) # dataset settings data = dict(samples_per_gpu=1, workers_per_gpu=1) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (1 samples per GPU) +auto_scale_lr = dict(base_batch_size=8) diff --git a/configs/pvt/retinanet_pvtv2-b5_fpn_1x_coco.py b/configs/pvt/retinanet_pvtv2-b5_fpn_1x_coco.py index d8e6d23..a9fea2e 100644 --- a/configs/pvt/retinanet_pvtv2-b5_fpn_1x_coco.py +++ b/configs/pvt/retinanet_pvtv2-b5_fpn_1x_coco.py @@ -12,3 +12,8 @@ _delete_=True, type='AdamW', lr=0.0001 / 1.4, weight_decay=0.0001) # dataset settings data = dict(samples_per_gpu=1, workers_per_gpu=1) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (1 samples per GPU) +auto_scale_lr = dict(base_batch_size=8) diff --git a/configs/queryinst/README.md b/configs/queryinst/README.md index 7129dd4..ad6e0b3 100644 --- a/configs/queryinst/README.md +++ b/configs/queryinst/README.md @@ -1,24 +1,30 @@ -# Instances as Queries +# QueryInst -## Abstract +> [Instances as Queries](https://openaccess.thecvf.com/content/ICCV2021/html/Fang_Instances_As_Queries_ICCV_2021_paper.html) + + - +## Abstract We present QueryInst, a new perspective for instance segmentation. QueryInst is a multi-stage end-to-end system that treats instances of interest as learnable queries, enabling query based object detectors, e.g., Sparse R-CNN, to have strong instance segmentation performance. The attributes of instances such as categories, bounding boxes, instance masks, and instance association embeddings are represented by queries in a unified manner. In QueryInst, a query is shared by both detection and segmentation via dynamic convolutions and driven by parallelly-supervised multi-stage learning. We conduct extensive experiments on three challenging benchmarks, i.e., COCO, CityScapes, and YouTube-VIS to evaluate the effectiveness of QueryInst in object detection, instance segmentation, and video instance segmentation tasks. For the first time, we demonstrate that a simple end-to-end query based framework can achieve the state-of-the-art performance in various instance-level recognition tasks. -
- - +## Results and Models -## Citation +| Model | Backbone | Style | Lr schd | Number of Proposals | Multi-Scale | RandomCrop | box AP | mask AP | Config | Download | +| :-------: | :-------: | :-----: | :-----: | :-----------------: | :---------: | :--------: | :----: | :-----: | :------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| QueryInst | R-50-FPN | pytorch | 1x | 100 | False | False | 42.0 | 37.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/queryinst/queryinst_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_1x_coco/queryinst_r50_fpn_1x_coco_20210907_084916-5a8f1998.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_1x_coco/queryinst_r50_fpn_1x_coco_20210907_084916.log.json) | +| QueryInst | R-50-FPN | pytorch | 3x | 100 | True | False | 44.8 | 39.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/queryinst/queryinst_r50_fpn_mstrain_480-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_mstrain_480-800_3x_coco/queryinst_r50_fpn_mstrain_480-800_3x_coco_20210901_103643-7837af86.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_mstrain_480-800_3x_coco/queryinst_r50_fpn_mstrain_480-800_3x_coco_20210901_103643.log.json) | +| QueryInst | R-50-FPN | pytorch | 3x | 300 | True | True | 47.5 | 41.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/queryinst/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20210904_101802-85cffbd8.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20210904_101802.log.json) | +| QueryInst | R-101-FPN | pytorch | 3x | 100 | True | False | 46.4 | 41.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/queryinst/queryinst_r101_fpn_mstrain_480-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r101_fpn_mstrain_480-800_3x_coco/queryinst_r101_fpn_mstrain_480-800_3x_coco_20210904_104048-91f9995b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r101_fpn_mstrain_480-800_3x_coco/queryinst_r101_fpn_mstrain_480-800_3x_coco_20210904_104048.log.json) | +| QueryInst | R-101-FPN | pytorch | 3x | 300 | True | True | 49.0 | 42.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/queryinst/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20210904_153621-76cce59f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20210904_153621.log.json) | - +## Citation -``` +```latex @InProceedings{Fang_2021_ICCV, author = {Fang, Yuxin and Yang, Shusheng and Wang, Xinggang and Li, Yu and Fang, Chen and Shan, Ying and Feng, Bin and Liu, Wenyu}, title = {Instances As Queries}, @@ -28,13 +34,3 @@ We present QueryInst, a new perspective for instance segmentation. QueryInst is pages = {6910-6919} } ``` - -## Results and Models - -| Model | Backbone | Style | Lr schd | Number of Proposals |Multi-Scale| RandomCrop | box AP | mask AP | Config | Download | -|:------------:|:---------:|:-------:|:-------:|:-------: |:-------: |:---------:|:------:|:------:|:------:|:--------:| -| QueryInst | R-50-FPN | pytorch | 1x | 100 | False | False | 42.0 | 37.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/queryinst/queryinst_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_1x_coco/queryinst_r50_fpn_1x_coco_20210907_084916-5a8f1998.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_1x_coco/queryinst_r50_fpn_1x_coco_20210907_084916.log.json) | -| QueryInst | R-50-FPN | pytorch | 3x | 100 | True | False | 44.8 | 39.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/queryinst/queryinst_r50_fpn_mstrain_480-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_mstrain_480-800_3x_coco/queryinst_r50_fpn_mstrain_480-800_3x_coco_20210901_103643-7837af86.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_mstrain_480-800_3x_coco/queryinst_r50_fpn_mstrain_480-800_3x_coco_20210901_103643.log.json) | -| QueryInst | R-50-FPN | pytorch | 3x | 300 | True | True | 47.5 | 41.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/queryinst/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20210904_101802-85cffbd8.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20210904_101802.log.json) | -| QueryInst | R-101-FPN | pytorch | 3x | 100 | True | False | 46.4 | 41.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/queryinst/queryinst_r101_fpn_mstrain_480-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r101_fpn_mstrain_480-800_3x_coco/queryinst_r101_fpn_mstrain_480-800_3x_coco_20210904_104048-91f9995b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r101_fpn_mstrain_480-800_3x_coco/queryinst_r101_fpn_mstrain_480-800_3x_coco_20210904_104048.log.json) | -| QueryInst | R-101-FPN | pytorch | 3x | 300 | True | True | 49.0 | 42.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/queryinst/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20210904_153621-76cce59f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20210904_153621.log.json) | diff --git a/configs/regnet/README.md b/configs/regnet/README.md index 51f44df..711ed63 100644 --- a/configs/regnet/README.md +++ b/configs/regnet/README.md @@ -1,39 +1,22 @@ -# Designing Network Design Spaces +# RegNet -## Abstract +> [Designing Network Design Spaces](https://arxiv.org/abs/2003.13678) - + - In this work, we present a new network design paradigm. Our goal is to help advance the understanding of network design and discover design principles that generalize across settings. Instead of focusing on designing individual network instances, we design network design spaces that parametrize populations of networks. The overall process is analogous to classic manual design of networks, but elevated to the design space level. Using our methodology we explore the structure aspect of network design and arrive at a low-dimensional design space consisting of simple, regular networks that we call RegNet. The core insight of the RegNet parametrization is surprisingly simple: widths and depths of good networks can be explained by a quantized linear function. We analyze the RegNet design space and arrive at interesting findings that do not match the current practice of network design. The RegNet design space provides simple and fast networks that work well across a wide range of flop regimes. Under comparable training settings and flops, the RegNet models outperform the popular EfficientNet models while being up to 5x faster on GPUs. +## Abstract + +In this work, we present a new network design paradigm. Our goal is to help advance the understanding of network design and discover design principles that generalize across settings. Instead of focusing on designing individual network instances, we design network design spaces that parametrize populations of networks. The overall process is analogous to classic manual design of networks, but elevated to the design space level. Using our methodology we explore the structure aspect of network design and arrive at a low-dimensional design space consisting of simple, regular networks that we call RegNet. The core insight of the RegNet parametrization is surprisingly simple: widths and depths of good networks can be explained by a quantized linear function. We analyze the RegNet design space and arrive at interesting findings that do not match the current practice of network design. The RegNet design space provides simple and fast networks that work well across a wide range of flop regimes. Under comparable training settings and flops, the RegNet models outperform the popular EfficientNet models while being up to 5x faster on GPUs. -
- - - ## Introduction - - We implement RegNetX and RegNetY models in detection systems and provide their first results on Mask R-CNN, Faster R-CNN and RetinaNet. -The pre-trained modles are converted from [model zoo of pycls](https://github.com/facebookresearch/pycls/blob/master/MODEL_ZOO.md). - -## Citation - -```latex -@article{radosavovic2020designing, - title={Designing Network Design Spaces}, - author={Ilija Radosavovic and Raj Prateek Kosaraju and Ross Girshick and Kaiming He and Piotr Dollár}, - year={2020}, - eprint={2003.13678}, - archivePrefix={arXiv}, - primaryClass={cs.CV} -} -``` +The pre-trained models are converted from [model zoo of pycls](https://github.com/facebookresearch/pycls/blob/master/MODEL_ZOO.md). ## Usage @@ -64,63 +47,76 @@ For other pre-trained models or self-implemented regnet models, the users are re **Note**: Although Fig. 15 & 16 also provide `w0`, `wa`, `wm`, `group_w`, and `bot_mul` for `arch`, they are quantized thus inaccurate, using them sometimes produces different backbone that does not match the key in the pre-trained model. -## Results +## Results and Models ### Mask R-CNN -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -| :---------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | -| [R-50-FPN](../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py)| pytorch | 1x | 4.4 | 12.0 | 38.2 | 34.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205_050542.log.json) | -|[RegNetX-3.2GF-FPN](./mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py)| pytorch | 1x |5.0 ||40.3|36.6|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco/mask_rcnn_regnetx-3.2GF_fpn_1x_coco_20200520_163141-2a9d1814.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco/mask_rcnn_regnetx-3.2GF_fpn_1x_coco_20200520_163141.log.json) | -|[RegNetX-4.0GF-FPN](./mask_rcnn_regnetx-4GF_fpn_1x_coco.py)| pytorch | 1x |5.5||41.5|37.4|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco/mask_rcnn_regnetx-4GF_fpn_1x_coco_20200517_180217-32e9c92d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco/mask_rcnn_regnetx-4GF_fpn_1x_coco_20200517_180217.log.json) | -| [R-101-FPN](../mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py)| pytorch | 1x | 6.4 | 10.3 | 40.0 | 36.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_1x_coco/mask_rcnn_r101_fpn_1x_coco_20200204-1efe0ed5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_1x_coco/mask_rcnn_r101_fpn_1x_coco_20200204_144809.log.json) | -|[RegNetX-6.4GF-FPN](./mask_rcnn_regnetx-6.4GF_fpn_1x_coco.py)| pytorch | 1x |6.1 ||41.0|37.1|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco/mask_rcnn_regnetx-6.4GF_fpn_1x_coco_20200517_180439-3a7aae83.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco/mask_rcnn_regnetx-6.4GF_fpn_1x_coco_20200517_180439.log.json) | -| [X-101-32x4d-FPN](../mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py) | pytorch | 1x | 7.6 | 9.4 | 41.9 | 37.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco/mask_rcnn_x101_32x4d_fpn_1x_coco_20200205-478d0b67.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco/mask_rcnn_x101_32x4d_fpn_1x_coco_20200205_034906.log.json) | -|[RegNetX-8.0GF-FPN](./mask_rcnn_regnetx-8GF_fpn_1x_coco.py)| pytorch | 1x |6.4 ||41.7|37.5|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco/mask_rcnn_regnetx-8GF_fpn_1x_coco_20200517_180515-09daa87e.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco/mask_rcnn_regnetx-8GF_fpn_1x_coco_20200517_180515.log.json) | -|[RegNetX-12GF-FPN](./mask_rcnn_regnetx-12GF_fpn_1x_coco.py)| pytorch | 1x |7.4 ||42.2|38|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco/mask_rcnn_regnetx-12GF_fpn_1x_coco_20200517_180552-b538bd8b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco/mask_rcnn_regnetx-12GF_fpn_1x_coco_20200517_180552.log.json) | -|[RegNetX-3.2GF-FPN-DCN-C3-C5](./mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco.py)| pytorch | 1x |5.0 ||40.3|36.6|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco_20200520_172726-75f40794.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco_20200520_172726.log.json) | +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :----------------------------------------------------------------------------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :--------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| [R-50-FPN](../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py) | pytorch | 1x | 4.4 | 12.0 | 38.2 | 34.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205_050542.log.json) | +| [RegNetX-3.2GF-FPN](./mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py) | pytorch | 1x | 5.0 | | 40.3 | 36.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco/mask_rcnn_regnetx-3.2GF_fpn_1x_coco_20200520_163141-2a9d1814.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco/mask_rcnn_regnetx-3.2GF_fpn_1x_coco_20200520_163141.log.json) | +| [RegNetX-4.0GF-FPN](./mask_rcnn_regnetx-4GF_fpn_1x_coco.py) | pytorch | 1x | 5.5 | | 41.5 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco/mask_rcnn_regnetx-4GF_fpn_1x_coco_20200517_180217-32e9c92d.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco/mask_rcnn_regnetx-4GF_fpn_1x_coco_20200517_180217.log.json) | +| [R-101-FPN](../mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py) | pytorch | 1x | 6.4 | 10.3 | 40.0 | 36.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_1x_coco/mask_rcnn_r101_fpn_1x_coco_20200204-1efe0ed5.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_1x_coco/mask_rcnn_r101_fpn_1x_coco_20200204_144809.log.json) | +| [RegNetX-6.4GF-FPN](./mask_rcnn_regnetx-6.4GF_fpn_1x_coco.py) | pytorch | 1x | 6.1 | | 41.0 | 37.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco/mask_rcnn_regnetx-6.4GF_fpn_1x_coco_20200517_180439-3a7aae83.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco/mask_rcnn_regnetx-6.4GF_fpn_1x_coco_20200517_180439.log.json) | +| [X-101-32x4d-FPN](../mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py) | pytorch | 1x | 7.6 | 9.4 | 41.9 | 37.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco/mask_rcnn_x101_32x4d_fpn_1x_coco_20200205-478d0b67.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco/mask_rcnn_x101_32x4d_fpn_1x_coco_20200205_034906.log.json) | +| [RegNetX-8.0GF-FPN](./mask_rcnn_regnetx-8GF_fpn_1x_coco.py) | pytorch | 1x | 6.4 | | 41.7 | 37.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco/mask_rcnn_regnetx-8GF_fpn_1x_coco_20200517_180515-09daa87e.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco/mask_rcnn_regnetx-8GF_fpn_1x_coco_20200517_180515.log.json) | +| [RegNetX-12GF-FPN](./mask_rcnn_regnetx-12GF_fpn_1x_coco.py) | pytorch | 1x | 7.4 | | 42.2 | 38 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco/mask_rcnn_regnetx-12GF_fpn_1x_coco_20200517_180552-b538bd8b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco/mask_rcnn_regnetx-12GF_fpn_1x_coco_20200517_180552.log.json) | +| [RegNetX-3.2GF-FPN-DCN-C3-C5](./mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco.py) | pytorch | 1x | 5.0 | | 40.3 | 36.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco_20200520_172726-75f40794.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco_20200520_172726.log.json) | ### Faster R-CNN -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :---------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | -| [R-50-FPN](../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py)| pytorch | 1x | 4.0 | 18.2 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130_204655.log.json) | -|[RegNetX-3.2GF-FPN](./faster_rcnn_regnetx-3.2GF_fpn_1x_coco.py)| pytorch | 1x | 4.5||39.9|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco/faster_rcnn_regnetx-3.2GF_fpn_1x_coco_20200517_175927-126fd9bf.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco/faster_rcnn_regnetx-3.2GF_fpn_1x_coco_20200517_175927.log.json) | -|[RegNetX-3.2GF-FPN](./faster_rcnn_regnetx-3.2GF_fpn_2x_coco.py)| pytorch | 2x | 4.5||41.1|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco/faster_rcnn_regnetx-3.2GF_fpn_2x_coco_20200520_223955-e2081918.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco/faster_rcnn_regnetx-3.2GF_fpn_2x_coco_20200520_223955.log.json) | +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------------------------------------------------------: | :-----: | :-----: | :------: | :------------: | :----: | :---------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| [R-50-FPN](../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | pytorch | 1x | 4.0 | 18.2 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130_204655.log.json) | +| [RegNetX-3.2GF-FPN](./faster_rcnn_regnetx-3.2GF_fpn_1x_coco.py) | pytorch | 1x | 4.5 | | 39.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco/faster_rcnn_regnetx-3.2GF_fpn_1x_coco_20200517_175927-126fd9bf.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco/faster_rcnn_regnetx-3.2GF_fpn_1x_coco_20200517_175927.log.json) | +| [RegNetX-3.2GF-FPN](./faster_rcnn_regnetx-3.2GF_fpn_2x_coco.py) | pytorch | 2x | 4.5 | | 41.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco/faster_rcnn_regnetx-3.2GF_fpn_2x_coco_20200520_223955-e2081918.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco/faster_rcnn_regnetx-3.2GF_fpn_2x_coco_20200520_223955.log.json) | ### RetinaNet -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :---------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | -| [R-50-FPN](../retinanet/retinanet_r50_fpn_1x_coco.py) | pytorch | 1x | 3.8 | 16.6 | 36.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_1x_coco/retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_1x_coco/retinanet_r50_fpn_1x_coco_20200130_002941.log.json) | -|[RegNetX-800MF-FPN](./retinanet_regnetx-800MF_fpn_1x_coco.py)| pytorch | 1x |2.5||35.6|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/retinanet_regnetx-800MF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-800MF_fpn_1x_coco/retinanet_regnetx-800MF_fpn_1x_coco_20200517_191403-f6f91d10.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-800MF_fpn_1x_coco/retinanet_regnetx-800MF_fpn_1x_coco_20200517_191403.log.json) | -|[RegNetX-1.6GF-FPN](./retinanet_regnetx-1.6GF_fpn_1x_coco.py)| pytorch | 1x |3.3||37.3|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco/retinanet_regnetx-1.6GF_fpn_1x_coco_20200517_191403-37009a9d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco/retinanet_regnetx-1.6GF_fpn_1x_coco_20200517_191403.log.json) | -|[RegNetX-3.2GF-FPN](./retinanet_regnetx-3.2GF_fpn_1x_coco.py)| pytorch | 1x |4.2 ||39.1|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco/retinanet_regnetx-3.2GF_fpn_1x_coco_20200520_163141-cb1509e8.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco/retinanet_regnetx-3.2GF_fpn_1x_coco_20200520_163141.log.json) | +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-----------------------------------------------------------: | :-----: | :-----: | :------: | :------------: | :----: | :-------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| [R-50-FPN](../retinanet/retinanet_r50_fpn_1x_coco.py) | pytorch | 1x | 3.8 | 16.6 | 36.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_1x_coco/retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_1x_coco/retinanet_r50_fpn_1x_coco_20200130_002941.log.json) | +| [RegNetX-800MF-FPN](./retinanet_regnetx-800MF_fpn_1x_coco.py) | pytorch | 1x | 2.5 | | 35.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/retinanet_regnetx-800MF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-800MF_fpn_1x_coco/retinanet_regnetx-800MF_fpn_1x_coco_20200517_191403-f6f91d10.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-800MF_fpn_1x_coco/retinanet_regnetx-800MF_fpn_1x_coco_20200517_191403.log.json) | +| [RegNetX-1.6GF-FPN](./retinanet_regnetx-1.6GF_fpn_1x_coco.py) | pytorch | 1x | 3.3 | | 37.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco/retinanet_regnetx-1.6GF_fpn_1x_coco_20200517_191403-37009a9d.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco/retinanet_regnetx-1.6GF_fpn_1x_coco_20200517_191403.log.json) | +| [RegNetX-3.2GF-FPN](./retinanet_regnetx-3.2GF_fpn_1x_coco.py) | pytorch | 1x | 4.2 | | 39.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco/retinanet_regnetx-3.2GF_fpn_1x_coco_20200520_163141-cb1509e8.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco/retinanet_regnetx-3.2GF_fpn_1x_coco_20200520_163141.log.json) | ### Pre-trained models We also train some models with longer schedules and multi-scale training. The users could finetune them for downstream tasks. -| Method | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -| :-----: | :-----: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | -|Faster RCNN |[RegNetX-400MF-FPN](./faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py)| pytorch | 3x |2.3 ||37.1|-|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco_20210526_095112-e1967c37.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco_20210526_095112.log.json) | -|Faster RCNN |[RegNetX-800MF-FPN](./faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py)| pytorch | 3x |2.8 ||38.8|-|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco_20210526_095118-a2c70b20.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco_20210526_095118.log.json) | -|Faster RCNN |[RegNetX-1.6GF-FPN](./faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py)| pytorch | 3x |3.4 ||40.5|-|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-1_20210526_095325-94aa46cc.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-1_20210526_095325.log.json) | -|Faster RCNN |[RegNetX-3.2GF-FPN](./faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py)| pytorch | 3x |4.4 ||42.3|-|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-3_20210526_095152-e16a5227.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-3_20210526_095152.log.json) | -|Faster RCNN |[RegNetX-4GF-FPN](./faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py)| pytorch | 3x |4.9 ||42.8|-|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco_20210526_095201-65eaf841.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco_20210526_095201.log.json) | -|Mask RCNN |[RegNetX-3.2GF-FPN](./mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py)| pytorch | 3x |5.0 ||43.1|38.7|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco_20200521_202221-99879813.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco_20200521_202221.log.json) | -|Mask RCNN |[RegNetX-400MF-FPN](./mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco.py)| pytorch | 3x |2.5 ||37.6|34.4|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco_20210601_235443-8aac57a4.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco_20210601_235443.log.json) | -|Mask RCNN |[RegNetX-800MF-FPN](./mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco.py)| pytorch | 3x |2.9 ||39.5|36.1|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco_20210602_210641-715d51f5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco_20210602_210641.log.json) | -|Mask RCNN |[RegNetX-1.6GF-FPN](./mask_rcnn_regnetx-1.6GF_fpn_mstrain-poly_3x_coco.py)| pytorch | 3x |3.6 ||40.9|37.5|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-1.6GF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-1_20210602_210641-6764cff5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-1.6GF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-1_20210602_210641.log.json) | -|Mask RCNN |[RegNetX-3.2GF-FPN](./mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py) | pytorch | 3x |5.0 ||43.1|38.7|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco_20200521_202221-99879813.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco_20200521_202221.log.json) | -|Mask RCNN |[RegNetX-4GF-FPN](./mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco.py) | pytorch | 3x |5.1 ||43.4|39.2|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco_20210602_032621-00f0331c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco_20210602_032621.log.json) | -|Cascade Mask RCNN |[RegNetX-400MF-FPN](./cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py)| pytorch | 3x |4.3||41.6|36.4|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco_20210715_211619-5142f449.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco_20210715_211619.log.json) | -|Cascade Mask RCNN |[RegNetX-800MF-FPN](./cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py)| pytorch | 3x |4.8||42.8|37.6|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco_20210715_211616-dcbd13f4.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco_20210715_211616.log.json) | -|Cascade Mask RCNN |[RegNetX-1.6GF-FPN](./cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py)| pytorch | 3x |5.4||44.5|39.0|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-1_20210715_211616-75f29a61.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-1_20210715_211616.log.json) | -|Cascade Mask RCNN |[RegNetX-3.2GF-FPN](./cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py)| pytorch | 3x |6.4||45.8|40.0|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-3_20210715_211616-b9c2c58b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-3_20210715_211616.log.json) | -|Cascade Mask RCNN |[RegNetX-4GF-FPN](./cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py) | pytorch | 3x |6.9||45.8|40.0|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco_20210715_212034-cbb1be4c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco_20210715_212034.log.json) | +| Method | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :---------------: | :---------------------------------------------------------------------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :-----------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| Faster RCNN | [RegNetX-400MF-FPN](./faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py) | pytorch | 3x | 2.3 | | 37.1 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco_20210526_095112-e1967c37.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco_20210526_095112.log.json) | +| Faster RCNN | [RegNetX-800MF-FPN](./faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py) | pytorch | 3x | 2.8 | | 38.8 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco_20210526_095118-a2c70b20.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco_20210526_095118.log.json) | +| Faster RCNN | [RegNetX-1.6GF-FPN](./faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py) | pytorch | 3x | 3.4 | | 40.5 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-1_20210526_095325-94aa46cc.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-1_20210526_095325.log.json) | +| Faster RCNN | [RegNetX-3.2GF-FPN](./faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py) | pytorch | 3x | 4.4 | | 42.3 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-3_20210526_095152-e16a5227.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-3_20210526_095152.log.json) | +| Faster RCNN | [RegNetX-4GF-FPN](./faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py) | pytorch | 3x | 4.9 | | 42.8 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco_20210526_095201-65eaf841.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco_20210526_095201.log.json) | +| Mask RCNN | [RegNetX-3.2GF-FPN](./mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py) | pytorch | 3x | 5.0 | | 43.1 | 38.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco_20200521_202221-99879813.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco_20200521_202221.log.json) | +| Mask RCNN | [RegNetX-400MF-FPN](./mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco.py) | pytorch | 3x | 2.5 | | 37.6 | 34.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco_20210601_235443-8aac57a4.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco_20210601_235443.log.json) | +| Mask RCNN | [RegNetX-800MF-FPN](./mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco.py) | pytorch | 3x | 2.9 | | 39.5 | 36.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco_20210602_210641-715d51f5.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco_20210602_210641.log.json) | +| Mask RCNN | [RegNetX-1.6GF-FPN](./mask_rcnn_regnetx-1.6GF_fpn_mstrain-poly_3x_coco.py) | pytorch | 3x | 3.6 | | 40.9 | 37.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-1.6GF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-1_20210602_210641-6764cff5.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-1.6GF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-1_20210602_210641.log.json) | +| Mask RCNN | [RegNetX-3.2GF-FPN](./mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py) | pytorch | 3x | 5.0 | | 43.1 | 38.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco_20200521_202221-99879813.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco_20200521_202221.log.json) | +| Mask RCNN | [RegNetX-4GF-FPN](./mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco.py) | pytorch | 3x | 5.1 | | 43.4 | 39.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco_20210602_032621-00f0331c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco_20210602_032621.log.json) | +| Cascade Mask RCNN | [RegNetX-400MF-FPN](./cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py) | pytorch | 3x | 4.3 | | 41.6 | 36.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco_20210715_211619-5142f449.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco_20210715_211619.log.json) | +| Cascade Mask RCNN | [RegNetX-800MF-FPN](./cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py) | pytorch | 3x | 4.8 | | 42.8 | 37.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco_20210715_211616-dcbd13f4.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco_20210715_211616.log.json) | +| Cascade Mask RCNN | [RegNetX-1.6GF-FPN](./cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py) | pytorch | 3x | 5.4 | | 44.5 | 39.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-1_20210715_211616-75f29a61.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-1_20210715_211616.log.json) | +| Cascade Mask RCNN | [RegNetX-3.2GF-FPN](./cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py) | pytorch | 3x | 6.4 | | 45.8 | 40.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-3_20210715_211616-b9c2c58b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-3_20210715_211616.log.json) | +| Cascade Mask RCNN | [RegNetX-4GF-FPN](./cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py) | pytorch | 3x | 6.9 | | 45.8 | 40.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco_20210715_212034-cbb1be4c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco_20210715_212034.log.json) | ### Notice 1. The models are trained using a different weight decay, i.e., `weight_decay=5e-5` according to the setting in ImageNet training. This brings improvement of at least 0.7 AP absolute but does not improve the model using ResNet-50. 2. RetinaNets using RegNets are trained with learning rate 0.02 with gradient clip. We find that using learning rate 0.02 could improve the results by at least 0.7 AP absolute and gradient clip is necessary to stabilize the training. However, this does not improve the performance of ResNet-50-FPN RetinaNet. + +## Citation + +```latex +@article{radosavovic2020designing, + title={Designing Network Design Spaces}, + author={Ilija Radosavovic and Raj Prateek Kosaraju and Ross Girshick and Kaiming He and Piotr Dollár}, + year={2020}, + eprint={2003.13678}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` diff --git a/configs/regnet/metafile.yml b/configs/regnet/metafile.yml index 5390a35..ecd3953 100644 --- a/configs/regnet/metafile.yml +++ b/configs/regnet/metafile.yml @@ -1,6 +1,10 @@ -Collections: - - Name: RegNet +Models: + - Name: mask_rcnn_regnetx-3.2GF_fpn_1x_coco + In Collection: Mask R-CNN + Config: configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py Metadata: + Training Memory (GB): 5.0 + Epochs: 12 Training Data: COCO Training Techniques: - SGD with Momentum @@ -8,21 +12,6 @@ Collections: Training Resources: 8x V100 GPUs Architecture: - RegNet - Paper: - URL: https://arxiv.org/abs/2003.13678 - Title: 'Designing Network Design Spaces' - README: configs/regnet/README.md - Code: - URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 - Version: v2.1.0 - -Models: - - Name: mask_rcnn_regnetx-3.2GF_fpn_1x_coco - In Collection: RegNet - Config: configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py - Metadata: - Training Memory (GB): 5.0 - Epochs: 12 Results: - Task: Object Detection Dataset: COCO @@ -33,13 +22,27 @@ Models: Metrics: mask AP: 36.6 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco/mask_rcnn_regnetx-3.2GF_fpn_1x_coco_20200520_163141-2a9d1814.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: mask_rcnn_regnetx-4GF_fpn_1x_coco - In Collection: RegNet + In Collection: Mask R-CNN Config: configs/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco.py Metadata: Training Memory (GB): 5.5 Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO @@ -50,13 +53,27 @@ Models: Metrics: mask AP: 37.4 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco/mask_rcnn_regnetx-4GF_fpn_1x_coco_20200517_180217-32e9c92d.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: mask_rcnn_regnetx-6.4GF_fpn_1x_coco - In Collection: RegNet + In Collection: Mask R-CNN Config: configs/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco.py Metadata: Training Memory (GB): 6.1 Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO @@ -67,13 +84,27 @@ Models: Metrics: mask AP: 37.1 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco/mask_rcnn_regnetx-6.4GF_fpn_1x_coco_20200517_180439-3a7aae83.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: mask_rcnn_regnetx-8GF_fpn_1x_coco - In Collection: RegNet + In Collection: Mask R-CNN Config: configs/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco.py Metadata: Training Memory (GB): 6.4 Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO @@ -84,13 +115,27 @@ Models: Metrics: mask AP: 37.5 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco/mask_rcnn_regnetx-8GF_fpn_1x_coco_20200517_180515-09daa87e.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: mask_rcnn_regnetx-12GF_fpn_1x_coco - In Collection: RegNet + In Collection: Mask R-CNN Config: configs/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco.py Metadata: Training Memory (GB): 7.4 Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO @@ -101,13 +146,27 @@ Models: Metrics: mask AP: 38 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco/mask_rcnn_regnetx-12GF_fpn_1x_coco_20200517_180552-b538bd8b.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco - In Collection: RegNet + In Collection: Mask R-CNN Config: configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco.py Metadata: Training Memory (GB): 5.0 Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO @@ -118,143 +177,297 @@ Models: Metrics: mask AP: 36.6 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco_20200520_172726-75f40794.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: faster_rcnn_regnetx-3.2GF_fpn_1x_coco - In Collection: RegNet + In Collection: Faster R-CNN Config: configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco.py Metadata: Training Memory (GB): 4.5 Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 39.9 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco/faster_rcnn_regnetx-3.2GF_fpn_1x_coco_20200517_175927-126fd9bf.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: faster_rcnn_regnetx-3.2GF_fpn_2x_coco - In Collection: RegNet + In Collection: Faster R-CNN Config: configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco.py Metadata: Training Memory (GB): 4.5 Epochs: 24 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 41.1 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco/faster_rcnn_regnetx-3.2GF_fpn_2x_coco_20200520_223955-e2081918.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: retinanet_regnetx-800MF_fpn_1x_coco - In Collection: RegNet + In Collection: RetinaNet Config: configs/regnet/retinanet_regnetx-800MF_fpn_1x_coco.py Metadata: Training Memory (GB): 2.5 Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 35.6 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-800MF_fpn_1x_coco/retinanet_regnetx-800MF_fpn_1x_coco_20200517_191403-f6f91d10.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: retinanet_regnetx-1.6GF_fpn_1x_coco - In Collection: RegNet + In Collection: RetinaNet Config: configs/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco.py Metadata: Training Memory (GB): 3.3 Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 37.3 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco/retinanet_regnetx-1.6GF_fpn_1x_coco_20200517_191403-37009a9d.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: retinanet_regnetx-3.2GF_fpn_1x_coco - In Collection: RegNet + In Collection: RetinaNet Config: configs/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco.py Metadata: Training Memory (GB): 4.2 Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 39.1 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco/retinanet_regnetx-3.2GF_fpn_1x_coco_20200520_163141-cb1509e8.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco - In Collection: RegNet + In Collection: Faster R-CNN Config: configs/regnet/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py Metadata: Training Memory (GB): 2.3 Epochs: 36 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 37.1 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco_20210526_095112-e1967c37.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco - In Collection: RegNet + In Collection: Faster R-CNN Config: configs/regnet/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py Metadata: Training Memory (GB): 2.8 Epochs: 36 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 38.8 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco_20210526_095118-a2c70b20.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco - In Collection: RegNet + In Collection: Faster R-CNN Config: configs/regnet/faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py Metadata: Training Memory (GB): 3.4 Epochs: 36 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 40.5 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-1_20210526_095325-94aa46cc.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco - In Collection: RegNet + In Collection: Faster R-CNN Config: configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py Metadata: Training Memory (GB): 4.4 Epochs: 36 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 42.3 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-3_20210526_095152-e16a5227.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco - In Collection: RegNet + In Collection: Faster R-CNN Config: configs/regnet/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py Metadata: Training Memory (GB): 4.9 Epochs: 36 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 42.8 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco_20210526_095201-65eaf841.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco - In Collection: RegNet + In Collection: Mask R-CNN Config: configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py Metadata: Training Memory (GB): 5.0 Epochs: 36 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO @@ -265,13 +478,27 @@ Models: Metrics: mask AP: 38.7 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco_20200521_202221-99879813.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco - In Collection: RegNet + In Collection: Mask R-CNN Config: configs/regnet/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco.py Metadata: Training Memory (GB): 2.5 Epochs: 36 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO @@ -282,13 +509,27 @@ Models: Metrics: mask AP: 34.4 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco_20210601_235443-8aac57a4.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco - In Collection: RegNet + In Collection: Mask R-CNN Config: configs/regnet/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco.py Metadata: Training Memory (GB): 2.9 Epochs: 36 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO @@ -299,13 +540,27 @@ Models: Metrics: mask AP: 36.1 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco_20210602_210641-715d51f5.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco - In Collection: RegNet + In Collection: Mask R-CNN Config: configs/regnet/mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py Metadata: Training Memory (GB): 3.6 Epochs: 36 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO @@ -316,13 +571,27 @@ Models: Metrics: mask AP: 37.5 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-1.6GF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-1_20210602_210641-6764cff5.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco - In Collection: RegNet + In Collection: Mask R-CNN Config: configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py Metadata: Training Memory (GB): 5.0 Epochs: 36 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO @@ -333,13 +602,27 @@ Models: Metrics: mask AP: 38.7 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-1.6GF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-1_20210602_210641-6e63e19c.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco - In Collection: RegNet + In Collection: Mask R-CNN Config: configs/regnet/mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py Metadata: Training Memory (GB): 5.1 Epochs: 36 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO @@ -350,13 +633,27 @@ Models: Metrics: mask AP: 39.2 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco_20210602_032621-00f0331c.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco - In Collection: RegNet + In Collection: Cascade R-CNN Config: configs/regnet/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py Metadata: Training Memory (GB): 4.3 Epochs: 36 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO @@ -367,13 +664,27 @@ Models: Metrics: mask AP: 36.4 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco_20210715_211619-5142f449.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco - In Collection: RegNet + In Collection: Cascade R-CNN Config: configs/regnet/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py Metadata: Training Memory (GB): 4.8 Epochs: 36 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO @@ -384,13 +695,27 @@ Models: Metrics: mask AP: 37.6 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco_20210715_211616-dcbd13f4.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco - In Collection: RegNet + In Collection: Cascade R-CNN Config: configs/regnet/cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py Metadata: Training Memory (GB): 5.4 Epochs: 36 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO @@ -401,13 +726,27 @@ Models: Metrics: mask AP: 39.0 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-1_20210715_211616-75f29a61.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco - In Collection: RegNet + In Collection: Cascade R-CNN Config: configs/regnet/cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py Metadata: Training Memory (GB): 6.4 Epochs: 36 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO @@ -418,13 +757,27 @@ Models: Metrics: mask AP: 40.0 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-3_20210715_211616-b9c2c58b.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 - Name: cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco - In Collection: RegNet + In Collection: Cascade R-CNN Config: configs/regnet/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py Metadata: Training Memory (GB): 6.9 Epochs: 36 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet Results: - Task: Object Detection Dataset: COCO @@ -435,3 +788,10 @@ Models: Metrics: mask AP: 40.0 Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco_20210715_212034-cbb1be4c.pth + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 diff --git a/configs/reppoints/README.md b/configs/reppoints/README.md index 8e53bd3..acf8e47 100644 --- a/configs/reppoints/README.md +++ b/configs/reppoints/README.md @@ -1,19 +1,17 @@ -# RepPoints: Point Set Representation for Object Detection +# RepPoints -## Abstract +> [RepPoints: Point Set Representation for Object Detection](https://arxiv.org/abs/1904.11490) + + - +## Abstract Modern object detectors rely heavily on rectangular bounding boxes, such as anchors, proposals and the final predictions, to represent objects at various recognition stages. The bounding box is convenient to use but provides only a coarse localization of objects and leads to a correspondingly coarse extraction of object features. In this paper, we present RepPoints(representative points), a new finer representation of objects as a set of sample points useful for both localization and recognition. Given ground truth localization and recognition targets for training, RepPoints learn to automatically arrange themselves in a manner that bounds the spatial extent of an object and indicates semantically significant local areas. They furthermore do not require the use of anchors to sample a space of bounding boxes. We show that an anchor-free object detector based on RepPoints can be as effective as the state-of-the-art anchor-based detection methods, with 46.5 AP and 67.4 AP50 on the COCO test-dev detection benchmark, using ResNet-101 model. -
- - - ## Introdution By [Ze Yang](https://yangze.tech/), [Shaohui Liu](http://b1ueber2y.me/), and [Han Hu](https://ancientmooner.github.io/). @@ -21,38 +19,24 @@ By [Ze Yang](https://yangze.tech/), [Shaohui Liu](http://b1ueber2y.me/), and [Ha We provide code support and configuration files to reproduce the results in the paper for ["RepPoints: Point Set Representation for Object Detection"](https://arxiv.org/abs/1904.11490) on COCO object detection. - - **RepPoints**, initially described in [arXiv](https://arxiv.org/abs/1904.11490), is a new representation method for visual objects, on which visual understanding tasks are typically centered. Visual object representation, aiming at both geometric description and appearance feature extraction, is conventionally achieved by `bounding box + RoIPool (RoIAlign)`. The bounding box representation is convenient to use; however, it provides only a rectangular localization of objects that lacks geometric precision and may consequently degrade feature quality. Our new representation, RepPoints, models objects by a `point set` instead of a `bounding box`, which learns to adaptively position themselves over an object in a manner that circumscribes the object’s `spatial extent` and enables `semantically aligned feature extraction`. This richer and more flexible representation maintains the convenience of bounding boxes while facilitating various visual understanding applications. This repo demonstrated the effectiveness of RepPoints for COCO object detection. Another feature of this repo is the demonstration of an `anchor-free detector`, which can be as effective as state-of-the-art anchor-based detection methods. The anchor-free detector can utilize either `bounding box` or `RepPoints` as the basic object representation. -## Citation - -``` -@inproceedings{yang2019reppoints, - title={RepPoints: Point Set Representation for Object Detection}, - author={Yang, Ze and Liu, Shaohui and Hu, Han and Wang, Liwei and Lin, Stephen}, - booktitle={The IEEE International Conference on Computer Vision (ICCV)}, - month={Oct}, - year={2019} -} -``` - -## Results and models +## Results and Models The results on COCO 2017val are shown in the table below. -| Method | Backbone | GN | Anchor | convert func | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -|:---------:|:-------------:|:---:|:------:|:------------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| -| BBox | R-50-FPN | Y | single | - | 1x | 3.9 | 15.9 | 36.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints/bbox_r50_grid_fpn_gn-neck+head_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/reppoints/bbox_r50_grid_fpn_gn-neck%2Bhead_1x_coco/bbox_r50_grid_fpn_gn-neck%2Bhead_1x_coco_20200329-c98bfa96.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/reppoints/bbox_r50_grid_fpn_gn-neck%2Bhead_1x_coco/bbox_r50_grid_fpn_gn-neck%2Bhead_1x_coco_20200329_145916.log.json) | -| BBox | R-50-FPN | Y | none | - | 1x | 3.9 | 15.4 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints/bbox_r50_grid_center_fpn_gn-neck+Bhead_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/reppoints/bbox_r50_grid_center_fpn_gn-neck%2Bhead_1x_coco/bbox_r50_grid_center_fpn_gn-neck%2Bhead_1x_coco_20200330-00f73d58.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/reppoints/bbox_r50_grid_center_fpn_gn-neck%2Bhead_1x_coco/bbox_r50_grid_center_fpn_gn-neck%2Bhead_1x_coco_20200330_233609.log.json) | -| RepPoints | R-50-FPN | N | none | moment | 1x | 3.3 | 18.5 | 37.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints/reppoints_moment_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r50_fpn_1x_coco/reppoints_moment_r50_fpn_1x_coco_20200330-b73db8d1.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r50_fpn_1x_coco/reppoints_moment_r50_fpn_1x_coco_20200330_233609.log.json) | -| RepPoints | R-50-FPN | Y | none | moment | 1x | 3.9 | 17.5 | 38.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints/reppoints_moment_r50_fpn_gn-neck%2Bhead_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r50_fpn_gn-neck%2Bhead_1x_coco/reppoints_moment_r50_fpn_gn-neck%2Bhead_1x_coco_20200329-4b38409a.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r50_fpn_gn-neck%2Bhead_1x_coco/reppoints_moment_r50_fpn_gn-neck%2Bhead_1x_coco_20200329_145952.log.json) | -| RepPoints | R-50-FPN | Y | none | moment | 2x | 3.9 | - | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r50_fpn_gn-neck%2Bhead_2x_coco/reppoints_moment_r50_fpn_gn-neck%2Bhead_2x_coco_20200329-91babaa2.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r50_fpn_gn-neck%2Bhead_2x_coco/reppoints_moment_r50_fpn_gn-neck%2Bhead_2x_coco_20200329_150020.log.json) | -| RepPoints | R-101-FPN | Y | none | moment | 2x | 5.8 | 13.7 | 40.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints/reppoints_moment_r101_fpn_gn-neck+head_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r101_fpn_gn-neck%2Bhead_2x_coco/reppoints_moment_r101_fpn_gn-neck%2Bhead_2x_coco_20200329-4fbc7310.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r101_fpn_gn-neck%2Bhead_2x_coco/reppoints_moment_r101_fpn_gn-neck%2Bhead_2x_coco_20200329_132205.log.json) | -| RepPoints | R-101-FPN-DCN | Y | none | moment | 2x | 5.9 | 12.1 | 42.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco_20200329-3309fbf2.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco_20200329_132134.log.json) | -| RepPoints | X-101-FPN-DCN | Y | none | moment | 2x | 7.1 | 9.3 | 44.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco_20200329-f87da1ea.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco_20200329_132201.log.json) | +| Method | Backbone | GN | Anchor | convert func | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------: | :-----------: | :-: | :----: | :----------: | :-----: | :------: | :------------: | :----: | :---------------------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| BBox | R-50-FPN | Y | single | - | 1x | 3.9 | 15.9 | 36.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints/bbox_r50_grid_fpn_gn-neck+head_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/reppoints/bbox_r50_grid_fpn_gn-neck%2Bhead_1x_coco/bbox_r50_grid_fpn_gn-neck%2Bhead_1x_coco_20200329-c98bfa96.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/reppoints/bbox_r50_grid_fpn_gn-neck%2Bhead_1x_coco/bbox_r50_grid_fpn_gn-neck%2Bhead_1x_coco_20200329_145916.log.json) | +| BBox | R-50-FPN | Y | none | - | 1x | 3.9 | 15.4 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints/bbox_r50_grid_center_fpn_gn-neck+Bhead_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/reppoints/bbox_r50_grid_center_fpn_gn-neck%2Bhead_1x_coco/bbox_r50_grid_center_fpn_gn-neck%2Bhead_1x_coco_20200330-00f73d58.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/reppoints/bbox_r50_grid_center_fpn_gn-neck%2Bhead_1x_coco/bbox_r50_grid_center_fpn_gn-neck%2Bhead_1x_coco_20200330_233609.log.json) | +| RepPoints | R-50-FPN | N | none | moment | 1x | 3.3 | 18.5 | 37.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints/reppoints_moment_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r50_fpn_1x_coco/reppoints_moment_r50_fpn_1x_coco_20200330-b73db8d1.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r50_fpn_1x_coco/reppoints_moment_r50_fpn_1x_coco_20200330_233609.log.json) | +| RepPoints | R-50-FPN | Y | none | moment | 1x | 3.9 | 17.5 | 38.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints/reppoints_moment_r50_fpn_gn-neck%2Bhead_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r50_fpn_gn-neck%2Bhead_1x_coco/reppoints_moment_r50_fpn_gn-neck%2Bhead_1x_coco_20200329-4b38409a.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r50_fpn_gn-neck%2Bhead_1x_coco/reppoints_moment_r50_fpn_gn-neck%2Bhead_1x_coco_20200329_145952.log.json) | +| RepPoints | R-50-FPN | Y | none | moment | 2x | 3.9 | - | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r50_fpn_gn-neck%2Bhead_2x_coco/reppoints_moment_r50_fpn_gn-neck%2Bhead_2x_coco_20200329-91babaa2.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r50_fpn_gn-neck%2Bhead_2x_coco/reppoints_moment_r50_fpn_gn-neck%2Bhead_2x_coco_20200329_150020.log.json) | +| RepPoints | R-101-FPN | Y | none | moment | 2x | 5.8 | 13.7 | 40.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints/reppoints_moment_r101_fpn_gn-neck+head_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r101_fpn_gn-neck%2Bhead_2x_coco/reppoints_moment_r101_fpn_gn-neck%2Bhead_2x_coco_20200329-4fbc7310.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r101_fpn_gn-neck%2Bhead_2x_coco/reppoints_moment_r101_fpn_gn-neck%2Bhead_2x_coco_20200329_132205.log.json) | +| RepPoints | R-101-FPN-DCN | Y | none | moment | 2x | 5.9 | 12.1 | 42.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco_20200329-3309fbf2.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco_20200329_132134.log.json) | +| RepPoints | X-101-FPN-DCN | Y | none | moment | 2x | 7.1 | 9.3 | 44.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco_20200329-f87da1ea.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco_20200329_132201.log.json) | **Notes:** @@ -61,3 +45,15 @@ The results on COCO 2017val are shown in the table below. - `none` in the `anchor` column means 2-d `center point` (x,y) is used to represent the initial object hypothesis. `single` denotes one 4-d anchor box (x,y,w,h) with IoU based label assign criterion is adopted. - `moment`, `partial MinMax`, `MinMax` in the `convert func` column are three functions to convert a point set to a pseudo box. - Note the results here are slightly different from those reported in the paper, due to framework change. While the original paper uses an [MXNet](https://mxnet.apache.org/) implementation, we re-implement the method in [PyTorch](https://pytorch.org/) based on mmdetection. + +## Citation + +```latex +@inproceedings{yang2019reppoints, + title={RepPoints: Point Set Representation for Object Detection}, + author={Yang, Ze and Liu, Shaohui and Hu, Han and Wang, Liwei and Lin, Stephen}, + booktitle={The IEEE International Conference on Computer Vision (ICCV)}, + month={Oct}, + year={2019} +} +``` diff --git a/configs/res2net/README.md b/configs/res2net/README.md index 3a76bef..1285870 100644 --- a/configs/res2net/README.md +++ b/configs/res2net/README.md @@ -1,31 +1,27 @@ -# Res2Net: A New Multi-scale Backbone Architecture +# Res2Net -## Abstract +> [Res2Net: A New Multi-scale Backbone Architecture](https://arxiv.org/abs/1904.01169) + + - +## Abstract Representing features at multiple scales is of great importance for numerous vision tasks. Recent advances in backbone convolutional neural networks (CNNs) continually demonstrate stronger multi-scale representation ability, leading to consistent performance gains on a wide range of applications. However, most existing methods represent the multi-scale features in a layer-wise manner. In this paper, we propose a novel building block for CNNs, namely Res2Net, by constructing hierarchical residual-like connections within one single residual block. The Res2Net represents multi-scale features at a granular level and increases the range of receptive fields for each network layer. The proposed Res2Net block can be plugged into the state-of-the-art backbone CNN models, e.g., ResNet, ResNeXt, and DLA. We evaluate the Res2Net block on all these models and demonstrate consistent performance gains over baseline models on widely-used datasets, e.g., CIFAR-100 and ImageNet. Further ablation studies and experimental results on representative computer vision tasks, i.e., object detection, class activation mapping, and salient object detection, further verify the superiority of the Res2Net over the state-of-the-art baseline methods. -
- - - ## Introduction - - We propose a novel building block for CNNs, namely Res2Net, by constructing hierarchical residual-like connections within one single residual block. The Res2Net represents multi-scale features at a granular level and increases the range of receptive fields for each network layer. -| Backbone |Params. | GFLOPs | top-1 err. | top-5 err. | -| :-------------: |:----: | :-----: | :--------: | :--------: | -| ResNet-101 |44.6 M | 7.8 | 22.63 | 6.44 | -| ResNeXt-101-64x4d |83.5M | 15.5 | 20.40 | - | -| HRNetV2p-W48 | 77.5M | 16.1 | 20.70 | 5.50 | -| Res2Net-101 | 45.2M | 8.3 | 18.77 | 4.64 | +| Backbone | Params. | GFLOPs | top-1 err. | top-5 err. | +| :---------------: | :-----: | :----: | :--------: | :--------: | +| ResNet-101 | 44.6 M | 7.8 | 22.63 | 6.44 | +| ResNeXt-101-64x4d | 83.5M | 15.5 | 20.40 | - | +| HRNetV2p-W48 | 77.5M | 16.1 | 20.70 | 5.50 | +| Res2Net-101 | 45.2M | 8.3 | 18.77 | 4.64 | Compared with other backbone networks, Res2Net requires fewer parameters and FLOPs. @@ -33,49 +29,49 @@ Compared with other backbone networks, Res2Net requires fewer parameters and FLO - GFLOPs for classification are calculated with image size (224x224). -## Citation - -```latex -@article{gao2019res2net, - title={Res2Net: A New Multi-scale Backbone Architecture}, - author={Gao, Shang-Hua and Cheng, Ming-Ming and Zhao, Kai and Zhang, Xin-Yu and Yang, Ming-Hsuan and Torr, Philip}, - journal={IEEE TPAMI}, - year={2020}, - doi={10.1109/TPAMI.2019.2938758}, -} -``` - ## Results and Models ### Faster R-CNN -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | -|R2-101-FPN | pytorch | 2x | 7.4 | - | 43.0 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/res2net/faster_rcnn_r2_101_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/res2net/faster_rcnn_r2_101_fpn_2x_coco/faster_rcnn_r2_101_fpn_2x_coco-175f1da6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/res2net/faster_rcnn_r2_101_fpn_2x_coco/faster_rcnn_r2_101_fpn_2x_coco_20200514_231734.log.json) | +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :--------: | :-----: | :-----: | :------: | :------------: | :----: | :---------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R2-101-FPN | pytorch | 2x | 7.4 | - | 43.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/res2net/faster_rcnn_r2_101_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/res2net/faster_rcnn_r2_101_fpn_2x_coco/faster_rcnn_r2_101_fpn_2x_coco-175f1da6.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/res2net/faster_rcnn_r2_101_fpn_2x_coco/faster_rcnn_r2_101_fpn_2x_coco_20200514_231734.log.json) | ### Mask R-CNN -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | -|R2-101-FPN | pytorch | 2x | 7.9 | - | 43.6 | 38.7 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/res2net/mask_rcnn_r2_101_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/res2net/mask_rcnn_r2_101_fpn_2x_coco/mask_rcnn_r2_101_fpn_2x_coco-17f061e8.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/res2net/mask_rcnn_r2_101_fpn_2x_coco/mask_rcnn_r2_101_fpn_2x_coco_20200515_002413.log.json) | +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :--------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :-------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R2-101-FPN | pytorch | 2x | 7.9 | - | 43.6 | 38.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/res2net/mask_rcnn_r2_101_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/res2net/mask_rcnn_r2_101_fpn_2x_coco/mask_rcnn_r2_101_fpn_2x_coco-17f061e8.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/res2net/mask_rcnn_r2_101_fpn_2x_coco/mask_rcnn_r2_101_fpn_2x_coco_20200515_002413.log.json) | ### Cascade R-CNN -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | -|R2-101-FPN | pytorch | 20e | 7.8 | - | 45.7 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/res2net/cascade_rcnn_r2_101_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/res2net/cascade_rcnn_r2_101_fpn_20e_coco/cascade_rcnn_r2_101_fpn_20e_coco-f4b7b7db.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/res2net/cascade_rcnn_r2_101_fpn_20e_coco/cascade_rcnn_r2_101_fpn_20e_coco_20200515_091644.log.json) | +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :--------: | :-----: | :-----: | :------: | :------------: | :----: | :-----------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R2-101-FPN | pytorch | 20e | 7.8 | - | 45.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/res2net/cascade_rcnn_r2_101_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/res2net/cascade_rcnn_r2_101_fpn_20e_coco/cascade_rcnn_r2_101_fpn_20e_coco-f4b7b7db.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/res2net/cascade_rcnn_r2_101_fpn_20e_coco/cascade_rcnn_r2_101_fpn_20e_coco_20200515_091644.log.json) | ### Cascade Mask R-CNN -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | -R2-101-FPN | pytorch | 20e | 9.5 | - | 46.4 | 40.0 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/res2net/cascade_mask_rcnn_r2_101_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/res2net/cascade_mask_rcnn_r2_101_fpn_20e_coco/cascade_mask_rcnn_r2_101_fpn_20e_coco-8a7b41e1.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/res2net/cascade_mask_rcnn_r2_101_fpn_20e_coco/cascade_mask_rcnn_r2_101_fpn_20e_coco_20200515_091645.log.json) | +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :--------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :----------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R2-101-FPN | pytorch | 20e | 9.5 | - | 46.4 | 40.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/res2net/cascade_mask_rcnn_r2_101_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/res2net/cascade_mask_rcnn_r2_101_fpn_20e_coco/cascade_mask_rcnn_r2_101_fpn_20e_coco-8a7b41e1.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/res2net/cascade_mask_rcnn_r2_101_fpn_20e_coco/cascade_mask_rcnn_r2_101_fpn_20e_coco_20200515_091645.log.json) | ### Hybrid Task Cascade (HTC) -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | -| R2-101-FPN | pytorch | 20e | - | - | 47.5 | 41.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/res2net/htc_r2_101_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/res2net/htc_r2_101_fpn_20e_coco/htc_r2_101_fpn_20e_coco-3a8d2112.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/res2net/htc_r2_101_fpn_20e_coco/htc_r2_101_fpn_20e_coco_20200515_150029.log.json) | +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :--------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :--------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R2-101-FPN | pytorch | 20e | - | - | 47.5 | 41.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/res2net/htc_r2_101_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/res2net/htc_r2_101_fpn_20e_coco/htc_r2_101_fpn_20e_coco-3a8d2112.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/res2net/htc_r2_101_fpn_20e_coco/htc_r2_101_fpn_20e_coco_20200515_150029.log.json) | - Res2Net ImageNet pretrained models are in [Res2Net-PretrainedModels](https://github.com/Res2Net/Res2Net-PretrainedModels). - More applications of Res2Net are in [Res2Net-Github](https://github.com/Res2Net/). + +## Citation + +```latex +@article{gao2019res2net, + title={Res2Net: A New Multi-scale Backbone Architecture}, + author={Gao, Shang-Hua and Cheng, Ming-Ming and Zhao, Kai and Zhang, Xin-Yu and Yang, Ming-Hsuan and Torr, Philip}, + journal={IEEE TPAMI}, + year={2020}, + doi={10.1109/TPAMI.2019.2938758}, +} +``` diff --git a/configs/res2net/metafile.yml b/configs/res2net/metafile.yml index 71809f3..27bac8c 100644 --- a/configs/res2net/metafile.yml +++ b/configs/res2net/metafile.yml @@ -1,6 +1,10 @@ -Collections: - - Name: Res2Net +Models: + - Name: faster_rcnn_r2_101_fpn_2x_coco + In Collection: Faster R-CNN + Config: configs/res2net/faster_rcnn_r2_101_fpn_2x_coco.py Metadata: + Training Memory (GB): 7.4 + Epochs: 24 Training Data: COCO Training Techniques: - SGD with Momentum @@ -8,6 +12,12 @@ Collections: Training Resources: 8x V100 GPUs Architecture: - Res2Net + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/res2net/faster_rcnn_r2_101_fpn_2x_coco/faster_rcnn_r2_101_fpn_2x_coco-175f1da6.pth Paper: URL: https://arxiv.org/abs/1904.01169 Title: 'Res2Net for object detection and instance segmentation' @@ -16,26 +26,19 @@ Collections: URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/res2net.py#L239 Version: v2.1.0 -Models: - - Name: faster_rcnn_r2_101_fpn_2x_coco - In Collection: Res2Net - Config: configs/res2net/faster_rcnn_r2_101_fpn_2x_coco.py - Metadata: - Training Memory (GB): 7.4 - Epochs: 24 - Results: - - Task: Object Detection - Dataset: COCO - Metrics: - box AP: 43.0 - Weights: https://download.openmmlab.com/mmdetection/v2.0/res2net/faster_rcnn_r2_101_fpn_2x_coco/faster_rcnn_r2_101_fpn_2x_coco-175f1da6.pth - - Name: mask_rcnn_r2_101_fpn_2x_coco - In Collection: Res2Net + In Collection: Mask R-CNN Config: configs/res2net/mask_rcnn_r2_101_fpn_2x_coco.py Metadata: Training Memory (GB): 7.9 Epochs: 24 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - Res2Net Results: - Task: Object Detection Dataset: COCO @@ -46,26 +49,54 @@ Models: Metrics: mask AP: 38.7 Weights: https://download.openmmlab.com/mmdetection/v2.0/res2net/mask_rcnn_r2_101_fpn_2x_coco/mask_rcnn_r2_101_fpn_2x_coco-17f061e8.pth + Paper: + URL: https://arxiv.org/abs/1904.01169 + Title: 'Res2Net for object detection and instance segmentation' + README: configs/res2net/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/res2net.py#L239 + Version: v2.1.0 - Name: cascade_rcnn_r2_101_fpn_20e_coco - In Collection: Res2Net + In Collection: Cascade R-CNN Config: configs/res2net/cascade_rcnn_r2_101_fpn_20e_coco.py Metadata: Training Memory (GB): 7.8 Epochs: 20 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - Res2Net Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 45.7 Weights: https://download.openmmlab.com/mmdetection/v2.0/res2net/cascade_rcnn_r2_101_fpn_20e_coco/cascade_rcnn_r2_101_fpn_20e_coco-f4b7b7db.pth + Paper: + URL: https://arxiv.org/abs/1904.01169 + Title: 'Res2Net for object detection and instance segmentation' + README: configs/res2net/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/res2net.py#L239 + Version: v2.1.0 - Name: cascade_mask_rcnn_r2_101_fpn_20e_coco - In Collection: Res2Net + In Collection: Cascade R-CNN Config: configs/res2net/cascade_mask_rcnn_r2_101_fpn_20e_coco.py Metadata: Training Memory (GB): 9.5 Epochs: 20 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - Res2Net Results: - Task: Object Detection Dataset: COCO @@ -76,12 +107,26 @@ Models: Metrics: mask AP: 40.0 Weights: https://download.openmmlab.com/mmdetection/v2.0/res2net/cascade_mask_rcnn_r2_101_fpn_20e_coco/cascade_mask_rcnn_r2_101_fpn_20e_coco-8a7b41e1.pth + Paper: + URL: https://arxiv.org/abs/1904.01169 + Title: 'Res2Net for object detection and instance segmentation' + README: configs/res2net/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/res2net.py#L239 + Version: v2.1.0 - Name: htc_r2_101_fpn_20e_coco - In Collection: Res2Net + In Collection: HTC Config: configs/res2net/htc_r2_101_fpn_20e_coco.py Metadata: Epochs: 20 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - Res2Net Results: - Task: Object Detection Dataset: COCO @@ -92,3 +137,10 @@ Models: Metrics: mask AP: 41.6 Weights: https://download.openmmlab.com/mmdetection/v2.0/res2net/htc_r2_101_fpn_20e_coco/htc_r2_101_fpn_20e_coco-3a8d2112.pth + Paper: + URL: https://arxiv.org/abs/1904.01169 + Title: 'Res2Net for object detection and instance segmentation' + README: configs/res2net/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/res2net.py#L239 + Version: v2.1.0 diff --git a/configs/resnest/README.md b/configs/resnest/README.md index de80430..3676e56 100644 --- a/configs/resnest/README.md +++ b/configs/resnest/README.md @@ -1,58 +1,54 @@ -# ResNeSt: Split-Attention Networks +# ResNeSt -## Abstract +> [ResNeSt: Split-Attention Networks](https://arxiv.org/abs/2004.08955) + + - +## Abstract It is well known that featuremap attention and multi-path representation are important for visual recognition. In this paper, we present a modularized architecture, which applies the channel-wise attention on different network branches to leverage their success in capturing cross-feature interactions and learning diverse representations. Our design results in a simple and unified computation block, which can be parameterized using only a few variables. Our model, named ResNeSt, outperforms EfficientNet in accuracy and latency trade-off on image classification. In addition, ResNeSt has achieved superior transfer learning results on several public benchmarks serving as the backbone, and has been adopted by the winning entries of COCO-LVIS challenge. -
- - - -## Citation - - - -```latex -@article{zhang2020resnest, -title={ResNeSt: Split-Attention Networks}, -author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander}, -journal={arXiv preprint arXiv:2004.08955}, -year={2020} -} -``` - ## Results and Models ### Faster R-CNN -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | -|S-50-FPN | pytorch | 1x | 4.8 | - | 42.0 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/resnest/faster_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/resnest/faster_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/faster_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco_20200926_125502-20289c16.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/resnest/faster_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/faster_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco-20200926_125502.log.json) | -|S-101-FPN | pytorch | 1x | 7.1 | - | 44.5 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/resnest/faster_rcnn_s101_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/resnest/faster_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/faster_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco_20201006_021058-421517f1.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/resnest/faster_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/faster_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco-20201006_021058.log.json) | +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------: | :-----: | :-----: | :------: | :------------: | :----: | :------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| S-50-FPN | pytorch | 1x | 4.8 | - | 42.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/resnest/faster_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/resnest/faster_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/faster_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco_20200926_125502-20289c16.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/resnest/faster_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/faster_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco-20200926_125502.log.json) | +| S-101-FPN | pytorch | 1x | 7.1 | - | 44.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/resnest/faster_rcnn_s101_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/resnest/faster_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/faster_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco_20201006_021058-421517f1.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/resnest/faster_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/faster_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco-20201006_021058.log.json) | ### Mask R-CNN -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | -|S-50-FPN | pytorch | 1x | 5.5 | - | 42.6 | 38.1 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/resnest/mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/resnest/mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco_20200926_125503-8a2c3d47.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/resnest/mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco-20200926_125503.log.json) | -|S-101-FPN | pytorch | 1x | 7.8 | - | 45.2 | 40.2 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/resnest/mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/resnest/mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco_20201005_215831-af60cdf9.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/resnest/mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco-20201005_215831.log.json) | +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :----------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| S-50-FPN | pytorch | 1x | 5.5 | - | 42.6 | 38.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/resnest/mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/resnest/mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco_20200926_125503-8a2c3d47.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/resnest/mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco-20200926_125503.log.json) | +| S-101-FPN | pytorch | 1x | 7.8 | - | 45.2 | 40.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/resnest/mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/resnest/mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco_20201005_215831-af60cdf9.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/resnest/mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco-20201005_215831.log.json) | ### Cascade R-CNN -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | -|S-50-FPN | pytorch | 1x | - | - | 44.5 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/resnest/cascade_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/cascade_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco_20201122_213640-763cc7b5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/cascade_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco-20201005_113242.log.json) | -|S-101-FPN | pytorch | 1x | 8.4 | - | 46.8 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/resnest/cascade_rcnn_s101_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/cascade_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco_20201005_113242-b9459f8f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/cascade_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco-20201122_213640.log.json) | +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------: | :-----: | :-----: | :------: | :------------: | :----: | :-------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| S-50-FPN | pytorch | 1x | - | - | 44.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/resnest/cascade_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/cascade_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco_20201122_213640-763cc7b5.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/cascade_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco-20201005_113242.log.json) | +| S-101-FPN | pytorch | 1x | 8.4 | - | 46.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/resnest/cascade_rcnn_s101_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/cascade_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco_20201005_113242-b9459f8f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/cascade_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco-20201122_213640.log.json) | ### Cascade Mask R-CNN -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | -|S-50-FPN | pytorch | 1x | - | - | 45.4 | 39.5 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/resnest/cascade_mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/cascade_mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco_20201122_104428-99eca4c7.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/cascade_mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco-20201122_104428.log.json) | -|S-101-FPN | pytorch | 1x | 10.5 | - | 47.7 | 41.4 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/resnest/cascade_mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/cascade_mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco_20201005_113243-42607475.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/cascade_mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco-20201005_113243.log.json) | +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| S-50-FPN | pytorch | 1x | - | - | 45.4 | 39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/resnest/cascade_mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/cascade_mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco_20201122_104428-99eca4c7.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/cascade_mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco-20201122_104428.log.json) | +| S-101-FPN | pytorch | 1x | 10.5 | - | 47.7 | 41.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/resnest/cascade_mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/cascade_mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco_20201005_113243-42607475.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/cascade_mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco-20201005_113243.log.json) | + +## Citation + +```latex +@article{zhang2020resnest, +title={ResNeSt: Split-Attention Networks}, +author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander}, +journal={arXiv preprint arXiv:2004.08955}, +year={2020} +} +``` diff --git a/configs/resnest/metafile.yml b/configs/resnest/metafile.yml index 3323fad..cfeec71 100644 --- a/configs/resnest/metafile.yml +++ b/configs/resnest/metafile.yml @@ -1,6 +1,10 @@ -Collections: - - Name: ResNeSt +Models: + - Name: faster_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco + In Collection: Faster R-CNN + Config: configs/resnest/faster_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py Metadata: + Training Memory (GB): 4.8 + Epochs: 12 Training Data: COCO Training Techniques: - SGD with Momentum @@ -8,6 +12,12 @@ Collections: Training Resources: 8x V100 GPUs Architecture: - ResNeSt + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/resnest/faster_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/faster_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco_20200926_125502-20289c16.pth Paper: URL: https://arxiv.org/abs/2004.08955 Title: 'ResNeSt: Split-Attention Networks' @@ -16,39 +26,46 @@ Collections: URL: https://github.com/open-mmlab/mmdetection/blob/v2.7.0/mmdet/models/backbones/resnest.py#L273 Version: v2.7.0 -Models: - - Name: faster_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco - In Collection: ResNeSt - Config: configs/resnest/faster_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py - Metadata: - Training Memory (GB): 4.8 - Epochs: 12 - Results: - - Task: Object Detection - Dataset: COCO - Metrics: - box AP: 42.0 - Weights: https://download.openmmlab.com/mmdetection/v2.0/resnest/faster_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/faster_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco_20200926_125502-20289c16.pth - - Name: faster_rcnn_s101_fpn_syncbn-backbone+head_mstrain-range_1x_coco - In Collection: ResNeSt + In Collection: Faster R-CNN Config: configs/resnest/faster_rcnn_s101_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py Metadata: Training Memory (GB): 7.1 Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - ResNeSt Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 44.5 Weights: https://download.openmmlab.com/mmdetection/v2.0/resnest/faster_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/faster_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco_20201006_021058-421517f1.pth + Paper: + URL: https://arxiv.org/abs/2004.08955 + Title: 'ResNeSt: Split-Attention Networks' + README: configs/resnest/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.7.0/mmdet/models/backbones/resnest.py#L273 + Version: v2.7.0 - Name: mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco - In Collection: ResNeSt + In Collection: Mask R-CNN Config: configs/resnest/mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py Metadata: Training Memory (GB): 5.5 Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - ResNeSt Results: - Task: Object Detection Dataset: COCO @@ -59,13 +76,27 @@ Models: Metrics: mask AP: 38.1 Weights: https://download.openmmlab.com/mmdetection/v2.0/resnest/mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco_20200926_125503-8a2c3d47.pth + Paper: + URL: https://arxiv.org/abs/2004.08955 + Title: 'ResNeSt: Split-Attention Networks' + README: configs/resnest/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.7.0/mmdet/models/backbones/resnest.py#L273 + Version: v2.7.0 - Name: mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco - In Collection: ResNeSt + In Collection: Mask R-CNN Config: configs/resnest/mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco.py Metadata: Training Memory (GB): 7.8 Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - ResNeSt Results: - Task: Object Detection Dataset: COCO @@ -76,37 +107,79 @@ Models: Metrics: mask AP: 40.2 Weights: https://download.openmmlab.com/mmdetection/v2.0/resnest/mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco_20201005_215831-af60cdf9.pth + Paper: + URL: https://arxiv.org/abs/2004.08955 + Title: 'ResNeSt: Split-Attention Networks' + README: configs/resnest/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.7.0/mmdet/models/backbones/resnest.py#L273 + Version: v2.7.0 - Name: cascade_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco - In Collection: ResNeSt + In Collection: Cascade R-CNN Config: configs/resnest/cascade_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py Metadata: Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - ResNeSt Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 44.5 Weights: https://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/cascade_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco_20201122_213640-763cc7b5.pth + Paper: + URL: https://arxiv.org/abs/2004.08955 + Title: 'ResNeSt: Split-Attention Networks' + README: configs/resnest/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.7.0/mmdet/models/backbones/resnest.py#L273 + Version: v2.7.0 - Name: cascade_rcnn_s101_fpn_syncbn-backbone+head_mstrain-range_1x_coco - In Collection: ResNeSt + In Collection: Cascade R-CNN Config: configs/resnest/cascade_rcnn_s101_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py Metadata: Training Memory (GB): 8.4 Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - ResNeSt Results: - Task: Object Detection Dataset: COCO Metrics: box AP: 46.8 Weights: https://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/cascade_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco_20201005_113242-b9459f8f.pth + Paper: + URL: https://arxiv.org/abs/2004.08955 + Title: 'ResNeSt: Split-Attention Networks' + README: configs/resnest/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.7.0/mmdet/models/backbones/resnest.py#L273 + Version: v2.7.0 - Name: cascade_mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco - In Collection: ResNeSt + In Collection: Cascade R-CNN Config: configs/resnest/cascade_mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py Metadata: Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - ResNeSt Results: - Task: Object Detection Dataset: COCO @@ -117,13 +190,27 @@ Models: Metrics: mask AP: 39.5 Weights: https://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/cascade_mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco_20201122_104428-99eca4c7.pth + Paper: + URL: https://arxiv.org/abs/2004.08955 + Title: 'ResNeSt: Split-Attention Networks' + README: configs/resnest/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.7.0/mmdet/models/backbones/resnest.py#L273 + Version: v2.7.0 - Name: cascade_mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco - In Collection: ResNeSt + In Collection: Cascade R-CNN Config: configs/resnest/cascade_mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco.py Metadata: Training Memory (GB): 10.5 Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - ResNeSt Results: - Task: Object Detection Dataset: COCO @@ -134,3 +221,10 @@ Models: Metrics: mask AP: 41.4 Weights: https://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/cascade_mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco_20201005_113243-42607475.pth + Paper: + URL: https://arxiv.org/abs/2004.08955 + Title: 'ResNeSt: Split-Attention Networks' + README: configs/resnest/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.7.0/mmdet/models/backbones/resnest.py#L273 + Version: v2.7.0 diff --git a/configs/resnet_strikes_back/README.md b/configs/resnet_strikes_back/README.md new file mode 100644 index 0000000..dd00b20 --- /dev/null +++ b/configs/resnet_strikes_back/README.md @@ -0,0 +1,40 @@ +# ResNet strikes back + +> [ResNet strikes back: An improved training procedure in timm](https://arxiv.org/abs/2110.00476) + + + +## Abstract + +The influential Residual Networks designed by He et al. remain the gold-standard architecture in numerous scientific publications. They typically serve as the default architecture in studies, or as baselines when new architectures are proposed. Yet there has been significant progress on best practices for training neural networks since the inception of the ResNet architecture in 2015. Novel optimization & dataaugmentation have increased the effectiveness of the training recipes. + +In this paper, we re-evaluate the performance of the vanilla ResNet-50 when trained with a procedure that integrates such advances. We share competitive training settings and pre-trained models in the timm open-source library, with the hope that they will serve as better baselines for future work. For instance, with our more demanding training setting, a vanilla ResNet-50 reaches 80.4% top-1 accuracy at resolution 224×224 on ImageNet-val without extra data or distillation. We also report the performance achieved with popular models with our training procedure. + +
+ +
+ +## Results and Models + +| Method | Backbone | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :----------------: | :------: | :-----: | :------: | :------------: | :---------: | :---------: | :-----------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| Faster R-CNN | R-50 rsb | 1x | 3.9 | - | 40.8 (+3.4) | - | [Config](./faster_rcnn_r50_fpn_rsb-pretrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/resnet_strikes_back/faster_rcnn_r50_fpn_rsb-pretrain_1x_coco/faster_rcnn_r50_fpn_rsb-pretrain_1x_coco_20220113_162229-32ae82a9.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/resnet_strikes_back/faster_rcnn_r50_fpn_rsb-pretrain_1x_coco/faster_rcnn_r50_fpn_rsb-pretrain_1x_coco_20220113_162229.log.json) | +| Mask R-CNN | R-50 rsb | 1x | 4.5 | - | 41.2 (+3.0) | 38.2 (+3.0) | [Config](./mask_rcnn_r50_fpn_rsb-pretrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/resnet_strikes_back/mask_rcnn_r50_fpn_rsb-pretrain_1x_coco/mask_rcnn_r50_fpn_rsb-pretrain_1x_coco_20220113_174054-06ce8ba0.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/resnet_strikes_back/mask_rcnn_r50_fpn_rsb-pretrain_1x_coco/mask_rcnn_r50_fpn_rsb-pretrain_1x_coco_20220113_174054.log.json) | +| Cascade Mask R-CNN | R-50 rsb | 1x | 6.2 | - | 44.8 (+3.6) | 39.9 (+3.6) | [Config](./cascade_mask_rcnn_r50_fpn_rsb-pretrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/resnet_strikes_back/cascade_mask_rcnn_r50_fpn_rsb-pretrain_1x_coco/cascade_mask_rcnn_r50_fpn_rsb-pretrain_1x_coco_20220113_193636-8b9ad50f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/resnet_strikes_back/cascade_mask_rcnn_r50_fpn_rsb-pretrain_1x_coco/cascade_mask_rcnn_r50_fpn_rsb-pretrain_1x_coco_20220113_193636.log.json) | +| RetinaNet | R-50 rsb | 1x | 3.8 | - | 39.0 (+2.5) | - | [Config](./retinanet_r50_fpn_rsb-pretrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/resnet_strikes_back/retinanet_r50_fpn_rsb-pretrain_1x_coco/retinanet_r50_fpn_rsb-pretrain_1x_coco_20220113_175432-bd24aae9.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/resnet_strikes_back/retinanet_r50_fpn_rsb-pretrain_1x_coco/retinanet_r50_fpn_rsb-pretrain_1x_coco_20220113_175432.log.json) | + +**Notes:** + +- 'rsb' is short for 'resnet strikes back' +- We have done some grid searches on learning rate and weight decay and get these optimal hyper-parameters. + +## Citation + +```latex +@article{wightman2021resnet, +title={Resnet strikes back: An improved training procedure in timm}, +author={Ross Wightman, Hugo Touvron, Hervé Jégou}, +journal={arXiv preprint arXiv:2110.00476}, +year={2021} +} +``` diff --git a/configs/resnet_strikes_back/cascade_mask_rcnn_r50_fpn_rsb-pretrain_1x_coco.py b/configs/resnet_strikes_back/cascade_mask_rcnn_r50_fpn_rsb-pretrain_1x_coco.py new file mode 100644 index 0000000..8b601f0 --- /dev/null +++ b/configs/resnet_strikes_back/cascade_mask_rcnn_r50_fpn_rsb-pretrain_1x_coco.py @@ -0,0 +1,18 @@ +_base_ = [ + '../_base_/models/cascade_mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +checkpoint = 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a1-600e_in1k_20211228-20e21305.pth' # noqa +model = dict( + backbone=dict( + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint))) + +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.0002, + weight_decay=0.05, + paramwise_cfg=dict(norm_decay_mult=0., bypass_duplicate=True)) diff --git a/configs/resnet_strikes_back/faster_rcnn_r50_fpn_rsb-pretrain_1x_coco.py b/configs/resnet_strikes_back/faster_rcnn_r50_fpn_rsb-pretrain_1x_coco.py new file mode 100644 index 0000000..fe86684 --- /dev/null +++ b/configs/resnet_strikes_back/faster_rcnn_r50_fpn_rsb-pretrain_1x_coco.py @@ -0,0 +1,18 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +checkpoint = 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a1-600e_in1k_20211228-20e21305.pth' # noqa +model = dict( + backbone=dict( + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint))) + +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.0002, + weight_decay=0.05, + paramwise_cfg=dict(norm_decay_mult=0., bypass_duplicate=True)) diff --git a/configs/resnet_strikes_back/mask_rcnn_r50_fpn_rsb-pretrain_1x_coco.py b/configs/resnet_strikes_back/mask_rcnn_r50_fpn_rsb-pretrain_1x_coco.py new file mode 100644 index 0000000..321d98e --- /dev/null +++ b/configs/resnet_strikes_back/mask_rcnn_r50_fpn_rsb-pretrain_1x_coco.py @@ -0,0 +1,18 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +checkpoint = 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a1-600e_in1k_20211228-20e21305.pth' # noqa +model = dict( + backbone=dict( + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint))) + +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.0002, + weight_decay=0.05, + paramwise_cfg=dict(norm_decay_mult=0., bypass_duplicate=True)) diff --git a/configs/resnet_strikes_back/metafile.yml b/configs/resnet_strikes_back/metafile.yml new file mode 100644 index 0000000..4c85a16 --- /dev/null +++ b/configs/resnet_strikes_back/metafile.yml @@ -0,0 +1,116 @@ +Models: + - Name: faster_rcnn_r50_fpn_rsb-pretrain_1x_coco + In Collection: Faster R-CNN + Config: configs/resnet_strikes_back/faster_rcnn_r50_fpn_rsb-pretrain_1x_coco.py + Metadata: + Training Memory (GB): 3.9 + Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - ResNet + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/resnet_strikes_back/faster_rcnn_r50_fpn_rsb-pretrain_1x_coco/faster_rcnn_r50_fpn_rsb-pretrain_1x_coco_20220113_162229-32ae82a9.pth + Paper: + URL: https://arxiv.org/abs/2110.00476 + Title: 'ResNet strikes back: An improved training procedure in timm' + README: configs/resnet_strikes_back/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.22.0/configs/resnet_strikes_back/README.md + Version: v2.22.0 + + - Name: cascade_mask_rcnn_r50_fpn_rsb-pretrain_1x_coco + In Collection: Cascade R-CNN + Config: configs/resnet_strikes_back/cascade_mask_rcnn_r50_fpn_rsb-pretrain_1x_coco.py + Metadata: + Training Memory (GB): 6.2 + Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - ResNet + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.8 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/resnet_strikes_back/cascade_mask_rcnn_r50_fpn_rsb-pretrain_1x_coco/cascade_mask_rcnn_r50_fpn_rsb-pretrain_1x_coco_20220113_193636-8b9ad50f.pth + Paper: + URL: https://arxiv.org/abs/2110.00476 + Title: 'ResNet strikes back: An improved training procedure in timm' + README: configs/resnet_strikes_back/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.22.0/configs/resnet_strikes_back/README.md + Version: v2.22.0 + + - Name: retinanet_r50_fpn_rsb-pretrain_1x_coco + In Collection: RetinaNet + Config: configs/resnet_strikes_back/retinanet_r50_fpn_rsb-pretrain_1x_coco.py + Metadata: + Training Memory (GB): 3.8 + Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - ResNet + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/resnet_strikes_back/retinanet_r50_fpn_rsb-pretrain_1x_coco/retinanet_r50_fpn_rsb-pretrain_1x_coco_20220113_175432-bd24aae9.pth + Paper: + URL: https://arxiv.org/abs/2110.00476 + Title: 'ResNet strikes back: An improved training procedure in timm' + README: configs/resnet_strikes_back/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.22.0/configs/resnet_strikes_back/README.md + Version: v2.22.0 + + - Name: mask_rcnn_r50_fpn_rsb-pretrain_1x_coco + In Collection: Mask R-CNN + Config: configs/resnet_strikes_back/mask_rcnn_r50_fpn_rsb-pretrain_1x_coco.py + Metadata: + Training Memory (GB): 4.5 + Epochs: 12 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - ResNet + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.2 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/resnet_strikes_back/mask_rcnn_r50_fpn_rsb-pretrain_1x_coco/mask_rcnn_r50_fpn_rsb-pretrain_1x_coco_20220113_174054-06ce8ba0.pth + Paper: + URL: https://arxiv.org/abs/2110.00476 + Title: 'ResNet strikes back: An improved training procedure in timm' + README: configs/resnet_strikes_back/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.22.0/configs/resnet_strikes_back/README.md + Version: v2.22.0 diff --git a/configs/resnet_strikes_back/retinanet_r50_fpn_rsb-pretrain_1x_coco.py b/configs/resnet_strikes_back/retinanet_r50_fpn_rsb-pretrain_1x_coco.py new file mode 100644 index 0000000..480697a --- /dev/null +++ b/configs/resnet_strikes_back/retinanet_r50_fpn_rsb-pretrain_1x_coco.py @@ -0,0 +1,18 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +checkpoint = 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a1-600e_in1k_20211228-20e21305.pth' # noqa +model = dict( + backbone=dict( + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint))) + +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.0001, + weight_decay=0.05, + paramwise_cfg=dict(norm_decay_mult=0., bypass_duplicate=True)) diff --git a/configs/retinanet/README.md b/configs/retinanet/README.md index 34dd55e..b9e0a2a 100644 --- a/configs/retinanet/README.md +++ b/configs/retinanet/README.md @@ -1,22 +1,47 @@ -# Focal Loss for Dense Object Detection +# RetinaNet -## Abstract +> [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002) - + + +## Abstract The highest accuracy object detectors to date are based on a two-stage approach popularized by R-CNN, where a classifier is applied to a sparse set of candidate object locations. In contrast, one-stage detectors that are applied over a regular, dense sampling of possible object locations have the potential to be faster and simpler, but have trailed the accuracy of two-stage detectors thus far. In this paper, we investigate why this is the case. We discover that the extreme foreground-background class imbalance encountered during training of dense detectors is the central cause. We propose to address this class imbalance by reshaping the standard cross entropy loss such that it down-weights the loss assigned to well-classified examples. Our novel Focal Loss focuses training on a sparse set of hard examples and prevents the vast number of easy negatives from overwhelming the detector during training. To evaluate the effectiveness of our loss, we design and train a simple dense detector we call RetinaNet. Our results show that when trained with the focal loss, RetinaNet is able to match the speed of previous one-stage detectors while surpassing the accuracy of all existing state-of-the-art two-stage detectors. -
- - +## Results and Models + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :----------: | :------: | :------------: | :----: | :-------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-18-FPN | pytorch | 1x | 1.7 | | 31.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r18_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r18_fpn_1x_coco/retinanet_r18_fpn_1x_coco_20220407_171055-614fd399.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r18_fpn_1x_coco/retinanet_r18_fpn_1x_coco_20220407_171055.log.json) | +| R-18-FPN | pytorch | 1x(1 x 8 BS) | 5.0 | | 31.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r18_fpn_1x8_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r18_fpn_1x8_1x_coco/retinanet_r18_fpn_1x8_1x_coco_20220407_171255-4ea310d7.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r18_fpn_1x8_1x_coco/retinanet_r18_fpn_1x8_1x_coco_20220407_171255.log.json) | +| R-50-FPN | caffe | 1x | 3.5 | 18.6 | 36.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_caffe_fpn_1x_coco/retinanet_r50_caffe_fpn_1x_coco_20200531-f11027c5.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_caffe_fpn_1x_coco/retinanet_r50_caffe_fpn_1x_coco_20200531_012518.log.json) | +| R-50-FPN | pytorch | 1x | 3.8 | 19.0 | 36.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_1x_coco/retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_1x_coco/retinanet_r50_fpn_1x_coco_20200130_002941.log.json) | +| R-50-FPN (FP16) | pytorch | 1x | 2.8 | 31.6 | 36.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r50_fpn_fp16_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fp16/retinanet_r50_fpn_fp16_1x_coco/retinanet_r50_fpn_fp16_1x_coco_20200702-0dbfb212.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/fp16/retinanet_r50_fpn_fp16_1x_coco/retinanet_r50_fpn_fp16_1x_coco_20200702_020127.log.json) | +| R-50-FPN | pytorch | 2x | - | - | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r50_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_2x_coco/retinanet_r50_fpn_2x_coco_20200131-fdb43119.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_2x_coco/retinanet_r50_fpn_2x_coco_20200131_114738.log.json) | +| R-101-FPN | caffe | 1x | 5.5 | 14.7 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_caffe_fpn_1x_coco/retinanet_r101_caffe_fpn_1x_coco_20200531-b428fa0f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_caffe_fpn_1x_coco/retinanet_r101_caffe_fpn_1x_coco_20200531_012536.log.json) | +| R-101-FPN | pytorch | 1x | 5.7 | 15.0 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_fpn_1x_coco/retinanet_r101_fpn_1x_coco_20200130-7a93545f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_fpn_1x_coco/retinanet_r101_fpn_1x_coco_20200130_003055.log.json) | +| R-101-FPN | pytorch | 2x | - | - | 38.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r101_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_fpn_2x_coco/retinanet_r101_fpn_2x_coco_20200131-5560aee8.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_fpn_2x_coco/retinanet_r101_fpn_2x_coco_20200131_114859.log.json) | +| X-101-32x4d-FPN | pytorch | 1x | 7.0 | 12.1 | 39.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_32x4d_fpn_1x_coco/retinanet_x101_32x4d_fpn_1x_coco_20200130-5c8b7ec4.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_32x4d_fpn_1x_coco/retinanet_x101_32x4d_fpn_1x_coco_20200130_003004.log.json) | +| X-101-32x4d-FPN | pytorch | 2x | - | - | 40.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_x101_32x4d_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_32x4d_fpn_2x_coco/retinanet_x101_32x4d_fpn_2x_coco_20200131-237fc5e1.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_32x4d_fpn_2x_coco/retinanet_x101_32x4d_fpn_2x_coco_20200131_114812.log.json) | +| X-101-64x4d-FPN | pytorch | 1x | 10.0 | 8.7 | 41.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_64x4d_fpn_1x_coco/retinanet_x101_64x4d_fpn_1x_coco_20200130-366f5af1.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_64x4d_fpn_1x_coco/retinanet_x101_64x4d_fpn_1x_coco_20200130_003008.log.json) | +| X-101-64x4d-FPN | pytorch | 2x | - | - | 40.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_x101_64x4d_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_64x4d_fpn_2x_coco/retinanet_x101_64x4d_fpn_2x_coco_20200131-bca068ab.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_64x4d_fpn_2x_coco/retinanet_x101_64x4d_fpn_2x_coco_20200131_114833.log.json) | -## Citation +## Pre-trained Models - +We also train some models with longer schedules and multi-scale training. The users could finetune them for downstream tasks. + +| Backbone | Style | Lr schd | Mem (GB) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :----: | :-----------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | pytorch | 3x | 3.5 | 39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r50_fpn_mstrain_640-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_mstrain_3x_coco/retinanet_r50_fpn_mstrain_3x_coco_20210718_220633-88476508.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_mstrain_3x_coco/retinanet_r50_fpn_mstrain_3x_coco_20210718_220633-88476508.log.json) | +| R-101-FPN | caffe | 3x | 5.4 | 40.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r101_caffe_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_caffe_fpn_mstrain_3x_coco/retinanet_r101_caffe_fpn_mstrain_3x_coco_20210721_063439-88a8a944.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_caffe_fpn_mstrain_3x_coco/retinanet_r101_caffe_fpn_mstrain_3x_coco_20210721_063439-88a8a944.log.json) | +| R-101-FPN | pytorch | 3x | 5.4 | 41 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r101_fpn_mstrain_640-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_fpn_mstrain_3x_coco/retinanet_r101_fpn_mstrain_3x_coco_20210720_214650-7ee888e0.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_fpn_mstrain_3x_coco/retinanet_r101_fpn_mstrain_3x_coco_20210720_214650-7ee888e0.log.json) | +| X-101-64x4d-FPN | pytorch | 3x | 9.8 | 41.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_x101_64x4d_fpn_mstrain_640-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_64x4d_fpn_mstrain_3x_coco/retinanet_x101_64x4d_fpn_mstrain_3x_coco_20210719_051838-022c2187.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_64x4d_fpn_mstrain_3x_coco/retinanet_x101_64x4d_fpn_mstrain_3x_coco_20210719_051838-022c2187.log.json) | + +## Citation ```latex @inproceedings{lin2017focal, @@ -26,30 +51,3 @@ The highest accuracy object detectors to date are based on a two-stage approach year={2017} } ``` - -## Results and models - -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | -| R-50-FPN | caffe | 1x | 3.5 | 18.6 | 36.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_caffe_fpn_1x_coco/retinanet_r50_caffe_fpn_1x_coco_20200531-f11027c5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_caffe_fpn_1x_coco/retinanet_r50_caffe_fpn_1x_coco_20200531_012518.log.json) | -| R-50-FPN | pytorch | 1x | 3.8 | 19.0 | 36.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_1x_coco/retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_1x_coco/retinanet_r50_fpn_1x_coco_20200130_002941.log.json) | -| R-50-FPN (FP16) | pytorch | 1x | 2.8 | 31.6 | 36.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fp16/retinanet_r50_fpn_fp16_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fp16/retinanet_r50_fpn_fp16_1x_coco/retinanet_r50_fpn_fp16_1x_coco_20200702-0dbfb212.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fp16/retinanet_r50_fpn_fp16_1x_coco/retinanet_r50_fpn_fp16_1x_coco_20200702_020127.log.json) | -| R-50-FPN | pytorch | 2x | - | - | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r50_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_2x_coco/retinanet_r50_fpn_2x_coco_20200131-fdb43119.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_2x_coco/retinanet_r50_fpn_2x_coco_20200131_114738.log.json) | -| R-101-FPN | caffe | 1x | 5.5 | 14.7 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_caffe_fpn_1x_coco/retinanet_r101_caffe_fpn_1x_coco_20200531-b428fa0f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_caffe_fpn_1x_coco/retinanet_r101_caffe_fpn_1x_coco_20200531_012536.log.json) | -| R-101-FPN | pytorch | 1x | 5.7 | 15.0 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_fpn_1x_coco/retinanet_r101_fpn_1x_coco_20200130-7a93545f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_fpn_1x_coco/retinanet_r101_fpn_1x_coco_20200130_003055.log.json) | -| R-101-FPN | pytorch | 2x | - | - | 38.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r101_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_fpn_2x_coco/retinanet_r101_fpn_2x_coco_20200131-5560aee8.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_fpn_2x_coco/retinanet_r101_fpn_2x_coco_20200131_114859.log.json) | -| X-101-32x4d-FPN | pytorch | 1x | 7.0 | 12.1 | 39.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_32x4d_fpn_1x_coco/retinanet_x101_32x4d_fpn_1x_coco_20200130-5c8b7ec4.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_32x4d_fpn_1x_coco/retinanet_x101_32x4d_fpn_1x_coco_20200130_003004.log.json) | -| X-101-32x4d-FPN | pytorch | 2x | - | - | 40.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_x101_32x4d_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_32x4d_fpn_2x_coco/retinanet_x101_32x4d_fpn_2x_coco_20200131-237fc5e1.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_32x4d_fpn_2x_coco/retinanet_x101_32x4d_fpn_2x_coco_20200131_114812.log.json) | -| X-101-64x4d-FPN | pytorch | 1x | 10.0 | 8.7 | 41.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_64x4d_fpn_1x_coco/retinanet_x101_64x4d_fpn_1x_coco_20200130-366f5af1.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_64x4d_fpn_1x_coco/retinanet_x101_64x4d_fpn_1x_coco_20200130_003008.log.json) | -| X-101-64x4d-FPN | pytorch | 2x | - | - | 40.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_x101_64x4d_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_64x4d_fpn_2x_coco/retinanet_x101_64x4d_fpn_2x_coco_20200131-bca068ab.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_64x4d_fpn_2x_coco/retinanet_x101_64x4d_fpn_2x_coco_20200131_114833.log.json) | - -## Pre-trained Models - -We also train some models with longer schedules and multi-scale training. The users could finetune them for downstream tasks. - -| Backbone | Style | Lr schd | Mem (GB) | box AP | Config | Download | -| :----------------: | :-----: | :-----: | :------: | :----: | :------: | :--------: | -| R-50-FPN | pytorch| 3x | 3.5 | 39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r50_fpn_mstrain_640-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_mstrain_3x_coco/retinanet_r50_fpn_mstrain_3x_coco_20210718_220633-88476508.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_mstrain_3x_coco/retinanet_r50_fpn_mstrain_3x_coco_20210718_220633-88476508.log.json) -| R-101-FPN | caffe | 3x | 5.4 | 40.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r101_caffe_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_caffe_fpn_mstrain_3x_coco/retinanet_r101_caffe_fpn_mstrain_3x_coco_20210721_063439-88a8a944.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_caffe_fpn_mstrain_3x_coco/retinanet_r101_caffe_fpn_mstrain_3x_coco_20210721_063439-88a8a944.log.json) -| R-101-FPN | pytorch| 3x | 5.4 | 41 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r101_fpn_mstrain_640-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_fpn_mstrain_3x_coco/retinanet_r101_fpn_mstrain_3x_coco_20210720_214650-7ee888e0.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_fpn_mstrain_3x_coco/retinanet_r101_fpn_mstrain_3x_coco_20210720_214650-7ee888e0.log.json) -| X-101-64x4d-FPN | pytorch| 3x | 9.8 | 41.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_x101_64x4d_fpn_mstrain_640-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_64x4d_fpn_mstrain_3x_coco/retinanet_x101_64x4d_fpn_mstrain_3x_coco_20210719_051838-022c2187.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_64x4d_fpn_mstrain_3x_coco/retinanet_x101_64x4d_fpn_mstrain_3x_coco_20210719_051838-022c2187.log.json) diff --git a/configs/retinanet/metafile.yml b/configs/retinanet/metafile.yml index b04a06c..2080771 100644 --- a/configs/retinanet/metafile.yml +++ b/configs/retinanet/metafile.yml @@ -19,6 +19,34 @@ Collections: Version: v2.0.0 Models: + - Name: retinanet_r18_fpn_1x_coco + In Collection: RetinaNet + Config: configs/retinanet/retinanet_r18_fpn_1x_coco.py + Metadata: + Training Memory (GB): 1.7 + Training Resources: 8x V100 GPUs + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 31.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r18_fpn_1x_coco/retinanet_r18_fpn_1x_coco_20220407_171055-614fd399.pth + + - Name: retinanet_r18_fpn_1x8_1x_coco + In Collection: RetinaNet + Config: configs/retinanet/retinanet_r18_fpn_1x8_1x_coco.py + Metadata: + Training Memory (GB): 5.0 + Training Resources: 1x V100 GPUs + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 31.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r18_fpn_1x8_1x_coco/retinanet_r18_fpn_1x8_1x_coco_20220407_171255-4ea310d7.pth + - Name: retinanet_r50_caffe_fpn_1x_coco In Collection: RetinaNet Config: configs/retinanet/retinanet_r50_caffe_fpn_1x_coco.py @@ -61,7 +89,7 @@ Models: - Name: retinanet_r50_fpn_fp16_1x_coco In Collection: RetinaNet - Config: configs/fp16/retinanet_r50_fpn_fp16_1x_coco.py + Config: configs/retinanet/retinanet_r50_fpn_fp16_1x_coco.py Metadata: Training Memory (GB): 2.8 Training Techniques: diff --git a/configs/retinanet/retinanet_r18_fpn_1x8_1x_coco.py b/configs/retinanet/retinanet_r18_fpn_1x8_1x_coco.py new file mode 100644 index 0000000..01a35f2 --- /dev/null +++ b/configs/retinanet/retinanet_r18_fpn_1x8_1x_coco.py @@ -0,0 +1,23 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +# data +data = dict(samples_per_gpu=8) + +# optimizer +model = dict( + backbone=dict( + depth=18, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18')), + neck=dict(in_channels=[64, 128, 256, 512])) + +# Note: If the learning rate is set to 0.0025, the mAP will be 32.4. +optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0001) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (1 GPUs) x (8 samples per GPU) +auto_scale_lr = dict(base_batch_size=8) diff --git a/configs/retinanet/retinanet_r18_fpn_1x_coco.py b/configs/retinanet/retinanet_r18_fpn_1x_coco.py new file mode 100644 index 0000000..6197b32 --- /dev/null +++ b/configs/retinanet/retinanet_r18_fpn_1x_coco.py @@ -0,0 +1,18 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +# optimizer +model = dict( + backbone=dict( + depth=18, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18')), + neck=dict(in_channels=[64, 128, 256, 512])) +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (2 samples per GPU) +auto_scale_lr = dict(base_batch_size=16) diff --git a/configs/rpn/README.md b/configs/rpn/README.md index d1bf674..99addc0 100644 --- a/configs/rpn/README.md +++ b/configs/rpn/README.md @@ -1,23 +1,34 @@ -# Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks +# RPN -## Abstract +> [Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks](https://arxiv.org/abs/1506.01497) + + - +## Abstract State-of-the-art object detection networks depend on region proposal algorithms to hypothesize object locations. Advances like SPPnet and Fast R-CNN have reduced the running time of these detection networks, exposing region proposal computation as a bottleneck. In this work, we introduce a Region Proposal Network (RPN) that shares full-image convolutional features with the detection network, thus enabling nearly cost-free region proposals. An RPN is a fully convolutional network that simultaneously predicts object bounds and objectness scores at each position. The RPN is trained end-to-end to generate high-quality region proposals, which are used by Fast R-CNN for detection. We further merge RPN and Fast R-CNN into a single network by sharing their convolutional features---using the recently popular terminology of neural networks with 'attention' mechanisms, the RPN component tells the unified network where to look. For the very deep VGG-16 model, our detection system has a frame rate of 5fps (including all steps) on a GPU, while achieving state-of-the-art object detection accuracy on PASCAL VOC 2007, 2012, and MS COCO datasets with only 300 proposals per image. In ILSVRC and COCO 2015 competitions, Faster R-CNN and RPN are the foundations of the 1st-place winning entries in several tracks. -
- - +## Results and Models + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | AR1000 | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | caffe | 1x | 3.5 | 22.6 | 58.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r50_caffe_fpn_1x_coco/rpn_r50_caffe_fpn_1x_coco_20200531-5b903a37.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r50_caffe_fpn_1x_coco/rpn_r50_caffe_fpn_1x_coco_20200531_012334.log.json) | +| R-50-FPN | pytorch | 1x | 3.8 | 22.3 | 58.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r50_fpn_1x_coco/rpn_r50_fpn_1x_coco_20200218-5525fa2e.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r50_fpn_1x_coco/rpn_r50_fpn_1x_coco_20200218_151240.log.json) | +| R-50-FPN | pytorch | 2x | - | - | 58.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_r50_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r50_fpn_2x_coco/rpn_r50_fpn_2x_coco_20200131-0728c9b3.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r50_fpn_2x_coco/rpn_r50_fpn_2x_coco_20200131_190631.log.json) | +| R-101-FPN | caffe | 1x | 5.4 | 17.3 | 60.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r101_caffe_fpn_1x_coco/rpn_r101_caffe_fpn_1x_coco_20200531-0629a2e2.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r101_caffe_fpn_1x_coco/rpn_r101_caffe_fpn_1x_coco_20200531_012345.log.json) | +| R-101-FPN | pytorch | 1x | 5.8 | 16.5 | 59.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r101_fpn_1x_coco/rpn_r101_fpn_1x_coco_20200131-2ace2249.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r101_fpn_1x_coco/rpn_r101_fpn_1x_coco_20200131_191000.log.json) | +| R-101-FPN | pytorch | 2x | - | - | 60.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_r101_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r101_fpn_2x_coco/rpn_r101_fpn_2x_coco_20200131-24e3db1a.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r101_fpn_2x_coco/rpn_r101_fpn_2x_coco_20200131_191106.log.json) | +| X-101-32x4d-FPN | pytorch | 1x | 7.0 | 13.0 | 60.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_32x4d_fpn_1x_coco/rpn_x101_32x4d_fpn_1x_coco_20200219-b02646c6.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_32x4d_fpn_1x_coco/rpn_x101_32x4d_fpn_1x_coco_20200219_012037.log.json) | +| X-101-32x4d-FPN | pytorch | 2x | - | - | 61.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_x101_32x4d_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_32x4d_fpn_2x_coco/rpn_x101_32x4d_fpn_2x_coco_20200208-d22bd0bb.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_32x4d_fpn_2x_coco/rpn_x101_32x4d_fpn_2x_coco_20200208_200752.log.json) | +| X-101-64x4d-FPN | pytorch | 1x | 10.1 | 9.1 | 61.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_64x4d_fpn_1x_coco/rpn_x101_64x4d_fpn_1x_coco_20200208-cde6f7dd.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_64x4d_fpn_1x_coco/rpn_x101_64x4d_fpn_1x_coco_20200208_200752.log.json) | +| X-101-64x4d-FPN | pytorch | 2x | - | - | 61.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_x101_64x4d_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_64x4d_fpn_2x_coco/rpn_x101_64x4d_fpn_2x_coco_20200208-c65f524f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_64x4d_fpn_2x_coco/rpn_x101_64x4d_fpn_2x_coco_20200208_200752.log.json) | ## Citation - - ```latex @inproceedings{ren2015faster, title={Faster r-cnn: Towards real-time object detection with region proposal networks}, @@ -26,18 +37,3 @@ State-of-the-art object detection networks depend on region proposal algorithms year={2015} } ``` - -## Results and models - -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | AR1000 | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | -| R-50-FPN | caffe | 1x | 3.5 | 22.6 | 58.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r50_caffe_fpn_1x_coco/rpn_r50_caffe_fpn_1x_coco_20200531-5b903a37.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r50_caffe_fpn_1x_coco/rpn_r50_caffe_fpn_1x_coco_20200531_012334.log.json) | -| R-50-FPN | pytorch | 1x | 3.8 | 22.3 | 58.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r50_fpn_1x_coco/rpn_r50_fpn_1x_coco_20200218-5525fa2e.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r50_fpn_1x_coco/rpn_r50_fpn_1x_coco_20200218_151240.log.json) | -| R-50-FPN | pytorch | 2x | - | - | 58.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_r50_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r50_fpn_2x_coco/rpn_r50_fpn_2x_coco_20200131-0728c9b3.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r50_fpn_2x_coco/rpn_r50_fpn_2x_coco_20200131_190631.log.json) | -| R-101-FPN | caffe | 1x | 5.4 | 17.3 | 60.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r101_caffe_fpn_1x_coco/rpn_r101_caffe_fpn_1x_coco_20200531-0629a2e2.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r101_caffe_fpn_1x_coco/rpn_r101_caffe_fpn_1x_coco_20200531_012345.log.json) | -| R-101-FPN | pytorch | 1x | 5.8 | 16.5 | 59.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r101_fpn_1x_coco/rpn_r101_fpn_1x_coco_20200131-2ace2249.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r101_fpn_1x_coco/rpn_r101_fpn_1x_coco_20200131_191000.log.json) | -| R-101-FPN | pytorch | 2x | - | - | 60.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_r101_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r101_fpn_2x_coco/rpn_r101_fpn_2x_coco_20200131-24e3db1a.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r101_fpn_2x_coco/rpn_r101_fpn_2x_coco_20200131_191106.log.json) | -| X-101-32x4d-FPN | pytorch | 1x | 7.0 | 13.0 | 60.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_32x4d_fpn_1x_coco/rpn_x101_32x4d_fpn_1x_coco_20200219-b02646c6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_32x4d_fpn_1x_coco/rpn_x101_32x4d_fpn_1x_coco_20200219_012037.log.json) | -| X-101-32x4d-FPN | pytorch | 2x | - | - | 61.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_x101_32x4d_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_32x4d_fpn_2x_coco/rpn_x101_32x4d_fpn_2x_coco_20200208-d22bd0bb.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_32x4d_fpn_2x_coco/rpn_x101_32x4d_fpn_2x_coco_20200208_200752.log.json) | -| X-101-64x4d-FPN | pytorch | 1x | 10.1 | 9.1 | 61.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_64x4d_fpn_1x_coco/rpn_x101_64x4d_fpn_1x_coco_20200208-cde6f7dd.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_64x4d_fpn_1x_coco/rpn_x101_64x4d_fpn_1x_coco_20200208_200752.log.json) | -| X-101-64x4d-FPN | pytorch | 2x | - | - | 61.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_x101_64x4d_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_64x4d_fpn_2x_coco/rpn_x101_64x4d_fpn_2x_coco_20200208-c65f524f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_64x4d_fpn_2x_coco/rpn_x101_64x4d_fpn_2x_coco_20200208_200752.log.json) | diff --git a/configs/sabl/README.md b/configs/sabl/README.md index c090d00..03992be 100644 --- a/configs/sabl/README.md +++ b/configs/sabl/README.md @@ -1,23 +1,39 @@ -# Side-Aware Boundary Localization for More Precise Object Detection +# SABL -## Abstract +> [Side-Aware Boundary Localization for More Precise Object Detection](https://arxiv.org/abs/1912.04260) - + + +## Abstract Current object detection frameworks mainly rely on bounding box regression to localize objects. Despite the remarkable progress in recent years, the precision of bounding box regression remains unsatisfactory, hence limiting performance in object detection. We observe that precise localization requires careful placement of each side of the bounding box. However, the mainstream approach, which focuses on predicting centers and sizes, is not the most effective way to accomplish this task, especially when there exists displacements with large variance between the anchors and the targets. In this paper, we propose an alternative approach, named as Side-Aware Boundary Localization (SABL), where each side of the bounding box is respectively localized with a dedicated network branch. To tackle the difficulty of precise localization in the presence of displacements with large variance, we further propose a two-step localization scheme, which first predicts a range of movement through bucket prediction and then pinpoints the precise position within the predicted bucket. We test the proposed method on both two-stage and single-stage detection frameworks. Replacing the standard bounding box regression branch with the proposed design leads to significant improvements on Faster R-CNN, RetinaNet, and Cascade R-CNN, by 3.0%, 1.7%, and 0.9%, respectively. -
- - +## Results and Models -## Citation +The results on COCO 2017 val is shown in the below table. (results on test-dev are usually slightly higher than val). +Single-scale testing (1333x800) is adopted in all results. - +| Method | Backbone | Lr schd | ms-train | box AP | Config | Download | +| :----------------: | :-------: | :-----: | :------: | :----: | :----------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| SABL Faster R-CNN | R-50-FPN | 1x | N | 39.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_faster_rcnn_r50_fpn_1x_coco/sabl_faster_rcnn_r50_fpn_1x_coco-e867595b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_faster_rcnn_r50_fpn_1x_coco/20200830_130324.log.json) | +| SABL Faster R-CNN | R-101-FPN | 1x | N | 41.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_faster_rcnn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_faster_rcnn_r101_fpn_1x_coco/sabl_faster_rcnn_r101_fpn_1x_coco-f804c6c1.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_faster_rcnn_r101_fpn_1x_coco/20200830_183949.log.json) | +| SABL Cascade R-CNN | R-50-FPN | 1x | N | 41.6 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_cascade_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_cascade_rcnn_r50_fpn_1x_coco/sabl_cascade_rcnn_r50_fpn_1x_coco-e1748e5e.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_cascade_rcnn_r50_fpn_1x_coco/20200831_033726.log.json) | +| SABL Cascade R-CNN | R-101-FPN | 1x | N | 43.0 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_cascade_rcnn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_cascade_rcnn_r101_fpn_1x_coco/sabl_cascade_rcnn_r101_fpn_1x_coco-2b83e87c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_cascade_rcnn_r101_fpn_1x_coco/20200831_141745.log.json) | + +| Method | Backbone | GN | Lr schd | ms-train | box AP | Config | Download | +| :------------: | :-------: | :-: | :-----: | :---------: | :----: | :---------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| SABL RetinaNet | R-50-FPN | N | 1x | N | 37.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_retinanet_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r50_fpn_1x_coco/sabl_retinanet_r50_fpn_1x_coco-6c54fd4f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r50_fpn_1x_coco/20200830_053451.log.json) | +| SABL RetinaNet | R-50-FPN | Y | 1x | N | 38.8 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_retinanet_r50_fpn_gn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r50_fpn_gn_1x_coco/sabl_retinanet_r50_fpn_gn_1x_coco-e16dfcf1.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r50_fpn_gn_1x_coco/20200831_141955.log.json) | +| SABL RetinaNet | R-101-FPN | N | 1x | N | 39.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_retinanet_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_1x_coco/sabl_retinanet_r101_fpn_1x_coco-42026904.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_1x_coco/20200831_034256.log.json) | +| SABL RetinaNet | R-101-FPN | Y | 1x | N | 40.5 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_retinanet_r101_fpn_gn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_gn_1x_coco/sabl_retinanet_r101_fpn_gn_1x_coco-40a893e8.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_gn_1x_coco/20200830_201422.log.json) | +| SABL RetinaNet | R-101-FPN | Y | 2x | Y (640~800) | 42.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_640_800_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_640_800_coco/sabl_retinanet_r101_fpn_gn_2x_ms_640_800_coco-1e63382c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_640_800_coco/20200830_144807.log.json) | +| SABL RetinaNet | R-101-FPN | Y | 2x | Y (480~960) | 43.6 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_480_960_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_480_960_coco/sabl_retinanet_r101_fpn_gn_2x_ms_480_960_coco-5342f857.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_480_960_coco/20200830_164537.log.json) | + +## Citation We provide config files to reproduce the object detection results in the ECCV 2020 Spotlight paper for [Side-Aware Boundary Localization for More Precise Object Detection](https://arxiv.org/abs/1912.04260). @@ -29,24 +45,3 @@ We provide config files to reproduce the object detection results in the ECCV 20 year = {2020} } ``` - -## Results and Models - -The results on COCO 2017 val is shown in the below table. (results on test-dev are usually slightly higher than val). -Single-scale testing (1333x800) is adopted in all results. - -| Method | Backbone | Lr schd | ms-train | box AP | Config | Download | -| :----------------: | :-------: | :-----: | :------: | :----: | :----------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| SABL Faster R-CNN | R-50-FPN | 1x | N | 39.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_faster_rcnn_r50_fpn_1x_coco/sabl_faster_rcnn_r50_fpn_1x_coco-e867595b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_faster_rcnn_r50_fpn_1x_coco/20200830_130324.log.json) | -| SABL Faster R-CNN | R-101-FPN | 1x | N | 41.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_faster_rcnn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_faster_rcnn_r101_fpn_1x_coco/sabl_faster_rcnn_r101_fpn_1x_coco-f804c6c1.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_faster_rcnn_r101_fpn_1x_coco/20200830_183949.log.json) | -| SABL Cascade R-CNN | R-50-FPN | 1x | N | 41.6 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_cascade_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_cascade_rcnn_r50_fpn_1x_coco/sabl_cascade_rcnn_r50_fpn_1x_coco-e1748e5e.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_cascade_rcnn_r50_fpn_1x_coco/20200831_033726.log.json) | -| SABL Cascade R-CNN | R-101-FPN | 1x | N | 43.0 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_cascade_rcnn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_cascade_rcnn_r101_fpn_1x_coco/sabl_cascade_rcnn_r101_fpn_1x_coco-2b83e87c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_cascade_rcnn_r101_fpn_1x_coco/20200831_141745.log.json) | - -| Method | Backbone | GN | Lr schd | ms-train | box AP | Config | Download | -| :------------: | :-------: | :---: | :-----: | :---------: | :----: | :---------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| SABL RetinaNet | R-50-FPN | N | 1x | N | 37.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_retinanet_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r50_fpn_1x_coco/sabl_retinanet_r50_fpn_1x_coco-6c54fd4f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r50_fpn_1x_coco/20200830_053451.log.json) | -| SABL RetinaNet | R-50-FPN | Y | 1x | N | 38.8 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_retinanet_r50_fpn_gn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r50_fpn_gn_1x_coco/sabl_retinanet_r50_fpn_gn_1x_coco-e16dfcf1.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r50_fpn_gn_1x_coco/20200831_141955.log.json) | -| SABL RetinaNet | R-101-FPN | N | 1x | N | 39.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_retinanet_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_1x_coco/sabl_retinanet_r101_fpn_1x_coco-42026904.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_1x_coco/20200831_034256.log.json) | -| SABL RetinaNet | R-101-FPN | Y | 1x | N | 40.5 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_retinanet_r101_fpn_gn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_gn_1x_coco/sabl_retinanet_r101_fpn_gn_1x_coco-40a893e8.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_gn_1x_coco/20200830_201422.log.json) | -| SABL RetinaNet | R-101-FPN | Y | 2x | Y (640~800) | 42.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_640_800_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_640_800_coco/sabl_retinanet_r101_fpn_gn_2x_ms_640_800_coco-1e63382c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_640_800_coco/20200830_144807.log.json) | -| SABL RetinaNet | R-101-FPN | Y | 2x | Y (480~960) | 43.6 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_480_960_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_480_960_coco/sabl_retinanet_r101_fpn_gn_2x_ms_480_960_coco-5342f857.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_480_960_coco/20200830_164537.log.json) | diff --git a/configs/scnet/README.md b/configs/scnet/README.md index 52f3b49..773874a 100644 --- a/configs/scnet/README.md +++ b/configs/scnet/README.md @@ -1,34 +1,19 @@ -# SCNet: Training Inference Sample Consistency for Instance Segmentation +# SCNet + +> [SCNet: Training Inference Sample Consistency for Instance Segmentation](https://arxiv.org/abs/2012.10150) + + ## Abstract -Cascaded architectures have brought significant performance improvement in object detection and instance segmentation. However, there are lingering issues regarding the disparity in the Intersection-over-Union (IoU) distribution of the samples between training and inference. This disparity can potentially exacerbate detection accuracy. This paper proposes an architecture referred to as Sample Consistency Network (SCNet) to ensure that the IoU distribution of the samples at training time is close to that at inference time. Furthermore, SCNet incorporates feature relay and utilizes global contextual information to further reinforce the reciprocal relationships among classifying, detecting, and segmenting sub-tasks. Extensive experiments on the standard COCO dataset reveal the effectiveness of the proposed method over multiple evaluation metrics, including box AP, mask AP, and inference speed. In particular, while running 38\% faster, the proposed SCNet improves the AP of the box and mask predictions by respectively 1.3 and 2.3 points compared to the strong Cascade Mask R-CNN baseline. +Cascaded architectures have brought significant performance improvement in object detection and instance segmentation. However, there are lingering issues regarding the disparity in the Intersection-over-Union (IoU) distribution of the samples between training and inference. This disparity can potentially exacerbate detection accuracy. This paper proposes an architecture referred to as Sample Consistency Network (SCNet) to ensure that the IoU distribution of the samples at training time is close to that at inference time. Furthermore, SCNet incorporates feature relay and utilizes global contextual information to further reinforce the reciprocal relationships among classifying, detecting, and segmenting sub-tasks. Extensive experiments on the standard COCO dataset reveal the effectiveness of the proposed method over multiple evaluation metrics, including box AP, mask AP, and inference speed. In particular, while running 38% faster, the proposed SCNet improves the AP of the box and mask predictions by respectively 1.3 and 2.3 points compared to the strong Cascade Mask R-CNN baseline. -
- - - -## Citation - - - -We provide the code for reproducing experiment results of [SCNet](https://arxiv.org/abs/2012.10150). - -``` -@inproceedings{vu2019cascade, - title={SCNet: Training Inference Sample Consistency for Instance Segmentation}, - author={Vu, Thang and Haeyong, Kang and Yoo, Chang D}, - booktitle={AAAI}, - year={2021} -} -``` - ## Dataset SCNet requires COCO and [COCO-stuff](http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/stuffthingmaps_trainval2017.zip) dataset for training. You need to download and extract it in the COCO dataset path. @@ -52,14 +37,27 @@ mmdetection The results on COCO 2017val are shown in the below table. (results on test-dev are usually slightly higher than val) -| Backbone | Style | Lr schd | Mem (GB) | Inf speed (fps) | box AP | mask AP | TTA box AP | TTA mask AP | Config | Download | -|:---------------:|:-------:|:-------:|:--------:|:---------------:|:------:|:-------:|:----------:|:-----------:|:------:|:------------:| -| R-50-FPN | pytorch | 1x | 7.0 | 6.2 | 43.5 | 39.2 | 44.8 | 40.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/scnet/scnet_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/scnet/scnet_r50_fpn_1x_coco/scnet_r50_fpn_1x_coco-c3f09857.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/scnet/scnet_r50_fpn_1x_coco/scnet_r50_fpn_1x_coco_20210117_192725.log.json) | -| R-50-FPN | pytorch | 20e | 7.0 | 6.2 | 44.5 | 40.0 | 45.8 | 41.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/scnet/scnet_r50_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/scnet/scnet_r50_fpn_20e_coco/scnet_r50_fpn_20e_coco-a569f645.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/scnet/scnet_r50_fpn_20e_coco/scnet_r50_fpn_20e_coco_20210116_060148.log.json) | -| R-101-FPN | pytorch | 20e | 8.9 | 5.8 | 45.8 | 40.9 | 47.3 | 42.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/scnet/scnet_r101_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/scnet/scnet_r101_fpn_20e_coco/scnet_r101_fpn_20e_coco-294e312c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/scnet/scnet_r101_fpn_20e_coco/scnet_r101_fpn_20e_coco_20210118_175824.log.json) | -| X-101-64x4d-FPN | pytorch | 20e | 13.2 | 4.9 | 47.5 | 42.3 | 48.9 | 44.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/scnet/scnet_x101_64x4d_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/scnet/scnet_x101_64x4d_fpn_20e_coco/scnet_x101_64x4d_fpn_20e_coco-fb09dec9.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/scnet/scnet_x101_64x4d_fpn_20e_coco/scnet_x101_64x4d_fpn_20e_coco_20210120_045959.log.json) | +| Backbone | Style | Lr schd | Mem (GB) | Inf speed (fps) | box AP | mask AP | TTA box AP | TTA mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :-------------: | :----: | :-----: | :--------: | :---------: | :------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-FPN | pytorch | 1x | 7.0 | 6.2 | 43.5 | 39.2 | 44.8 | 40.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/scnet/scnet_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/scnet/scnet_r50_fpn_1x_coco/scnet_r50_fpn_1x_coco-c3f09857.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/scnet/scnet_r50_fpn_1x_coco/scnet_r50_fpn_1x_coco_20210117_192725.log.json) | +| R-50-FPN | pytorch | 20e | 7.0 | 6.2 | 44.5 | 40.0 | 45.8 | 41.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/scnet/scnet_r50_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/scnet/scnet_r50_fpn_20e_coco/scnet_r50_fpn_20e_coco-a569f645.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/scnet/scnet_r50_fpn_20e_coco/scnet_r50_fpn_20e_coco_20210116_060148.log.json) | +| R-101-FPN | pytorch | 20e | 8.9 | 5.8 | 45.8 | 40.9 | 47.3 | 42.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/scnet/scnet_r101_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/scnet/scnet_r101_fpn_20e_coco/scnet_r101_fpn_20e_coco-294e312c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/scnet/scnet_r101_fpn_20e_coco/scnet_r101_fpn_20e_coco_20210118_175824.log.json) | +| X-101-64x4d-FPN | pytorch | 20e | 13.2 | 4.9 | 47.5 | 42.3 | 48.9 | 44.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/scnet/scnet_x101_64x4d_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/scnet/scnet_x101_64x4d_fpn_20e_coco/scnet_x101_64x4d_fpn_20e_coco-fb09dec9.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/scnet/scnet_x101_64x4d_fpn_20e_coco/scnet_x101_64x4d_fpn_20e_coco_20210120_045959.log.json) | ### Notes - Training hyper-parameters are identical to those of [HTC](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc). - TTA means Test Time Augmentation, which applies horizontal flip and multi-scale testing. Refer to [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/scnet/scnet_r50_fpn_1x_coco.py). + +## Citation + +We provide the code for reproducing experiment results of [SCNet](https://arxiv.org/abs/2012.10150). + +```latex +@inproceedings{vu2019cascade, + title={SCNet: Training Inference Sample Consistency for Instance Segmentation}, + author={Vu, Thang and Haeyong, Kang and Yoo, Chang D}, + booktitle={AAAI}, + year={2021} +} +``` diff --git a/configs/scnet/scnet_x101_64x4d_fpn_8x1_20e_coco.py b/configs/scnet/scnet_x101_64x4d_fpn_8x1_20e_coco.py index 9f3ce6d..be8ddc5 100644 --- a/configs/scnet/scnet_x101_64x4d_fpn_8x1_20e_coco.py +++ b/configs/scnet/scnet_x101_64x4d_fpn_8x1_20e_coco.py @@ -1,3 +1,8 @@ _base_ = './scnet_x101_64x4d_fpn_20e_coco.py' data = dict(samples_per_gpu=1, workers_per_gpu=1) optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (1 samples per GPU) +auto_scale_lr = dict(base_batch_size=8) diff --git a/configs/scratch/README.md b/configs/scratch/README.md index 68d3a72..189f181 100644 --- a/configs/scratch/README.md +++ b/configs/scratch/README.md @@ -1,22 +1,29 @@ -# Rethinking ImageNet Pre-training +# Scratch -## Abstract +> [Rethinking ImageNet Pre-training](https://arxiv.org/abs/1811.08883) - + -We report competitive results on object detection and instance segmentation on the COCO dataset using standard models trained from random initialization. The results are no worse than their ImageNet pre-training counterparts even when using the hyper-parameters of the baseline system (Mask R-CNN) that were optimized for fine-tuning pre-trained models, with the sole exception of increasing the number of training iterations so the randomly initialized models may converge. Training from random initialization is surprisingly robust; our results hold even when: (i) using only 10% of the training data, (ii) for deeper and wider models, and (iii) for multiple tasks and metrics. Experiments show that ImageNet pre-training speeds up convergence early in training, but does not necessarily provide regularization or improve final target task accuracy. To push the envelope we demonstrate 50.9 AP on COCO object detection without using any external data---a result on par with the top COCO 2017 competition results that used ImageNet pre-training. These observations challenge the conventional wisdom of ImageNet pre-training for dependent tasks and we expect these discoveries will encourage people to rethink the current de facto paradigm of `pre-training and fine-tuning' in computer vision. +## Abstract + +We report competitive results on object detection and instance segmentation on the COCO dataset using standard models trained from random initialization. The results are no worse than their ImageNet pre-training counterparts even when using the hyper-parameters of the baseline system (Mask R-CNN) that were optimized for fine-tuning pre-trained models, with the sole exception of increasing the number of training iterations so the randomly initialized models may converge. Training from random initialization is surprisingly robust; our results hold even when: (i) using only 10% of the training data, (ii) for deeper and wider models, and (iii) for multiple tasks and metrics. Experiments show that ImageNet pre-training speeds up convergence early in training, but does not necessarily provide regularization or improve final target task accuracy. To push the envelope we demonstrate 50.9 AP on COCO object detection without using any external data---a result on par with the top COCO 2017 competition results that used ImageNet pre-training. These observations challenge the conventional wisdom of ImageNet pre-training for dependent tasks and we expect these discoveries will encourage people to rethink the current de facto paradigm of \`pre-training and fine-tuning' in computer vision. -
- - +## Results and Models + +| Model | Backbone | Style | Lr schd | box AP | mask AP | Config | Download | +| :----------: | :------: | :-----: | :-----: | :----: | :-----: | :---------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| Faster R-CNN | R-50-FPN | pytorch | 6x | 40.7 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/scratch/faster_rcnn_r50_fpn_gn-all_scratch_6x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/scratch/faster_rcnn_r50_fpn_gn-all_scratch_6x_coco/scratch_faster_rcnn_r50_fpn_gn_6x_bbox_mAP-0.407_20200201_193013-90813d01.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/scratch/faster_rcnn_r50_fpn_gn-all_scratch_6x_coco/scratch_faster_rcnn_r50_fpn_gn_6x_20200201_193013.log.json) | +| Mask R-CNN | R-50-FPN | pytorch | 6x | 41.2 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/scratch/mask_rcnn_r50_fpn_gn-all_scratch_6x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/scratch/mask_rcnn_r50_fpn_gn-all_scratch_6x_coco/scratch_mask_rcnn_r50_fpn_gn_6x_bbox_mAP-0.412__segm_mAP-0.374_20200201_193051-1e190a40.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/scratch/mask_rcnn_r50_fpn_gn-all_scratch_6x_coco/scratch_mask_rcnn_r50_fpn_gn_6x_20200201_193051.log.json) | -## Citation +Note: - +- The above models are trained with 16 GPUs. + +## Citation ```latex @article{he2018rethinking, @@ -26,14 +33,3 @@ We report competitive results on object detection and instance segmentation on t year={2018} } ``` - -## Results and Models - -| Model | Backbone | Style | Lr schd | box AP | mask AP | Config | Download | -|:------------:|:---------:|:-------:|:-------:|:------:|:-------:|:------:|:--------:| -| Faster R-CNN | R-50-FPN | pytorch | 6x | 40.7 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/scratch/faster_rcnn_r50_fpn_gn-all_scratch_6x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/scratch/faster_rcnn_r50_fpn_gn-all_scratch_6x_coco/scratch_faster_rcnn_r50_fpn_gn_6x_bbox_mAP-0.407_20200201_193013-90813d01.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/scratch/faster_rcnn_r50_fpn_gn-all_scratch_6x_coco/scratch_faster_rcnn_r50_fpn_gn_6x_20200201_193013.log.json) | -| Mask R-CNN | R-50-FPN | pytorch | 6x | 41.2 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/scratch/mask_rcnn_r50_fpn_gn-all_scratch_6x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/scratch/mask_rcnn_r50_fpn_gn-all_scratch_6x_coco/scratch_mask_rcnn_r50_fpn_gn_6x_bbox_mAP-0.412__segm_mAP-0.374_20200201_193051-1e190a40.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/scratch/mask_rcnn_r50_fpn_gn-all_scratch_6x_coco/scratch_mask_rcnn_r50_fpn_gn_6x_20200201_193051.log.json) | - -Note: - -- The above models are trained with 16 GPUs. diff --git a/configs/seesaw_loss/README.md b/configs/seesaw_loss/README.md index 9c1fcb0..696b008 100644 --- a/configs/seesaw_loss/README.md +++ b/configs/seesaw_loss/README.md @@ -1,22 +1,39 @@ -# Seesaw Loss for Long-Tailed Instance Segmentation +# Seesaw Loss -## Abstract +> [Seesaw Loss for Long-Tailed Instance Segmentation](https://arxiv.org/abs/2008.10032) - + + +## Abstract Instance segmentation has witnessed a remarkable progress on class-balanced benchmarks. However, they fail to perform as accurately in real-world scenarios, where the category distribution of objects naturally comes with a long tail. Instances of head classes dominate a long-tailed dataset and they serve as negative samples of tail categories. The overwhelming gradients of negative samples on tail classes lead to a biased learning process for classifiers. Consequently, objects of tail categories are more likely to be misclassified as backgrounds or head categories. To tackle this problem, we propose Seesaw Loss to dynamically re-balance gradients of positive and negative samples for each category, with two complementary factors, i.e., mitigation factor and compensation factor. The mitigation factor reduces punishments to tail categories w.r.t. the ratio of cumulative training instances between different categories. Meanwhile, the compensation factor increases the penalty of misclassified instances to avoid false positives of tail categories. We conduct extensive experiments on Seesaw Loss with mainstream frameworks and different data sampling strategies. With a simple end-to-end training pipeline, Seesaw Loss obtains significant gains over Cross-Entropy Loss, and achieves state-of-the-art performance on LVIS dataset without bells and whistles. -
- - +- Please setup [LVIS dataset](../lvis/README.md) for MMDetection. -## Citation +- RFS indicates to use oversample strategy [here](../../docs/tutorials/customize_dataset.md#class-balanced-dataset) with oversample threshold `1e-3`. - +## Results and models of Seasaw Loss on LVIS v1 dataset + +| Method | Backbone | Style | Lr schd | Data Sampler | Norm Mask | box AP | mask AP | Config | Download | +| :----------------: | :-------: | :-----: | :-----: | :----------: | :-------: | :----: | :-----: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| Mask R-CNN | R-50-FPN | pytorch | 2x | random | N | 25.6 | 25.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_mstrain_2x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_mstrain_2x_lvis_v1-a698dd3d.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_mstrain_2x_lvis_v1.log.json) | +| Mask R-CNN | R-50-FPN | pytorch | 2x | random | Y | 25.6 | 25.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1-a1c11314.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.log.json) | +| Mask R-CNN | R-101-FPN | pytorch | 2x | random | N | 27.4 | 26.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn_r101_fpn_random_seesaw_loss_mstrain_2x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r101_fpn_random_seesaw_loss_mstrain_2x_lvis_v1-8e6e6dd5.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r101_fpn_random_seesaw_loss_mstrain_2x_lvis_v1.log.json) | +| Mask R-CNN | R-101-FPN | pytorch | 2x | random | Y | 27.2 | 27.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/seesaw_loss/mask_rcnn_r101_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r101_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1-a0b59c42.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r101_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.log.json) | +| Mask R-CNN | R-50-FPN | pytorch | 2x | RFS | N | 27.6 | 26.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/seesaw_loss/mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1-392a804b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1.log.json) | +| Mask R-CNN | R-50-FPN | pytorch | 2x | RFS | Y | 27.6 | 26.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/seesaw_loss/mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1-cd0f6a12.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.log.json) | +| Mask R-CNN | R-101-FPN | pytorch | 2x | RFS | N | 28.9 | 27.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/seesaw_loss/mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1-e68eb464.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1.log.json) | +| Mask R-CNN | R-101-FPN | pytorch | 2x | RFS | Y | 28.9 | 28.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/seesaw_loss/mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1-1d817139.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.log.json) | +| Cascade Mask R-CNN | R-101-FPN | pytorch | 2x | random | N | 33.1 | 29.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/seesaw_loss/cascade_mask_rcnn_r101_fpn_random_seesaw_loss_mstrain_2x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/cascade_mask_rcnn_r101_fpn_random_seesaw_loss_mstrain_2x_lvis_v1-71e2215e.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/cascade_mask_rcnn_r101_fpn_random_seesaw_loss_mstrain_2x_lvis_v1.log.json) | +| Cascade Mask R-CNN | R-101-FPN | pytorch | 2x | random | Y | 33.0 | 30.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/seesaw_loss/cascade_mask_rcnn_r101_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/cascade_mask_rcnn_r101_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1-8b5a6745.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/cascade_mask_rcnn_r101_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.log.json) | +| Cascade Mask R-CNN | R-101-FPN | pytorch | 2x | RFS | N | 30.0 | 29.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/seesaw_loss/cascade_mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/cascade_mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1-5d8ca2a4.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/cascade_mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1.log.json) | +| Cascade Mask R-CNN | R-101-FPN | pytorch | 2x | RFS | Y | 32.8 | 30.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/seesaw_loss/cascade_mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/cascade_mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1-c8551505.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/cascade_mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.log.json) | + +## Citation We provide config files to reproduce the instance segmentation performance in the CVPR 2021 paper for [Seesaw Loss for Long-Tailed Instance Segmentation](https://arxiv.org/abs/2008.10032). @@ -28,26 +45,3 @@ We provide config files to reproduce the instance segmentation performance in th year={2021} } ``` - - -* Please setup [LVIS dataset](../lvis/README.md) for MMDetection. - -* RFS indicates to use oversample strategy [here](../../docs/tutorials/customize_dataset.md#class-balanced-dataset) with oversample threshold `1e-3`. - -## Results and models of Seasaw Loss on LVIS v1 dataset - - -| Method | Backbone | Style | Lr schd | Data Sampler | Norm Mask | box AP | mask AP | Config | Download | -| :----------------: | :-------: | :-----: | :-----: | :----------: | :-------: | :----: | :-----: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| Mask R-CNN | R-50-FPN | pytorch | 2x | random | N | 25.6 | 25.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_mstrain_2x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_mstrain_2x_lvis_v1-a698dd3d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_mstrain_2x_lvis_v1.log.json) | -| Mask R-CNN | R-50-FPN | pytorch | 2x | random | Y | 25.6 | 25.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1-a1c11314.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.log.json) | -| Mask R-CNN | R-101-FPN | pytorch | 2x | random | N | 27.4 | 26.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn_r101_fpn_random_seesaw_loss_mstrain_2x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r101_fpn_random_seesaw_loss_mstrain_2x_lvis_v1-8e6e6dd5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r101_fpn_random_seesaw_loss_mstrain_2x_lvis_v1.log.json) | -| Mask R-CNN | R-101-FPN | pytorch | 2x | random | Y | 27.2 | 27.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/seesaw_loss/mask_rcnn_r101_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r101_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1-a0b59c42.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r101_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.log.json) | -| Mask R-CNN | R-50-FPN | pytorch | 2x | RFS | N | 27.6 | 26.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/seesaw_loss/mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1-392a804b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1.log.json) | -| Mask R-CNN | R-50-FPN | pytorch | 2x | RFS | Y | 27.6 | 26.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/seesaw_loss/mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1-cd0f6a12.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.log.json) | -| Mask R-CNN | R-101-FPN | pytorch | 2x | RFS | N | 28.9 | 27.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/seesaw_loss/mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1-e68eb464.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1.log.json) | -| Mask R-CNN | R-101-FPN | pytorch | 2x | RFS | Y | 28.9 | 28.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/seesaw_loss/mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1-1d817139.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.log.json) | -| Cascade Mask R-CNN | R-101-FPN | pytorch | 2x | random | N | 33.1 | 29.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/seesaw_loss/cascade_mask_rcnn_r101_fpn_random_seesaw_loss_mstrain_2x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/cascade_mask_rcnn_r101_fpn_random_seesaw_loss_mstrain_2x_lvis_v1-71e2215e.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/cascade_mask_rcnn_r101_fpn_random_seesaw_loss_mstrain_2x_lvis_v1.log.json) | -| Cascade Mask R-CNN | R-101-FPN | pytorch | 2x | random | Y | 33.0 | 30.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/seesaw_loss/cascade_mask_rcnn_r101_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/cascade_mask_rcnn_r101_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1-8b5a6745.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/cascade_mask_rcnn_r101_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.log.json) | -| Cascade Mask R-CNN | R-101-FPN | pytorch | 2x | RFS | N | 30.0 | 29.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/seesaw_loss/cascade_mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/cascade_mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1-5d8ca2a4.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/cascade_mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1.log.json) | -| Cascade Mask R-CNN | R-101-FPN | pytorch | 2x | RFS | Y | 32.8 | 30.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/seesaw_loss/cascade_mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/cascade_mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1-c8551505.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/cascade_mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.log.json) | diff --git a/configs/selfsup_pretrain/README.md b/configs/selfsup_pretrain/README.md index 2c61bd6..9bd92cb 100644 --- a/configs/selfsup_pretrain/README.md +++ b/configs/selfsup_pretrain/README.md @@ -1,62 +1,21 @@ # Backbones Trained by Self-Supervise Algorithms -## Abstract + - +## Abstract Unsupervised image representations have significantly reduced the gap with supervised pretraining, notably with the recent achievements of contrastive learning methods. These contrastive methods typically work online and rely on a large number of explicit pairwise feature comparisons, which is computationally challenging. In this paper, we propose an online algorithm, SwAV, that takes advantage of contrastive methods without requiring to compute pairwise comparisons. Specifically, our method simultaneously clusters the data while enforcing consistency between cluster assignments produced for different augmentations (or views) of the same image, instead of comparing features directly as in contrastive learning. Simply put, we use a swapped prediction mechanism where we predict the cluster assignment of a view from the representation of another view. Our method can be trained with large and small batches and can scale to unlimited amounts of data. Compared to previous contrastive methods, our method is more memory efficient since it does not require a large memory bank or a special momentum network. In addition, we also propose a new data augmentation strategy, multi-crop, that uses a mix of views with different resolutions in place of two full-resolution views, without increasing the memory or compute requirements much. We validate our findings by achieving 75.3% top-1 accuracy on ImageNet with ResNet-50, as well as surpassing supervised pretraining on all the considered transfer tasks. -
We present Momentum Contrast (MoCo) for unsupervised visual representation learning. From a perspective on contrastive learning as dictionary look-up, we build a dynamic dictionary with a queue and a moving-averaged encoder. This enables building a large and consistent dictionary on-the-fly that facilitates contrastive unsupervised learning. MoCo provides competitive results under the common linear protocol on ImageNet classification. More importantly, the representations learned by MoCo transfer well to downstream tasks. MoCo can outperform its supervised pre-training counterpart in 7 detection/segmentation tasks on PASCAL VOC, COCO, and other datasets, sometimes surpassing it by large margins. This suggests that the gap between unsupervised and supervised representation learning has been largely closed in many vision tasks. -
- - - -## Citation - - - -We support to apply the backbone models pre-trained by different self-supervised methods in detection systems and provide their results on Mask R-CNN. - -The pre-trained models are converted from [MoCo](https://github.com/facebookresearch/moco) and downloaded from [SwAV](https://github.com/facebookresearch/swav). - -For SwAV, please cite - -```latex -@article{caron2020unsupervised, - title={Unsupervised Learning of Visual Features by Contrasting Cluster Assignments}, - author={Caron, Mathilde and Misra, Ishan and Mairal, Julien and Goyal, Priya and Bojanowski, Piotr and Joulin, Armand}, - booktitle={Proceedings of Advances in Neural Information Processing Systems (NeurIPS)}, - year={2020} -} -``` - -For MoCo, please cite - -```latex -@Article{he2019moco, - author = {Kaiming He and Haoqi Fan and Yuxin Wu and Saining Xie and Ross Girshick}, - title = {Momentum Contrast for Unsupervised Visual Representation Learning}, - journal = {arXiv preprint arXiv:1911.05722}, - year = {2019}, -} -@Article{chen2020mocov2, - author = {Xinlei Chen and Haoqi Fan and Ross Girshick and Kaiming He}, - title = {Improved Baselines with Momentum Contrastive Learning}, - journal = {arXiv preprint arXiv:2003.04297}, - year = {2020}, -} -``` - ## Usage To use a self-supervisely pretrained backbone, there are two steps to do: @@ -84,7 +43,7 @@ To use the ResNet-50 backbone released by SwAV, you can download it from [here]( ### Modify config -The backbone requires SyncBN and the `fronzen_stages` need to be changed. A config that use the moco backbone is as below +The backbone requires SyncBN and the `frozen_stages` need to be changed. A config that use the moco backbone is as below ```python _base_ = [ @@ -102,15 +61,49 @@ model = dict( ``` -## Results +## Results and Models -| Method | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -| :-----: | :-----: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | -|Mask RCNN |[R50 by MoCo v2](./mask_rcnn_r50_fpn_mocov2-pretrain_1x_coco.py)| pytorch |1x|| |38.0|34.3|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/selfsup_pretrain/mask_rcnn_r50_fpn_mocov2-pretrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/selfsup_pretrain/mask_rcnn_r50_fpn_mocov2-pretrain_1x_coco/mask_rcnn_r50_fpn_mocov2-pretrain_1x_coco_20210604_114614-a8b63483.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/selfsup_pretrain/mask_rcnn_r50_fpn_mocov2-pretrain_1x_coco/mask_rcnn_r50_fpn_mocov2-pretrain_1x_coco_20210604_114614.log.json)| -|Mask RCNN |[R50 by MoCo v2](./mask_rcnn_r50_fpn_mocov2-pretrain_ms-2x_coco.py)| pytorch | multi-scale 2x || |40.8|36.8|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/selfsup_pretrain/mask_rcnn_r50_fpn_mocov2-pretrain_ms-2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/selfsup_pretrain/mask_rcnn_r50_fpn_mocov2-pretrain_ms-2x_coco/mask_rcnn_r50_fpn_mocov2-pretrain_ms-2x_coco_20210605_163717-d95df20a.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/selfsup_pretrain/mask_rcnn_r50_fpn_mocov2-pretrain_ms-2x_coco/mask_rcnn_r50_fpn_mocov2-pretrain_ms-2x_coco_20210605_163717.log.json)| -|Mask RCNN |[R50 by SwAV](./mask_rcnn_r50_fpn_swav-pretrain_1x_coco.py)| pytorch | 1x || |39.1 | 35.7|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/selfsup_pretrain/mask_rcnn_r50_fpn_swav-pretrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/selfsup_pretrain/mask_rcnn_r50_fpn_swav-pretrain_1x_coco/mask_rcnn_r50_fpn_swav-pretrain_1x_coco_20210604_114640-7b9baf28.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/selfsup_pretrain/mask_rcnn_r50_fpn_swav-pretrain_1x_coco/mask_rcnn_r50_fpn_swav-pretrain_1x_coco_20210604_114640.log.json)| -|Mask RCNN |[R50 by SwAV](./mask_rcnn_r50_fpn_swav-pretrain_ms-2x_coco.py)| pytorch | multi-scale 2x || |41.3|37.3|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/selfsup_pretrain/mask_rcnn_r50_fpn_swav-pretrain_ms-2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/selfsup_pretrain/mask_rcnn_r50_fpn_swav-pretrain_ms-2x_coco/mask_rcnn_r50_fpn_swav-pretrain_ms-2x_coco_20210605_163717-08e26fca.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/selfsup_pretrain/mask_rcnn_r50_fpn_swav-pretrain_ms-2x_coco/mask_rcnn_r50_fpn_swav-pretrain_ms-2x_coco_20210605_163717.log.json)| +| Method | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------: | :-----------------------------------------------------------------: | :-----: | :------------: | :------: | :------------: | :----: | :-----: | :--------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| Mask RCNN | [R50 by MoCo v2](./mask_rcnn_r50_fpn_mocov2-pretrain_1x_coco.py) | pytorch | 1x | | | 38.0 | 34.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/selfsup_pretrain/mask_rcnn_r50_fpn_mocov2-pretrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/selfsup_pretrain/mask_rcnn_r50_fpn_mocov2-pretrain_1x_coco/mask_rcnn_r50_fpn_mocov2-pretrain_1x_coco_20210604_114614-a8b63483.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/selfsup_pretrain/mask_rcnn_r50_fpn_mocov2-pretrain_1x_coco/mask_rcnn_r50_fpn_mocov2-pretrain_1x_coco_20210604_114614.log.json) | +| Mask RCNN | [R50 by MoCo v2](./mask_rcnn_r50_fpn_mocov2-pretrain_ms-2x_coco.py) | pytorch | multi-scale 2x | | | 40.8 | 36.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/selfsup_pretrain/mask_rcnn_r50_fpn_mocov2-pretrain_ms-2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/selfsup_pretrain/mask_rcnn_r50_fpn_mocov2-pretrain_ms-2x_coco/mask_rcnn_r50_fpn_mocov2-pretrain_ms-2x_coco_20210605_163717-d95df20a.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/selfsup_pretrain/mask_rcnn_r50_fpn_mocov2-pretrain_ms-2x_coco/mask_rcnn_r50_fpn_mocov2-pretrain_ms-2x_coco_20210605_163717.log.json) | +| Mask RCNN | [R50 by SwAV](./mask_rcnn_r50_fpn_swav-pretrain_1x_coco.py) | pytorch | 1x | | | 39.1 | 35.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/selfsup_pretrain/mask_rcnn_r50_fpn_swav-pretrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/selfsup_pretrain/mask_rcnn_r50_fpn_swav-pretrain_1x_coco/mask_rcnn_r50_fpn_swav-pretrain_1x_coco_20210604_114640-7b9baf28.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/selfsup_pretrain/mask_rcnn_r50_fpn_swav-pretrain_1x_coco/mask_rcnn_r50_fpn_swav-pretrain_1x_coco_20210604_114640.log.json) | +| Mask RCNN | [R50 by SwAV](./mask_rcnn_r50_fpn_swav-pretrain_ms-2x_coco.py) | pytorch | multi-scale 2x | | | 41.3 | 37.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/selfsup_pretrain/mask_rcnn_r50_fpn_swav-pretrain_ms-2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/selfsup_pretrain/mask_rcnn_r50_fpn_swav-pretrain_ms-2x_coco/mask_rcnn_r50_fpn_swav-pretrain_ms-2x_coco_20210605_163717-08e26fca.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/selfsup_pretrain/mask_rcnn_r50_fpn_swav-pretrain_ms-2x_coco/mask_rcnn_r50_fpn_swav-pretrain_ms-2x_coco_20210605_163717.log.json) | ### Notice 1. We only provide single-scale 1x and multi-scale 2x configs as examples to show how to use backbones trained by self-supervised algorithms. We will try to reproduce the results in their corresponding paper using the released backbone in the future. Please stay tuned. + +## Citation + +We support to apply the backbone models pre-trained by different self-supervised methods in detection systems and provide their results on Mask R-CNN. + +The pre-trained models are converted from [MoCo](https://github.com/facebookresearch/moco) and downloaded from [SwAV](https://github.com/facebookresearch/swav). + +For SwAV, please cite + +```latex +@article{caron2020unsupervised, + title={Unsupervised Learning of Visual Features by Contrasting Cluster Assignments}, + author={Caron, Mathilde and Misra, Ishan and Mairal, Julien and Goyal, Priya and Bojanowski, Piotr and Joulin, Armand}, + booktitle={Proceedings of Advances in Neural Information Processing Systems (NeurIPS)}, + year={2020} +} +``` + +For MoCo, please cite + +```latex +@Article{he2019moco, + author = {Kaiming He and Haoqi Fan and Yuxin Wu and Saining Xie and Ross Girshick}, + title = {Momentum Contrast for Unsupervised Visual Representation Learning}, + journal = {arXiv preprint arXiv:1911.05722}, + year = {2019}, +} +@Article{chen2020mocov2, + author = {Xinlei Chen and Haoqi Fan and Ross Girshick and Kaiming He}, + title = {Improved Baselines with Momentum Contrastive Learning}, + journal = {arXiv preprint arXiv:2003.04297}, + year = {2020}, +} +``` diff --git a/configs/simple_copy_paste/README.md b/configs/simple_copy_paste/README.md new file mode 100644 index 0000000..46162aa --- /dev/null +++ b/configs/simple_copy_paste/README.md @@ -0,0 +1,38 @@ +# SimpleCopyPaste + +> [Simple Copy-Paste is a Strong Data Augmentation Method for Instance Segmentation](https://arxiv.org/abs/2012.07177) + + + +## Abstract + +Building instance segmentation models that are data-efficient and can handle rare object categories is an important challenge in computer vision. Leveraging data augmentations is a promising direction towards addressing this challenge. Here, we perform a systematic study of the Copy-Paste augmentation (\[13, 12\]) for instance segmentation where we randomly paste objects onto an image. Prior studies on Copy-Paste relied on modeling the surrounding visual context for pasting the objects. However, we find that the simple mechanism of pasting objects randomly is good enough and can provide solid gains on top of strong baselines. Furthermore, we show Copy-Paste is additive with semi-supervised methods that leverage extra data through pseudo labeling (e.g. self-training). On COCO instance segmentation, we achieve 49.1 mask AP and 57.3 box AP, an improvement of +0.6 mask AP and +1.5 box AP over the previous state-of-the-art. We further demonstrate that Copy-Paste can lead to significant improvements on the LVIS benchmark. Our baseline model outperforms the LVIS 2020 Challenge winning entry by +3.6 mask AP on rare categories. + +
+ +
+ +## Results and Models + +### Mask R-CNN with Standard Scale Jittering (SSJ) and Simple Copy-Paste(SCP) + +Standard Scale Jittering(SSJ) resizes and crops an image with a resize range of 0.8 to 1.25 of the original image size, and Simple Copy-Paste(SCP) selects a random subset of objects from one of the images and pastes them onto the other image. + +| Backbone | Training schedule | Augmentation | batch size | box AP | mask AP | Config | Download | +| :------: | :---------------: | :----------: | :--------: | :----: | :-----: | :--------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | 90k | SSJ | 64 | 43.3 | 39.0 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_90k_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_90k_coco/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_90k_coco_20220316_181409-f79c84c5.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_90k_coco/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_90k_coco_20220316_181409.log.json) | +| R-50 | 90k | SSJ+SCP | 64 | 43.8 | 39.2 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_90k_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_90k_coco/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_90k_coco_20220316_181307-6bc5726f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_90k_coco/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_90k_coco_20220316_181307.log.json) | +| R-50 | 270k | SSJ | 64 | 43.5 | 39.1 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_270k_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_270k_coco/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_270k_coco_20220324_182940-33a100c5.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_270k_coco/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_270k_coco_20220324_182940.log.json) | +| R-50 | 270k | SSJ+SCP | 64 | 45.1 | 40.3 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_270k_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_270k_coco/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_270k_coco_20220324_201229-80ee90b7.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_270k_coco/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_270k_coco_20220324_201229.log.json) | + +## Citation + +```latex +@inproceedings{ghiasi2021simple, + title={Simple copy-paste is a strong data augmentation method for instance segmentation}, + author={Ghiasi, Golnaz and Cui, Yin and Srinivas, Aravind and Qian, Rui and Lin, Tsung-Yi and Cubuk, Ekin D and Le, Quoc V and Zoph, Barret}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={2918--2928}, + year={2021} +} +``` diff --git a/configs/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_270k_coco.py b/configs/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_270k_coco.py new file mode 100644 index 0000000..d0ce917 --- /dev/null +++ b/configs/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_270k_coco.py @@ -0,0 +1,20 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + # 270k iterations with batch_size 64 is roughly equivalent to 144 epochs + '../common/ssj_270k_coco_instance.py', +] + +norm_cfg = dict(type='SyncBN', requires_grad=True) +# Use MMSyncBN that handles empty tensor in head. It can be changed to +# SyncBN after https://github.com/pytorch/pytorch/issues/36530 is fixed. +head_norm_cfg = dict(type='MMSyncBN', requires_grad=True) +model = dict( + backbone=dict(frozen_stages=-1, norm_eval=False, norm_cfg=norm_cfg), + neck=dict(norm_cfg=norm_cfg), + rpn_head=dict(num_convs=2), # leads to 0.1+ mAP + roi_head=dict( + bbox_head=dict( + type='Shared4Conv1FCBBoxHead', + conv_out_channels=256, + norm_cfg=head_norm_cfg), + mask_head=dict(norm_cfg=head_norm_cfg))) diff --git a/configs/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_90k_coco.py b/configs/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_90k_coco.py new file mode 100644 index 0000000..1eee95f --- /dev/null +++ b/configs/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_90k_coco.py @@ -0,0 +1,7 @@ +_base_ = 'mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_270k_coco.py' + +# lr steps at [0.9, 0.95, 0.975] of the maximum iterations +lr_config = dict( + warmup_iters=500, warmup_ratio=0.067, step=[81000, 85500, 87750]) +# 90k iterations with batch_size 64 is roughly equivalent to 48 epochs +runner = dict(type='IterBasedRunner', max_iters=90000) diff --git a/configs/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_270k_coco.py b/configs/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_270k_coco.py new file mode 100644 index 0000000..bd28ddd --- /dev/null +++ b/configs/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_270k_coco.py @@ -0,0 +1,20 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + # 270k iterations with batch_size 64 is roughly equivalent to 144 epochs + '../common/ssj_scp_270k_coco_instance.py' +] + +norm_cfg = dict(type='SyncBN', requires_grad=True) +# Use MMSyncBN that handles empty tensor in head. It can be changed to +# SyncBN after https://github.com/pytorch/pytorch/issues/36530 is fixed. +head_norm_cfg = dict(type='MMSyncBN', requires_grad=True) +model = dict( + backbone=dict(frozen_stages=-1, norm_eval=False, norm_cfg=norm_cfg), + neck=dict(norm_cfg=norm_cfg), + rpn_head=dict(num_convs=2), # leads to 0.1+ mAP + roi_head=dict( + bbox_head=dict( + type='Shared4Conv1FCBBoxHead', + conv_out_channels=256, + norm_cfg=head_norm_cfg), + mask_head=dict(norm_cfg=head_norm_cfg))) diff --git a/configs/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_90k_coco.py b/configs/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_90k_coco.py new file mode 100644 index 0000000..b632c13 --- /dev/null +++ b/configs/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_90k_coco.py @@ -0,0 +1,7 @@ +_base_ = 'mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_270k_coco.py' + +# lr steps at [0.9, 0.95, 0.975] of the maximum iterations +lr_config = dict( + warmup_iters=500, warmup_ratio=0.067, step=[81000, 85500, 87750]) +# 90k iterations with batch_size 64 is roughly equivalent to 48 epochs +runner = dict(type='IterBasedRunner', max_iters=90000) diff --git a/configs/simple_copy_paste/metafile.yml b/configs/simple_copy_paste/metafile.yml new file mode 100644 index 0000000..bb6106c --- /dev/null +++ b/configs/simple_copy_paste/metafile.yml @@ -0,0 +1,92 @@ +Collections: + - Name: SimpleCopyPaste + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 32x A100 GPUs + Architecture: + - Softmax + - RPN + - Convolution + - Dense Connections + - FPN + - ResNet + - RoIAlign + Paper: + URL: https://arxiv.org/abs/2012.07177 + Title: "Simple Copy-Paste is a Strong Data Augmentation Method for Instance Segmentation" + README: configs/simple_copy_paste/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.25.0/mmdet/datasets/pipelines/transforms.py#L2762 + Version: v2.25.0 + +Models: + - Name: mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_270k_coco + In Collection: SimpleCopyPaste + Config: configs/simplecopypaste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_270k_coco.py + Metadata: + Training Memory (GB): 7.2 + Iterations: 270000 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.5 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_270k_coco/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_270k_coco_20220324_182940-33a100c5.pth + + - Name: mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_90k_coco + In Collection: SimpleCopyPaste + Config: configs/simplecopypaste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_90k_coco.py + Metadata: + Training Memory (GB): 7.2 + Iterations: 90000 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.3 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_90k_coco/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_90k_coco_20220316_181409-f79c84c5.pth + + - Name: mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_270k_coco + In Collection: SimpleCopyPaste + Config: configs/simplecopypaste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_270k_coco.py + Metadata: + Training Memory (GB): 7.2 + Iterations: 270000 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 45.1 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 40.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_270k_coco/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_270k_coco_20220324_201229-80ee90b7.pth + + - Name: mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_90k_coco + In Collection: SimpleCopyPaste + Config: configs/simplecopypaste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_90k_coco.py + Metadata: + Training Memory (GB): 7.2 + Iterations: 90000 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.8 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_90k_coco/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_90k_coco_20220316_181307-6bc5726f.pth diff --git a/configs/solo/README.md b/configs/solo/README.md index 76664b7..4a36676 100644 --- a/configs/solo/README.md +++ b/configs/solo/README.md @@ -1,56 +1,54 @@ -# SOLO: Segmenting Objects by Locations +# SOLO -## Abstract +> [SOLO: Segmenting Objects by Locations](https://arxiv.org/abs/1912.04488) + + - +## Abstract We present a new, embarrassingly simple approach to instance segmentation in images. Compared to many other dense prediction tasks, e.g., semantic segmentation, it is the arbitrary number of instances that have made instance segmentation much more challenging. In order to predict a mask for each instance, mainstream approaches either follow the 'detect-thensegment' strategy as used by Mask R-CNN, or predict category masks first then use clustering techniques to group pixels into individual instances. We view the task of instance segmentation from a completely new perspective by introducing the notion of "instance categories", which assigns categories to each pixel within an instance according to the instance's location and size, thus nicely converting instance mask segmentation into a classification-solvable problem. Now instance segmentation is decomposed into two classification tasks. We demonstrate a much simpler and flexible instance segmentation framework with strong performance, achieving on par accuracy with Mask R-CNN and outperforming recent singleshot instance segmenters in accuracy. We hope that this very simple and strong framework can serve as a baseline for many instance-level recognition tasks besides instance segmentation. -
- - - -## Citation - -``` -@inproceedings{wang2020solo, - title = {{SOLO}: Segmenting Objects by Locations}, - author = {Wang, Xinlong and Kong, Tao and Shen, Chunhua and Jiang, Yuning and Li, Lei}, - booktitle = {Proc. Eur. Conf. Computer Vision (ECCV)}, - year = {2020} -} -``` - ## Results and Models ### SOLO -| Backbone | Style | MS train | Lr schd | Mem (GB) | Inf time (fps) | mask AP | Download | -|:---------:|:-------:|:--------:|:-------:|:--------:|:--------------:|:------:|:--------:| -| R-50 | pytorch | N | 1x | 8.0 | 14.0 | 33.1 | [model](https://download.openmmlab.com/mmdetection/v2.0/solo/solo_r50_fpn_1x_coco/solo_r50_fpn_1x_coco_20210821_035055-2290a6b8.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/solo/solo_r50_fpn_1x_coco/solo_r50_fpn_1x_coco_20210821_035055.log.json) | -| R-50 | pytorch | Y | 3x | 7.4 | 14.0 | 35.9 | [model](https://download.openmmlab.com/mmdetection/v2.0/solo/solo_r50_fpn_3x_coco/solo_r50_fpn_3x_coco_20210901_012353-11d224d7.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/solo/solo_r50_fpn_3x_coco/solo_r50_fpn_3x_coco_20210901_012353.log.json) | +| Backbone | Style | MS train | Lr schd | Mem (GB) | Inf time (fps) | mask AP | Download | +| :------: | :-----: | :------: | :-----: | :------: | :------------: | :-----: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | pytorch | N | 1x | 8.0 | 14.0 | 33.1 | [model](https://download.openmmlab.com/mmdetection/v2.0/solo/solo_r50_fpn_1x_coco/solo_r50_fpn_1x_coco_20210821_035055-2290a6b8.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/solo/solo_r50_fpn_1x_coco/solo_r50_fpn_1x_coco_20210821_035055.log.json) | +| R-50 | pytorch | Y | 3x | 7.4 | 14.0 | 35.9 | [model](https://download.openmmlab.com/mmdetection/v2.0/solo/solo_r50_fpn_3x_coco/solo_r50_fpn_3x_coco_20210901_012353-11d224d7.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/solo/solo_r50_fpn_3x_coco/solo_r50_fpn_3x_coco_20210901_012353.log.json) | ### Decoupled SOLO -| Backbone | Style | MS train | Lr schd | Mem (GB) | Inf time (fps) | mask AP | Download | -|:---------:|:-------:|:--------:|:-------:|:--------:|:--------------:|:-------:|:--------:| -| R-50 | pytorch | N | 1x | 7.8 | 12.5 | 33.9 | [model](https://download.openmmlab.com/mmdetection/v2.0/solo/decoupled_solo_r50_fpn_1x_coco/decoupled_solo_r50_fpn_1x_coco_20210820_233348-6337c589.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/solo/decoupled_solo_r50_fpn_1x_coco/decoupled_solo_r50_fpn_1x_coco_20210820_233348.log.json) | -| R-50 | pytorch | Y | 3x | 7.9 | 12.5 | 36.7 | [model](https://download.openmmlab.com/mmdetection/v2.0/solo/decoupled_solo_r50_fpn_3x_coco/decoupled_solo_r50_fpn_3x_coco_20210821_042504-7b3301ec.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/solo/decoupled_solo_r50_fpn_3x_coco/decoupled_solo_r50_fpn_3x_coco_20210821_042504.log.json) | +| Backbone | Style | MS train | Lr schd | Mem (GB) | Inf time (fps) | mask AP | Download | +| :------: | :-----: | :------: | :-----: | :------: | :------------: | :-----: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | pytorch | N | 1x | 7.8 | 12.5 | 33.9 | [model](https://download.openmmlab.com/mmdetection/v2.0/solo/decoupled_solo_r50_fpn_1x_coco/decoupled_solo_r50_fpn_1x_coco_20210820_233348-6337c589.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/solo/decoupled_solo_r50_fpn_1x_coco/decoupled_solo_r50_fpn_1x_coco_20210820_233348.log.json) | +| R-50 | pytorch | Y | 3x | 7.9 | 12.5 | 36.7 | [model](https://download.openmmlab.com/mmdetection/v2.0/solo/decoupled_solo_r50_fpn_3x_coco/decoupled_solo_r50_fpn_3x_coco_20210821_042504-7b3301ec.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/solo/decoupled_solo_r50_fpn_3x_coco/decoupled_solo_r50_fpn_3x_coco_20210821_042504.log.json) | - Decoupled SOLO has a decoupled head which is different from SOLO head. -Decoupled SOLO serves as an efficient and equivalent variant in accuracy -of SOLO. Please refer to the corresponding config files for details. + Decoupled SOLO serves as an efficient and equivalent variant in accuracy + of SOLO. Please refer to the corresponding config files for details. ### Decoupled Light SOLO -| Backbone | Style | MS train | Lr schd | Mem (GB) | Inf time (fps) | mask AP | Download | -|:---------:|:-------:|:--------:|:-------:|:--------:|:--------------:|:------:|:--------:| -| R-50 | pytorch | Y | 3x | 2.2 | 31.2 | 32.9 | [model](https://download.openmmlab.com/mmdetection/v2.0/solo/decoupled_solo_light_r50_fpn_3x_coco/decoupled_solo_light_r50_fpn_3x_coco_20210906_142703-e70e226f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/solo/decoupled_solo_light_r50_fpn_3x_coco/decoupled_solo_light_r50_fpn_3x_coco_20210906_142703.log.json) | +| Backbone | Style | MS train | Lr schd | Mem (GB) | Inf time (fps) | mask AP | Download | +| :------: | :-----: | :------: | :-----: | :------: | :------------: | :-----: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | pytorch | Y | 3x | 2.2 | 31.2 | 32.9 | [model](https://download.openmmlab.com/mmdetection/v2.0/solo/decoupled_solo_light_r50_fpn_3x_coco/decoupled_solo_light_r50_fpn_3x_coco_20210906_142703-e70e226f.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/solo/decoupled_solo_light_r50_fpn_3x_coco/decoupled_solo_light_r50_fpn_3x_coco_20210906_142703.log.json) | - Decoupled Light SOLO using decoupled structure similar to Decoupled -SOLO head, with light-weight head and smaller input size, Please refer -to the corresponding config files for details. + SOLO head, with light-weight head and smaller input size, Please refer + to the corresponding config files for details. + +## Citation + +```latex +@inproceedings{wang2020solo, + title = {{SOLO}: Segmenting Objects by Locations}, + author = {Wang, Xinlong and Kong, Tao and Shen, Chunhua and Jiang, Yuning and Li, Lei}, + booktitle = {Proc. Eur. Conf. Computer Vision (ECCV)}, + year = {2020} +} +``` diff --git a/configs/solov2/README.md b/configs/solov2/README.md new file mode 100644 index 0000000..2ffe70f --- /dev/null +++ b/configs/solov2/README.md @@ -0,0 +1,59 @@ +# SOLOv2 + +> [SOLOv2: Dynamic and Fast Instance Segmentation](https://arxiv.org/abs/2003.10152) + + + +## Abstract + +In this work, we aim at building a simple, direct, and fast instance segmentation +framework with strong performance. We follow the principle of the SOLO method of +Wang et al. "SOLO: segmenting objects by locations". Importantly, we take one +step further by dynamically learning the mask head of the object segmenter such +that the mask head is conditioned on the location. Specifically, the mask branch +is decoupled into a mask kernel branch and mask feature branch, which are +responsible for learning the convolution kernel and the convolved features +respectively. Moreover, we propose Matrix NMS (non maximum suppression) to +significantly reduce the inference time overhead due to NMS of masks. Our +Matrix NMS performs NMS with parallel matrix operations in one shot, and +yields better results. We demonstrate a simple direct instance segmentation +system, outperforming a few state-of-the-art methods in both speed and accuracy. +A light-weight version of SOLOv2 executes at 31.3 FPS and yields 37.1% AP. +Moreover, our state-of-the-art results in object detection (from our mask byproduct) +and panoptic segmentation show the potential to serve as a new strong baseline +for many instance-level recognition tasks besides instance segmentation. + +
+ +
+ +## Results and Models + +### SOLOv2 + +| Backbone | Style | MS train | Lr schd | Mem (GB) | mask AP | Config | Download | +| :--------: | :-----: | :------: | :-----: | :------: | :-----: | :-----------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | pytorch | N | 1x | 5.1 | 34.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/solov2/solov2_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_r50_fpn_1x_coco/solov2_r50_fpn_1x_coco_20220512_125858-a357fa23.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_r50_fpn_1x_coco/solov2_r50_fpn_1x_coco_20220512_125858.log.json) | +| R-50 | pytorch | Y | 3x | 5.1 | 37.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/solov2/solov2_r50_fpn_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_r50_fpn_3x_coco/solov2_r50_fpn_3x_coco_20220512_125856-fed092d4.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_r50_fpn_3x_coco/solov2_r50_fpn_3x_coco_20220512_125856.log.json) | +| R-101 | pytorch | Y | 3x | 6.9 | 39.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/solov2/solov2_r101_fpn_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_r101_fpn_3x_coco/solov2_r101_fpn_3x_coco_20220511_095119-c559a076.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_r101_fpn_3x_coco/solov2_r101_fpn_3x_coco_20220511_095119.log.json) | +| R-101(DCN) | pytorch | Y | 3x | 7.1 | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/solov2/solov2_r101_dcn_fpn_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_r101_dcn_fpn_3x_coco/solov2_r101_dcn_fpn_3x_coco_20220513_214734-16c966cb.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_r101_dcn_fpn_3x_coco/solov2_r101_dcn_fpn_3x_coco_20220513_214734.log.json) | +| X-101(DCN) | pytorch | Y | 3x | 11.3 | 42.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/solov2/solov2_x101_dcn_fpn_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_x101_dcn_fpn_3x_coco/solov2_x101_dcn_fpn_3x_coco_20220513_214337-aef41095.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_x101_dcn_fpn_3x_coco/solov2_x101_dcn_fpn_3x_coco_20220513_214337.log.json) | + +### Light SOLOv2 + +| Backbone | Style | MS train | Lr schd | Mem (GB) | mask AP | Config | Download | +| :------: | :-----: | :------: | :-----: | :------: | :-----: | :------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-18 | pytorch | Y | 3x | 9.1 | 29.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/solov2/solov2_light_r18_fpn_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_light_r18_fpn_3x_coco/solov2_light_r18_fpn_3x_coco_20220511_083717-75fa355b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_light_r18_fpn_3x_coco/solov2_light_r18_fpn_3x_coco_20220511_083717.log.json) | +| R-34 | pytorch | Y | 3x | 9.3 | 31.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/solov2/solov2_light_r34_fpn_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_light_r34_fpn_3x_coco/solov2_light_r34_fpn_3x_coco_20220511_091839-e51659d3.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_light_r34_fpn_3x_coco/solov2_light_r34_fpn_3x_coco_20220511_091839.log.json) | +| R-50 | pytorch | Y | 3x | 9.9 | 33.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/solov2/solov2_light_r50_fpn_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_light_r50_fpn_3x_coco/solov2_light_r50_fpn_3x_coco_20220512_165256-c93a6074.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_light_r50_fpn_3x_coco/solov2_light_r50_fpn_3x_coco_20220512_165256.log.json) | + +## Citation + +```latex +@article{wang2020solov2, + title={SOLOv2: Dynamic and Fast Instance Segmentation}, + author={Wang, Xinlong and Zhang, Rufeng and Kong, Tao and Li, Lei and Shen, Chunhua}, + journal={Proc. Advances in Neural Information Processing Systems (NeurIPS)}, + year={2020} +} +``` diff --git a/configs/solov2/metafile.yml b/configs/solov2/metafile.yml new file mode 100644 index 0000000..656f66f --- /dev/null +++ b/configs/solov2/metafile.yml @@ -0,0 +1,119 @@ +Collections: + - Name: SOLOv2 + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x A100 GPUs + Architecture: + - FPN + - Convolution + - ResNet + Paper: https://arxiv.org/abs/2003.10152 + README: configs/solov2/README.md + +Models: + - Name: solov2_r50_fpn_1x_coco + In Collection: SOLOv2 + Config: configs/solov2/solov2_r50_fpn_1x_coco.py + Metadata: + Training Memory (GB): 5.1 + Epochs: 12 + Results: + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 34.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_r50_fpn_1x_coco/solov2_r50_fpn_1x_coco_20220512_125858-a357fa23.pth + + - Name: solov2_r50_fpn_3x_coco + In Collection: SOLOv2 + Config: configs/solov2/solov2_r50_fpn_3x_coco.py + Metadata: + Training Memory (GB): 5.1 + Epochs: 36 + Results: + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_r50_fpn_3x_coco/solov2_r50_fpn_3x_coco_20220512_125856-fed092d4.pth + + - Name: solov2_r101_fpn_3x_coco + In Collection: SOLOv2 + Config: configs/solov2/solov2_r101_fpn_3x_coco.py + Metadata: + Training Memory (GB): 6.9 + Epochs: 36 + Results: + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_r101_fpn_3x_coco/solov2_r101_fpn_3x_coco_20220511_095119-c559a076.pth + + - Name: solov2_r101_dcn_fpn_3x_coco + In Collection: SOLOv2 + Config: configs/solov2/solov2_r101_dcn_fpn_3x_coco.py + Metadata: + Training Memory (GB): 7.1 + Epochs: 36 + Results: + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 41.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_r101_dcn_fpn_3x_coco/solov2_r101_dcn_fpn_3x_coco_20220513_214734-16c966cb.pth + + - Name: solov2_x101_dcn_fpn_3x_coco + In Collection: SOLOv2 + Config: configs/solov2/solov2_x101_dcn_fpn_3x_coco.py + Metadata: + Training Memory (GB): 11.3 + Epochs: 36 + Results: + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 42.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_x101_dcn_fpn_3x_coco/solov2_x101_dcn_fpn_3x_coco_20220513_214337-aef41095.pth + + - Name: solov2_light_r18_fpn_3x_coco + In Collection: SOLOv2 + Config: configs/solov2/solov2_light_r18_fpn_3x_coco.py + Metadata: + Training Memory (GB): 9.1 + Epochs: 36 + Results: + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 29.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_light_r18_fpn_3x_coco/solov2_light_r18_fpn_3x_coco_20220511_083717-75fa355b.pth + + - Name: solov2_light_r34_fpn_3x_coco + In Collection: SOLOv2 + Config: configs/solov2/solov2_light_r34_fpn_3x_coco.py + Metadata: + Training Memory (GB): 9.3 + Epochs: 36 + Results: + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 31.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_light_r34_fpn_3x_coco/solov2_light_r34_fpn_3x_coco_20220511_091839-e51659d3.pth + + - Name: solov2_light_r50_fpn_3x_coco + In Collection: SOLOv2 + Config: configs/solov2/solov2_light_r50_fpn_3x_coco.py + Metadata: + Training Memory (GB): 9.9 + Epochs: 36 + Results: + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 33.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_light_r50_fpn_3x_coco/solov2_light_r50_fpn_3x_coco_20220512_165256-c93a6074.pth diff --git a/configs/solov2/solov2_light_r18_fpn_3x_coco.py b/configs/solov2/solov2_light_r18_fpn_3x_coco.py new file mode 100644 index 0000000..6fb33b0 --- /dev/null +++ b/configs/solov2/solov2_light_r18_fpn_3x_coco.py @@ -0,0 +1,7 @@ +_base_ = 'solov2_light_r50_fpn_3x_coco.py' + +# model settings +model = dict( + backbone=dict( + depth=18, init_cfg=dict(checkpoint='torchvision://resnet18')), + neck=dict(in_channels=[64, 128, 256, 512])) diff --git a/configs/solov2/solov2_light_r34_fpn_3x_coco.py b/configs/solov2/solov2_light_r34_fpn_3x_coco.py new file mode 100644 index 0000000..ea082a1 --- /dev/null +++ b/configs/solov2/solov2_light_r34_fpn_3x_coco.py @@ -0,0 +1,7 @@ +_base_ = 'solov2_light_r50_fpn_3x_coco.py' + +# model settings +model = dict( + backbone=dict( + depth=34, init_cfg=dict(checkpoint='torchvision://resnet34')), + neck=dict(in_channels=[64, 128, 256, 512])) diff --git a/configs/solov2/solov2_light_r50_dcn_fpn_3x_coco.py b/configs/solov2/solov2_light_r50_dcn_fpn_3x_coco.py new file mode 100644 index 0000000..4d758e2 --- /dev/null +++ b/configs/solov2/solov2_light_r50_dcn_fpn_3x_coco.py @@ -0,0 +1,62 @@ +_base_ = 'solov2_r50_fpn_3x_coco.py' + +# model settings +model = dict( + backbone=dict( + dcn=dict(type='DCNv2', deformable_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True)), + mask_head=dict( + feat_channels=256, + stacked_convs=3, + scale_ranges=((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)), + mask_feature_head=dict(out_channels=128), + dcn_cfg=dict(type='DCNv2'), + dcn_apply_to_all_conv=False)) # light solov2 head + +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[27, 33]) +runner = dict(type='EpochBasedRunner', max_epochs=36) + +# data +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', + img_scale=[(768, 512), (768, 480), (768, 448), (768, 416), (768, 384), + (768, 352)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(448, 768), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/solov2/solov2_light_r50_fpn_3x_coco.py b/configs/solov2/solov2_light_r50_fpn_3x_coco.py new file mode 100644 index 0000000..e08f1db --- /dev/null +++ b/configs/solov2/solov2_light_r50_fpn_3x_coco.py @@ -0,0 +1,57 @@ +_base_ = 'solov2_r50_fpn_1x_coco.py' + +# model settings +model = dict( + mask_head=dict( + stacked_convs=2, + feat_channels=256, + scale_ranges=((1, 56), (28, 112), (56, 224), (112, 448), (224, 896)), + mask_feature_head=dict(out_channels=128))) + +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[27, 33]) +runner = dict(type='EpochBasedRunner', max_epochs=36) + +# data +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', + img_scale=[(768, 512), (768, 480), (768, 448), (768, 416), (768, 384), + (768, 352)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(448, 768), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/solov2/solov2_r101_dcn_fpn_3x_coco.py b/configs/solov2/solov2_r101_dcn_fpn_3x_coco.py new file mode 100644 index 0000000..1594118 --- /dev/null +++ b/configs/solov2/solov2_r101_dcn_fpn_3x_coco.py @@ -0,0 +1,13 @@ +_base_ = 'solov2_r50_fpn_3x_coco.py' + +# model settings +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(checkpoint='torchvision://resnet101'), + dcn=dict(type='DCNv2', deformable_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True)), + mask_head=dict( + mask_feature_head=dict(conv_cfg=dict(type='DCNv2')), + dcn_cfg=dict(type='DCNv2'), + dcn_apply_to_all_conv=True)) diff --git a/configs/solov2/solov2_r101_fpn_3x_coco.py b/configs/solov2/solov2_r101_fpn_3x_coco.py new file mode 100644 index 0000000..6c248e5 --- /dev/null +++ b/configs/solov2/solov2_r101_fpn_3x_coco.py @@ -0,0 +1,6 @@ +_base_ = 'solov2_r50_fpn_3x_coco.py' + +# model settings +model = dict( + backbone=dict( + depth=101, init_cfg=dict(checkpoint='torchvision://resnet101'))) diff --git a/configs/solov2/solov2_r50_fpn_1x_coco.py b/configs/solov2/solov2_r50_fpn_1x_coco.py new file mode 100644 index 0000000..9aee571 --- /dev/null +++ b/configs/solov2/solov2_r50_fpn_1x_coco.py @@ -0,0 +1,61 @@ +_base_ = [ + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +# model settings +model = dict( + type='SOLOv2', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=0, + num_outs=5), + mask_head=dict( + type='SOLOV2Head', + num_classes=80, + in_channels=256, + feat_channels=512, + stacked_convs=4, + strides=[8, 8, 16, 32, 32], + scale_ranges=((1, 96), (48, 192), (96, 384), (192, 768), (384, 2048)), + pos_scale=0.2, + num_grids=[40, 36, 24, 16, 12], + cls_down_index=0, + mask_feature_head=dict( + feat_channels=128, + start_level=0, + end_level=3, + out_channels=256, + mask_stride=4, + norm_cfg=dict(type='GN', num_groups=32, requires_grad=True)), + loss_mask=dict(type='DiceLoss', use_sigmoid=True, loss_weight=3.0), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0)), + # model training and testing settings + test_cfg=dict( + nms_pre=500, + score_thr=0.1, + mask_thr=0.5, + filter_thr=0.05, + kernel='gaussian', # gaussian/linear + sigma=2.0, + max_per_img=100)) + +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/solov2/solov2_r50_fpn_3x_coco.py b/configs/solov2/solov2_r50_fpn_3x_coco.py new file mode 100644 index 0000000..640c730 --- /dev/null +++ b/configs/solov2/solov2_r50_fpn_3x_coco.py @@ -0,0 +1,28 @@ +_base_ = 'solov2_r50_fpn_1x_coco.py' + +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', + img_scale=[(1333, 800), (1333, 768), (1333, 736), (1333, 704), + (1333, 672), (1333, 640)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +data = dict(train=dict(pipeline=train_pipeline)) + +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[27, 33]) +runner = dict(type='EpochBasedRunner', max_epochs=36) diff --git a/configs/solov2/solov2_x101_dcn_fpn_3x_coco.py b/configs/solov2/solov2_x101_dcn_fpn_3x_coco.py new file mode 100644 index 0000000..6115fed --- /dev/null +++ b/configs/solov2/solov2_x101_dcn_fpn_3x_coco.py @@ -0,0 +1,17 @@ +_base_ = 'solov2_r50_fpn_3x_coco.py' + +# model settings +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + dcn=dict(type='DCNv2', deformable_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), + mask_head=dict( + mask_feature_head=dict(conv_cfg=dict(type='DCNv2')), + dcn_cfg=dict(type='DCNv2'), + dcn_apply_to_all_conv=True)) diff --git a/configs/sparse_rcnn/README.md b/configs/sparse_rcnn/README.md index 48c8aef..d7912e0 100644 --- a/configs/sparse_rcnn/README.md +++ b/configs/sparse_rcnn/README.md @@ -1,24 +1,34 @@ -# Sparse R-CNN: End-to-End Object Detection with Learnable Proposals +# Sparse R-CNN -## Abstract +> [Sparse R-CNN: End-to-End Object Detection with Learnable Proposals](https://arxiv.org/abs/2011.12450) - + + +## Abstract We present Sparse R-CNN, a purely sparse method for object detection in images. Existing works on object detection heavily rely on dense object candidates, such as k anchor boxes pre-defined on all grids of image feature map of size H×W. In our method, however, a fixed sparse set of learned object proposals, total length of N, are provided to object recognition head to perform classification and location. By eliminating HWk (up to hundreds of thousands) hand-designed object candidates to N (e.g. 100) learnable proposals, Sparse R-CNN completely avoids all efforts related to object candidates design and many-to-one label assignment. More importantly, final predictions are directly output without non-maximum suppression post-procedure. Sparse R-CNN demonstrates accuracy, run-time and training convergence performance on par with the well-established detector baselines on the challenging COCO dataset, e.g., achieving 45.0 AP in standard 3× training schedule and running at 22 fps using ResNet-50 FPN model. We hope our work could inspire re-thinking the convention of dense prior in object detectors. -
- - +## Results and Models -## Citation +| Model | Backbone | Style | Lr schd | Number of Proposals | Multi-Scale | RandomCrop | box AP | Config | Download | +| :----------: | :-------: | :-----: | :-----: | :-----------------: | :---------: | :--------: | :----: | :----------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| Sparse R-CNN | R-50-FPN | pytorch | 1x | 100 | False | False | 37.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco/sparse_rcnn_r50_fpn_1x_coco_20201222_214453-dc79b137.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco/sparse_rcnn_r50_fpn_1x_coco_20201222_214453-dc79b137.log.json) | +| Sparse R-CNN | R-50-FPN | pytorch | 3x | 100 | True | False | 42.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/sparse_rcnn/sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco/sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco_20201218_154234-7bc5c054.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco/sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco_20201218_154234-7bc5c054.log.json) | +| Sparse R-CNN | R-50-FPN | pytorch | 3x | 300 | True | True | 45.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/sparse_rcnn/sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco/sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20201223_024605-9fe92701.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco/sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20201223_024605-9fe92701.log.json) | +| Sparse R-CNN | R-101-FPN | pytorch | 3x | 100 | True | False | 44.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/sparse_rcnn/sparse_rcnn_r101_fpn_mstrain_480-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r101_fpn_mstrain_480-800_3x_coco/sparse_rcnn_r101_fpn_mstrain_480-800_3x_coco_20201223_121552-6c46c9d6.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r101_fpn_mstrain_480-800_3x_coco/sparse_rcnn_r101_fpn_mstrain_480-800_3x_coco_20201223_121552-6c46c9d6.log.json) | +| Sparse R-CNN | R-101-FPN | pytorch | 3x | 300 | True | True | 46.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/sparse_rcnn/sparse_rcnn_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco/sparse_rcnn_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20201223_023452-c23c3564.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco/sparse_rcnn_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20201223_023452-c23c3564.log.json) | - +### Notes -``` +We observe about 0.3 AP noise especially when using ResNet-101 as the backbone. + +## Citation + +```latex @article{peize2020sparse, title = {{SparseR-CNN}: End-to-End Object Detection with Learnable Proposals}, author = {Peize Sun and Rufeng Zhang and Yi Jiang and Tao Kong and Chenfeng Xu and Wei Zhan and Masayoshi Tomizuka and Lei Li and Zehuan Yuan and Changhu Wang and Ping Luo}, @@ -26,17 +36,3 @@ We present Sparse R-CNN, a purely sparse method for object detection in images. year = {2020} } ``` - -## Results and Models - -| Model | Backbone | Style | Lr schd | Number of Proposals |Multi-Scale| RandomCrop | box AP | Config | Download | -|:------------:|:---------:|:-------:|:-------:|:-------: |:-------: |:---------:|:------:|:------:|:--------:| -| Sparse R-CNN | R-50-FPN | pytorch | 1x | 100 | False | False | 37.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco/sparse_rcnn_r50_fpn_1x_coco_20201222_214453-dc79b137.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco/sparse_rcnn_r50_fpn_1x_coco_20201222_214453-dc79b137.log.json) | -| Sparse R-CNN | R-50-FPN | pytorch | 3x | 100 | True | False | 42.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/sparse_rcnn/sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco/sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco_20201218_154234-7bc5c054.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco/sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco_20201218_154234-7bc5c054.log.json) | -| Sparse R-CNN | R-50-FPN | pytorch | 3x | 300 | True | True | 45.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/sparse_rcnn/sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco/sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20201223_024605-9fe92701.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco/sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20201223_024605-9fe92701.log.json) | -| Sparse R-CNN | R-101-FPN | pytorch | 3x | 100 | True | False | 44.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/sparse_rcnn/sparse_rcnn_r101_fpn_mstrain_480-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r101_fpn_mstrain_480-800_3x_coco/sparse_rcnn_r101_fpn_mstrain_480-800_3x_coco_20201223_121552-6c46c9d6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r101_fpn_mstrain_480-800_3x_coco/sparse_rcnn_r101_fpn_mstrain_480-800_3x_coco_20201223_121552-6c46c9d6.log.json) | -| Sparse R-CNN | R-101-FPN | pytorch | 3x | 300 | True | True | 46.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/sparse_rcnn/sparse_rcnn_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco/sparse_rcnn_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20201223_023452-c23c3564.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco/sparse_rcnn_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20201223_023452-c23c3564.log.json) | - -### Notes - -We observe about 0.3 AP noise especially when using ResNet-101 as the backbone. diff --git a/configs/ssd/README.md b/configs/ssd/README.md index f668cd5..463926b 100644 --- a/configs/ssd/README.md +++ b/configs/ssd/README.md @@ -1,44 +1,29 @@ -# SSD: Single Shot MultiBox Detector +# SSD -## Abstract +> [SSD: Single Shot MultiBox Detector](https://arxiv.org/abs/1512.02325) + + - +## Abstract We present a method for detecting objects in images using a single deep neural network. Our approach, named SSD, discretizes the output space of bounding boxes into a set of default boxes over different aspect ratios and scales per feature map location. At prediction time, the network generates scores for the presence of each object category in each default box and produces adjustments to the box to better match the object shape. Additionally, the network combines predictions from multiple feature maps with different resolutions to naturally handle objects of various sizes. Our SSD model is simple relative to methods that require object proposals because it completely eliminates proposal generation and subsequent pixel or feature resampling stage and encapsulates all computation in a single network. This makes SSD easy to train and straightforward to integrate into systems that require a detection component. Experimental results on the PASCAL VOC, MS COCO, and ILSVRC datasets confirm that SSD has comparable accuracy to methods that utilize an additional object proposal step and is much faster, while providing a unified framework for both training and inference. Compared to other single stage methods, SSD has much better accuracy, even with a smaller input image size. For 300×300 input, SSD achieves 72.1% mAP on VOC2007 test at 58 FPS on a Nvidia Titan X and for 500×500 input, SSD achieves 75.1% mAP, outperforming a comparable state of the art Faster R-CNN model. -
- - - -## Citation - - - -```latex -@article{Liu_2016, - title={SSD: Single Shot MultiBox Detector}, - journal={ECCV}, - author={Liu, Wei and Anguelov, Dragomir and Erhan, Dumitru and Szegedy, Christian and Reed, Scott and Fu, Cheng-Yang and Berg, Alexander C.}, - year={2016}, -} -``` - ## Results and models of SSD -| Backbone | Size | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :------: | :---: | :---: | :-----: | :------: | :------------: | :----: | :------: | :--------: | -| VGG16 | 300 | caffe | 120e | 9.9 | 43.7 | 25.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd/ssd300_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ssd/ssd300_coco/ssd300_coco_20210803_015428-d231a06e.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ssd/ssd300_coco/ssd300_coco_20210803_015428.log.json) | -| VGG16 | 512 | caffe | 120e | 19.4 | 30.7 | 29.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd/ssd512_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ssd/ssd512_coco/ssd512_coco_20210803_022849-0a47a1ca.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ssd/ssd512_coco/ssd512_coco_20210803_022849.log.json) | +| Backbone | Size | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :------: | :--: | :---: | :-----: | :------: | :------------: | :----: | :----------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| VGG16 | 300 | caffe | 120e | 9.9 | 43.7 | 25.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd/ssd300_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ssd/ssd300_coco/ssd300_coco_20210803_015428-d231a06e.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/ssd/ssd300_coco/ssd300_coco_20210803_015428.log.json) | +| VGG16 | 512 | caffe | 120e | 19.4 | 30.7 | 29.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd/ssd512_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ssd/ssd512_coco/ssd512_coco_20210803_022849-0a47a1ca.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/ssd/ssd512_coco/ssd512_coco_20210803_022849.log.json) | ## Results and models of SSD-Lite -| Backbone | Size | Training from scratch | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :------------: | :---: | :-------------------: | :-----: | :------: | :------------: | :----: | :------: | :--------: | -| MobileNetV2 | 320 | yes | 600e | 4.0 | 69.9 | 21.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd/ssdlite_mobilenetv2_scratch_600e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ssd/ssdlite_mobilenetv2_scratch_600e_coco/ssdlite_mobilenetv2_scratch_600e_coco_20210629_110627-974d9307.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ssd/ssdlite_mobilenetv2_scratch_600e_coco/ssdlite_mobilenetv2_scratch_600e_coco_20210629_110627.log.json) | +| Backbone | Size | Training from scratch | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :---------: | :--: | :-------------------: | :-----: | :------: | :------------: | :----: | :------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| MobileNetV2 | 320 | yes | 600e | 4.0 | 69.9 | 21.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd/ssdlite_mobilenetv2_scratch_600e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ssd/ssdlite_mobilenetv2_scratch_600e_coco/ssdlite_mobilenetv2_scratch_600e_coco_20210629_110627-974d9307.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/ssd/ssdlite_mobilenetv2_scratch_600e_coco/ssdlite_mobilenetv2_scratch_600e_coco_20210629_110627.log.json) | ## Notice @@ -64,3 +49,14 @@ There are some differences between our implementation of MobileNetV2 SSD-Lite an 2. The anchor sizes are different. 3. The C4 feature map is taken from the last layer of stage 4 instead of the middle of the block. 4. The model in TensorFlow1.x is trained on coco 2014 and validated on coco minival2014, but we trained and validated the model on coco 2017. The mAP on val2017 is usually a little lower than minival2014 (refer to the results in TensorFlow Object Detection API, e.g., MobileNetV2 SSD gets 22 mAP on minival2014 but 20.2 mAP on val2017). + +## Citation + +```latex +@article{Liu_2016, + title={SSD: Single Shot MultiBox Detector}, + journal={ECCV}, + author={Liu, Wei and Anguelov, Dragomir and Erhan, Dumitru and Szegedy, Christian and Reed, Scott and Fu, Cheng-Yang and Berg, Alexander C.}, + year={2016}, +} +``` diff --git a/configs/ssd/ssd300_coco.py b/configs/ssd/ssd300_coco.py index 2884658..1891bad 100644 --- a/configs/ssd/ssd300_coco.py +++ b/configs/ssd/ssd300_coco.py @@ -64,3 +64,8 @@ dict(type='NumClassCheckHook'), dict(type='CheckInvalidLossHook', interval=50, priority='VERY_LOW') ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (8 samples per GPU) +auto_scale_lr = dict(base_batch_size=64) diff --git a/configs/ssd/ssd512_coco.py b/configs/ssd/ssd512_coco.py index 82271c6..117777f 100644 --- a/configs/ssd/ssd512_coco.py +++ b/configs/ssd/ssd512_coco.py @@ -77,3 +77,8 @@ dict(type='NumClassCheckHook'), dict(type='CheckInvalidLossHook', interval=50, priority='VERY_LOW') ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (8 samples per GPU) +auto_scale_lr = dict(base_batch_size=64) diff --git a/configs/ssd/ssdlite_mobilenetv2_scratch_600e_coco.py b/configs/ssd/ssdlite_mobilenetv2_scratch_600e_coco.py index c22d0a7..929eb6c 100644 --- a/configs/ssd/ssdlite_mobilenetv2_scratch_600e_coco.py +++ b/configs/ssd/ssdlite_mobilenetv2_scratch_600e_coco.py @@ -143,3 +143,8 @@ dict(type='NumClassCheckHook'), dict(type='CheckInvalidLossHook', interval=50, priority='VERY_LOW') ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (24 samples per GPU) +auto_scale_lr = dict(base_batch_size=192) diff --git a/configs/strong_baselines/README.md b/configs/strong_baselines/README.md index 5ada104..aa2550d 100644 --- a/configs/strong_baselines/README.md +++ b/configs/strong_baselines/README.md @@ -1,16 +1,18 @@ # Strong Baselines + + We train Mask R-CNN with large-scale jitter and longer schedule as strong baselines. The modifications follow those in [Detectron2](https://github.com/facebookresearch/detectron2/tree/master/configs/new_baselines). -## Results and models +## Results and Models -| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | -| R-50-FPN | pytorch | 50e | | | | | [config](./mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_lsj_50e_coco.py) | [model]() | [log]() | -| R-50-FPN | pytorch | 100e | | | | | [config](./mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_lsj_100e_coco.py) | [model]() | [log]() | -| R-50-FPN | caffe | 100e | | | 44.7 | 40.4 | [config](./mask_rcnn_r50_caffe_fpn_syncbn-all_rpn-2conv_lsj_100e_coco.py) | [model]() | [log]() | -| R-50-FPN | caffe | 400e | | | | | [config](./mask_rcnn_r50_caffe_fpn_syncbn-all_rpn-2conv_lsj_400e_coco.py) | [model]() | [log]() | +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :-----------------------------------------------------------------------: | :----------------------: | +| R-50-FPN | pytorch | 50e | | | | | [config](./mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_lsj_50e_coco.py) | [model](<>) \| [log](<>) | +| R-50-FPN | pytorch | 100e | | | | | [config](./mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_lsj_100e_coco.py) | [model](<>) \| [log](<>) | +| R-50-FPN | caffe | 100e | | | 44.7 | 40.4 | [config](./mask_rcnn_r50_caffe_fpn_syncbn-all_rpn-2conv_lsj_100e_coco.py) | [model](<>) \| [log](<>) | +| R-50-FPN | caffe | 400e | | | | | [config](./mask_rcnn_r50_caffe_fpn_syncbn-all_rpn-2conv_lsj_400e_coco.py) | [model](<>) \| [log](<>) | ## Notice diff --git a/configs/swin/README.md b/configs/swin/README.md index 9a4ef02..2136134 100644 --- a/configs/swin/README.md +++ b/configs/swin/README.md @@ -1,22 +1,35 @@ -# Swin Transformer: Hierarchical Vision Transformer using Shifted Windows +# Swin -## Abstract +> [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030) + + - +## Abstract This paper presents a new vision Transformer, called Swin Transformer, that capably serves as a general-purpose backbone for computer vision. Challenges in adapting Transformer from language to vision arise from differences between the two domains, such as large variations in the scale of visual entities and the high resolution of pixels in images compared to words in text. To address these differences, we propose a hierarchical Transformer whose representation is computed with Shifted windows. The shifted windowing scheme brings greater efficiency by limiting self-attention computation to non-overlapping local windows while also allowing for cross-window connection. This hierarchical architecture has the flexibility to model at various scales and has linear computational complexity with respect to image size. These qualities of Swin Transformer make it compatible with a broad range of vision tasks, including image classification (87.3 top-1 accuracy on ImageNet-1K) and dense prediction tasks such as object detection (58.7 box AP and 51.1 mask AP on COCO test-dev) and semantic segmentation (53.5 mIoU on ADE20K val). Its performance surpasses the previous state-of-the-art by a large margin of +2.7 box AP and +2.6 mask AP on COCO, and +3.2 mIoU on ADE20K, demonstrating the potential of Transformer-based models as vision backbones. The hierarchical design and the shifted window approach also prove beneficial for all-MLP architectures. -
- - +## Results and Models -## Citation +### Mask R-CNN + +| Backbone | Pretrain | Lr schd | Multi-scale crop | FP16 | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :------: | :---------: | :-----: | :--------------: | :--: | :------: | :------------: | :----: | :-----: | :------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| Swin-T | ImageNet-1K | 1x | no | no | 7.6 | | 42.7 | 39.3 | [config](./mask_rcnn_swin-t-p4-w7_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-t-p4-w7_fpn_1x_coco/mask_rcnn_swin-t-p4-w7_fpn_1x_coco_20210902_120937-9d6b7cfa.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-t-p4-w7_fpn_1x_coco/mask_rcnn_swin-t-p4-w7_fpn_1x_coco_20210902_120937.log.json) | +| Swin-T | ImageNet-1K | 3x | yes | no | 10.2 | | 46.0 | 41.6 | [config](./mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco/mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco_20210906_131725-bacf6f7b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco/mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco_20210906_131725.log.json) | +| Swin-T | ImageNet-1K | 3x | yes | yes | 7.8 | | 46.0 | 41.7 | [config](./mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco_20210908_165006-90a4008c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco_20210908_165006.log.json) | +| Swin-S | ImageNet-1K | 3x | yes | yes | 11.9 | | 48.2 | 43.2 | [config](./mask_rcnn_swin-s-p4-w7_fpn_fp16_ms-crop-3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-s-p4-w7_fpn_fp16_ms-crop-3x_coco/mask_rcnn_swin-s-p4-w7_fpn_fp16_ms-crop-3x_coco_20210903_104808-b92c91f1.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-s-p4-w7_fpn_fp16_ms-crop-3x_coco/mask_rcnn_swin-s-p4-w7_fpn_fp16_ms-crop-3x_coco_20210903_104808.log.json) | + +### Notice + +Please follow the example +of `retinanet_swin-t-p4-w7_fpn_1x_coco.py` when you want to combine Swin Transformer with +the one-stage detector. Because there is a layer norm at the outs of Swin Transformer, you must set `start_level` as 0 in FPN, so we have to set the `out_indices` of backbone as `[1,2,3]`. - +## Citation ```latex @article{liu2021Swin, @@ -26,19 +39,3 @@ This paper presents a new vision Transformer, called Swin Transformer, that capa year={2021} } ``` - -## Results and models - -### Mask R-CNN - -| Backbone | Pretrain | Lr schd | Multi-scale crop | FP16 |Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | -| :------: | :---------: | :-----: | :-------------------:| :------: |:------: | :------------: | :----: | :-----: | :------: | :--------: | -| Swin-T | ImageNet-1K | 1x | no | no | 7.6 | | 42.7 | 39.3 | [config](./mask_rcnn_swin-t-p4-w7_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-t-p4-w7_fpn_1x_coco/mask_rcnn_swin-t-p4-w7_fpn_1x_coco_20210902_120937-9d6b7cfa.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-t-p4-w7_fpn_1x_coco/mask_rcnn_swin-t-p4-w7_fpn_1x_coco_20210902_120937.log.json) | -| Swin-T | ImageNet-1K | 3x | yes | no | 10.2 | | 46.0 | 41.6 | [config](./mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco/mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco_20210906_131725-bacf6f7b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco/mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco_20210906_131725.log.json) | -| Swin-T | ImageNet-1K | 3x | yes | yes | 7.8 | | 46.0 | 41.7 | [config](./mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco.py)| [model](https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco_20210908_165006-90a4008c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco_20210908_165006.log.json) | -| Swin-S | ImageNet-1K | 3x | yes | yes | 11.9 | | 48.2 | 43.2 | [config](./mask_rcnn_swin-s-p4-w7_fpn_fp16_ms-crop-3x_coco.py)| [model](https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-s-p4-w7_fpn_fp16_ms-crop-3x_coco/mask_rcnn_swin-s-p4-w7_fpn_fp16_ms-crop-3x_coco_20210903_104808-b92c91f1.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-s-p4-w7_fpn_fp16_ms-crop-3x_coco/mask_rcnn_swin-s-p4-w7_fpn_fp16_ms-crop-3x_coco_20210903_104808.log.json) | - -### Notice -Please follow the example -of `retinanet_swin-t-p4-w7_fpn_1x_coco.py` when you want to combine Swin Transformer with -the one-stage detector. Because there is a layer norm at the outs of Swin Transformer, you must set `start_level` as 0 in FPN, so we have to set the `out_indices` of backbone as `[1,2,3]`. diff --git a/configs/swin/metafile.yml b/configs/swin/metafile.yml index b265afe..6c07f17 100644 --- a/configs/swin/metafile.yml +++ b/configs/swin/metafile.yml @@ -1,27 +1,16 @@ -Collections: - - Name: Swin Transformer +Models: + - Name: mask_rcnn_swin-s-p4-w7_fpn_fp16_ms-crop-3x_coco + In Collection: Mask R-CNN + Config: configs/swin/mask_rcnn_swin-s-p4-w7_fpn_fp16_ms-crop-3x_coco.py Metadata: + Training Memory (GB): 11.9 + Epochs: 36 Training Data: COCO Training Techniques: - AdamW Training Resources: 8x V100 GPUs Architecture: - Swin Transformer - Paper: - URL: https://arxiv.org/abs/2107.08430 - Title: 'Swin Transformer: Hierarchical Vision Transformer using Shifted Windows' - README: configs/swin/README.md - Code: - URL: https://github.com/open-mmlab/mmdetection/blob/v2.16.0/mmdet/models/backbones/swin.py#L465 - Version: v2.16.0 - -Models: - - Name: mask_rcnn_swin-s-p4-w7_fpn_fp16_ms-crop-3x_coco - In Collection: Swin Transformer - Config: configs/swin/mask_rcnn_swin-s-p4-w7_fpn_fp16_ms-crop-3x_coco.py - Metadata: - Training Memory (GB): 11.9 - Epochs: 36 Results: - Task: Object Detection Dataset: COCO @@ -32,13 +21,26 @@ Models: Metrics: mask AP: 43.2 Weights: https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-s-p4-w7_fpn_fp16_ms-crop-3x_coco/mask_rcnn_swin-s-p4-w7_fpn_fp16_ms-crop-3x_coco_20210903_104808-b92c91f1.pth + Paper: + URL: https://arxiv.org/abs/2107.08430 + Title: 'Swin Transformer: Hierarchical Vision Transformer using Shifted Windows' + README: configs/swin/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.16.0/mmdet/models/backbones/swin.py#L465 + Version: v2.16.0 - Name: mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco - In Collection: Swin Transformer + In Collection: Mask R-CNN Config: configs/swin/mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco.py Metadata: Training Memory (GB): 10.2 Epochs: 36 + Training Data: COCO + Training Techniques: + - AdamW + Training Resources: 8x V100 GPUs + Architecture: + - Swin Transformer Results: - Task: Object Detection Dataset: COCO @@ -49,13 +51,26 @@ Models: Metrics: mask AP: 41.6 Weights: https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco/mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco_20210906_131725-bacf6f7b.pth + Paper: + URL: https://arxiv.org/abs/2107.08430 + Title: 'Swin Transformer: Hierarchical Vision Transformer using Shifted Windows' + README: configs/swin/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.16.0/mmdet/models/backbones/swin.py#L465 + Version: v2.16.0 - Name: mask_rcnn_swin-t-p4-w7_fpn_1x_coco - In Collection: Swin Transformer + In Collection: Mask R-CNN Config: configs/swin/mask_rcnn_swin-t-p4-w7_fpn_1x_coco.py Metadata: Training Memory (GB): 7.6 Epochs: 12 + Training Data: COCO + Training Techniques: + - AdamW + Training Resources: 8x V100 GPUs + Architecture: + - Swin Transformer Results: - Task: Object Detection Dataset: COCO @@ -66,13 +81,26 @@ Models: Metrics: mask AP: 39.3 Weights: https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-t-p4-w7_fpn_1x_coco/mask_rcnn_swin-t-p4-w7_fpn_1x_coco_20210902_120937-9d6b7cfa.pth + Paper: + URL: https://arxiv.org/abs/2107.08430 + Title: 'Swin Transformer: Hierarchical Vision Transformer using Shifted Windows' + README: configs/swin/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.16.0/mmdet/models/backbones/swin.py#L465 + Version: v2.16.0 - Name: mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco - In Collection: Swin Transformer + In Collection: Mask R-CNN Config: configs/swin/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco.py Metadata: Training Memory (GB): 7.8 Epochs: 36 + Training Data: COCO + Training Techniques: + - AdamW + Training Resources: 8x V100 GPUs + Architecture: + - Swin Transformer Results: - Task: Object Detection Dataset: COCO @@ -83,3 +111,10 @@ Models: Metrics: mask AP: 41.7 Weights: https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco_20210908_165006-90a4008c.pth + Paper: + URL: https://arxiv.org/abs/2107.08430 + Title: 'Swin Transformer: Hierarchical Vision Transformer using Shifted Windows' + README: configs/swin/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.16.0/mmdet/models/backbones/swin.py#L465 + Version: v2.16.0 diff --git a/configs/swin/retinanet_swin-t-p4-w7_fpn_1x_coco.py b/configs/swin/retinanet_swin-t-p4-w7_fpn_1x_coco.py index 9d620fd..3315093 100644 --- a/configs/swin/retinanet_swin-t-p4-w7_fpn_1x_coco.py +++ b/configs/swin/retinanet_swin-t-p4-w7_fpn_1x_coco.py @@ -23,6 +23,7 @@ # Please only add indices that would be used # in FPN, otherwise some parameter will not be used with_cp=False, + convert_weights=True, init_cfg=dict(type='Pretrained', checkpoint=pretrained)), neck=dict(in_channels=[192, 384, 768], start_level=0, num_outs=5)) diff --git a/configs/timm_example/README.md b/configs/timm_example/README.md new file mode 100644 index 0000000..4374855 --- /dev/null +++ b/configs/timm_example/README.md @@ -0,0 +1,62 @@ +# Timm Example + +> [PyTorch Image Models](https://github.com/rwightman/pytorch-image-models) + + + +## Abstract + +Py**T**orch **Im**age **M**odels (`timm`) is a collection of image models, layers, utilities, optimizers, schedulers, data-loaders / augmentations, and reference training / validation scripts that aim to pull together a wide variety of SOTA models with ability to reproduce ImageNet training results. + + + +## Results and Models + +### RetinaNet + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-------------------------------------------------------: | :------: | +| R-50 | pytorch | 1x | | | | [config](./retinanet_timm_tv_resnet50_fpn_1x_coco.py) | | +| EfficientNet-B1 | - | 1x | | | | [config](./retinanet_timm_efficientnet_b1_fpn_1x_coco.py) | | + +## Usage + +### Install additional requirements + +MMDetection supports timm backbones via `TIMMBackbone`, a wrapper class in MMClassification. +Thus, you need to install `mmcls` in addition to timm. +If you have already installed requirements for mmdet, run + +```shell +pip install 'dataclasses; python_version<"3.7"' +pip install timm +pip install 'mmcls>=0.20.0' +``` + +See [this document](https://mmclassification.readthedocs.io/en/latest/install.html) for the details of MMClassification installation. + +### Edit config + +- See example configs for basic usage. +- See the documents of [timm feature extraction](https://rwightman.github.io/pytorch-image-models/feature_extraction/#multi-scale-feature-maps-feature-pyramid) and [TIMMBackbone](https://mmclassification.readthedocs.io/en/latest/api.html#mmcls.models.backbones.TIMMBackbone) for details. +- Which feature map is output depends on the backbone. + Please check `backbone out_channels` and `backbone out_strides` in your log, and modify `model.neck.in_channels` and `model.backbone.out_indices` if necessary. +- If you use Vision Transformer models that do not support `features_only=True`, add `custom_hooks = []` to your config to disable `NumClassCheckHook`. + +## Citation + +```latex +@misc{rw2019timm, + author = {Ross Wightman}, + title = {PyTorch Image Models}, + year = {2019}, + publisher = {GitHub}, + journal = {GitHub repository}, + doi = {10.5281/zenodo.4414861}, + howpublished = {\url{https://github.com/rwightman/pytorch-image-models}} +} +``` diff --git a/configs/timm_example/retinanet_timm_efficientnet_b1_fpn_1x_coco.py b/configs/timm_example/retinanet_timm_efficientnet_b1_fpn_1x_coco.py new file mode 100644 index 0000000..6500116 --- /dev/null +++ b/configs/timm_example/retinanet_timm_efficientnet_b1_fpn_1x_coco.py @@ -0,0 +1,20 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +# please install mmcls>=0.20.0 +# import mmcls.models to trigger register_module in mmcls +custom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False) +model = dict( + backbone=dict( + _delete_=True, + type='mmcls.TIMMBackbone', + model_name='efficientnet_b1', + features_only=True, + pretrained=True, + out_indices=(1, 2, 3, 4)), + neck=dict(in_channels=[24, 40, 112, 320])) + +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/timm_example/retinanet_timm_tv_resnet50_fpn_1x_coco.py b/configs/timm_example/retinanet_timm_tv_resnet50_fpn_1x_coco.py new file mode 100644 index 0000000..0c5b7a8 --- /dev/null +++ b/configs/timm_example/retinanet_timm_tv_resnet50_fpn_1x_coco.py @@ -0,0 +1,19 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +# please install mmcls>=0.20.0 +# import mmcls.models to trigger register_module in mmcls +custom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False) +model = dict( + backbone=dict( + _delete_=True, + type='mmcls.TIMMBackbone', + model_name='tv_resnet50', # ResNet-50 with torchvision weights + features_only=True, + pretrained=True, + out_indices=(1, 2, 3, 4))) + +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/tood/README.md b/configs/tood/README.md index b1522e7..925f0ed 100644 --- a/configs/tood/README.md +++ b/configs/tood/README.md @@ -1,22 +1,34 @@ -# TOOD: Task-aligned One-stage Object Detection +# TOOD -## Abstract +> [TOOD: Task-aligned One-stage Object Detection](https://arxiv.org/abs/2108.07755) + + - +## Abstract One-stage object detection is commonly implemented by optimizing two sub-tasks: object classification and localization, using heads with two parallel branches, which might lead to a certain level of spatial misalignment in predictions between the two tasks. In this work, we propose a Task-aligned One-stage Object Detection (TOOD) that explicitly aligns the two tasks in a learning-based manner. First, we design a novel Task-aligned Head (T-Head) which offers a better balance between learning task-interactive and task-specific features, as well as a greater flexibility to learn the alignment via a task-aligned predictor. Second, we propose Task Alignment Learning (TAL) to explicitly pull closer (or even unify) the optimal anchors for the two tasks during training via a designed sample assignment scheme and a task-aligned loss. Extensive experiments are conducted on MS-COCO, where TOOD achieves a 51.1 AP at single-model single-scale testing. This surpasses the recent one-stage detectors by a large margin, such as ATSS (47.7 AP), GFL (48.2 AP), and PAA (49.0 AP), with fewer parameters and FLOPs. Qualitative results also demonstrate the effectiveness of TOOD for better aligning the tasks of object classification and localization. -
- - +## Results and Models -## Citation +| Backbone | Style | Anchor Type | Lr schd | Multi-scale Training | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :---------------: | :-----: | :----------: | :-----: | :------------------: | :------: | :------------: | :----: | :------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | pytorch | Anchor-free | 1x | N | 4.1 | | 42.4 | [config](./tood_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r50_fpn_1x_coco/tood_r50_fpn_1x_coco_20211210_103425-20e20746.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r50_fpn_1x_coco/tood_r50_fpn_1x_coco_20211210_103425.log) | +| R-50 | pytorch | Anchor-based | 1x | N | 4.1 | | 42.4 | [config](./tood_r50_fpn_anchor_based_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r50_fpn_anchor_based_1x_coco/tood_r50_fpn_anchor_based_1x_coco_20211214_100105-b776c134.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r50_fpn_anchor_based_1x_coco/tood_r50_fpn_anchor_based_1x_coco_20211214_100105.log) | +| R-50 | pytorch | Anchor-free | 2x | Y | 4.1 | | 44.5 | [config](./tood_r50_fpn_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r50_fpn_mstrain_2x_coco/tood_r50_fpn_mstrain_2x_coco_20211210_144231-3b23174c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r50_fpn_mstrain_2x_coco/tood_r50_fpn_mstrain_2x_coco_20211210_144231.log) | +| R-101 | pytorch | Anchor-free | 2x | Y | 6.0 | | 46.1 | [config](./tood_r101_fpn_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r101_fpn_mstrain_2x_coco/tood_r101_fpn_mstrain_2x_coco_20211210_144232-a18f53c8.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r101_fpn_mstrain_2x_coco/tood_r101_fpn_mstrain_2x_coco_20211210_144232.log) | +| R-101-dcnv2 | pytorch | Anchor-free | 2x | Y | 6.2 | | 49.3 | [config](./tood_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r101_fpn_dconv_c3-c5_mstrain_2x_coco/tood_r101_fpn_dconv_c3-c5_mstrain_2x_coco_20211210_213728-4a824142.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r101_fpn_dconv_c3-c5_mstrain_2x_coco/tood_r101_fpn_dconv_c3-c5_mstrain_2x_coco_20211210_213728.log) | +| X-101-64x4d | pytorch | Anchor-free | 2x | Y | 10.2 | | 47.6 | [config](./tood_x101_64x4d_fpn_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/tood/tood_x101_64x4d_fpn_mstrain_2x_coco/tood_x101_64x4d_fpn_mstrain_2x_coco_20211211_003519-a4f36113.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/tood/tood_x101_64x4d_fpn_mstrain_2x_coco/tood_x101_64x4d_fpn_mstrain_2x_coco_20211211_003519.log) | +| X-101-64x4d-dcnv2 | pytorch | Anchor-free | 2x | Y | | | | [config](./tood_x101_64x4d_fpn_dconv_c4-c5_mstrain_2x_coco.py) | [model](<>) \| [log](<>) | - +\[1\] *1x and 2x mean the model is trained for 90K and 180K iterations, respectively.* \ +\[2\] *All results are obtained with a single model and without any test time data augmentation such as multi-scale, flipping and etc..* \ +\[3\] *`dcnv2` denotes deformable convolutional networks v2.* \\ + +## Citation ```latex @inproceedings{feng2021tood, @@ -26,19 +38,3 @@ One-stage object detection is commonly implemented by optimizing two sub-tasks: year={2021} } ``` - -## Results and Models - -| Backbone | Style | Anchor Type | Lr schd | Multi-scale Training| Mem (GB)| Inf time (fps) | box AP | Config | Download | -|:-----------------:|:-------:|:------------:|:-------:|:-------------------:|:-------:|:--------------:|:------:|:------:|:--------:| -| R-50 | pytorch | Anchor-free | 1x | N | 4.1 | | 42.4 | [config](./tood_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r50_fpn_1x_coco/tood_r50_fpn_1x_coco_20211210_103425-20e20746.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r50_fpn_1x_coco/tood_r50_fpn_1x_coco_20211210_103425.log) | -| R-50 | pytorch | Anchor-based | 1x | N | 4.1 | | 42.4 | [config](./tood_r50_fpn_anchor_based_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r50_fpn_anchor_based_1x_coco/tood_r50_fpn_anchor_based_1x_coco_20211214_100105-b776c134.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r50_fpn_anchor_based_1x_coco/tood_r50_fpn_anchor_based_1x_coco_20211214_100105.log) | -| R-50 | pytorch | Anchor-free | 2x | Y | 4.1 | | 44.5 | [config](./tood_r50_fpn_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r50_fpn_mstrain_2x_coco/tood_r50_fpn_mstrain_2x_coco_20211210_144231-3b23174c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r50_fpn_mstrain_2x_coco/tood_r50_fpn_mstrain_2x_coco_20211210_144231.log) | -| R-101 | pytorch | Anchor-free | 2x | Y | 6.0 | | 46.1 | [config](./tood_r101_fpn_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r101_fpn_mstrain_2x_coco/tood_r101_fpn_mstrain_2x_coco_20211210_144232-a18f53c8.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r101_fpn_mstrain_2x_coco/tood_r101_fpn_mstrain_2x_coco_20211210_144232.log) | -| R-101-dcnv2 | pytorch | Anchor-free | 2x | Y | 6.2 | | 49.3 | [config](./tood_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r101_fpn_dconv_c3-c5_mstrain_2x_coco/tood_r101_fpn_dconv_c3-c5_mstrain_2x_coco_20211210_213728-4a824142.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r101_fpn_dconv_c3-c5_mstrain_2x_coco/tood_r101_fpn_dconv_c3-c5_mstrain_2x_coco_20211210_213728.log) | -| X-101-64x4d | pytorch | Anchor-free | 2x | Y | 10.2 | | 47.6 | [config](./tood_x101_64x4d_fpn_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/tood/tood_x101_64x4d_fpn_mstrain_2x_coco/tood_x101_64x4d_fpn_mstrain_2x_coco_20211211_003519-a4f36113.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/tood/tood_x101_64x4d_fpn_mstrain_2x_coco/tood_x101_64x4d_fpn_mstrain_2x_coco_20211211_003519.log) | -| X-101-64x4d-dcnv2 | pytorch | Anchor-free | 2x | Y | | | | [config](./tood_x101_64x4d_fpn_dconv_c4-c5_mstrain_2x_coco.py) | [model]() | [log]() | - -[1] *1x and 2x mean the model is trained for 90K and 180K iterations, respectively.* \ -[2] *All results are obtained with a single model and without any test time data augmentation such as multi-scale, flipping and etc..* \ -[3] *`dcnv2` denotes deformable convolutional networks v2.* \ diff --git a/configs/tridentnet/README.md b/configs/tridentnet/README.md index 6c43a3b..b972b3a 100644 --- a/configs/tridentnet/README.md +++ b/configs/tridentnet/README.md @@ -1,24 +1,34 @@ -# Scale-Aware Trident Networks for Object Detection +# TridentNet -## Abstract +> [Scale-Aware Trident Networks for Object Detection](https://arxiv.org/abs/1901.01892) + + - +## Abstract Scale variation is one of the key challenges in object detection. In this work, we first present a controlled experiment to investigate the effect of receptive fields for scale variation in object detection. Based on the findings from the exploration experiments, we propose a novel Trident Network (TridentNet) aiming to generate scale-specific feature maps with a uniform representational power. We construct a parallel multi-branch architecture in which each branch shares the same transformation parameters but with different receptive fields. Then, we adopt a scale-aware training scheme to specialize each branch by sampling object instances of proper scales for training. As a bonus, a fast approximation version of TridentNet could achieve significant improvements without any additional parameters and computational cost compared with the vanilla detector. On the COCO dataset, our TridentNet with ResNet-101 backbone achieves state-of-the-art single-model results of 48.4 mAP. -
- - +## Results and Models -## Citation +We reports the test results using only one branch for inference. - +| Backbone | Style | mstrain | Lr schd | Mem (GB) | Inf time (fps) | box AP | Download | +| :------: | :---: | :-----: | :-----: | :------: | :------------: | :----: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | caffe | N | 1x | | | 37.7 | [model](https://download.openmmlab.com/mmdetection/v2.0/tridentnet/tridentnet_r50_caffe_1x_coco/tridentnet_r50_caffe_1x_coco_20201230_141838-2ec0b530.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/tridentnet/tridentnet_r50_caffe_1x_coco/tridentnet_r50_caffe_1x_coco_20201230_141838.log.json) | +| R-50 | caffe | Y | 1x | | | 37.6 | [model](https://download.openmmlab.com/mmdetection/v2.0/tridentnet/tridentnet_r50_caffe_mstrain_1x_coco/tridentnet_r50_caffe_mstrain_1x_coco_20201230_141839-6ce55ccb.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/tridentnet/tridentnet_r50_caffe_mstrain_1x_coco/tridentnet_r50_caffe_mstrain_1x_coco_20201230_141839.log.json) | +| R-50 | caffe | Y | 3x | | | 40.3 | [model](https://download.openmmlab.com/mmdetection/v2.0/tridentnet/tridentnet_r50_caffe_mstrain_3x_coco/tridentnet_r50_caffe_mstrain_3x_coco_20201130_100539-46d227ba.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/tridentnet/tridentnet_r50_caffe_mstrain_3x_coco/tridentnet_r50_caffe_mstrain_3x_coco_20201130_100539.log.json) | -``` +**Note** + +Similar to [Detectron2](https://github.com/facebookresearch/detectron2/tree/master/projects/TridentNet), we haven't implemented the Scale-aware Training Scheme in section 4.2 of the paper. + +## Citation + +```latex @InProceedings{li2019scale, title={Scale-Aware Trident Networks for Object Detection}, author={Li, Yanghao and Chen, Yuntao and Wang, Naiyan and Zhang, Zhaoxiang}, @@ -26,17 +36,3 @@ Scale variation is one of the key challenges in object detection. In this work, year={2019} } ``` - -## Results and models - -We reports the test results using only one branch for inference. - -| Backbone | Style | mstrain | Lr schd | Mem (GB) | Inf time (fps) | box AP | Download | -| :-------------: | :-----: | :-----: | :-----: | :------: | :------------: | :----: | :------: | -| R-50 | caffe | N | 1x | | | 37.7 |[model](https://download.openmmlab.com/mmdetection/v2.0/tridentnet/tridentnet_r50_caffe_1x_coco/tridentnet_r50_caffe_1x_coco_20201230_141838-2ec0b530.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/tridentnet/tridentnet_r50_caffe_1x_coco/tridentnet_r50_caffe_1x_coco_20201230_141838.log.json) | -| R-50 | caffe | Y | 1x | | | 37.6 |[model](https://download.openmmlab.com/mmdetection/v2.0/tridentnet/tridentnet_r50_caffe_mstrain_1x_coco/tridentnet_r50_caffe_mstrain_1x_coco_20201230_141839-6ce55ccb.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/tridentnet/tridentnet_r50_caffe_mstrain_1x_coco/tridentnet_r50_caffe_mstrain_1x_coco_20201230_141839.log.json) | -| R-50 | caffe | Y | 3x | | | 40.3 |[model](https://download.openmmlab.com/mmdetection/v2.0/tridentnet/tridentnet_r50_caffe_mstrain_3x_coco/tridentnet_r50_caffe_mstrain_3x_coco_20201130_100539-46d227ba.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/tridentnet/tridentnet_r50_caffe_mstrain_3x_coco/tridentnet_r50_caffe_mstrain_3x_coco_20201130_100539.log.json) | - -**Note** - -Similar to [Detectron2](https://github.com/facebookresearch/detectron2/tree/master/projects/TridentNet), we haven't implemented the Scale-aware Training Scheme in section 4.2 of the paper. diff --git a/configs/vfnet/README.md b/configs/vfnet/README.md index e57a1be..a492bec 100644 --- a/configs/vfnet/README.md +++ b/configs/vfnet/README.md @@ -1,25 +1,40 @@ -# VarifocalNet: An IoU-aware Dense Object Detector +# VarifocalNet -## Abstract +> [VarifocalNet: An IoU-aware Dense Object Detector](https://arxiv.org/abs/2008.13367) + + - +## Abstract Accurately ranking the vast number of candidate detections is crucial for dense object detectors to achieve high performance. Prior work uses the classification score or a combination of classification and predicted localization scores to rank candidates. However, neither option results in a reliable ranking, thus degrading detection performance. In this paper, we propose to learn an Iou-aware Classification Score (IACS) as a joint representation of object presence confidence and localization accuracy. We show that dense object detectors can achieve a more accurate ranking of candidate detections based on the IACS. We design a new loss function, named Varifocal Loss, to train a dense object detector to predict the IACS, and propose a new star-shaped bounding box feature representation for IACS prediction and bounding box refinement. Combining these two new components and a bounding box refinement branch, we build an IoU-aware dense object detector based on the FCOS+ATSS architecture, that we call VarifocalNet or VFNet for short. Extensive experiments on MS COCO show that our VFNet consistently surpasses the strong baseline by ∼2.0 AP with different backbones. Our best model VFNet-X-1200 with Res2Net-101-DCN achieves a single-model single-scale AP of 55.1 on COCO test-dev, which is state-of-the-art among various object detectors. -
- - - ## Introduction - - **VarifocalNet (VFNet)** learns to predict the IoU-aware classification score which mixes the object presence confidence and localization accuracy together as the detection score for a bounding box. The learning is supervised by the proposed Varifocal Loss (VFL), based on a new star-shaped bounding box feature representation (the features at nine yellow sampling points). Given the new representation, the object localization accuracy is further improved by refining the initially regressed bounding box. The full paper is available at: [https://arxiv.org/abs/2008.13367](https://arxiv.org/abs/2008.13367). +## Results and Models + +| Backbone | Style | DCN | MS train | Lr schd | Inf time (fps) | box AP (val) | box AP (test-dev) | Config | Download | +| :---------: | :-----: | :-: | :------: | :-----: | :------------: | :----------: | :---------------: | :--------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | pytorch | N | N | 1x | - | 41.6 | 41.6 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/vfnet/vfnet_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_1x_coco/vfnet_r50_fpn_1x_coco_20201027-38db6f58.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_1x_coco/vfnet_r50_fpn_1x_coco.json) | +| R-50 | pytorch | N | Y | 2x | - | 44.5 | 44.8 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/vfnet/vfnet_r50_fpn_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_mstrain_2x_coco/vfnet_r50_fpn_mstrain_2x_coco_20201027-7cc75bd2.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_mstrain_2x_coco/vfnet_r50_fpn_mstrain_2x_coco.json) | +| R-50 | pytorch | Y | Y | 2x | - | 47.8 | 48.0 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/vfnet/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-6879c318.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco.json) | +| R-101 | pytorch | N | N | 1x | - | 43.0 | 43.6 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/vfnet/vfnet_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_1x_coco/vfnet_r101_fpn_1x_coco_20201027pth-c831ece7.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_1x_coco/vfnet_r101_fpn_1x_coco.json) | +| R-101 | pytorch | N | Y | 2x | - | 46.2 | 46.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/vfnet/vfnet_r101_fpn_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_mstrain_2x_coco/vfnet_r101_fpn_mstrain_2x_coco_20201027pth-4a5d53f1.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_mstrain_2x_coco/vfnet_r101_fpn_mstrain_2x_coco.json) | +| R-101 | pytorch | Y | Y | 2x | - | 49.0 | 49.2 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/vfnet/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-7729adb5.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco.json) | +| X-101-32x4d | pytorch | Y | Y | 2x | - | 49.7 | 50.0 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/vfnet/vfnet_x101_32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_x101_32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_x101_32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-d300a6fc.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_x101_32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_x101_32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.json) | +| X-101-64x4d | pytorch | Y | Y | 2x | - | 50.4 | 50.8 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/vfnet/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-b5f6da5e.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.json) | + +**Notes:** + +- The MS-train scale range is 1333x\[480:960\] (`range` mode) and the inference scale keeps 1333x800. +- DCN means using `DCNv2` in both backbone and head. +- Inference time will be updated soon. +- More results and pre-trained models can be found in [VarifocalNet-Github](https://github.com/hyz-xmaster/VarifocalNet) ## Citation @@ -31,23 +46,3 @@ Accurately ranking the vast number of candidate detections is crucial for dense year={2020} } ``` - -## Results and Models - -| Backbone | Style | DCN | MS train | Lr schd |Inf time (fps) | box AP (val) | box AP (test-dev) | Config | Download | -|:------------:|:---------:|:-------:|:--------:|:-------:|:-------------:|:------------:|:-----------------:|:------:|:--------:| -| R-50 | pytorch | N | N | 1x | - | 41.6 | 41.6 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/vfnet/vfnet_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_1x_coco/vfnet_r50_fpn_1x_coco_20201027-38db6f58.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_1x_coco/vfnet_r50_fpn_1x_coco.json)| -| R-50 | pytorch | N | Y | 2x | - | 44.5 | 44.8 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/vfnet/vfnet_r50_fpn_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_mstrain_2x_coco/vfnet_r50_fpn_mstrain_2x_coco_20201027-7cc75bd2.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_mstrain_2x_coco/vfnet_r50_fpn_mstrain_2x_coco.json)| -| R-50 | pytorch | Y | Y | 2x | - | 47.8 | 48.0 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/vfnet/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-6879c318.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco.json)| -| R-101 | pytorch | N | N | 1x | - | 43.0 | 43.6 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/vfnet/vfnet_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_1x_coco/vfnet_r101_fpn_1x_coco_20201027pth-c831ece7.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_1x_coco/vfnet_r101_fpn_1x_coco.json)| -| R-101 | pytorch | N | Y | 2x | - | 46.2 | 46.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/vfnet/vfnet_r101_fpn_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_mstrain_2x_coco/vfnet_r101_fpn_mstrain_2x_coco_20201027pth-4a5d53f1.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_mstrain_2x_coco/vfnet_r101_fpn_mstrain_2x_coco.json)| -| R-101 | pytorch | Y | Y | 2x | - | 49.0 | 49.2 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/vfnet/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-7729adb5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco.json)| -| X-101-32x4d | pytorch | Y | Y | 2x | - | 49.7 | 50.0 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/vfnet/vfnet_x101_32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_x101_32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_x101_32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-d300a6fc.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_x101_32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_x101_32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.json)| -| X-101-64x4d | pytorch | Y | Y | 2x | - | 50.4 | 50.8 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/vfnet/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-b5f6da5e.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.json)| - -**Notes:** - -- The MS-train scale range is 1333x[480:960] (`range` mode) and the inference scale keeps 1333x800. -- DCN means using `DCNv2` in both backbone and head. -- Inference time will be updated soon. -- More results and pre-trained models can be found in [VarifocalNet-Github](https://github.com/hyz-xmaster/VarifocalNet) diff --git a/configs/wider_face/README.md b/configs/wider_face/README.md index 2e6124c..1904506 100644 --- a/configs/wider_face/README.md +++ b/configs/wider_face/README.md @@ -1,23 +1,19 @@ -# WIDER FACE: A Face Detection Benchmark +# WIDER FACE -## Abstract +> [WIDER FACE: A Face Detection Benchmark](https://arxiv.org/abs/1511.06523) + + - +## Abstract Face detection is one of the most studied topics in the computer vision community. Much of the progresses have been made by the availability of face detection benchmark datasets. We show that there is a gap between current face detection performance and the real world requirements. To facilitate future face detection research, we introduce the WIDER FACE dataset, which is 10 times larger than existing datasets. The dataset contains rich annotations, including occlusions, poses, event categories, and face bounding boxes. Faces in the proposed dataset are extremely challenging due to large variations in scale, pose and occlusion, as shown in Fig. 1. Furthermore, we show that WIDER FACE dataset is an effective training source for face detection. We benchmark several representative detection systems, providing an overview of state-of-the-art performance and propose a solution to deal with large scale variation. Finally, we discuss common failure cases that worth to be further investigated. -
- - - ## Introduction - - To use the WIDER Face dataset you need to download it and extract to the `data/WIDERFace` folder. Annotation in the VOC format can be found in this [repo](https://github.com/sovrasov/wider-face-pascal-voc-annotations.git). @@ -51,7 +47,7 @@ create your own config based on the presented one. ## Citation -``` +```latex @inproceedings{yang2016wider, Author = {Yang, Shuo and Luo, Ping and Loy, Chen Change and Tang, Xiaoou}, Booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, diff --git a/configs/yolact/README.md b/configs/yolact/README.md index c4e3903..9eb51b4 100644 --- a/configs/yolact/README.md +++ b/configs/yolact/README.md @@ -1,27 +1,24 @@ -# **Y**ou **O**nly **L**ook **A**t **C**oefficien**T**s +# YOLACT -## Abstract +> [YOLACT: Real-time Instance Segmentation](https://arxiv.org/abs/1904.02689) + + - +## Abstract We present a simple, fully-convolutional model for real-time instance segmentation that achieves 29.8 mAP on MS COCO at 33.5 fps evaluated on a single Titan Xp, which is significantly faster than any previous competitive approach. Moreover, we obtain this result after training on only one GPU. We accomplish this by breaking instance segmentation into two parallel subtasks: (1) generating a set of prototype masks and (2) predicting per-instance mask coefficients. Then we produce instance masks by linearly combining the prototypes with the mask coefficients. We find that because this process doesn't depend on repooling, this approach produces very high-quality masks and exhibits temporal stability for free. Furthermore, we analyze the emergent behavior of our prototypes and show they learn to localize instances on their own in a translation variant manner, despite being fully-convolutional. Finally, we also propose Fast NMS, a drop-in 12 ms faster replacement for standard NMS that only has a marginal performance penalty. -
- - - ## Introduction - - A simple, fully convolutional model for real-time instance segmentation. This is the code for our paper: - [YOLACT: Real-time Instance Segmentation](https://arxiv.org/abs/1904.02689) - + + For a real-time demo, check out our ICCV video: [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/0pMfmo8qfpQ/0.jpg)](https://www.youtube.com/watch?v=0pMfmo8qfpQ) @@ -30,13 +27,13 @@ For a real-time demo, check out our ICCV video: Here are our YOLACT models along with their FPS on a Titan Xp and mAP on COCO's `val`: -| Image Size | GPU x BS | Backbone | *FPS | mAP | Weights | Configs | Download | -|:----------:|:--------:|:-------------:|:-----:|:----:|:-------:|:------:|:--------:| -| 550 | 1x8 | Resnet50-FPN | 42.5 | 29.0 | | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/yolact/yolact_r50_1x8_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/yolact/yolact_r50_1x8_coco/yolact_r50_1x8_coco_20200908-f38d58df.pth) | -| 550 | 8x8 | Resnet50-FPN | 42.5 | 28.4 | | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/yolact/yolact_r50_8x8_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/yolact/yolact_r50_8x8_coco/yolact_r50_8x8_coco_20200908-ca34f5db.pth) | -| 550 | 1x8 | Resnet101-FPN | 33.5 | 30.4 | | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/yolact/yolact_r101_1x8_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/yolact/yolact_r101_1x8_coco/yolact_r101_1x8_coco_20200908-4cbe9101.pth) | +| Image Size | GPU x BS | Backbone | \*FPS | mAP | Weights | Configs | Download | +| :--------: | :------: | :-----------: | :---: | :--: | :-----: | :----------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------: | +| 550 | 1x8 | Resnet50-FPN | 42.5 | 29.0 | | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/yolact/yolact_r50_1x8_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/yolact/yolact_r50_1x8_coco/yolact_r50_1x8_coco_20200908-f38d58df.pth) | +| 550 | 8x8 | Resnet50-FPN | 42.5 | 28.4 | | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/yolact/yolact_r50_8x8_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/yolact/yolact_r50_8x8_coco/yolact_r50_8x8_coco_20200908-ca34f5db.pth) | +| 550 | 1x8 | Resnet101-FPN | 33.5 | 30.4 | | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/yolact/yolact_r101_1x8_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/yolact/yolact_r101_1x8_coco/yolact_r101_1x8_coco_20200908-4cbe9101.pth) | -*Note: The FPS is evaluated by the [original implementation](https://github.com/dbolya/yolact). When calculating FPS, only the model inference time is taken into account. Data loading and post-processing operations such as converting masks to RLE code, generating COCO JSON results, image rendering are not included. +\*Note: The FPS is evaluated by the [original implementation](https://github.com/dbolya/yolact). When calculating FPS, only the model inference time is taken into account. Data loading and post-processing operations such as converting masks to RLE code, generating COCO JSON results, image rendering are not included. ## Training @@ -49,7 +46,7 @@ All the aforementioned models are trained with a single GPU. It typically takes ## Testing -Please refer to [mmdetection/docs/getting_started.md](https://github.com/open-mmlab/mmdetection/blob/master/docs/getting_started.md#inference-with-pretrained-models). +Please refer to [mmdetection/docs/getting_started.md](https://mmdetection.readthedocs.io/en/latest/1_exist_data_model.html#test-existing-models). ## Citation diff --git a/configs/yolact/yolact_r50_1x8_coco.py b/configs/yolact/yolact_r50_1x8_coco.py index ceed0ca..dbced5a 100644 --- a/configs/yolact/yolact_r50_1x8_coco.py +++ b/configs/yolact/yolact_r50_1x8_coco.py @@ -158,3 +158,8 @@ runner = dict(type='EpochBasedRunner', max_epochs=55) cudnn_benchmark = True evaluation = dict(metric=['bbox', 'segm']) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (1 GPUs) x (8 samples per GPU) +auto_scale_lr = dict(base_batch_size=8) diff --git a/configs/yolact/yolact_r50_8x8_coco.py b/configs/yolact/yolact_r50_8x8_coco.py index b3adcb7..41003ab 100644 --- a/configs/yolact/yolact_r50_8x8_coco.py +++ b/configs/yolact/yolact_r50_8x8_coco.py @@ -9,3 +9,8 @@ warmup_iters=1000, warmup_ratio=0.1, step=[20, 42, 49, 52]) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (8 samples per GPU) +auto_scale_lr = dict(base_batch_size=64) diff --git a/configs/yolo/README.md b/configs/yolo/README.md index 93d14bf..c9eb8a6 100644 --- a/configs/yolo/README.md +++ b/configs/yolo/README.md @@ -1,59 +1,55 @@ -# YOLOv3: An Incremental Improvement +# YOLOv3 -## Abstract +> [YOLOv3: An Incremental Improvement](https://arxiv.org/abs/1804.02767) - + + +## Abstract We present some updates to YOLO! We made a bunch of little design changes to make it better. We also trained this new network that's pretty swell. It's a little bigger than last time but more accurate. It's still fast though, don't worry. At 320x320 YOLOv3 runs in 22 ms at 28.2 mAP, as accurate as SSD but three times faster. When we look at the old .5 IOU mAP detection metric YOLOv3 is quite good. It achieves 57.9 mAP@50 in 51 ms on a Titan X, compared to 57.5 mAP@50 in 198 ms by RetinaNet, similar performance but 3.8x faster. -
- - - -## Citation - - - -```latex -@misc{redmon2018yolov3, - title={YOLOv3: An Incremental Improvement}, - author={Joseph Redmon and Ali Farhadi}, - year={2018}, - eprint={1804.02767}, - archivePrefix={arXiv}, - primaryClass={cs.CV} -} -``` - ## Results and Models -| Backbone | Scale | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | -| DarkNet-53 | 320 | 273e | 2.7 | 63.9 | 27.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo/yolov3_d53_320_273e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_320_273e_coco/yolov3_d53_320_273e_coco-421362b6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_320_273e_coco/yolov3_d53_320_273e_coco-20200819_172101.log.json) | -| DarkNet-53 | 416 | 273e | 3.8 | 61.2 | 30.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo/yolov3_d53_mstrain-416_273e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_mstrain-416_273e_coco/yolov3_d53_mstrain-416_273e_coco-2b60fcd9.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_mstrain-416_273e_coco/yolov3_d53_mstrain-416_273e_coco-20200819_173424.log.json) | -| DarkNet-53 | 608 | 273e | 7.4 | 48.1 | 33.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo/yolov3_d53_mstrain-608_273e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_mstrain-608_273e_coco/yolov3_d53_mstrain-608_273e_coco_20210518_115020-a2c3acb8.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_mstrain-608_273e_coco/yolov3_d53_mstrain-608_273e_coco_20210518_115020.log.json) | +| Backbone | Scale | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :--------: | :---: | :-----: | :------: | :------------: | :----: | :--------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| DarkNet-53 | 320 | 273e | 2.7 | 63.9 | 27.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo/yolov3_d53_320_273e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_320_273e_coco/yolov3_d53_320_273e_coco-421362b6.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_320_273e_coco/yolov3_d53_320_273e_coco-20200819_172101.log.json) | +| DarkNet-53 | 416 | 273e | 3.8 | 61.2 | 30.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo/yolov3_d53_mstrain-416_273e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_mstrain-416_273e_coco/yolov3_d53_mstrain-416_273e_coco-2b60fcd9.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_mstrain-416_273e_coco/yolov3_d53_mstrain-416_273e_coco-20200819_173424.log.json) | +| DarkNet-53 | 608 | 273e | 7.4 | 48.1 | 33.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo/yolov3_d53_mstrain-608_273e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_mstrain-608_273e_coco/yolov3_d53_mstrain-608_273e_coco_20210518_115020-a2c3acb8.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_mstrain-608_273e_coco/yolov3_d53_mstrain-608_273e_coco_20210518_115020.log.json) | ## Mixed Precision Training We also train YOLOv3 with mixed precision training. -| Backbone | Scale | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | -| DarkNet-53 | 608 | 273e | 4.7 | 48.1 | 33.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo/yolov3_d53_fp16_mstrain-608_273e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_fp16_mstrain-608_273e_coco/yolov3_d53_fp16_mstrain-608_273e_coco_20210517_213542-4bc34944.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_fp16_mstrain-608_273e_coco/yolov3_d53_fp16_mstrain-608_273e_coco_20210517_213542.log.json) | +| Backbone | Scale | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :--------: | :---: | :-----: | :------: | :------------: | :----: | :-------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| DarkNet-53 | 608 | 273e | 4.7 | 48.1 | 33.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo/yolov3_d53_fp16_mstrain-608_273e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_fp16_mstrain-608_273e_coco/yolov3_d53_fp16_mstrain-608_273e_coco_20210517_213542-4bc34944.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_fp16_mstrain-608_273e_coco/yolov3_d53_fp16_mstrain-608_273e_coco_20210517_213542.log.json) | ## Lightweight models -| Backbone | Scale | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | -| MobileNetV2 | 416 | 300e | 5.3 | | 23.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo/yolov3_mobilenetv2_mstrain-416_300e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_mobilenetv2_mstrain-416_300e_coco/yolov3_mobilenetv2_mstrain-416_300e_coco_20210718_010823-f68a07b3.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_mobilenetv2_mstrain-416_300e_coco/yolov3_mobilenetv2_mstrain-416_300e_coco_20210718_010823.log.json) | -| MobileNetV2 | 320 | 300e | 3.2 | | 22.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo/yolov3_mobilenetv2_320_300e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_mobilenetv2_320_300e_coco/yolov3_mobilenetv2_320_300e_coco_20210719_215349-d18dff72.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_mobilenetv2_320_300e_coco/yolov3_mobilenetv2_320_300e_coco_20210719_215349.log.json) | +| Backbone | Scale | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :---------: | :---: | :-----: | :------: | :------------: | :----: | :----------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| MobileNetV2 | 416 | 300e | 5.3 | | 23.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo/yolov3_mobilenetv2_mstrain-416_300e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_mobilenetv2_mstrain-416_300e_coco/yolov3_mobilenetv2_mstrain-416_300e_coco_20210718_010823-f68a07b3.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_mobilenetv2_mstrain-416_300e_coco/yolov3_mobilenetv2_mstrain-416_300e_coco_20210718_010823.log.json) | +| MobileNetV2 | 320 | 300e | 3.2 | | 22.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo/yolov3_mobilenetv2_320_300e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_mobilenetv2_320_300e_coco/yolov3_mobilenetv2_320_300e_coco_20210719_215349-d18dff72.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_mobilenetv2_320_300e_coco/yolov3_mobilenetv2_320_300e_coco_20210719_215349.log.json) | Notice: We reduce the number of channels to 96 in both head and neck. It can reduce the flops and parameters, which makes these models more suitable for edge devices. ## Credit This implementation originates from the project of Haoyu Wu(@wuhy08) at Western Digital. + +## Citation + +```latex +@misc{redmon2018yolov3, + title={YOLOv3: An Incremental Improvement}, + author={Joseph Redmon and Ali Farhadi}, + year={2018}, + eprint={1804.02767}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` diff --git a/configs/yolo/yolov3_d53_mstrain-608_273e_coco.py b/configs/yolo/yolov3_d53_mstrain-608_273e_coco.py index 601f0fa..43aa2f0 100644 --- a/configs/yolo/yolov3_d53_mstrain-608_273e_coco.py +++ b/configs/yolo/yolov3_d53_mstrain-608_273e_coco.py @@ -125,3 +125,8 @@ # runtime settings runner = dict(type='EpochBasedRunner', max_epochs=273) evaluation = dict(interval=1, metric=['bbox']) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (8 samples per GPU) +auto_scale_lr = dict(base_batch_size=64) diff --git a/configs/yolo/yolov3_mobilenetv2_mstrain-416_300e_coco.py b/configs/yolo/yolov3_mobilenetv2_mstrain-416_300e_coco.py index fc3663e..18e0622 100644 --- a/configs/yolo/yolov3_mobilenetv2_mstrain-416_300e_coco.py +++ b/configs/yolo/yolov3_mobilenetv2_mstrain-416_300e_coco.py @@ -135,3 +135,8 @@ runner = dict(type='EpochBasedRunner', max_epochs=30) evaluation = dict(interval=1, metric=['bbox']) find_unused_parameters = True + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (24 samples per GPU) +auto_scale_lr = dict(base_batch_size=192) diff --git a/configs/yolof/README.md b/configs/yolof/README.md index 6a53ed5..e88da02 100644 --- a/configs/yolof/README.md +++ b/configs/yolof/README.md @@ -1,24 +1,31 @@ -# You Only Look One-level Feature +# YOLOF -## Abstract +> [You Only Look One-level Feature](https://arxiv.org/abs/2103.09460) + + - +## Abstract -This paper revisits feature pyramids networks (FPN) for one-stage detectors and points out that the success of FPN is due to its divide-and-conquer solution to the optimization problem in object detection rather than multi-scale feature fusion. From the perspective of optimization, we introduce an alternative way to address the problem instead of adopting the complex feature pyramids - {\em utilizing only one-level feature for detection}. Based on the simple and efficient solution, we present You Only Look One-level Feature (YOLOF). In our method, two key components, Dilated Encoder and Uniform Matching, are proposed and bring considerable improvements. Extensive experiments on the COCO benchmark prove the effectiveness of the proposed model. Our YOLOF achieves comparable results with its feature pyramids counterpart RetinaNet while being 2.5× faster. Without transformer layers, YOLOF can match the performance of DETR in a single-level feature manner with 7× less training epochs. With an image size of 608×608, YOLOF achieves 44.3 mAP running at 60 fps on 2080Ti, which is 13% faster than YOLOv4. +This paper revisits feature pyramids networks (FPN) for one-stage detectors and points out that the success of FPN is due to its divide-and-conquer solution to the optimization problem in object detection rather than multi-scale feature fusion. From the perspective of optimization, we introduce an alternative way to address the problem instead of adopting the complex feature pyramids - {\\em utilizing only one-level feature for detection}. Based on the simple and efficient solution, we present You Only Look One-level Feature (YOLOF). In our method, two key components, Dilated Encoder and Uniform Matching, are proposed and bring considerable improvements. Extensive experiments on the COCO benchmark prove the effectiveness of the proposed model. Our YOLOF achieves comparable results with its feature pyramids counterpart RetinaNet while being 2.5× faster. Without transformer layers, YOLOF can match the performance of DETR in a single-level feature manner with 7× less training epochs. With an image size of 608×608, YOLOF achieves 44.3 mAP running at 60 fps on 2080Ti, which is 13% faster than YOLOv4. -
- - +## Results and Models -## Citation +| Backbone | Style | Epoch | Lr schd | Mem (GB) | box AP | Config | Download | +| :------: | :---: | :---: | :-----: | :------: | :----: | :-------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50-C5 | caffe | Y | 1x | 8.3 | 37.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolof/yolof_r50_c5_8x8_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/yolof/yolof_r50_c5_8x8_1x_coco/yolof_r50_c5_8x8_1x_coco_20210425_024427-8e864411.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/yolof/yolof_r50_c5_8x8_1x_coco/yolof_r50_c5_8x8_1x_coco_20210425_024427.log.json) | - +**Note**: -``` +1. We find that the performance is unstable and may fluctuate by about 0.3 mAP. mAP 37.4 ~ 37.7 is acceptable in YOLOF_R_50_C5_1x. Such fluctuation can also be found in the [original implementation](https://github.com/chensnathan/YOLOF). +2. In addition to instability issues, sometimes there are large loss fluctuations and NAN, so there may still be problems with this project, which will be improved subsequently. + +## Citation + +```latex @inproceedings{chen2021you, title={You Only Look One-level Feature}, author={Chen, Qiang and Wang, Yingming and Yang, Tong and Zhang, Xiangyu and Cheng, Jian and Sun, Jian}, @@ -26,14 +33,3 @@ This paper revisits feature pyramids networks (FPN) for one-stage detectors and year={2021} } ``` - -## Results and Models - -| Backbone | Style | Epoch | Lr schd | Mem (GB) | box AP | Config | Download | -|:---------:|:-------:|:-------:|:-------:|:--------:|:------:|:------:|:--------:| -| R-50-C5 | caffe | Y | 1x | 8.3 | 37.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolof/yolof_r50_c5_8x8_1x_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/yolof/yolof_r50_c5_8x8_1x_coco/yolof_r50_c5_8x8_1x_coco_20210425_024427-8e864411.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/yolof/yolof_r50_c5_8x8_1x_coco/yolof_r50_c5_8x8_1x_coco_20210425_024427.log.json) | - -**Note**: - -1. We find that the performance is unstable and may fluctuate by about 0.3 mAP. mAP 37.4 ~ 37.7 is acceptable in YOLOF_R_50_C5_1x. Such fluctuation can also be found in the [original implementation](https://github.com/chensnathan/YOLOF). -2. In addition to instability issues, sometimes there are large loss fluctuations and NAN, so there may still be problems with this project, which will be improved subsequently. diff --git a/configs/yolof/yolof_r50_c5_8x8_1x_coco.py b/configs/yolof/yolof_r50_c5_8x8_1x_coco.py index 0e3b501..d0b9649 100644 --- a/configs/yolof/yolof_r50_c5_8x8_1x_coco.py +++ b/configs/yolof/yolof_r50_c5_8x8_1x_coco.py @@ -21,7 +21,8 @@ in_channels=2048, out_channels=512, block_mid_channels=128, - num_residual_blocks=4), + num_residual_blocks=4, + block_dilations=[2, 4, 6, 8]), bbox_head=dict( type='YOLOFHead', num_classes=80, @@ -103,3 +104,8 @@ train=dict(pipeline=train_pipeline), val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (8 samples per GPU) +auto_scale_lr = dict(base_batch_size=64) diff --git a/configs/yolox/README.md b/configs/yolox/README.md index b836b82..4890fbd 100644 --- a/configs/yolox/README.md +++ b/configs/yolox/README.md @@ -1,22 +1,33 @@ -# YOLOX: Exceeding YOLO Series in 2021 +# YOLOX -## Abstract +> [YOLOX: Exceeding YOLO Series in 2021](https://arxiv.org/abs/2107.08430) + + - +## Abstract In this report, we present some experienced improvements to YOLO series, forming a new high-performance detector -- YOLOX. We switch the YOLO detector to an anchor-free manner and conduct other advanced detection techniques, i.e., a decoupled head and the leading label assignment strategy SimOTA to achieve state-of-the-art results across a large scale range of models: For YOLO-Nano with only 0.91M parameters and 1.08G FLOPs, we get 25.3% AP on COCO, surpassing NanoDet by 1.8% AP; for YOLOv3, one of the most widely used detectors in industry, we boost it to 47.3% AP on COCO, outperforming the current best practice by 3.0% AP; for YOLOX-L with roughly the same amount of parameters as YOLOv4-CSP, YOLOv5-L, we achieve 50.0% AP on COCO at a speed of 68.9 FPS on Tesla V100, exceeding YOLOv5-L by 1.8% AP. Further, we won the 1st Place on Streaming Perception Challenge (Workshop on Autonomous Driving at CVPR 2021) using a single YOLOX-L model. We hope this report can provide useful experience for developers and researchers in practical scenes, and we also provide deploy versions with ONNX, TensorRT, NCNN, and Openvino supported. -
- - +## Results and Models -## Citation +| Backbone | size | Mem (GB) | box AP | Config | Download | +| :--------: | :--: | :------: | :----: | :-------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| YOLOX-tiny | 416 | 3.5 | 32.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox/yolox_tiny_8x8_300e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234.log.json) | +| YOLOX-s | 640 | 7.6 | 40.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox/yolox_s_8x8_300e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_20211121_095711.log.json) | +| YOLOX-l | 640 | 19.9 | 49.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox/yolox_l_8x8_300e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236.log.json) | +| YOLOX-x | 640 | 28.1 | 50.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox/yolox_x_8x8_300e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254.log.json) | - +**Note**: + +1. The test score threshold is 0.001, and the box AP indicates the best AP. +2. Due to the need for pre-training weights, we cannot reproduce the performance of the `yolox-nano` model. Please refer to https://github.com/Megvii-BaseDetection/YOLOX/issues/674 for more information. +3. We also trained the model by the official release of YOLOX based on [Megvii-BaseDetection/YOLOX#735](https://github.com/Megvii-BaseDetection/YOLOX/issues/735) with commit ID [38c633](https://github.com/Megvii-BaseDetection/YOLOX/tree/38c633bf176462ee42b110c70e4ffe17b5753208). We found that the best AP of `YOLOX-tiny`, `YOLOX-s`, `YOLOX-l`, and `YOLOX-x` is 31.8, 40.3, 49.2, and 50.9, respectively. The performance is consistent with that of our re-implementation (see Table above) but still has a gap (0.3~0.8 AP) in comparison with the reported performance in their [README](https://github.com/Megvii-BaseDetection/YOLOX/blob/38c633bf176462ee42b110c70e4ffe17b5753208/README.md#benchmark). + +## Citation ```latex @article{yolox2021, @@ -26,19 +37,3 @@ In this report, we present some experienced improvements to YOLO series, forming year={2021} } ``` - -## Results and Models - -| Backbone | size | Mem (GB) | box AP | Config | Download | -|:---------:|:-------:|:-------:|:-------:|:--------:|:------:| -| YOLOX-tiny | 416 | 3.5 | 32.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox/yolox_tiny_8x8_300e_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234.log.json) | -| YOLOX-s | 640 | 7.6 | 40.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox/yolox_s_8x8_300e_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_20211121_095711.log.json) | -| YOLOX-l | 640 | 19.9 | 49.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox/yolox_l_8x8_300e_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236.log.json) | -| YOLOX-x | 640 | 28.1 | 50.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox/yolox_x_8x8_300e_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254.log.json) | - - -**Note**: - -1. The test score threshold is 0.001, and the box AP indicates the best AP. -2. Due to the need for pre-training weights, we cannot reproduce the performance of the `yolox-nano` model. Please refer to https://github.com/Megvii-BaseDetection/YOLOX/issues/674 for more information. -3. We also trained the model by the official release of YOLOX based on [Megvii-BaseDetection/YOLOX#735](https://github.com/Megvii-BaseDetection/YOLOX/issues/735) with commit ID [38c633](https://github.com/Megvii-BaseDetection/YOLOX/tree/38c633bf176462ee42b110c70e4ffe17b5753208). We found that the best AP of `YOLOX-tiny`, `YOLOX-s`, `YOLOX-l`, and `YOLOX-x` is 31.8, 40.3, 49.2, and 50.9, respectively. The performance is consistent with that of our re-implementation (see Table above) but still has a gap (0.3~0.8 AP) in comparison with the reported performance in their [README](https://github.com/Megvii-BaseDetection/YOLOX/blob/38c633bf176462ee42b110c70e4ffe17b5753208/README.md#benchmark). diff --git a/configs/yolox/yolox_s_8x8_300e_coco.py b/configs/yolox/yolox_s_8x8_300e_coco.py index cc73051..97ff23e 100644 --- a/configs/yolox/yolox_s_8x8_300e_coco.py +++ b/configs/yolox/yolox_s_8x8_300e_coco.py @@ -1,6 +1,6 @@ _base_ = ['../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'] -img_scale = (640, 640) +img_scale = (640, 640) # height, width # model settings model = dict( @@ -158,3 +158,8 @@ dynamic_intervals=[(max_epochs - num_last_epochs, 1)], metric='bbox') log_config = dict(interval=50) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (8 samples per GPU) +auto_scale_lr = dict(base_batch_size=64) diff --git a/configs/yolox/yolox_tiny_8x8_300e_coco.py b/configs/yolox/yolox_tiny_8x8_300e_coco.py index 3aee99a..75931ba 100644 --- a/configs/yolox/yolox_tiny_8x8_300e_coco.py +++ b/configs/yolox/yolox_tiny_8x8_300e_coco.py @@ -7,7 +7,7 @@ neck=dict(in_channels=[96, 192, 384], out_channels=96), bbox_head=dict(in_channels=96, feat_channels=96)) -img_scale = (640, 640) +img_scale = (640, 640) # height, width train_pipeline = [ dict(type='Mosaic', img_scale=img_scale, pad_val=114.0), @@ -51,3 +51,8 @@ train=train_dataset, val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (8 samples per GPU) +auto_scale_lr = dict(base_batch_size=64)