diff --git a/configs/_base_/datasets/cityscapes_detection.py b/configs/_base_/datasets/cityscapes_detection.py new file mode 100644 index 0000000..156aca0 --- /dev/null +++ b/configs/_base_/datasets/cityscapes_detection.py @@ -0,0 +1,55 @@ +dataset_type = 'CityscapesDataset' +data_root = 'data/cityscapes/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=1, + workers_per_gpu=2, + train=dict( + type='RepeatDataset', + times=8, + dataset=dict( + type=dataset_type, + ann_file=data_root + + 'annotations/instancesonly_filtered_gtFine_train.json', + img_prefix=data_root + 'leftImg8bit/train/', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + ann_file=data_root + + 'annotations/instancesonly_filtered_gtFine_val.json', + img_prefix=data_root + 'leftImg8bit/val/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + + 'annotations/instancesonly_filtered_gtFine_test.json', + img_prefix=data_root + 'leftImg8bit/test/', + pipeline=test_pipeline)) +evaluation = dict(interval=1, metric='bbox') diff --git a/configs/_base_/datasets/cityscapes_instance.py b/configs/_base_/datasets/cityscapes_instance.py new file mode 100644 index 0000000..3c5472a --- /dev/null +++ b/configs/_base_/datasets/cityscapes_instance.py @@ -0,0 +1,55 @@ +dataset_type = 'CityscapesDataset' +data_root = 'data/cityscapes/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=1, + workers_per_gpu=2, + train=dict( + type='RepeatDataset', + times=8, + dataset=dict( + type=dataset_type, + ann_file=data_root + + 'annotations/instancesonly_filtered_gtFine_train.json', + img_prefix=data_root + 'leftImg8bit/train/', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + ann_file=data_root + + 'annotations/instancesonly_filtered_gtFine_val.json', + img_prefix=data_root + 'leftImg8bit/val/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + + 'annotations/instancesonly_filtered_gtFine_test.json', + img_prefix=data_root + 'leftImg8bit/test/', + pipeline=test_pipeline)) +evaluation = dict(metric=['bbox', 'segm']) diff --git a/configs/_base_/datasets/coco_detection.py b/configs/_base_/datasets/coco_detection.py new file mode 100644 index 0000000..09a75c4 --- /dev/null +++ b/configs/_base_/datasets/coco_detection.py @@ -0,0 +1,48 @@ +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) +evaluation = dict(interval=1, metric='bbox') diff --git a/configs/_base_/datasets/coco_instance.py b/configs/_base_/datasets/coco_instance.py new file mode 100644 index 0000000..f6ea4f4 --- /dev/null +++ b/configs/_base_/datasets/coco_instance.py @@ -0,0 +1,48 @@ +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) +evaluation = dict(metric=['bbox', 'segm']) diff --git a/configs/_base_/datasets/coco_instance_semantic.py b/configs/_base_/datasets/coco_instance_semantic.py new file mode 100644 index 0000000..f7c072e --- /dev/null +++ b/configs/_base_/datasets/coco_instance_semantic.py @@ -0,0 +1,53 @@ +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='SegRescale', scale_factor=1 / 8), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + seg_prefix=data_root + 'stuffthingmaps/train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) +evaluation = dict(metric=['bbox', 'segm']) diff --git a/configs/_base_/datasets/deepfashion.py b/configs/_base_/datasets/deepfashion.py new file mode 100644 index 0000000..308b4b2 --- /dev/null +++ b/configs/_base_/datasets/deepfashion.py @@ -0,0 +1,53 @@ +# dataset settings +dataset_type = 'DeepFashionDataset' +data_root = 'data/DeepFashion/In-shop/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(750, 1101), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(750, 1101), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + imgs_per_gpu=2, + workers_per_gpu=1, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json', + img_prefix=data_root + 'Img/', + pipeline=train_pipeline, + data_root=data_root), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json', + img_prefix=data_root + 'Img/', + pipeline=test_pipeline, + data_root=data_root), + test=dict( + type=dataset_type, + ann_file=data_root + + 'annotations/DeepFashion_segmentation_gallery.json', + img_prefix=data_root + 'Img/', + pipeline=test_pipeline, + data_root=data_root)) +evaluation = dict(interval=5, metric=['bbox', 'segm']) diff --git a/configs/_base_/datasets/lvis_v0.5_instance.py b/configs/_base_/datasets/lvis_v0.5_instance.py new file mode 100644 index 0000000..f3da861 --- /dev/null +++ b/configs/_base_/datasets/lvis_v0.5_instance.py @@ -0,0 +1,23 @@ +_base_ = 'coco_instance.py' +dataset_type = 'LVISV05Dataset' +data_root = 'data/lvis_v0.5/' +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + _delete_=True, + type='ClassBalancedDataset', + oversample_thr=1e-3, + dataset=dict( + type=dataset_type, + ann_file=data_root + 'annotations/lvis_v0.5_train.json', + img_prefix=data_root + 'train2017/')), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/lvis_v0.5_val.json', + img_prefix=data_root + 'val2017/'), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/lvis_v0.5_val.json', + img_prefix=data_root + 'val2017/')) +evaluation = dict(metric=['bbox', 'segm']) diff --git a/configs/_base_/datasets/lvis_v1_instance.py b/configs/_base_/datasets/lvis_v1_instance.py new file mode 100644 index 0000000..e8c5d1b --- /dev/null +++ b/configs/_base_/datasets/lvis_v1_instance.py @@ -0,0 +1,23 @@ +_base_ = 'coco_instance.py' +dataset_type = 'LVISV1Dataset' +data_root = 'data/lvis_v1/' +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + _delete_=True, + type='ClassBalancedDataset', + oversample_thr=1e-3, + dataset=dict( + type=dataset_type, + ann_file=data_root + 'annotations/lvis_v1_train.json', + img_prefix=data_root)), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/lvis_v1_val.json', + img_prefix=data_root), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/lvis_v1_val.json', + img_prefix=data_root)) +evaluation = dict(metric=['bbox', 'segm']) diff --git a/configs/_base_/datasets/voc0712.py b/configs/_base_/datasets/voc0712.py new file mode 100644 index 0000000..ae09acd --- /dev/null +++ b/configs/_base_/datasets/voc0712.py @@ -0,0 +1,55 @@ +# dataset settings +dataset_type = 'VOCDataset' +data_root = 'data/VOCdevkit/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1000, 600), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1000, 600), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='RepeatDataset', + times=3, + dataset=dict( + type=dataset_type, + ann_file=[ + data_root + 'VOC2007/ImageSets/Main/trainval.txt', + data_root + 'VOC2012/ImageSets/Main/trainval.txt' + ], + img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'], + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', + img_prefix=data_root + 'VOC2007/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', + img_prefix=data_root + 'VOC2007/', + pipeline=test_pipeline)) +evaluation = dict(interval=1, metric='mAP') diff --git a/configs/_base_/datasets/wider_face.py b/configs/_base_/datasets/wider_face.py new file mode 100644 index 0000000..d1d649b --- /dev/null +++ b/configs/_base_/datasets/wider_face.py @@ -0,0 +1,63 @@ +# dataset settings +dataset_type = 'WIDERFaceDataset' +data_root = 'data/WIDERFace/' +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 4)), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=(300, 300), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(300, 300), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=60, + workers_per_gpu=2, + train=dict( + type='RepeatDataset', + times=2, + dataset=dict( + type=dataset_type, + ann_file=data_root + 'train.txt', + img_prefix=data_root + 'WIDER_train/', + min_size=17, + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + ann_file=data_root + 'val.txt', + img_prefix=data_root + 'WIDER_val/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'val.txt', + img_prefix=data_root + 'WIDER_val/', + pipeline=test_pipeline)) diff --git a/configs/_base_/default_runtime.py b/configs/_base_/default_runtime.py new file mode 100644 index 0000000..55097c5 --- /dev/null +++ b/configs/_base_/default_runtime.py @@ -0,0 +1,16 @@ +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +custom_hooks = [dict(type='NumClassCheckHook')] + +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py b/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py new file mode 100644 index 0000000..9ef6673 --- /dev/null +++ b/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py @@ -0,0 +1,196 @@ +# model settings +model = dict( + type='CascadeRCNN', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), + roi_head=dict( + type='CascadeRoIHead', + num_stages=3, + stage_loss_weights=[1, 0.5, 0.25], + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=[ + dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, + loss_weight=1.0)), + dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.05, 0.05, 0.1, 0.1]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, + loss_weight=1.0)), + dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.033, 0.033, 0.067, 0.067]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) + ], + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + mask_head=dict( + type='FCNMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=80, + loss_mask=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=[ + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False), + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.6, + neg_iou_thr=0.6, + min_pos_iou=0.6, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False), + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.7, + min_pos_iou=0.7, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False) + ]), + test_cfg=dict( + rpn=dict( + nms_pre=1000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100, + mask_thr_binary=0.5))) diff --git a/configs/_base_/models/cascade_rcnn_r50_fpn.py b/configs/_base_/models/cascade_rcnn_r50_fpn.py new file mode 100644 index 0000000..cde2a96 --- /dev/null +++ b/configs/_base_/models/cascade_rcnn_r50_fpn.py @@ -0,0 +1,179 @@ +# model settings +model = dict( + type='CascadeRCNN', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), + roi_head=dict( + type='CascadeRoIHead', + num_stages=3, + stage_loss_weights=[1, 0.5, 0.25], + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=[ + dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, + loss_weight=1.0)), + dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.05, 0.05, 0.1, 0.1]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, + loss_weight=1.0)), + dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.033, 0.033, 0.067, 0.067]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) + ]), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=[ + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + pos_weight=-1, + debug=False), + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.6, + neg_iou_thr=0.6, + min_pos_iou=0.6, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + pos_weight=-1, + debug=False), + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.7, + min_pos_iou=0.7, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + pos_weight=-1, + debug=False) + ]), + test_cfg=dict( + rpn=dict( + nms_pre=1000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100))) diff --git a/configs/_base_/models/fast_rcnn_r50_fpn.py b/configs/_base_/models/fast_rcnn_r50_fpn.py new file mode 100644 index 0000000..1099165 --- /dev/null +++ b/configs/_base_/models/fast_rcnn_r50_fpn.py @@ -0,0 +1,62 @@ +# model settings +model = dict( + type='FastRCNN', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + roi_head=dict( + type='StandardRoIHead', + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + pos_weight=-1, + debug=False)), + test_cfg=dict( + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100))) diff --git a/configs/_base_/models/faster_rcnn_r50_caffe_c4.py b/configs/_base_/models/faster_rcnn_r50_caffe_c4.py new file mode 100644 index 0000000..6e18f71 --- /dev/null +++ b/configs/_base_/models/faster_rcnn_r50_caffe_c4.py @@ -0,0 +1,112 @@ +# model settings +norm_cfg = dict(type='BN', requires_grad=False) +model = dict( + type='FasterRCNN', + pretrained='open-mmlab://detectron2/resnet50_caffe', + backbone=dict( + type='ResNet', + depth=50, + num_stages=3, + strides=(1, 2, 2), + dilations=(1, 1, 1), + out_indices=(2, ), + frozen_stages=1, + norm_cfg=norm_cfg, + norm_eval=True, + style='caffe'), + rpn_head=dict( + type='RPNHead', + in_channels=1024, + feat_channels=1024, + anchor_generator=dict( + type='AnchorGenerator', + scales=[2, 4, 8, 16, 32], + ratios=[0.5, 1.0, 2.0], + strides=[16]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + roi_head=dict( + type='StandardRoIHead', + shared_head=dict( + type='ResLayer', + depth=50, + stage=3, + stride=2, + dilation=1, + style='caffe', + norm_cfg=norm_cfg, + norm_eval=True), + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), + out_channels=1024, + featmap_strides=[16]), + bbox_head=dict( + type='BBoxHead', + with_avg_pool=True, + roi_feat_size=7, + in_channels=2048, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_pre=12000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + pos_weight=-1, + debug=False)), + test_cfg=dict( + rpn=dict( + nms_pre=6000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100))) diff --git a/configs/_base_/models/faster_rcnn_r50_caffe_dc5.py b/configs/_base_/models/faster_rcnn_r50_caffe_dc5.py new file mode 100644 index 0000000..5089f0e --- /dev/null +++ b/configs/_base_/models/faster_rcnn_r50_caffe_dc5.py @@ -0,0 +1,103 @@ +# model settings +norm_cfg = dict(type='BN', requires_grad=False) +model = dict( + type='FasterRCNN', + pretrained='open-mmlab://detectron2/resnet50_caffe', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + strides=(1, 2, 2, 1), + dilations=(1, 1, 1, 2), + out_indices=(3, ), + frozen_stages=1, + norm_cfg=norm_cfg, + norm_eval=True, + style='caffe'), + rpn_head=dict( + type='RPNHead', + in_channels=2048, + feat_channels=2048, + anchor_generator=dict( + type='AnchorGenerator', + scales=[2, 4, 8, 16, 32], + ratios=[0.5, 1.0, 2.0], + strides=[16]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + roi_head=dict( + type='StandardRoIHead', + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), + out_channels=2048, + featmap_strides=[16]), + bbox_head=dict( + type='Shared2FCBBoxHead', + in_channels=2048, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_pre=12000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + pos_weight=-1, + debug=False)), + test_cfg=dict( + rpn=dict( + nms=dict(type='nms', iou_threshold=0.7), + nms_pre=6000, + max_per_img=1000, + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100))) diff --git a/configs/_base_/models/faster_rcnn_r50_fpn.py b/configs/_base_/models/faster_rcnn_r50_fpn.py new file mode 100644 index 0000000..0f038d1 --- /dev/null +++ b/configs/_base_/models/faster_rcnn_r50_fpn.py @@ -0,0 +1,107 @@ +model = dict( + type='FasterRCNN', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + roi_head=dict( + type='StandardRoIHead', + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_pre=2000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + pos_weight=-1, + debug=False)), + test_cfg=dict( + rpn=dict( + nms_pre=1000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100) + # soft-nms is also supported for rcnn testing + # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05) + )) diff --git a/configs/_base_/models/mask_rcnn_r50_caffe_c4.py b/configs/_base_/models/mask_rcnn_r50_caffe_c4.py new file mode 100644 index 0000000..eaae134 --- /dev/null +++ b/configs/_base_/models/mask_rcnn_r50_caffe_c4.py @@ -0,0 +1,123 @@ +# model settings +norm_cfg = dict(type='BN', requires_grad=False) +model = dict( + type='MaskRCNN', + pretrained='open-mmlab://detectron2/resnet50_caffe', + backbone=dict( + type='ResNet', + depth=50, + num_stages=3, + strides=(1, 2, 2), + dilations=(1, 1, 1), + out_indices=(2, ), + frozen_stages=1, + norm_cfg=norm_cfg, + norm_eval=True, + style='caffe'), + rpn_head=dict( + type='RPNHead', + in_channels=1024, + feat_channels=1024, + anchor_generator=dict( + type='AnchorGenerator', + scales=[2, 4, 8, 16, 32], + ratios=[0.5, 1.0, 2.0], + strides=[16]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + roi_head=dict( + type='StandardRoIHead', + shared_head=dict( + type='ResLayer', + depth=50, + stage=3, + stride=2, + dilation=1, + style='caffe', + norm_cfg=norm_cfg, + norm_eval=True), + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), + out_channels=1024, + featmap_strides=[16]), + bbox_head=dict( + type='BBoxHead', + with_avg_pool=True, + roi_feat_size=7, + in_channels=2048, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + mask_roi_extractor=None, + mask_head=dict( + type='FCNMaskHead', + num_convs=0, + in_channels=2048, + conv_out_channels=256, + num_classes=80, + loss_mask=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_pre=12000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=14, + pos_weight=-1, + debug=False)), + test_cfg=dict( + rpn=dict( + nms_pre=6000, + nms=dict(type='nms', iou_threshold=0.7), + max_per_img=1000, + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100, + mask_thr_binary=0.5))) diff --git a/configs/_base_/models/mask_rcnn_r50_fpn.py b/configs/_base_/models/mask_rcnn_r50_fpn.py new file mode 100644 index 0000000..6fc7908 --- /dev/null +++ b/configs/_base_/models/mask_rcnn_r50_fpn.py @@ -0,0 +1,120 @@ +# model settings +model = dict( + type='MaskRCNN', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + roi_head=dict( + type='StandardRoIHead', + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + mask_head=dict( + type='FCNMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=80, + loss_mask=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_pre=2000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=True, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False)), + test_cfg=dict( + rpn=dict( + nms_pre=1000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100, + mask_thr_binary=0.5))) diff --git a/configs/_base_/models/retinanet_r50_fpn.py b/configs/_base_/models/retinanet_r50_fpn.py new file mode 100644 index 0000000..47fe98c --- /dev/null +++ b/configs/_base_/models/retinanet_r50_fpn.py @@ -0,0 +1,60 @@ +# model settings +model = dict( + type='RetinaNet', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_input', + num_outs=5), + bbox_head=dict( + type='RetinaHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=4, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[8, 16, 32, 64, 128]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + # training and testing settings + train_cfg=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0, + ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100)) diff --git a/configs/_base_/models/rpn_r50_caffe_c4.py b/configs/_base_/models/rpn_r50_caffe_c4.py new file mode 100644 index 0000000..9c32a55 --- /dev/null +++ b/configs/_base_/models/rpn_r50_caffe_c4.py @@ -0,0 +1,56 @@ +# model settings +model = dict( + type='RPN', + pretrained='open-mmlab://detectron2/resnet50_caffe', + backbone=dict( + type='ResNet', + depth=50, + num_stages=3, + strides=(1, 2, 2), + dilations=(1, 1, 1), + out_indices=(2, ), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + norm_eval=True, + style='caffe'), + neck=None, + rpn_head=dict( + type='RPNHead', + in_channels=1024, + feat_channels=1024, + anchor_generator=dict( + type='AnchorGenerator', + scales=[2, 4, 8, 16, 32], + ratios=[0.5, 1.0, 2.0], + strides=[16]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False)), + test_cfg=dict( + rpn=dict( + nms_pre=12000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0))) diff --git a/configs/_base_/models/rpn_r50_fpn.py b/configs/_base_/models/rpn_r50_fpn.py new file mode 100644 index 0000000..22193c1 --- /dev/null +++ b/configs/_base_/models/rpn_r50_fpn.py @@ -0,0 +1,59 @@ +# model settings + +model = dict( + type='RPN', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False)), + test_cfg=dict( + rpn=dict( + nms_pre=2000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0))) diff --git a/configs/_base_/models/ssd300.py b/configs/_base_/models/ssd300.py new file mode 100644 index 0000000..4ea7975 --- /dev/null +++ b/configs/_base_/models/ssd300.py @@ -0,0 +1,49 @@ +# model settings +input_size = 300 +model = dict( + type='SingleStageDetector', + pretrained='open-mmlab://vgg16_caffe', + backbone=dict( + type='SSDVGG', + input_size=input_size, + depth=16, + with_last_pool=False, + ceil_mode=True, + out_indices=(3, 4), + out_feature_indices=(22, 34), + l2_norm_scale=20), + neck=None, + bbox_head=dict( + type='SSDHead', + in_channels=(512, 1024, 512, 256, 256, 256), + num_classes=80, + anchor_generator=dict( + type='SSDAnchorGenerator', + scale_major=False, + input_size=input_size, + basesize_ratio_range=(0.15, 0.9), + strides=[8, 16, 32, 64, 100, 300], + ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.1, 0.1, 0.2, 0.2])), + train_cfg=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0., + ignore_iof_thr=-1, + gt_max_assign_all=False), + smoothl1_beta=1., + allowed_border=-1, + pos_weight=-1, + neg_pos_ratio=3, + debug=False), + test_cfg=dict( + nms=dict(type='nms', iou_threshold=0.45), + min_bbox_size=0, + score_thr=0.02, + max_per_img=200)) +cudnn_benchmark = True diff --git a/configs/_base_/schedules/schedule_1x.py b/configs/_base_/schedules/schedule_1x.py new file mode 100644 index 0000000..13b3783 --- /dev/null +++ b/configs/_base_/schedules/schedule_1x.py @@ -0,0 +1,11 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[8, 11]) +runner = dict(type='EpochBasedRunner', max_epochs=12) diff --git a/configs/_base_/schedules/schedule_20e.py b/configs/_base_/schedules/schedule_20e.py new file mode 100644 index 0000000..00e8590 --- /dev/null +++ b/configs/_base_/schedules/schedule_20e.py @@ -0,0 +1,11 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[16, 19]) +runner = dict(type='EpochBasedRunner', max_epochs=20) diff --git a/configs/_base_/schedules/schedule_2x.py b/configs/_base_/schedules/schedule_2x.py new file mode 100644 index 0000000..69dc9ee --- /dev/null +++ b/configs/_base_/schedules/schedule_2x.py @@ -0,0 +1,11 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/albu_example/README.md b/configs/albu_example/README.md new file mode 100644 index 0000000..bf35a9b --- /dev/null +++ b/configs/albu_example/README.md @@ -0,0 +1,19 @@ +# Albu Example + +[OTHERS] + +``` +@article{2018arXiv180906839B, + author = {A. Buslaev, A. Parinov, E. Khvedchenya, V.~I. Iglovikov and A.~A. Kalinin}, + title = "{Albumentations: fast and flexible image augmentations}", + journal = {ArXiv e-prints}, + eprint = {1809.06839}, + year = 2018 +} +``` + +## Results and Models + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +|:---------:|:-------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| +| R-50 | pytorch | 1x | 4.4 | 16.6 | 38.0 | 34.5 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/albu_example/mask_rcnn_r50_fpn_albu_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/albu_example/mask_rcnn_r50_fpn_albu_1x_coco/mask_rcnn_r50_fpn_albu_1x_coco_20200208-ab203bcd.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/albu_example/mask_rcnn_r50_fpn_albu_1x_coco/mask_rcnn_r50_fpn_albu_1x_coco_20200208_225520.log.json) | diff --git a/configs/albu_example/mask_rcnn_r50_fpn_albu_1x_coco.py b/configs/albu_example/mask_rcnn_r50_fpn_albu_1x_coco.py new file mode 100644 index 0000000..b3f879a --- /dev/null +++ b/configs/albu_example/mask_rcnn_r50_fpn_albu_1x_coco.py @@ -0,0 +1,73 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +albu_train_transforms = [ + dict( + type='ShiftScaleRotate', + shift_limit=0.0625, + scale_limit=0.0, + rotate_limit=0, + interpolation=1, + p=0.5), + dict( + type='RandomBrightnessContrast', + brightness_limit=[0.1, 0.3], + contrast_limit=[0.1, 0.3], + p=0.2), + dict( + type='OneOf', + transforms=[ + dict( + type='RGBShift', + r_shift_limit=10, + g_shift_limit=10, + b_shift_limit=10, + p=1.0), + dict( + type='HueSaturationValue', + hue_shift_limit=20, + sat_shift_limit=30, + val_shift_limit=20, + p=1.0) + ], + p=0.1), + dict(type='JpegCompression', quality_lower=85, quality_upper=95, p=0.2), + dict(type='ChannelShuffle', p=0.1), + dict( + type='OneOf', + transforms=[ + dict(type='Blur', blur_limit=3, p=1.0), + dict(type='MedianBlur', blur_limit=3, p=1.0) + ], + p=0.1), +] +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='Pad', size_divisor=32), + dict( + type='Albu', + transforms=albu_train_transforms, + bbox_params=dict( + type='BboxParams', + format='pascal_voc', + label_fields=['gt_labels'], + min_visibility=0.0, + filter_lost_elements=True), + keymap={ + 'img': 'image', + 'gt_masks': 'masks', + 'gt_bboxes': 'bboxes' + }, + update_pad_shape=False, + skip_img_without_anno=True), + dict(type='Normalize', **img_norm_cfg), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'], + meta_keys=('filename', 'ori_shape', 'img_shape', 'img_norm_cfg', + 'pad_shape', 'scale_factor')) +] +data = dict(train=dict(pipeline=train_pipeline)) diff --git a/configs/atss/README.md b/configs/atss/README.md new file mode 100644 index 0000000..4ba9150 --- /dev/null +++ b/configs/atss/README.md @@ -0,0 +1,21 @@ +# Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection + +## Introduction + +[ALGORITHM] + +```latex +@article{zhang2019bridging, + title = {Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection}, + author = {Zhang, Shifeng and Chi, Cheng and Yao, Yongqiang and Lei, Zhen and Li, Stan Z.}, + journal = {arXiv preprint arXiv:1912.02424}, + year = {2019} +} +``` + +## Results and Models + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:---------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| R-50 | pytorch | 1x | 3.7 | 19.7 | 39.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/atss/atss_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/atss/atss_r50_fpn_1x_coco/atss_r50_fpn_1x_coco_20200209-985f7bd0.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/atss/atss_r50_fpn_1x_coco/atss_r50_fpn_1x_coco_20200209_102539.log.json) | +| R-101 | pytorch | 1x | 5.6 | 12.3 | 41.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/atss/atss_r101_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/atss/atss_r101_fpn_1x_coco/atss_r101_fpn_1x_20200825-dfcadd6f.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/atss/atss_r101_fpn_1x_coco/atss_r101_fpn_1x_20200825-dfcadd6f.log.json) | diff --git a/configs/atss/atss_r101_fpn_1x_coco.py b/configs/atss/atss_r101_fpn_1x_coco.py new file mode 100644 index 0000000..695779a --- /dev/null +++ b/configs/atss/atss_r101_fpn_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = './atss_r50_fpn_1x_coco.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(depth=101), +) diff --git a/configs/atss/atss_r50_fpn_1x_coco.py b/configs/atss/atss_r50_fpn_1x_coco.py new file mode 100644 index 0000000..cfd70ed --- /dev/null +++ b/configs/atss/atss_r50_fpn_1x_coco.py @@ -0,0 +1,62 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + type='ATSS', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_output', + num_outs=5), + bbox_head=dict( + type='ATSSHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + octave_base_scale=8, + scales_per_octave=1, + strides=[8, 16, 32, 64, 128]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.1, 0.1, 0.2, 0.2]), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='GIoULoss', loss_weight=2.0), + loss_centerness=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), + # training and testing settings + train_cfg=dict( + assigner=dict(type='ATSSAssigner', topk=9), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/carafe/README.md b/configs/carafe/README.md new file mode 100644 index 0000000..d9ca664 --- /dev/null +++ b/configs/carafe/README.md @@ -0,0 +1,32 @@ +# CARAFE: Content-Aware ReAssembly of FEatures + +## Introduction + +[ALGORITHM] + +We provide config files to reproduce the object detection & instance segmentation results in the ICCV 2019 Oral paper for [CARAFE: Content-Aware ReAssembly of FEatures](https://arxiv.org/abs/1905.02188). + +``` +@inproceedings{Wang_2019_ICCV, + title = {CARAFE: Content-Aware ReAssembly of FEatures}, + author = {Wang, Jiaqi and Chen, Kai and Xu, Rui and Liu, Ziwei and Loy, Chen Change and Lin, Dahua}, + booktitle = {The IEEE International Conference on Computer Vision (ICCV)}, + month = {October}, + year = {2019} +} +``` + +## Results and Models + +The results on COCO 2017 val is shown in the below table. + +| Method | Backbone | Style | Lr schd | Test Proposal Num | Inf time (fps) | Box AP | Mask AP | Config | Download | +|:--------------------:|:--------:|:-------:|:-------:|:-----------------:|:--------------:|:------:|:-------:|:------:|:--------:| +| Faster R-CNN w/ CARAFE | R-50-FPN | pytorch | 1x | 1000 | 16.5 | 38.6 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/carafe/faster_rcnn_r50_fpn_carafe_1x_coco/faster_rcnn_r50_fpn_carafe_1x_coco_bbox_mAP-0.386_20200504_175733-385a75b7.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/carafe/faster_rcnn_r50_fpn_carafe_1x_coco/faster_rcnn_r50_fpn_carafe_1x_coco_20200504_175733.log.json) | +| - | - | - | - | 2000 | | | | | +| Mask R-CNN w/ CARAFE | R-50-FPN | pytorch | 1x | 1000 | 14.0 | 39.3 | 35.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/carafe/mask_rcnn_r50_fpn_carafe_1x_coco/mask_rcnn_r50_fpn_carafe_1x_coco_bbox_mAP-0.393__segm_mAP-0.358_20200503_135957-8687f195.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/carafe/mask_rcnn_r50_fpn_carafe_1x_coco/mask_rcnn_r50_fpn_carafe_1x_coco_20200503_135957.log.json) | +| - | - | - | - | 2000 | | | | | + +## Implementation + +The CUDA implementation of CARAFE can be find at https://github.com/myownskyW7/CARAFE. diff --git a/configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py b/configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py new file mode 100644 index 0000000..dedac3f --- /dev/null +++ b/configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py @@ -0,0 +1,50 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + neck=dict( + type='FPN_CARAFE', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5, + start_level=0, + end_level=-1, + norm_cfg=None, + act_cfg=None, + order=('conv', 'norm', 'act'), + upsample_cfg=dict( + type='carafe', + up_kernel=5, + up_group=1, + encoder_kernel=3, + encoder_dilation=1, + compressed_channels=64))) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=64), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=64), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py b/configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py new file mode 100644 index 0000000..668c023 --- /dev/null +++ b/configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py @@ -0,0 +1,60 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + neck=dict( + type='FPN_CARAFE', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5, + start_level=0, + end_level=-1, + norm_cfg=None, + act_cfg=None, + order=('conv', 'norm', 'act'), + upsample_cfg=dict( + type='carafe', + up_kernel=5, + up_group=1, + encoder_kernel=3, + encoder_dilation=1, + compressed_channels=64)), + roi_head=dict( + mask_head=dict( + upsample_cfg=dict( + type='carafe', + scale_factor=2, + up_kernel=5, + up_group=1, + encoder_kernel=3, + encoder_dilation=1, + compressed_channels=64)))) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=64), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=64), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/cascade_rcnn/README.md b/configs/cascade_rcnn/README.md new file mode 100644 index 0000000..15e6191 --- /dev/null +++ b/configs/cascade_rcnn/README.md @@ -0,0 +1,55 @@ +# Cascade R-CNN: High Quality Object Detection and Instance Segmentation + +## Introduction + +[ALGORITHM] + +```latex +@article{Cai_2019, + title={Cascade R-CNN: High Quality Object Detection and Instance Segmentation}, + ISSN={1939-3539}, + url={http://dx.doi.org/10.1109/tpami.2019.2956516}, + DOI={10.1109/tpami.2019.2956516}, + journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, + publisher={Institute of Electrical and Electronics Engineers (IEEE)}, + author={Cai, Zhaowei and Vasconcelos, Nuno}, + year={2019}, + pages={1–1} +} +``` + +## Results and models + +### Cascade R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: |:------:|:--------:| +| R-50-FPN | caffe | 1x | 4.2 | | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco/cascade_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.404_20200504_174853-b857be87.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco/cascade_rcnn_r50_caffe_fpn_1x_coco_20200504_174853.log.json) | +| R-50-FPN | pytorch | 1x | 4.4 | 16.1 | 40.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco/cascade_rcnn_r50_fpn_1x_coco_20200316-3dc56deb.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco/cascade_rcnn_r50_fpn_1x_coco_20200316_214748.log.json) | +| R-50-FPN | pytorch | 20e | - | - | 41.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco/cascade_rcnn_r50_fpn_20e_coco_bbox_mAP-0.41_20200504_175131-e9872a90.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco/cascade_rcnn_r50_fpn_20e_coco_20200504_175131.log.json) | +| R-101-FPN | caffe | 1x | 6.2 | | 42.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco/cascade_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.423_20200504_175649-cab8dbd5.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco/cascade_rcnn_r101_caffe_fpn_1x_coco_20200504_175649.log.json) | +| R-101-FPN | pytorch | 1x | 6.4 | 13.5 | 42.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco/cascade_rcnn_r101_fpn_1x_coco_20200317-0b6a2fbf.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco/cascade_rcnn_r101_fpn_1x_coco_20200317_101744.log.json) | +| R-101-FPN | pytorch | 20e | - | - | 42.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco/cascade_rcnn_r101_fpn_20e_coco_bbox_mAP-0.425_20200504_231812-5057dcc5.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco/cascade_rcnn_r101_fpn_20e_coco_20200504_231812.log.json) | +| X-101-32x4d-FPN | pytorch | 1x | 7.6 | 10.9 | 43.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco/cascade_rcnn_x101_32x4d_fpn_1x_coco_20200316-95c2deb6.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco/cascade_rcnn_x101_32x4d_fpn_1x_coco_20200316_055608.log.json) | +| X-101-32x4d-FPN | pytorch | 20e | 7.6 | | 43.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco/cascade_rcnn_x101_32x4d_fpn_20e_coco_20200906_134608-9ae0a720.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco/cascade_rcnn_x101_32x4d_fpn_20e_coco_20200906_134608.log.json) | +| X-101-64x4d-FPN | pytorch | 1x | 10.7 | | 44.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco/cascade_rcnn_x101_64x4d_fpn_1x_coco_20200515_075702-43ce6a30.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco/cascade_rcnn_x101_64x4d_fpn_1x_coco_20200515_075702.log.json) | +| X-101-64x4d-FPN | pytorch | 20e | 10.7 | | 44.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco/cascade_rcnn_x101_64x4d_fpn_20e_coco_20200509_224357-051557b1.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco/cascade_rcnn_x101_64x4d_fpn_20e_coco_20200509_224357.log.json)| + +### Cascade Mask R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | +| R-50-FPN | caffe | 1x | 5.9 | | 41.2 | 36.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco/cascade_mask_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.412__segm_mAP-0.36_20200504_174659-5004b251.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco/cascade_mask_rcnn_r50_caffe_fpn_1x_coco_20200504_174659.log.json) | +| R-50-FPN | pytorch | 1x | 6.0 | 11.2 | 41.2 | 35.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco/cascade_mask_rcnn_r50_fpn_1x_coco_20200203-9d4dcb24.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco/cascade_mask_rcnn_r50_fpn_1x_coco_20200203_170449.log.json) | +| R-50-FPN | pytorch | 20e | - | - | 41.9 | 36.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco/cascade_mask_rcnn_r50_fpn_20e_coco_bbox_mAP-0.419__segm_mAP-0.365_20200504_174711-4af8e66e.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco/cascade_mask_rcnn_r50_fpn_20e_coco_20200504_174711.log.json)| +| R-101-FPN | caffe | 1x | 7.8 | | 43.2 | 37.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco/cascade_mask_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.432__segm_mAP-0.376_20200504_174813-5c1e9599.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco/cascade_mask_rcnn_r101_caffe_fpn_1x_coco_20200504_174813.log.json)| +| R-101-FPN | pytorch | 1x | 7.9 | 9.8 | 42.9 | 37.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco/cascade_mask_rcnn_r101_fpn_1x_coco_20200203-befdf6ee.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco/cascade_mask_rcnn_r101_fpn_1x_coco_20200203_092521.log.json) | +| R-101-FPN | pytorch | 20e | - | - | 43.4 | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco/cascade_mask_rcnn_r101_fpn_20e_coco_bbox_mAP-0.434__segm_mAP-0.378_20200504_174836-005947da.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco/cascade_mask_rcnn_r101_fpn_20e_coco_20200504_174836.log.json)| +| X-101-32x4d-FPN | pytorch | 1x | 9.2 | 8.6 | 44.3 | 38.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco_20200201-0f411b1f.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco_20200201_052416.log.json) | +| X-101-32x4d-FPN | pytorch | 20e | 9.2 | - | 45.0 | 39.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco_20200528_083917-ed1f4751.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco_20200528_083917.log.json) | +| X-101-64x4d-FPN | pytorch | 1x | 12.2 | 6.7 | 45.3 | 39.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco_20200203-9a2db89d.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco_20200203_044059.log.json) | +| X-101-64x4d-FPN | pytorch | 20e | 12.2 | | 45.6 |39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco_20200512_161033-bdb5126a.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco_20200512_161033.log.json)| + +**Notes:** + +- The `20e` schedule in Cascade (Mask) R-CNN indicates decreasing the lr at 16 and 19 epochs, with a total of 20 epochs. diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..f42165d --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = './cascade_mask_rcnn_r50_caffe_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron2/resnet101_caffe', + backbone=dict(depth=101)) diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco.py new file mode 100644 index 0000000..9212dda --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco.py @@ -0,0 +1,2 @@ +_base_ = './cascade_mask_rcnn_r50_fpn_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco.py new file mode 100644 index 0000000..d069f8c --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco.py @@ -0,0 +1,2 @@ +_base_ = './cascade_mask_rcnn_r50_fpn_20e_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..b371ed7 --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,38 @@ +_base_ = ['./cascade_mask_rcnn_r50_fpn_1x_coco.py'] + +model = dict( + pretrained='open-mmlab://detectron2/resnet50_caffe', + backbone=dict( + norm_cfg=dict(requires_grad=False), norm_eval=True, style='caffe')) + +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..49ab539 --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/cascade_mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco.py new file mode 100644 index 0000000..1296dc4 --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/cascade_mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_20e.py', '../_base_/default_runtime.py' +] diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..d05eb50 --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './cascade_mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco.py new file mode 100644 index 0000000..0cfc7d7 --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco.py @@ -0,0 +1,13 @@ +_base_ = './cascade_mask_rcnn_r50_fpn_20e_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000..33629ee --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './cascade_mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco.py new file mode 100644 index 0000000..e64c22c --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco.py @@ -0,0 +1,13 @@ +_base_ = './cascade_mask_rcnn_r50_fpn_20e_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..8e8b830 --- /dev/null +++ b/configs/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = './cascade_rcnn_r50_caffe_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron2/resnet101_caffe', + backbone=dict(depth=101)) diff --git a/configs/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py new file mode 100644 index 0000000..6666651 --- /dev/null +++ b/configs/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py @@ -0,0 +1,2 @@ +_base_ = './cascade_rcnn_r50_fpn_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco.py b/configs/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco.py new file mode 100644 index 0000000..9cb3581 --- /dev/null +++ b/configs/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco.py @@ -0,0 +1,2 @@ +_base_ = './cascade_rcnn_r50_fpn_20e_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..c576c74 --- /dev/null +++ b/configs/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,38 @@ +_base_ = './cascade_rcnn_r50_fpn_1x_coco.py' + +model = dict( + pretrained='open-mmlab://detectron2/resnet50_caffe', + backbone=dict(norm_cfg=dict(requires_grad=False), style='caffe')) + +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..87e21fb --- /dev/null +++ b/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/cascade_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] diff --git a/configs/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco.py b/configs/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco.py new file mode 100644 index 0000000..6f886e1 --- /dev/null +++ b/configs/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco.py @@ -0,0 +1,4 @@ +_base_ = './cascade_rcnn_r50_fpn_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 19]) +runner = dict(type='EpochBasedRunner', max_epochs=20) diff --git a/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..1fbe6ce --- /dev/null +++ b/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './cascade_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco.py b/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco.py new file mode 100644 index 0000000..1afeeef --- /dev/null +++ b/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco.py @@ -0,0 +1,13 @@ +_base_ = './cascade_rcnn_r50_fpn_20e_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000..b249bfa --- /dev/null +++ b/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './cascade_rcnn_r50_fpn_1x_coco.py' +model = dict( + type='CascadeRCNN', + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py b/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py new file mode 100644 index 0000000..500b48c --- /dev/null +++ b/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py @@ -0,0 +1,14 @@ +_base_ = './cascade_rcnn_r50_fpn_20e_coco.py' +model = dict( + type='CascadeRCNN', + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/cascade_rpn/README.md b/configs/cascade_rpn/README.md new file mode 100644 index 0000000..2b0c6de --- /dev/null +++ b/configs/cascade_rpn/README.md @@ -0,0 +1,29 @@ +# Cascade RPN + +[ALGORITHM] + +We provide the code for reproducing experiment results of [Cascade RPN](https://arxiv.org/abs/1909.06720). + +``` +@inproceedings{vu2019cascade, + title={Cascade RPN: Delving into High-Quality Region Proposal Network with Adaptive Convolution}, + author={Vu, Thang and Jang, Hyunjun and Pham, Trung X and Yoo, Chang D}, + booktitle={Conference on Neural Information Processing Systems (NeurIPS)}, + year={2019} +} +``` + +## Benchmark + +### Region proposal performance + +| Method | Backbone | Style | Mem (GB) | Train time (s/iter) | Inf time (fps) | AR 1000 | Download | +|:------:|:--------:|:-----:|:--------:|:-------------------:|:--------------:|:-------:|:--------------------------------------:| +| CRPN | R-50-FPN | caffe | - | - | - | 72.0 | [model](https://drive.google.com/file/d/1qxVdOnCgK-ee7_z0x6mvAir_glMu2Ihi/view?usp=sharing) | + +### Detection performance + +| Method | Proposal | Backbone | Style | Schedule | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | +|:-------------:|:-----------:|:--------:|:-------:|:--------:|:--------:|:-------------------:|:--------------:|:------:|:--------------------------------------------:| +| Fast R-CNN | Cascade RPN | R-50-FPN | caffe | 1x | - | - | - | 39.9 | [model](https://drive.google.com/file/d/1NmbnuY5VHi8I9FE8xnp5uNvh2i-t-6_L/view?usp=sharing) | +| Faster R-CNN | Cascade RPN | R-50-FPN | caffe | 1x | - | - | - | 40.4 | [model](https://drive.google.com/file/d/1dS3Q66qXMJpcuuQgDNkLp669E5w1UMuZ/view?usp=sharing) | diff --git a/configs/cascade_rpn/crpn_fast_rcnn_r50_caffe_fpn_1x_coco.py b/configs/cascade_rpn/crpn_fast_rcnn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..68c57df --- /dev/null +++ b/configs/cascade_rpn/crpn_fast_rcnn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,75 @@ +_base_ = '../fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron2/resnet50_caffe', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + norm_eval=True, + style='caffe'), + roi_head=dict( + bbox_head=dict( + bbox_coder=dict(target_stds=[0.04, 0.04, 0.08, 0.08]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.5), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rcnn=dict( + assigner=dict( + pos_iou_thr=0.65, neg_iou_thr=0.65, min_pos_iou=0.65), + sampler=dict(num=256))), + test_cfg=dict(rcnn=dict(score_thr=1e-3))) +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=300), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=300), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='ToTensor', keys=['proposals']), + dict( + type='ToDataContainer', + fields=[dict(key='proposals', stack=False)]), + dict(type='Collect', keys=['img', 'proposals']), + ]) +] +data = dict( + train=dict( + proposal_file=data_root + + 'proposals/crpn_r50_caffe_fpn_1x_train2017.pkl', + pipeline=train_pipeline), + val=dict( + proposal_file=data_root + + 'proposals/crpn_r50_caffe_fpn_1x_val2017.pkl', + pipeline=test_pipeline), + test=dict( + proposal_file=data_root + + 'proposals/crpn_r50_caffe_fpn_1x_val2017.pkl', + pipeline=test_pipeline)) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py b/configs/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..bad86e6 --- /dev/null +++ b/configs/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,92 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py' +rpn_weight = 0.7 +model = dict( + rpn_head=dict( + _delete_=True, + type='CascadeRPNHead', + num_stages=2, + stages=[ + dict( + type='StageCascadeRPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[1.0], + strides=[4, 8, 16, 32, 64]), + adapt_cfg=dict(type='dilation', dilation=3), + bridged_feature=True, + sampling=False, + with_cls=False, + reg_decoded_bbox=True, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=(.0, .0, .0, .0), + target_stds=(0.1, 0.1, 0.5, 0.5)), + loss_bbox=dict( + type='IoULoss', linear=True, + loss_weight=10.0 * rpn_weight)), + dict( + type='StageCascadeRPNHead', + in_channels=256, + feat_channels=256, + adapt_cfg=dict(type='offset'), + bridged_feature=False, + sampling=True, + with_cls=True, + reg_decoded_bbox=True, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=(.0, .0, .0, .0), + target_stds=(0.05, 0.05, 0.1, 0.1)), + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + loss_weight=1.0 * rpn_weight), + loss_bbox=dict( + type='IoULoss', linear=True, + loss_weight=10.0 * rpn_weight)) + ]), + roi_head=dict( + bbox_head=dict( + bbox_coder=dict(target_stds=[0.04, 0.04, 0.08, 0.08]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.5), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rpn=[ + dict( + assigner=dict( + type='RegionAssigner', center_ratio=0.2, ignore_ratio=0.5), + allowed_border=-1, + pos_weight=-1, + debug=False), + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.7, + min_pos_iou=0.3, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + pos_weight=-1, + debug=False) + ], + rpn_proposal=dict(max_per_img=300, nms=dict(iou_threshold=0.8)), + rcnn=dict( + assigner=dict( + pos_iou_thr=0.65, neg_iou_thr=0.65, min_pos_iou=0.65), + sampler=dict(type='RandomSampler', num=256))), + test_cfg=dict( + rpn=dict(max_per_img=300, nms=dict(iou_threshold=0.8)), + rcnn=dict(score_thr=1e-3))) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/cascade_rpn/crpn_r50_caffe_fpn_1x_coco.py b/configs/cascade_rpn/crpn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..5562e69 --- /dev/null +++ b/configs/cascade_rpn/crpn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,77 @@ +_base_ = '../rpn/rpn_r50_caffe_fpn_1x_coco.py' +model = dict( + rpn_head=dict( + _delete_=True, + type='CascadeRPNHead', + num_stages=2, + stages=[ + dict( + type='StageCascadeRPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[1.0], + strides=[4, 8, 16, 32, 64]), + adapt_cfg=dict(type='dilation', dilation=3), + bridged_feature=True, + sampling=False, + with_cls=False, + reg_decoded_bbox=True, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=(.0, .0, .0, .0), + target_stds=(0.1, 0.1, 0.5, 0.5)), + loss_bbox=dict(type='IoULoss', linear=True, loss_weight=10.0)), + dict( + type='StageCascadeRPNHead', + in_channels=256, + feat_channels=256, + adapt_cfg=dict(type='offset'), + bridged_feature=False, + sampling=True, + with_cls=True, + reg_decoded_bbox=True, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=(.0, .0, .0, .0), + target_stds=(0.05, 0.05, 0.1, 0.1)), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, + loss_weight=1.0), + loss_bbox=dict(type='IoULoss', linear=True, loss_weight=10.0)) + ]), + train_cfg=dict(rpn=[ + dict( + assigner=dict( + type='RegionAssigner', center_ratio=0.2, ignore_ratio=0.5), + allowed_border=-1, + pos_weight=-1, + debug=False), + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.7, + min_pos_iou=0.3, + ignore_iof_thr=-1, + iou_calculator=dict(type='BboxOverlaps2D')), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + pos_weight=-1, + debug=False) + ]), + test_cfg=dict( + rpn=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.8), + min_bbox_size=0))) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/centripetalnet/README.md b/configs/centripetalnet/README.md new file mode 100644 index 0000000..18631da --- /dev/null +++ b/configs/centripetalnet/README.md @@ -0,0 +1,26 @@ +# CentripetalNet + +## Introduction + +[ALGORITHM] + +```latex +@InProceedings{Dong_2020_CVPR, +author = {Dong, Zhiwei and Li, Guoxuan and Liao, Yue and Wang, Fei and Ren, Pengju and Qian, Chen}, +title = {CentripetalNet: Pursuing High-Quality Keypoint Pairs for Object Detection}, +booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, +month = {June}, +year = {2020} +} +``` + +## Results and models + +| Backbone | Batch Size | Step/Total Epochs | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :--------: |:----------------: | :------: | :------------: | :----: | :------: | :--------: | +| HourglassNet-104 | [16 x 6](./centripetalnet_hourglass104_mstest_16x6_210e_coco.py) | 190/210 | 16.7 | 3.7 | 44.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco/centripetalnet_hourglass104_mstest_16x6_210e_coco_20200915_204804-3ccc61e5.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco/centripetalnet_hourglass104_mstest_16x6_210e_coco_20200915_204804.log.json) | + +Note: + +- TTA setting is single-scale and `flip=True`. +- The model we released is the best checkpoint rather than the latest checkpoint (box AP 44.8 vs 44.6 in our experiment). diff --git a/configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py b/configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py new file mode 100644 index 0000000..e9c5def --- /dev/null +++ b/configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py @@ -0,0 +1,105 @@ +_base_ = [ + '../_base_/default_runtime.py', '../_base_/datasets/coco_detection.py' +] + +# model settings +model = dict( + type='CornerNet', + backbone=dict( + type='HourglassNet', + downsample_times=5, + num_stacks=2, + stage_channels=[256, 256, 384, 384, 384, 512], + stage_blocks=[2, 2, 2, 2, 2, 4], + norm_cfg=dict(type='BN', requires_grad=True)), + neck=None, + bbox_head=dict( + type='CentripetalHead', + num_classes=80, + in_channels=256, + num_feat_levels=2, + corner_emb_channels=0, + loss_heatmap=dict( + type='GaussianFocalLoss', alpha=2.0, gamma=4.0, loss_weight=1), + loss_offset=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1), + loss_guiding_shift=dict( + type='SmoothL1Loss', beta=1.0, loss_weight=0.05), + loss_centripetal_shift=dict( + type='SmoothL1Loss', beta=1.0, loss_weight=1)), + # training and testing settings + train_cfg=None, + test_cfg=dict( + corner_topk=100, + local_maximum_kernel=3, + distance_threshold=0.5, + score_thr=0.05, + max_per_img=100, + nms=dict(type='soft_nms', iou_threshold=0.5, method='gaussian'))) +# data settings +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='RandomCenterCropPad', + crop_size=(511, 511), + ratios=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3), + test_mode=False, + test_pad_mode=None, + **img_norm_cfg), + dict(type='Resize', img_scale=(511, 511), keep_ratio=False), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict( + type='MultiScaleFlipAug', + scale_factor=1.0, + flip=True, + transforms=[ + dict(type='Resize'), + dict( + type='RandomCenterCropPad', + crop_size=None, + ratios=None, + border=None, + test_mode=True, + test_pad_mode=['logical_or', 127], + **img_norm_cfg), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict( + type='Collect', + keys=['img'], + meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape', + 'scale_factor', 'flip', 'img_norm_cfg', 'border')), + ]) +] +data = dict( + samples_per_gpu=6, + workers_per_gpu=3, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='Adam', lr=0.0005) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[190]) +runner = dict(type='EpochBasedRunner', max_epochs=210) diff --git a/configs/cityscapes/README.md b/configs/cityscapes/README.md new file mode 100644 index 0000000..d892fc9 --- /dev/null +++ b/configs/cityscapes/README.md @@ -0,0 +1,33 @@ +# Cityscapes Dataset + +[DATASET] + +``` +@inproceedings{Cordts2016Cityscapes, + title={The Cityscapes Dataset for Semantic Urban Scene Understanding}, + author={Cordts, Marius and Omran, Mohamed and Ramos, Sebastian and Rehfeld, Timo and Enzweiler, Markus and Benenson, Rodrigo and Franke, Uwe and Roth, Stefan and Schiele, Bernt}, + booktitle={Proc. of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year={2016} +} +``` + +## Common settings + +- All baselines were trained using 8 GPU with a batch size of 8 (1 images per GPU) using the [linear scaling rule](https://arxiv.org/abs/1706.02677) to scale the learning rate. +- All models were trained on `cityscapes_train`, and tested on `cityscapes_val`. +- 1x training schedule indicates 64 epochs which corresponds to slightly less than the 24k iterations reported in the original schedule from the [Mask R-CNN paper](https://arxiv.org/abs/1703.06870) +- COCO pre-trained weights are used to initialize. +- A conversion [script](../../tools/dataset_converters/cityscapes.py) is provided to convert Cityscapes into COCO format. Please refer to [install.md](../../docs/1_exist_data_model.md#prepare-datasets) for details. +- `CityscapesDataset` implemented three evaluation methods. `bbox` and `segm` are standard COCO bbox/mask AP. `cityscapes` is the cityscapes dataset official evaluation, which may be slightly higher than COCO. + +### Faster R-CNN + +| Backbone | Style | Lr schd | Scale | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :---: | :------: | :------------: | :----: | :------: | :--------: | +| R-50-FPN | pytorch | 1x | 800-1024 | 5.2 | - | 40.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes_20200502-829424c0.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes_20200502_114915.log.json) | + +### Mask R-CNN + +| Backbone | Style | Lr schd | Scale | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------: | :------------: | :----: | :-----: | :------: | :------: | +| R-50-FPN | pytorch | 1x | 800-1024 | 5.3 | - | 40.9 | 36.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes/mask_rcnn_r50_fpn_1x_cityscapes_20201211_133733-d2858245.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes/mask_rcnn_r50_fpn_1x_cityscapes_20201211_133733.log.json) | diff --git a/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py b/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py new file mode 100644 index 0000000..5b17451 --- /dev/null +++ b/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py @@ -0,0 +1,39 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', + '../_base_/datasets/cityscapes_detection.py', + '../_base_/default_runtime.py' +] +model = dict( + pretrained=None, + roi_head=dict( + bbox_head=dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=8, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)))) +# optimizer +# lr is set for a batch size of 8 +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + # [7] yields higher performance than [6] + step=[7]) +runner = dict( + type='EpochBasedRunner', max_epochs=8) # actual epoch = 8 * 8 = 64 +log_config = dict(interval=100) +# For better, more stable performance initialize from COCO +load_from = 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth' # noqa diff --git a/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py b/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py new file mode 100644 index 0000000..0a4d7ca --- /dev/null +++ b/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py @@ -0,0 +1,46 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/cityscapes_instance.py', '../_base_/default_runtime.py' +] +model = dict( + pretrained=None, + roi_head=dict( + bbox_head=dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=8, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), + mask_head=dict( + type='FCNMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=8, + loss_mask=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)))) +# optimizer +# lr is set for a batch size of 8 +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + # [7] yields higher performance than [6] + step=[7]) +runner = dict( + type='EpochBasedRunner', max_epochs=8) # actual epoch = 8 * 8 = 64 +log_config = dict(interval=100) +# For better, more stable performance initialize from COCO +load_from = 'https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth' # noqa diff --git a/configs/cornernet/README.md b/configs/cornernet/README.md new file mode 100644 index 0000000..51e5e7a --- /dev/null +++ b/configs/cornernet/README.md @@ -0,0 +1,33 @@ +# CornerNet + +## Introduction + +[ALGORITHM] + +```latex +@inproceedings{law2018cornernet, + title={Cornernet: Detecting objects as paired keypoints}, + author={Law, Hei and Deng, Jia}, + booktitle={15th European Conference on Computer Vision, ECCV 2018}, + pages={765--781}, + year={2018}, + organization={Springer Verlag} +} +``` + +## Results and models + +| Backbone | Batch Size | Step/Total Epochs | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :--------: |:----------------: | :------: | :------------: | :----: | :------: | :--------: | +| HourglassNet-104 | [10 x 5](./cornernet_hourglass104_mstest_10x5_210e_coco.py) | 180/210 | 13.9 | 4.2 | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco/cornernet_hourglass104_mstest_10x5_210e_coco_20200824_185720-5fefbf1c.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco/cornernet_hourglass104_mstest_10x5_210e_coco_20200824_185720.log.json) | +| HourglassNet-104 | [8 x 6](./cornernet_hourglass104_mstest_8x6_210e_coco.py) | 180/210 | 15.9 | 4.2 | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco/cornernet_hourglass104_mstest_8x6_210e_coco_20200825_150618-79b44c30.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco/cornernet_hourglass104_mstest_8x6_210e_coco_20200825_150618.log.json) | +| HourglassNet-104 | [32 x 3](./cornernet_hourglass104_mstest_32x3_210e_coco.py) | 180/210 | 9.5 | 3.9 | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco/cornernet_hourglass104_mstest_32x3_210e_coco_20200819_203110-1efaea91.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco/cornernet_hourglass104_mstest_32x3_210e_coco_20200819_203110.log.json) | + +Note: + +- TTA setting is single-scale and `flip=True`. +- Experiments with `images_per_gpu=6` are conducted on Tesla V100-SXM2-32GB, `images_per_gpu=3` are conducted on GeForce GTX 1080 Ti. +- Here are the descriptions of each experiment setting: + - 10 x 5: 10 GPUs with 5 images per gpu. This is the same setting as that reported in the original paper. + - 8 x 6: 8 GPUs with 6 images per gpu. The total batchsize is similar to paper and only need 1 node to train. + - 32 x 3: 32 GPUs with 3 images per gpu. The default setting for 1080TI and need 4 nodes to train. diff --git a/configs/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco.py b/configs/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco.py new file mode 100644 index 0000000..89f3876 --- /dev/null +++ b/configs/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco.py @@ -0,0 +1,105 @@ +_base_ = [ + '../_base_/default_runtime.py', '../_base_/datasets/coco_detection.py' +] + +# model settings +model = dict( + type='CornerNet', + backbone=dict( + type='HourglassNet', + downsample_times=5, + num_stacks=2, + stage_channels=[256, 256, 384, 384, 384, 512], + stage_blocks=[2, 2, 2, 2, 2, 4], + norm_cfg=dict(type='BN', requires_grad=True)), + neck=None, + bbox_head=dict( + type='CornerHead', + num_classes=80, + in_channels=256, + num_feat_levels=2, + corner_emb_channels=1, + loss_heatmap=dict( + type='GaussianFocalLoss', alpha=2.0, gamma=4.0, loss_weight=1), + loss_embedding=dict( + type='AssociativeEmbeddingLoss', + pull_weight=0.10, + push_weight=0.10), + loss_offset=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1)), + # training and testing settings + train_cfg=None, + test_cfg=dict( + corner_topk=100, + local_maximum_kernel=3, + distance_threshold=0.5, + score_thr=0.05, + max_per_img=100, + nms=dict(type='soft_nms', iou_threshold=0.5, method='gaussian'))) +# data settings +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='RandomCenterCropPad', + crop_size=(511, 511), + ratios=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3), + test_mode=False, + test_pad_mode=None, + **img_norm_cfg), + dict(type='Resize', img_scale=(511, 511), keep_ratio=False), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict( + type='MultiScaleFlipAug', + scale_factor=1.0, + flip=True, + transforms=[ + dict(type='Resize'), + dict( + type='RandomCenterCropPad', + crop_size=None, + ratios=None, + border=None, + test_mode=True, + test_pad_mode=['logical_or', 127], + **img_norm_cfg), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict( + type='Collect', + keys=['img'], + meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape', + 'scale_factor', 'flip', 'img_norm_cfg', 'border')), + ]) +] +data = dict( + samples_per_gpu=5, + workers_per_gpu=3, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='Adam', lr=0.0005) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[180]) +runner = dict(type='EpochBasedRunner', max_epochs=210) diff --git a/configs/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco.py b/configs/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco.py new file mode 100644 index 0000000..873d598 --- /dev/null +++ b/configs/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco.py @@ -0,0 +1,105 @@ +_base_ = [ + '../_base_/default_runtime.py', '../_base_/datasets/coco_detection.py' +] + +# model settings +model = dict( + type='CornerNet', + backbone=dict( + type='HourglassNet', + downsample_times=5, + num_stacks=2, + stage_channels=[256, 256, 384, 384, 384, 512], + stage_blocks=[2, 2, 2, 2, 2, 4], + norm_cfg=dict(type='BN', requires_grad=True)), + neck=None, + bbox_head=dict( + type='CornerHead', + num_classes=80, + in_channels=256, + num_feat_levels=2, + corner_emb_channels=1, + loss_heatmap=dict( + type='GaussianFocalLoss', alpha=2.0, gamma=4.0, loss_weight=1), + loss_embedding=dict( + type='AssociativeEmbeddingLoss', + pull_weight=0.10, + push_weight=0.10), + loss_offset=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1)), + # training and testing settings + train_cfg=None, + test_cfg=dict( + corner_topk=100, + local_maximum_kernel=3, + distance_threshold=0.5, + score_thr=0.05, + max_per_img=100, + nms=dict(type='soft_nms', iou_threshold=0.5, method='gaussian'))) +# data settings +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='RandomCenterCropPad', + crop_size=(511, 511), + ratios=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3), + test_mode=False, + test_pad_mode=None, + **img_norm_cfg), + dict(type='Resize', img_scale=(511, 511), keep_ratio=False), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict( + type='MultiScaleFlipAug', + scale_factor=1.0, + flip=True, + transforms=[ + dict(type='Resize'), + dict( + type='RandomCenterCropPad', + crop_size=None, + ratios=None, + border=None, + test_mode=True, + test_pad_mode=['logical_or', 127], + **img_norm_cfg), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict( + type='Collect', + keys=['img'], + meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape', + 'scale_factor', 'flip', 'img_norm_cfg', 'border')), + ]) +] +data = dict( + samples_per_gpu=3, + workers_per_gpu=3, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='Adam', lr=0.0005) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[180]) +runner = dict(type='EpochBasedRunner', max_epochs=210) diff --git a/configs/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco.py b/configs/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco.py new file mode 100644 index 0000000..ef749cc --- /dev/null +++ b/configs/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco.py @@ -0,0 +1,105 @@ +_base_ = [ + '../_base_/default_runtime.py', '../_base_/datasets/coco_detection.py' +] + +# model settings +model = dict( + type='CornerNet', + backbone=dict( + type='HourglassNet', + downsample_times=5, + num_stacks=2, + stage_channels=[256, 256, 384, 384, 384, 512], + stage_blocks=[2, 2, 2, 2, 2, 4], + norm_cfg=dict(type='BN', requires_grad=True)), + neck=None, + bbox_head=dict( + type='CornerHead', + num_classes=80, + in_channels=256, + num_feat_levels=2, + corner_emb_channels=1, + loss_heatmap=dict( + type='GaussianFocalLoss', alpha=2.0, gamma=4.0, loss_weight=1), + loss_embedding=dict( + type='AssociativeEmbeddingLoss', + pull_weight=0.10, + push_weight=0.10), + loss_offset=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1)), + # training and testing settings + train_cfg=None, + test_cfg=dict( + corner_topk=100, + local_maximum_kernel=3, + distance_threshold=0.5, + score_thr=0.05, + max_per_img=100, + nms=dict(type='soft_nms', iou_threshold=0.5, method='gaussian'))) +# data settings +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='RandomCenterCropPad', + crop_size=(511, 511), + ratios=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3), + test_mode=False, + test_pad_mode=None, + **img_norm_cfg), + dict(type='Resize', img_scale=(511, 511), keep_ratio=False), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict( + type='MultiScaleFlipAug', + scale_factor=1.0, + flip=True, + transforms=[ + dict(type='Resize'), + dict( + type='RandomCenterCropPad', + crop_size=None, + ratios=None, + border=None, + test_mode=True, + test_pad_mode=['logical_or', 127], + **img_norm_cfg), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict( + type='Collect', + keys=['img'], + meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape', + 'scale_factor', 'flip', 'img_norm_cfg', 'border')), + ]) +] +data = dict( + samples_per_gpu=6, + workers_per_gpu=3, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='Adam', lr=0.0005) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[180]) +runner = dict(type='EpochBasedRunner', max_epochs=210) diff --git a/configs/dcn/README.md b/configs/dcn/README.md new file mode 100644 index 0000000..78e2dc1 --- /dev/null +++ b/configs/dcn/README.md @@ -0,0 +1,52 @@ +# Deformable Convolutional Networks + +## Introduction + +[ALGORITHM] + +```none +@inproceedings{dai2017deformable, + title={Deformable Convolutional Networks}, + author={Dai, Jifeng and Qi, Haozhi and Xiong, Yuwen and Li, Yi and Zhang, Guodong and Hu, Han and Wei, Yichen}, + booktitle={Proceedings of the IEEE international conference on computer vision}, + year={2017} +} +``` + +[ALGORITHM] + +``` +@article{zhu2018deformable, + title={Deformable ConvNets v2: More Deformable, Better Results}, + author={Zhu, Xizhou and Hu, Han and Lin, Stephen and Dai, Jifeng}, + journal={arXiv preprint arXiv:1811.11168}, + year={2018} +} +``` + +## Results and Models + +| Backbone | Model | Style | Conv | Pool | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +|:----------------:|:------------:|:-------:|:-------------:|:------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| +| R-50-FPN | Faster | pytorch | dconv(c3-c5) | - | 1x | 4.0 | 17.8 | 41.3 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130-d68aed1e.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130_212941.log.json) | +| R-50-FPN | Faster | pytorch | mdconv(c3-c5) | - | 1x | 4.1 | 17.6 | 41.4 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco_20200130-d099253b.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco_20200130_222144.log.json) | +| *R-50-FPN (dg=4) | Faster | pytorch | mdconv(c3-c5) | - | 1x | 4.2 | 17.4 | 41.5 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco_20200130-01262257.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco_20200130_222058.log.json) | +| R-50-FPN | Faster | pytorch | - | dpool | 1x | 5.0 | 17.2 | 38.9 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_dpool_1x_coco/faster_rcnn_r50_fpn_dpool_1x_coco_20200307-90d3c01d.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_dpool_1x_coco/faster_rcnn_r50_fpn_dpool_1x_coco_20200307_203250.log.json) | +| R-50-FPN | Faster | pytorch | - | mdpool | 1x | 5.8 | 16.6 | 38.7 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco/faster_rcnn_r50_fpn_mdpool_1x_coco_20200307-c0df27ff.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco/faster_rcnn_r50_fpn_mdpool_1x_coco_20200307_203304.log.json) | +| R-101-FPN | Faster | pytorch | dconv(c3-c5) | - | 1x | 6.0 | 12.5 | 42.7 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203-1377f13d.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203_230019.log.json) | +| X-101-32x4d-FPN | Faster | pytorch | dconv(c3-c5) | - | 1x | 7.3 | 10.0 | 44.5 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco_20200203-4f85c69c.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco_20200203_001325.log.json) | +| R-50-FPN | Mask | pytorch | dconv(c3-c5) | - | 1x | 4.5 | 15.4 | 41.8 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200203-4d9ad43b.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200203_061339.log.json) | +| R-50-FPN | Mask | pytorch | mdconv(c3-c5) | - | 1x | 4.5 | 15.1 | 41.5 | 37.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco_20200203-ad97591f.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco_20200203_063443.log.json) | +| R-101-FPN | Mask | pytorch | dconv(c3-c5) | - | 1x | 6.5 | 11.7 | 43.5 | 38.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200216-a71f5bce.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200216_191601.log.json) | +| R-50-FPN | Cascade | pytorch | dconv(c3-c5) | - | 1x | 4.5 | 14.6 | 43.8 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130-2f1fca44.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130_220843.log.json) | +| R-101-FPN | Cascade | pytorch | dconv(c3-c5) | - | 1x | 6.4 | 11.0 | 45.0 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203-3b2f0594.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203_224829.log.json) | +| R-50-FPN | Cascade Mask | pytorch | dconv(c3-c5) | - | 1x | 6.0 | 10.0 | 44.4 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200202-42e767a2.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200202_010309.log.json) | +| R-101-FPN | Cascade Mask | pytorch | dconv(c3-c5) | - | 1x | 8.0 | 8.6 | 45.8 | 39.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200204-df0c5f10.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200204_134006.log.json) | +| X-101-32x4d-FPN | Cascade Mask | pytorch | dconv(c3-c5) | - | 1x | 9.2 | | 47.3 | 41.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco-e75f90c8.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco-20200606_183737.log.json) | + +**Notes:** + +- `dconv` and `mdconv` denote (modulated) deformable convolution, `c3-c5` means adding dconv in resnet stage 3 to 5. `dpool` and `mdpool` denote (modulated) deformable roi pooling. +- The dcn ops are modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch, which should be more memory efficient and slightly faster. +- (*) For R-50-FPN (dg=4), dg is short for deformable_group. This model is trained and tested on Amazon EC2 p3dn.24xlarge instance. +- **Memory, Train/Inf time is outdated.** diff --git a/configs/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..081b998 --- /dev/null +++ b/configs/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..3b3683a --- /dev/null +++ b/configs/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..daaa472 --- /dev/null +++ b/configs/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..a01df33 --- /dev/null +++ b/configs/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..aa664bd --- /dev/null +++ b/configs/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..f5fee7e --- /dev/null +++ b/configs/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../faster_rcnn/faster_rcnn_r101_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..8787088 --- /dev/null +++ b/configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py b/configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py new file mode 100644 index 0000000..1b695f0 --- /dev/null +++ b/configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py @@ -0,0 +1,12 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + roi_head=dict( + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict( + _delete_=True, + type='DeformRoIPoolPack', + output_size=7, + output_channels=256), + out_channels=256, + featmap_strides=[4, 8, 16, 32]))) diff --git a/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py b/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..d1bcf3c --- /dev/null +++ b/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py b/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py new file mode 100644 index 0000000..d0ab89c --- /dev/null +++ b/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCNv2', deform_groups=4, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py b/configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py new file mode 100644 index 0000000..ad7b034 --- /dev/null +++ b/configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py @@ -0,0 +1,12 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + roi_head=dict( + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict( + _delete_=True, + type='ModulatedDeformRoIPoolPack', + output_size=7, + output_channels=256), + out_channels=256, + featmap_strides=[4, 8, 16, 32]))) diff --git a/configs/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..8357766 --- /dev/null +++ b/configs/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,15 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..cb34002 --- /dev/null +++ b/configs/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..ababe58 --- /dev/null +++ b/configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py b/configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..5ca2a67 --- /dev/null +++ b/configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/deepfashion/README.md b/configs/deepfashion/README.md new file mode 100644 index 0000000..c182bea --- /dev/null +++ b/configs/deepfashion/README.md @@ -0,0 +1,56 @@ +# DeepFashion + +[DATASET] + +[MMFashion](https://github.com/open-mmlab/mmfashion) develops "fashion parsing and segmentation" module +based on the dataset +[DeepFashion-Inshop](https://drive.google.com/drive/folders/0B7EVK8r0v71pVDZFQXRsMDZCX1E?usp=sharing). +Its annotation follows COCO style. +To use it, you need to first download the data. Note that we only use "img_highres" in this task. +The file tree should be like this: + +```sh +mmdetection +├── mmdet +├── tools +├── configs +├── data +│ ├── DeepFashion +│ │ ├── In-shop +│ │ ├── Anno +│ │ │   ├── segmentation +│ │ │   | ├── DeepFashion_segmentation_train.json +│ │ │   | ├── DeepFashion_segmentation_query.json +│ │ │   | ├── DeepFashion_segmentation_gallery.json +│ │ │   ├── list_bbox_inshop.txt +│ │ │   ├── list_description_inshop.json +│ │ │   ├── list_item_inshop.txt +│ │ │   └── list_landmarks_inshop.txt +│ │ ├── Eval +│ │ │ └── list_eval_partition.txt +│ │ ├── Img +│ │ │ ├── img +│ │ │ │ ├──XXX.jpg +│ │ │ ├── img_highres +│ │ │ └── ├──XXX.jpg + +``` + +After that you can train the Mask RCNN r50 on DeepFashion-In-shop dataset by launching training with the `mask_rcnn_r50_fpn_1x.py` config +or creating your own config file. + +``` +@inproceedings{liuLQWTcvpr16DeepFashion, + author = {Liu, Ziwei and Luo, Ping and Qiu, Shi and Wang, Xiaogang and Tang, Xiaoou}, + title = {DeepFashion: Powering Robust Clothes Recognition and Retrieval with Rich Annotations}, + booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + year = {2016} +} +``` + +## Model Zoo + +| Backbone | Model type | Dataset | bbox detection Average Precision | segmentation Average Precision | Config | Download (Google) | +| :---------: | :----------: | :-----------------: | :--------------------------------: | :----------------------------: | :---------:| :-------------------------: | +| ResNet50 | Mask RCNN | DeepFashion-In-shop | 0.599 | 0.584 |[config](https://github.com/open-mmlab/mmdetection/blob/master/configs/deepfashion/mask_rcnn_r50_fpn_15e_deepfashion.py)| [model](https://drive.google.com/open?id=1q6zF7J6Gb-FFgM87oIORIt6uBozaXp5r) | [log](https://drive.google.com/file/d/1qTK4Dr4FFLa9fkdI6UVko408gkrfTRLP/view?usp=sharing) | diff --git a/configs/deepfashion/mask_rcnn_r50_fpn_15e_deepfashion.py b/configs/deepfashion/mask_rcnn_r50_fpn_15e_deepfashion.py new file mode 100644 index 0000000..c4e8638 --- /dev/null +++ b/configs/deepfashion/mask_rcnn_r50_fpn_15e_deepfashion.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/deepfashion.py', '../_base_/schedules/schedule_1x.py', + '../_base_/default_runtime.py' +] +model = dict( + roi_head=dict( + bbox_head=dict(num_classes=15), mask_head=dict(num_classes=15))) +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=15) diff --git a/configs/detectors/README.md b/configs/detectors/README.md new file mode 100644 index 0000000..46dee5e --- /dev/null +++ b/configs/detectors/README.md @@ -0,0 +1,58 @@ +# DetectoRS + +## Introduction + +[ALGORITHM] + +We provide the config files for [DetectoRS: Detecting Objects with Recursive Feature Pyramid and Switchable Atrous Convolution](https://arxiv.org/pdf/2006.02334.pdf). + +```BibTeX +@article{qiao2020detectors, + title={DetectoRS: Detecting Objects with Recursive Feature Pyramid and Switchable Atrous Convolution}, + author={Qiao, Siyuan and Chen, Liang-Chieh and Yuille, Alan}, + journal={arXiv preprint arXiv:2006.02334}, + year={2020} +} +``` + +## Dataset + +DetectoRS requires COCO and [COCO-stuff](http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/stuffthingmaps_trainval2017.zip) dataset for training. You need to download and extract it in the COCO dataset path. +The directory should be like this. + +```none +mmdetection +├── mmdet +├── tools +├── configs +├── data +│ ├── coco +│ │ ├── annotations +│ │ ├── train2017 +│ │ ├── val2017 +│ │ ├── test2017 +| | ├── stuffthingmaps +``` + +## Results and Models + +DetectoRS includes two major components: + +- Recursive Feature Pyramid (RFP). +- Switchable Atrous Convolution (SAC). + +They can be used independently. +Combining them together results in DetectoRS. +The results on COCO 2017 val are shown in the below table. + +| Method | Detector | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +|:------:|:--------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| +| RFP | Cascade + ResNet-50 | 1x | 7.5 | - | 44.8 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/cascade_rcnn_r50_rfp_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/detectors/cascade_rcnn_r50_rfp_1x_coco/cascade_rcnn_r50_rfp_1x_coco-8cf51bfd.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/detectors/cascade_rcnn_r50_rfp_1x_coco/cascade_rcnn_r50_rfp_1x_coco_20200624_104126.log.json) | +| SAC | Cascade + ResNet-50 | 1x | 5.6 | - | 45.0| | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/cascade_rcnn_r50_sac_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/detectors/cascade_rcnn_r50_sac_1x_coco/cascade_rcnn_r50_sac_1x_coco-24bfda62.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/detectors/cascade_rcnn_r50_sac_1x_coco/cascade_rcnn_r50_sac_1x_coco_20200624_104402.log.json) | +| DetectoRS | Cascade + ResNet-50 | 1x | 9.9 | - | 47.4 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/detectors_cascade_rcnn_r50_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_cascade_rcnn_r50_1x_coco/detectors_cascade_rcnn_r50_1x_coco-32a10ba0.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_cascade_rcnn_r50_1x_coco/detectors_cascade_rcnn_r50_1x_coco_20200706_001203.log.json) | +| RFP | HTC + ResNet-50 | 1x | 11.2 | - | 46.6 | 40.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/htc_r50_rfp_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/detectors/htc_r50_rfp_1x_coco/htc_r50_rfp_1x_coco-8ff87c51.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/detectors/htc_r50_rfp_1x_coco/htc_r50_rfp_1x_coco_20200624_103053.log.json) | +| SAC | HTC + ResNet-50 | 1x | 9.3 | - | 46.4 | 40.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/htc_r50_sac_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/detectors/htc_r50_sac_1x_coco/htc_r50_sac_1x_coco-bfa60c54.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/detectors/htc_r50_sac_1x_coco/htc_r50_sac_1x_coco_20200624_103111.log.json) | +| DetectoRS | HTC + ResNet-50 | 1x | 13.6 | - | 49.1 | 42.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/detectors_htc_r50_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_htc_r50_1x_coco/detectors_htc_r50_1x_coco-329b1453.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_htc_r50_1x_coco/detectors_htc_r50_1x_coco_20200624_103659.log.json) | + +*Note*: This is a re-implementation based on MMDetection-V2. +The original implementation is based on MMDetection-V1. diff --git a/configs/detectors/cascade_rcnn_r50_rfp_1x_coco.py b/configs/detectors/cascade_rcnn_r50_rfp_1x_coco.py new file mode 100644 index 0000000..4430d8a --- /dev/null +++ b/configs/detectors/cascade_rcnn_r50_rfp_1x_coco.py @@ -0,0 +1,28 @@ +_base_ = [ + '../_base_/models/cascade_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +model = dict( + backbone=dict( + type='DetectoRS_ResNet', + conv_cfg=dict(type='ConvAWS'), + output_img=True), + neck=dict( + type='RFP', + rfp_steps=2, + aspp_out_channels=64, + aspp_dilations=(1, 3, 6, 1), + rfp_backbone=dict( + rfp_inplanes=256, + type='DetectoRS_ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + conv_cfg=dict(type='ConvAWS'), + pretrained='torchvision://resnet50', + style='pytorch'))) diff --git a/configs/detectors/cascade_rcnn_r50_sac_1x_coco.py b/configs/detectors/cascade_rcnn_r50_sac_1x_coco.py new file mode 100644 index 0000000..ccd9319 --- /dev/null +++ b/configs/detectors/cascade_rcnn_r50_sac_1x_coco.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/cascade_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +model = dict( + backbone=dict( + type='DetectoRS_ResNet', + conv_cfg=dict(type='ConvAWS'), + sac=dict(type='SAC', use_deform=True), + stage_with_sac=(False, True, True, True))) diff --git a/configs/detectors/detectors_cascade_rcnn_r50_1x_coco.py b/configs/detectors/detectors_cascade_rcnn_r50_1x_coco.py new file mode 100644 index 0000000..f760404 --- /dev/null +++ b/configs/detectors/detectors_cascade_rcnn_r50_1x_coco.py @@ -0,0 +1,32 @@ +_base_ = [ + '../_base_/models/cascade_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +model = dict( + backbone=dict( + type='DetectoRS_ResNet', + conv_cfg=dict(type='ConvAWS'), + sac=dict(type='SAC', use_deform=True), + stage_with_sac=(False, True, True, True), + output_img=True), + neck=dict( + type='RFP', + rfp_steps=2, + aspp_out_channels=64, + aspp_dilations=(1, 3, 6, 1), + rfp_backbone=dict( + rfp_inplanes=256, + type='DetectoRS_ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + conv_cfg=dict(type='ConvAWS'), + sac=dict(type='SAC', use_deform=True), + stage_with_sac=(False, True, True, True), + pretrained='torchvision://resnet50', + style='pytorch'))) diff --git a/configs/detectors/detectors_htc_r50_1x_coco.py b/configs/detectors/detectors_htc_r50_1x_coco.py new file mode 100644 index 0000000..0d2fc4f --- /dev/null +++ b/configs/detectors/detectors_htc_r50_1x_coco.py @@ -0,0 +1,28 @@ +_base_ = '../htc/htc_r50_fpn_1x_coco.py' + +model = dict( + backbone=dict( + type='DetectoRS_ResNet', + conv_cfg=dict(type='ConvAWS'), + sac=dict(type='SAC', use_deform=True), + stage_with_sac=(False, True, True, True), + output_img=True), + neck=dict( + type='RFP', + rfp_steps=2, + aspp_out_channels=64, + aspp_dilations=(1, 3, 6, 1), + rfp_backbone=dict( + rfp_inplanes=256, + type='DetectoRS_ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + conv_cfg=dict(type='ConvAWS'), + sac=dict(type='SAC', use_deform=True), + stage_with_sac=(False, True, True, True), + pretrained='torchvision://resnet50', + style='pytorch'))) diff --git a/configs/detectors/htc_r50_rfp_1x_coco.py b/configs/detectors/htc_r50_rfp_1x_coco.py new file mode 100644 index 0000000..496104e --- /dev/null +++ b/configs/detectors/htc_r50_rfp_1x_coco.py @@ -0,0 +1,24 @@ +_base_ = '../htc/htc_r50_fpn_1x_coco.py' + +model = dict( + backbone=dict( + type='DetectoRS_ResNet', + conv_cfg=dict(type='ConvAWS'), + output_img=True), + neck=dict( + type='RFP', + rfp_steps=2, + aspp_out_channels=64, + aspp_dilations=(1, 3, 6, 1), + rfp_backbone=dict( + rfp_inplanes=256, + type='DetectoRS_ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + conv_cfg=dict(type='ConvAWS'), + pretrained='torchvision://resnet50', + style='pytorch'))) diff --git a/configs/detectors/htc_r50_sac_1x_coco.py b/configs/detectors/htc_r50_sac_1x_coco.py new file mode 100644 index 0000000..72d4db9 --- /dev/null +++ b/configs/detectors/htc_r50_sac_1x_coco.py @@ -0,0 +1,8 @@ +_base_ = '../htc/htc_r50_fpn_1x_coco.py' + +model = dict( + backbone=dict( + type='DetectoRS_ResNet', + conv_cfg=dict(type='ConvAWS'), + sac=dict(type='SAC', use_deform=True), + stage_with_sac=(False, True, True, True))) diff --git a/configs/detr/README.md b/configs/detr/README.md new file mode 100644 index 0000000..711a308 --- /dev/null +++ b/configs/detr/README.md @@ -0,0 +1,27 @@ +# DETR + +## Introduction + +[ALGORITHM] + +We provide the config files for DETR: [End-to-End Object Detection with Transformers](https://arxiv.org/abs/2005.12872). + +```BibTeX +@inproceedings{detr, + author = {Nicolas Carion and + Francisco Massa and + Gabriel Synnaeve and + Nicolas Usunier and + Alexander Kirillov and + Sergey Zagoruyko}, + title = {End-to-End Object Detection with Transformers}, + booktitle = {ECCV}, + year = {2020} +} +``` + +## Results and Models + +| Backbone | Model | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:------:|:--------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| R-50 | DETR |150e |7.9| | 40.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detr/detr_r50_8x2_150e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/detr/detr_r50_8x2_150e_coco/detr_r50_8x2_150e_coco_20201130_194835-2c4b8974.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/detr/detr_r50_8x2_150e_coco/detr_r50_8x2_150e_coco_20201130_194835.log.json) | diff --git a/configs/detr/detr_r50_8x2_150e_coco.py b/configs/detr/detr_r50_8x2_150e_coco.py new file mode 100644 index 0000000..ba276f4 --- /dev/null +++ b/configs/detr/detr_r50_8x2_150e_coco.py @@ -0,0 +1,131 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py' +] +model = dict( + type='DETR', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(3, ), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + norm_eval=True, + style='pytorch'), + bbox_head=dict( + type='TransformerHead', + num_classes=80, + in_channels=2048, + num_fcs=2, + transformer=dict( + type='Transformer', + embed_dims=256, + num_heads=8, + num_encoder_layers=6, + num_decoder_layers=6, + feedforward_channels=2048, + dropout=0.1, + act_cfg=dict(type='ReLU', inplace=True), + norm_cfg=dict(type='LN'), + num_fcs=2, + pre_norm=False, + return_intermediate_dec=True), + positional_encoding=dict( + type='SinePositionalEncoding', num_feats=128, normalize=True), + loss_cls=dict( + type='CrossEntropyLoss', + bg_cls_weight=0.1, + use_sigmoid=False, + loss_weight=1.0, + class_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=5.0), + loss_iou=dict(type='GIoULoss', loss_weight=2.0)), + # training and testing settings + train_cfg=dict( + assigner=dict( + type='HungarianAssigner', + cls_cost=dict(type='ClassificationCost', weight=1.), + reg_cost=dict(type='BBoxL1Cost', weight=5.0), + iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))), + test_cfg=dict(max_per_img=100)) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different +# from the default setting in mmdet. +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict( + type='AutoAugment', + policies=[[ + dict( + type='Resize', + img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), + (608, 1333), (640, 1333), (672, 1333), (704, 1333), + (736, 1333), (768, 1333), (800, 1333)], + multiscale_mode='value', + keep_ratio=True) + ], + [ + dict( + type='Resize', + img_scale=[(400, 1333), (500, 1333), (600, 1333)], + multiscale_mode='value', + keep_ratio=True), + dict( + type='RandomCrop', + crop_type='absolute_range', + crop_size=(384, 600), + allow_negative_crop=True), + dict( + type='Resize', + img_scale=[(480, 1333), (512, 1333), (544, 1333), + (576, 1333), (608, 1333), (640, 1333), + (672, 1333), (704, 1333), (736, 1333), + (768, 1333), (800, 1333)], + multiscale_mode='value', + override=True, + keep_ratio=True) + ]]), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=1), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) +] +# test_pipeline, NOTE the Pad's size_divisor is different from the default +# setting (size_divisor=32). While there is little effect on the performance +# whether we use the default setting or use size_divisor=1. +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=1), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict( + type='AdamW', + lr=0.0001, + weight_decay=0.0001, + paramwise_cfg=dict( + custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)})) +optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2)) +# learning policy +lr_config = dict(policy='step', step=[100]) +runner = dict(type='EpochBasedRunner', max_epochs=150) diff --git a/configs/double_heads/README.md b/configs/double_heads/README.md new file mode 100644 index 0000000..3ad4f49 --- /dev/null +++ b/configs/double_heads/README.md @@ -0,0 +1,22 @@ +# Rethinking Classification and Localization for Object Detection + +## Introduction + +[ALGORITHM] + +```latex +@article{wu2019rethinking, + title={Rethinking Classification and Localization for Object Detection}, + author={Yue Wu and Yinpeng Chen and Lu Yuan and Zicheng Liu and Lijuan Wang and Hongzhi Li and Yun Fu}, + year={2019}, + eprint={1904.06493}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +## Results and models + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | +| R-50-FPN | pytorch | 1x | 6.8 | 9.5 | 40.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/double_heads/dh_faster_rcnn_r50_fpn_1x_coco/dh_faster_rcnn_r50_fpn_1x_coco_20200130-586b67df.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/double_heads/dh_faster_rcnn_r50_fpn_1x_coco/dh_faster_rcnn_r50_fpn_1x_coco_20200130_220238.log.json) | diff --git a/configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py b/configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..9b8118b --- /dev/null +++ b/configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,23 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + roi_head=dict( + type='DoubleHeadRoIHead', + reg_roi_scale_factor=1.3, + bbox_head=dict( + _delete_=True, + type='DoubleConvFCBBoxHead', + num_convs=4, + num_fcs=2, + in_channels=256, + conv_out_channels=1024, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=2.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=2.0)))) diff --git a/configs/dynamic_rcnn/README.md b/configs/dynamic_rcnn/README.md new file mode 100644 index 0000000..ffdc42d --- /dev/null +++ b/configs/dynamic_rcnn/README.md @@ -0,0 +1,20 @@ +# Dynamic R-CNN: Towards High Quality Object Detection via Dynamic Training + +## Introduction + +[ALGORITHM] + +``` +@article{DynamicRCNN, + author = {Hongkai Zhang and Hong Chang and Bingpeng Ma and Naiyan Wang and Xilin Chen}, + title = {Dynamic {R-CNN}: Towards High Quality Object Detection via Dynamic Training}, + journal = {arXiv preprint arXiv:2004.06002}, + year = {2020} +} +``` + +## Results and Models + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:---------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| R-50 | pytorch | 1x | 3.8 | | 38.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x/dynamic_rcnn_r50_fpn_1x-62a3f276.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x/dynamic_rcnn_r50_fpn_1x_20200618_095048.log.json) | diff --git a/configs/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x.py b/configs/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x.py new file mode 100644 index 0000000..f2deb99 --- /dev/null +++ b/configs/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x.py @@ -0,0 +1,28 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + roi_head=dict( + type='DynamicRoIHead', + bbox_head=dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))), + train_cfg=dict( + rpn_proposal=dict(nms=dict(iou_threshold=0.85)), + rcnn=dict( + dynamic_rcnn=dict( + iou_topk=75, + beta_topk=10, + update_iter_interval=100, + initial_iou=0.4, + initial_beta=1.0))), + test_cfg=dict(rpn=dict(nms=dict(iou_threshold=0.85)))) diff --git a/configs/empirical_attention/README.md b/configs/empirical_attention/README.md new file mode 100644 index 0000000..f9782d7 --- /dev/null +++ b/configs/empirical_attention/README.md @@ -0,0 +1,23 @@ +# An Empirical Study of Spatial Attention Mechanisms in Deep Networks + +## Introduction + +[ALGORITHM] + +```latex +@article{zhu2019empirical, + title={An Empirical Study of Spatial Attention Mechanisms in Deep Networks}, + author={Zhu, Xizhou and Cheng, Dazhi and Zhang, Zheng and Lin, Stephen and Dai, Jifeng}, + journal={arXiv preprint arXiv:1904.05873}, + year={2019} +} +``` + +## Results and Models + +| Backbone | Attention Component | DCN | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:---------:|:-------------------:|:----:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| R-50 | 1111 | N | 1x | 8.0 | 13.8 | 40.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco/faster_rcnn_r50_fpn_attention_1111_1x_coco_20200130-403cccba.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco/faster_rcnn_r50_fpn_attention_1111_1x_coco_20200130_210344.log.json) | +| R-50 | 0010 | N | 1x | 4.2 | 18.4 | 39.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco/faster_rcnn_r50_fpn_attention_0010_1x_coco_20200130-7cb0c14d.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco/faster_rcnn_r50_fpn_attention_0010_1x_coco_20200130_210125.log.json) | +| R-50 | 1111 | Y | 1x | 8.0 | 12.7 | 42.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco_20200130-8b2523a6.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco_20200130_204442.log.json) | +| R-50 | 0010 | Y | 1x | 4.2 | 17.1 | 42.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco_20200130-1a2e831d.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco_20200130_210410.log.json) | diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco.py new file mode 100644 index 0000000..a544e3a --- /dev/null +++ b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict(plugins=[ + dict( + cfg=dict( + type='GeneralizedAttention', + spatial_range=-1, + num_heads=8, + attention_type='0010', + kv_stride=2), + stages=(False, False, True, True), + position='after_conv2') + ])) diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco.py new file mode 100644 index 0000000..bbefd27 --- /dev/null +++ b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco.py @@ -0,0 +1,16 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + plugins=[ + dict( + cfg=dict( + type='GeneralizedAttention', + spatial_range=-1, + num_heads=8, + attention_type='0010', + kv_stride=2), + stages=(False, False, True, True), + position='after_conv2') + ], + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco.py new file mode 100644 index 0000000..13a4645 --- /dev/null +++ b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict(plugins=[ + dict( + cfg=dict( + type='GeneralizedAttention', + spatial_range=-1, + num_heads=8, + attention_type='1111', + kv_stride=2), + stages=(False, False, True, True), + position='after_conv2') + ])) diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py new file mode 100644 index 0000000..b1f26c0 --- /dev/null +++ b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py @@ -0,0 +1,16 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + plugins=[ + dict( + cfg=dict( + type='GeneralizedAttention', + spatial_range=-1, + num_heads=8, + attention_type='1111', + kv_stride=2), + stages=(False, False, True, True), + position='after_conv2') + ], + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/fast_rcnn/README.md b/configs/fast_rcnn/README.md new file mode 100644 index 0000000..c756507 --- /dev/null +++ b/configs/fast_rcnn/README.md @@ -0,0 +1,16 @@ +# Fast R-CNN + +## Introduction + +[ALGORITHM] + +```latex +@inproceedings{girshick2015fast, + title={Fast r-cnn}, + author={Girshick, Ross}, + booktitle={Proceedings of the IEEE international conference on computer vision}, + year={2015} +} +``` + +## Results and models diff --git a/configs/fast_rcnn/fast_rcnn_r101_caffe_fpn_1x_coco.py b/configs/fast_rcnn/fast_rcnn_r101_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..6db24b1 --- /dev/null +++ b/configs/fast_rcnn/fast_rcnn_r101_caffe_fpn_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = './fast_rcnn_r50_caffe_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron2/resnet101_caffe', + backbone=dict(depth=101)) diff --git a/configs/fast_rcnn/fast_rcnn_r101_fpn_1x_coco.py b/configs/fast_rcnn/fast_rcnn_r101_fpn_1x_coco.py new file mode 100644 index 0000000..9a76b39 --- /dev/null +++ b/configs/fast_rcnn/fast_rcnn_r101_fpn_1x_coco.py @@ -0,0 +1,2 @@ +_base_ = './fast_rcnn_r50_fpn_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/fast_rcnn/fast_rcnn_r101_fpn_2x_coco.py b/configs/fast_rcnn/fast_rcnn_r101_fpn_2x_coco.py new file mode 100644 index 0000000..c9d5b4b --- /dev/null +++ b/configs/fast_rcnn/fast_rcnn_r101_fpn_2x_coco.py @@ -0,0 +1,2 @@ +_base_ = './fast_rcnn_r50_fpn_2x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/fast_rcnn/fast_rcnn_r50_caffe_fpn_1x_coco.py b/configs/fast_rcnn/fast_rcnn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..178deb6 --- /dev/null +++ b/configs/fast_rcnn/fast_rcnn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,45 @@ +_base_ = './fast_rcnn_r50_fpn_1x_coco.py' + +model = dict( + pretrained='open-mmlab://detectron2/resnet50_caffe', + backbone=dict( + norm_cfg=dict(type='BN', requires_grad=False), style='caffe')) + +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=2000), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=None), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='ToTensor', keys=['proposals']), + dict( + type='ToDataContainer', + fields=[dict(key='proposals', stack=False)]), + dict(type='Collect', keys=['img', 'proposals']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py b/configs/fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..d2f080e --- /dev/null +++ b/configs/fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,52 @@ +_base_ = [ + '../_base_/models/fast_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=2000), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=None), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='ToTensor', keys=['proposals']), + dict( + type='ToDataContainer', + fields=[dict(key='proposals', stack=False)]), + dict(type='Collect', keys=['img', 'proposals']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl', + pipeline=train_pipeline), + val=dict( + proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', + pipeline=test_pipeline), + test=dict( + proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', + pipeline=test_pipeline)) diff --git a/configs/fast_rcnn/fast_rcnn_r50_fpn_2x_coco.py b/configs/fast_rcnn/fast_rcnn_r50_fpn_2x_coco.py new file mode 100644 index 0000000..228e856 --- /dev/null +++ b/configs/fast_rcnn/fast_rcnn_r50_fpn_2x_coco.py @@ -0,0 +1,5 @@ +_base_ = './fast_rcnn_r50_fpn_1x_coco.py' + +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/faster_rcnn/README.md b/configs/faster_rcnn/README.md new file mode 100644 index 0000000..d43fc6d --- /dev/null +++ b/configs/faster_rcnn/README.md @@ -0,0 +1,61 @@ +# Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks + +## Introduction + +[ALGORITHM] + +```latex +@article{Ren_2017, + title={Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks}, + journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, + publisher={Institute of Electrical and Electronics Engineers (IEEE)}, + author={Ren, Shaoqing and He, Kaiming and Girshick, Ross and Sun, Jian}, + year={2017}, + month={Jun}, +} +``` + +## Results and models + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | +| R-50-DC5 | caffe | 1x | - | - | 37.2 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco/faster_rcnn_r50_caffe_dc5_1x_coco_20201030_151909-531f0f43.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco/faster_rcnn_r50_caffe_dc5_1x_coco_20201030_151909.log.json) | +| R-50-FPN | caffe | 1x | 3.8 | | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco/faster_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.378_20200504_180032-c5925ee5.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco/faster_rcnn_r50_caffe_fpn_1x_coco_20200504_180032.log.json) | +| R-50-FPN | pytorch | 1x | 4.0 | 21.4 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130_204655.log.json) | +| R-50-FPN | pytorch | 2x | - | - | 38.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_20200504_210434.log.json) | +| R-101-FPN | caffe | 1x | 5.7 | | 39.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco/faster_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.398_20200504_180057-b269e9dd.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco/faster_rcnn_r101_caffe_fpn_1x_coco_20200504_180057.log.json) | +| R-101-FPN | pytorch | 1x | 6.0 | 15.6 | 39.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_1x_coco/faster_rcnn_r101_fpn_1x_coco_20200130-f513f705.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_1x_coco/faster_rcnn_r101_fpn_1x_coco_20200130_204655.log.json) | +| R-101-FPN | pytorch | 2x | - | - | 39.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r101_fpn_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_2x_coco/faster_rcnn_r101_fpn_2x_coco_bbox_mAP-0.398_20200504_210455-1d2dac9c.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_2x_coco/faster_rcnn_r101_fpn_2x_coco_20200504_210455.log.json) | +| X-101-32x4d-FPN | pytorch | 1x | 7.2 | 13.8 | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco/faster_rcnn_x101_32x4d_fpn_1x_coco_20200203-cff10310.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco/faster_rcnn_x101_32x4d_fpn_1x_coco_20200203_000520.log.json) | +| X-101-32x4d-FPN | pytorch | 2x | - | - | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco/faster_rcnn_x101_32x4d_fpn_2x_coco_bbox_mAP-0.412_20200506_041400-64a12c0b.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco/faster_rcnn_x101_32x4d_fpn_2x_coco_20200506_041400.log.json) | +| X-101-64x4d-FPN | pytorch | 1x | 10.3 | 9.4 | 42.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco/faster_rcnn_x101_64x4d_fpn_1x_coco_20200204-833ee192.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco/faster_rcnn_x101_64x4d_fpn_1x_coco_20200204_134340.log.json) | +| X-101-64x4d-FPN | pytorch | 2x | - | - | 41.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco/faster_rcnn_x101_64x4d_fpn_2x_coco_20200512_161033-5961fa95.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco/faster_rcnn_x101_64x4d_fpn_2x_coco_20200512_161033.log.json) | + +## Different regression loss + +We trained with R-50-FPN pytorch style backbone for 1x schedule. + +| Backbone | Loss type | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-------: | :------: | :------------: | :----: | :------: | :--------: | +| R-50-FPN | L1Loss | 4.0 | 21.4 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130_204655.log.json) | +| R-50-FPN | IoULoss | | | 37.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_iou_1x_coco-fdd207f3.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_iou_1x_coco_20200506_095954.log.json) | +| R-50-FPN | GIoULoss | | | 37.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_giou_1x_coco-0eada910.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_giou_1x_coco_20200505_161120.log.json) | +| R-50-FPN | BoundedIoULoss | | | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_bounded_iou_1x_coco-98ad993b.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_bounded_iou_1x_coco_20200505_160738.log.json) | + +## Pre-trained Models + +We also train some models with longer schedules and multi-scale training. The users could finetune them for downstream tasks. + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | +| [R-50-DC5](./faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py) | caffe | 1x | - | | 37.4 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco_20201028_233851-b33d21b9.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco_20201028_233851.log.json) +| [R-50-DC5](./faster_rcnn_r50_caffe_dc5_mstrain_3x_coco.py) | caffe | 3x | - | | 38.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco_20201028_002107-34a53b2c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco_20201028_002107.log.json) +| [R-50-FPN](./faster_rcnn_r50_caffe_fpn_mstrain_2x_coco.py) | caffe | 2x | 4.3 | | 39.7 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco_bbox_mAP-0.397_20200504_231813-10b2de58.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco_20200504_231813.log.json) +| [R-50-FPN](./faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py) | caffe | 3x | 4.3 | | 40.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_bbox_mAP-0.398_20200504_163323-30042637.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_20200504_163323.log.json) + +We further finetune some pre-trained models on the COCO subsets, which only contain only a few of the 80 categories. + +| Backbone | Style | Class name | Pre-traind model | Mem (GB) | box AP | Config | Download | +| ------------------------------------------------------------ | ----- | ------------------ | ------------------------------------------------------------ | -------- | ------ | ------------------------------------------------------------ | ------------------------------------------------------------ | +| [R-50-FPN](./faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person.py) | caffe | person | [R-50-FPN-Caffe-3x](./faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py) | 3.7 | 55.8 | [config](./faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco-person/faster_rcnn_r50_fpn_1x_coco-person_20201216_175929-d022e227.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco-person/faster_rcnn_r50_fpn_1x_coco-person_20201216_175929.log.json) | +| [R-50-FPN](./faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person-bicycle-car.py) | caffe | person-bicycle-car | [R-50-FPN-Caffe-3x](./faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py) | 3.7 | 44.1 | [config](./faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person-bicycle-car.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco-person-bicycle-car/faster_rcnn_r50_fpn_1x_coco-person-bicycle-car_20201216_173117-6eda6d92.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco-person-bicycle-car/faster_rcnn_r50_fpn_1x_coco-person-bicycle-car_20201216_173117.log.json) | diff --git a/configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..95c7238 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = './faster_rcnn_r50_caffe_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron2/resnet101_caffe', + backbone=dict(depth=101)) diff --git a/configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.py new file mode 100644 index 0000000..d2edab1 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.py @@ -0,0 +1,2 @@ +_base_ = './faster_rcnn_r50_fpn_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/faster_rcnn/faster_rcnn_r101_fpn_2x_coco.py b/configs/faster_rcnn/faster_rcnn_r101_fpn_2x_coco.py new file mode 100644 index 0000000..9367a3c --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r101_fpn_2x_coco.py @@ -0,0 +1,2 @@ +_base_ = './faster_rcnn_r50_fpn_2x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_c4_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_c4_1x_coco.py new file mode 100644 index 0000000..92344a1 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_c4_1x_coco.py @@ -0,0 +1,39 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_caffe_c4.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco.py new file mode 100644 index 0000000..ee2010c --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco.py @@ -0,0 +1,37 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_caffe_dc5.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py new file mode 100644 index 0000000..14eaef2 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py @@ -0,0 +1,42 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_caffe_dc5.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco.py new file mode 100644 index 0000000..403747f --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco.py @@ -0,0 +1,4 @@ +_base_ = './faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py' +# learning policy +lr_config = dict(step=[28, 34]) +runner = dict(type='EpochBasedRunner', max_epochs=36) diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..762c72b --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,37 @@ +_base_ = './faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron2/resnet50_caffe', + backbone=dict( + norm_cfg=dict(requires_grad=False), norm_eval=True, style='caffe')) +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person-bicycle-car.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person-bicycle-car.py new file mode 100644 index 0000000..23d7285 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person-bicycle-car.py @@ -0,0 +1,9 @@ +_base_ = './faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py' +model = dict(roi_head=dict(bbox_head=dict(num_classes=3))) +classes = ('person', 'bicycle', 'car') +data = dict( + train=dict(classes=classes), + val=dict(classes=classes), + test=dict(classes=classes)) + +load_from = 'http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_bbox_mAP-0.398_20200504_163323-30042637.pth' # noqa diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person.py new file mode 100644 index 0000000..b0164c7 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person.py @@ -0,0 +1,9 @@ +_base_ = './faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py' +model = dict(roi_head=dict(bbox_head=dict(num_classes=1))) +classes = ('person', ) +data = dict( + train=dict(classes=classes), + val=dict(classes=classes), + test=dict(classes=classes)) + +load_from = 'http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_bbox_mAP-0.398_20200504_163323-30042637.pth' # noqa diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py new file mode 100644 index 0000000..4b87b2c --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py @@ -0,0 +1,42 @@ +_base_ = './faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron2/resnet50_caffe', + backbone=dict( + norm_cfg=dict(requires_grad=False), norm_eval=True, style='caffe')) +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco.py new file mode 100644 index 0000000..df58973 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 23]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..a0ba54d --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py @@ -0,0 +1,4 @@ +_base_ = './faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py' +# learning policy +lr_config = dict(step=[28, 34]) +runner = dict(type='EpochBasedRunner', max_epochs=36) diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_90k_coco.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_90k_coco.py new file mode 100644 index 0000000..74dca24 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_90k_coco.py @@ -0,0 +1,15 @@ +_base_ = 'faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py' + +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[60000, 80000]) + +# Runner type +runner = dict(_delete_=True, type='IterBasedRunner', max_iters=90000) + +checkpoint_config = dict(interval=10000) +evaluation = dict(interval=10000, metric='bbox') diff --git a/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..009bd93 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] diff --git a/configs/faster_rcnn/faster_rcnn_r50_fpn_2x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_fpn_2x_coco.py new file mode 100644 index 0000000..e77a7fa --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_fpn_2x_coco.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py' +] diff --git a/configs/faster_rcnn/faster_rcnn_r50_fpn_bounded_iou_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_fpn_bounded_iou_1x_coco.py new file mode 100644 index 0000000..648081f --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_fpn_bounded_iou_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = './faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + roi_head=dict( + bbox_head=dict( + reg_decoded_bbox=True, + loss_bbox=dict(type='BoundedIoULoss', loss_weight=10.0)))) diff --git a/configs/faster_rcnn/faster_rcnn_r50_fpn_giou_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_fpn_giou_1x_coco.py new file mode 100644 index 0000000..5556c49 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_fpn_giou_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = './faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + roi_head=dict( + bbox_head=dict( + reg_decoded_bbox=True, + loss_bbox=dict(type='GIoULoss', loss_weight=10.0)))) diff --git a/configs/faster_rcnn/faster_rcnn_r50_fpn_iou_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_fpn_iou_1x_coco.py new file mode 100644 index 0000000..ddf663e --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_fpn_iou_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = './faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + roi_head=dict( + bbox_head=dict( + reg_decoded_bbox=True, + loss_bbox=dict(type='IoULoss', loss_weight=10.0)))) diff --git a/configs/faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py new file mode 100644 index 0000000..f897e7c --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py @@ -0,0 +1,2 @@ +_base_ = './faster_rcnn_r50_fpn_1x_coco.py' +model = dict(train_cfg=dict(rcnn=dict(sampler=dict(type='OHEMSampler')))) diff --git a/configs/faster_rcnn/faster_rcnn_r50_fpn_soft_nms_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_fpn_soft_nms_1x_coco.py new file mode 100644 index 0000000..759ae3a --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_fpn_soft_nms_1x_coco.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +model = dict( + test_cfg=dict( + rcnn=dict( + score_thr=0.05, + nms=dict(type='soft_nms', iou_threshold=0.5), + max_per_img=100))) diff --git a/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco.py b/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..c536fcc --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco.py b/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco.py new file mode 100644 index 0000000..9276092 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco.py @@ -0,0 +1,13 @@ +_base_ = './faster_rcnn_r50_fpn_2x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco.py b/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000..b588b4e --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco.py b/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco.py new file mode 100644 index 0000000..e87d21a --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco.py @@ -0,0 +1,13 @@ +_base_ = './faster_rcnn_r50_fpn_2x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/fcos/README.md b/configs/fcos/README.md new file mode 100644 index 0000000..c6209d2 --- /dev/null +++ b/configs/fcos/README.md @@ -0,0 +1,35 @@ +# FCOS: Fully Convolutional One-Stage Object Detection + +## Introduction + +[ALGORITHM] + +```latex +@article{tian2019fcos, + title={FCOS: Fully Convolutional One-Stage Object Detection}, + author={Tian, Zhi and Shen, Chunhua and Chen, Hao and He, Tong}, + journal={arXiv preprint arXiv:1904.01355}, + year={2019} +} +``` + +## Results and Models + +| Backbone | Style | GN | MS train | Tricks | DCN | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:---------:|:-------:|:-------:|:--------:|:-------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| R-50 | caffe | Y | N | N | N | 1x | 3.6 | 22.7 | 36.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco.py) | [model](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco/fcos_r50_caffe_fpn_gn-head_1x_coco-821213aa.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco/20201227_180009.log.json) | +| R-50 | caffe | Y | N | Y | N | 1x | 3.7 | - | 38.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py) | [model](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco/20210105_135818.log.json)| +| R-50 | caffe | Y | N | Y | Y | 1x | 3.8 | - | 42.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco.py) | [model](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco-ae4d8b3d.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco/20210105_224556.log.json)| +| R-101 | caffe | Y | N | N | N | 1x | 5.5 | 17.3 | 39.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco.py) | [model](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco/fcos_r101_caffe_fpn_gn-head_1x_coco-0e37b982.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco/20210103_155046.log.json) | + +| Backbone | Style | GN | MS train | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:---------:|:-------:|:-------:|:--------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| R-50 | caffe | Y | Y | 2x | 2.6 | 22.9 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py) | [model](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco-d92ceeea.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco/20201227_161900.log.json) | +| R-101 | caffe | Y | Y | 2x | 5.5 | 17.3 | 40.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py) | [model](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco-511424d6.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco/20210103_155046.log.json) | +| X-101 | pytorch | Y | Y | 2x | 10.0 | 9.7 | 42.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco.py) | [model](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco-ede514a8.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco/20210114_133041.log.json) | + +**Notes:** + +- The X-101 backbone is X-101-64x4d. +- Tricks means setting `norm_on_bbox`, `centerness_on_reg`, `center_sampling` as `True`. +- DCN means using `DCNv2` in both backbone and head. diff --git a/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py b/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py new file mode 100644 index 0000000..c25561e --- /dev/null +++ b/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py @@ -0,0 +1,51 @@ +_base_ = 'fcos_r50_caffe_fpn_gn-head_1x_coco.py' + +model = dict( + pretrained='open-mmlab://detectron2/resnet50_caffe', + bbox_head=dict( + norm_on_bbox=True, + centerness_on_reg=True, + dcn_on_last_conv=False, + center_sampling=True, + conv_bias=True, + loss_bbox=dict(type='GIoULoss', loss_weight=1.0)), + # training and testing settings + test_cfg=dict(nms=dict(type='nms', iou_threshold=0.6))) + +# dataset settings +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +optimizer_config = dict(_delete_=True, grad_clip=None) + +lr_config = dict(warmup='linear') diff --git a/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco.py b/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco.py new file mode 100644 index 0000000..72b90f8 --- /dev/null +++ b/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco.py @@ -0,0 +1,54 @@ +_base_ = 'fcos_r50_caffe_fpn_gn-head_1x_coco.py' + +model = dict( + pretrained='open-mmlab://detectron2/resnet50_caffe', + backbone=dict( + dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True)), + bbox_head=dict( + norm_on_bbox=True, + centerness_on_reg=True, + dcn_on_last_conv=True, + center_sampling=True, + conv_bias=True, + loss_bbox=dict(type='GIoULoss', loss_weight=1.0)), + # training and testing settings + test_cfg=dict(nms=dict(type='nms', iou_threshold=0.6))) + +# dataset settings +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +optimizer_config = dict(_delete_=True, grad_clip=None) + +lr_config = dict(warmup='linear') diff --git a/configs/fcos/fcos_center_r50_caffe_fpn_gn-head_1x_coco.py b/configs/fcos/fcos_center_r50_caffe_fpn_gn-head_1x_coco.py new file mode 100644 index 0000000..9f502e7 --- /dev/null +++ b/configs/fcos/fcos_center_r50_caffe_fpn_gn-head_1x_coco.py @@ -0,0 +1,2 @@ +_base_ = './fcos_r50_caffe_fpn_gn-head_1x_coco.py' +model = dict(bbox_head=dict(center_sampling=True, center_sample_radius=1.5)) diff --git a/configs/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco.py b/configs/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco.py new file mode 100644 index 0000000..6c38266 --- /dev/null +++ b/configs/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = './fcos_r50_caffe_fpn_gn-head_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron/resnet101_caffe', + backbone=dict(depth=101)) diff --git a/configs/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py b/configs/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py new file mode 100644 index 0000000..81f61c6 --- /dev/null +++ b/configs/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py @@ -0,0 +1,44 @@ +_base_ = './fcos_r50_caffe_fpn_gn-head_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron/resnet101_caffe', + backbone=dict(depth=101)) +img_norm_cfg = dict( + mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco.py b/configs/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco.py new file mode 100644 index 0000000..6e12411 --- /dev/null +++ b/configs/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco.py @@ -0,0 +1,105 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# model settings +model = dict( + type='FCOS', + pretrained='open-mmlab://detectron/resnet50_caffe', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + norm_eval=True, + style='caffe'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs=True, + extra_convs_on_inputs=False, # use P5 + num_outs=5, + relu_before_extra_convs=True), + bbox_head=dict( + type='FCOSHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + strides=[8, 16, 32, 64, 128], + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='IoULoss', loss_weight=1.0), + loss_centerness=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), + # training and testing settings + train_cfg=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0, + ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100)) +img_norm_cfg = dict( + mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict( + lr=0.01, paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.)) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='constant', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[8, 11]) +runner = dict(type='EpochBasedRunner', max_epochs=12) diff --git a/configs/fcos/fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py b/configs/fcos/fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py new file mode 100644 index 0000000..2816b16 --- /dev/null +++ b/configs/fcos/fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py @@ -0,0 +1,4 @@ +# TODO: Remove this config after benchmarking all related configs +_base_ = 'fcos_r50_caffe_fpn_gn-head_1x_coco.py' + +data = dict(samples_per_gpu=4, workers_per_gpu=4) diff --git a/configs/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py b/configs/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py new file mode 100644 index 0000000..497d03f --- /dev/null +++ b/configs/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py @@ -0,0 +1,39 @@ +_base_ = './fcos_r50_caffe_fpn_gn-head_1x_coco.py' +img_norm_cfg = dict( + mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco.py b/configs/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco.py new file mode 100644 index 0000000..fc576f6 --- /dev/null +++ b/configs/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco.py @@ -0,0 +1,59 @@ +_base_ = './fcos_r50_caffe_fpn_gn-head_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch')) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict( + lr=0.01, paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.)) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/foveabox/README.md b/configs/foveabox/README.md new file mode 100644 index 0000000..91a43c9 --- /dev/null +++ b/configs/foveabox/README.md @@ -0,0 +1,41 @@ +# FoveaBox: Beyond Anchor-based Object Detector + +[ALGORITHM] + +FoveaBox is an accurate, flexible and completely anchor-free object detection system for object detection framework, as presented in our paper [https://arxiv.org/abs/1904.03797](https://arxiv.org/abs/1904.03797): +Different from previous anchor-based methods, FoveaBox directly learns the object existing possibility and the bounding box coordinates without anchor reference. This is achieved by: (a) predicting category-sensitive semantic maps for the object existing possibility, and (b) producing category-agnostic bounding box for each position that potentially contains an object. + +## Main Results + +### Results on R50/101-FPN + +| Backbone | Style | align | ms-train| Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:---------:|:-------:|:-------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| R-50 | pytorch | N | N | 1x | 5.6 | 24.1 | 36.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r50_fpn_4x4_1x_coco/fovea_r50_fpn_4x4_1x_coco_20200219-ee4d5303.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r50_fpn_4x4_1x_coco/fovea_r50_fpn_4x4_1x_coco_20200219_223025.log.json) | +| R-50 | pytorch | N | N | 2x | 5.6 | - | 37.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_r50_fpn_4x4_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r50_fpn_4x4_2x_coco/fovea_r50_fpn_4x4_2x_coco_20200203-2df792b1.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r50_fpn_4x4_2x_coco/fovea_r50_fpn_4x4_2x_coco_20200203_112043.log.json) | +| R-50 | pytorch | Y | N | 2x | 8.1 | 19.4 | 37.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco/fovea_align_r50_fpn_gn-head_4x4_2x_coco_20200203-8987880d.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco/fovea_align_r50_fpn_gn-head_4x4_2x_coco_20200203_134252.log.json) | +| R-50 | pytorch | Y | Y | 2x | 8.1 | 18.3 | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco_20200205-85ce26cb.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco_20200205_112557.log.json) | +| R-101 | pytorch | N | N | 1x | 9.2 | 17.4 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_r101_fpn_4x4_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r101_fpn_4x4_1x_coco/fovea_r101_fpn_4x4_1x_coco_20200219-05e38f1c.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r101_fpn_4x4_1x_coco/fovea_r101_fpn_4x4_1x_coco_20200219_011740.log.json) | +| R-101 | pytorch | N | N | 2x | 11.7 | - | 40.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_r101_fpn_4x4_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r101_fpn_4x4_2x_coco/fovea_r101_fpn_4x4_2x_coco_20200208-02320ea4.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r101_fpn_4x4_2x_coco/fovea_r101_fpn_4x4_2x_coco_20200208_202059.log.json) | +| R-101 | pytorch | Y | N | 2x | 11.7 | 14.7 | 40.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco/fovea_align_r101_fpn_gn-head_4x4_2x_coco_20200208-c39a027a.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco/fovea_align_r101_fpn_gn-head_4x4_2x_coco_20200208_203337.log.json) | +| R-101 | pytorch | Y | Y | 2x | 11.7 | 14.7 | 42.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco_20200208-649c5eb6.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco_20200208_202124.log.json) | + +[1] *1x and 2x mean the model is trained for 12 and 24 epochs, respectively.* \ +[2] *Align means utilizing deformable convolution to align the cls branch.* \ +[3] *All results are obtained with a single model and without any test time data augmentation.*\ +[4] *We use 4 GPUs for training.* + +Any pull requests or issues are welcome. + +## Citations + +Please consider citing our paper in your publications if the project helps your research. BibTeX reference is as follows. + +```latex +@article{kong2019foveabox, + title={FoveaBox: Beyond Anchor-based Object Detector}, + author={Kong, Tao and Sun, Fuchun and Liu, Huaping and Jiang, Yuning and Shi, Jianbo}, + journal={arXiv preprint arXiv:1904.03797}, + year={2019} +} +``` diff --git a/configs/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco.py b/configs/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco.py new file mode 100644 index 0000000..30dca04 --- /dev/null +++ b/configs/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco.py @@ -0,0 +1,10 @@ +_base_ = './fovea_r50_fpn_4x4_1x_coco.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(depth=101), + bbox_head=dict( + with_deform=True, + norm_cfg=dict(type='GN', num_groups=32, requires_grad=True))) +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py b/configs/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py new file mode 100644 index 0000000..a02a814 --- /dev/null +++ b/configs/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py @@ -0,0 +1,27 @@ +_base_ = './fovea_r50_fpn_4x4_1x_coco.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(depth=101), + bbox_head=dict( + with_deform=True, + norm_cfg=dict(type='GN', num_groups=32, requires_grad=True))) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +data = dict(train=dict(pipeline=train_pipeline)) +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py b/configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py new file mode 100644 index 0000000..e7265bc --- /dev/null +++ b/configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py @@ -0,0 +1,10 @@ +_base_ = './fovea_r50_fpn_4x4_1x_coco.py' +model = dict( + bbox_head=dict( + with_deform=True, + norm_cfg=dict(type='GN', num_groups=32, requires_grad=True))) +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py b/configs/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py new file mode 100644 index 0000000..8fc39be --- /dev/null +++ b/configs/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py @@ -0,0 +1,25 @@ +_base_ = './fovea_r50_fpn_4x4_1x_coco.py' +model = dict( + bbox_head=dict( + with_deform=True, + norm_cfg=dict(type='GN', num_groups=32, requires_grad=True))) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +data = dict(train=dict(pipeline=train_pipeline)) +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/foveabox/fovea_r101_fpn_4x4_1x_coco.py b/configs/foveabox/fovea_r101_fpn_4x4_1x_coco.py new file mode 100644 index 0000000..907bede --- /dev/null +++ b/configs/foveabox/fovea_r101_fpn_4x4_1x_coco.py @@ -0,0 +1,2 @@ +_base_ = './fovea_r50_fpn_4x4_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/foveabox/fovea_r101_fpn_4x4_2x_coco.py b/configs/foveabox/fovea_r101_fpn_4x4_2x_coco.py new file mode 100644 index 0000000..9296393 --- /dev/null +++ b/configs/foveabox/fovea_r101_fpn_4x4_2x_coco.py @@ -0,0 +1,2 @@ +_base_ = './fovea_r50_fpn_4x4_2x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py b/configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py new file mode 100644 index 0000000..fd39257 --- /dev/null +++ b/configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py @@ -0,0 +1,52 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# model settings +model = dict( + type='FOVEA', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + num_outs=5, + add_extra_convs='on_input'), + bbox_head=dict( + type='FoveaHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + strides=[8, 16, 32, 64, 128], + base_edge_list=[16, 32, 64, 128, 256], + scale_ranges=((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)), + sigma=0.4, + with_deform=False, + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=1.50, + alpha=0.4, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)), + # training and testing settings + train_cfg=dict(), + test_cfg=dict( + nms_pre=1000, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100)) +data = dict(samples_per_gpu=4, workers_per_gpu=4) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/foveabox/fovea_r50_fpn_4x4_2x_coco.py b/configs/foveabox/fovea_r50_fpn_4x4_2x_coco.py new file mode 100644 index 0000000..68ce4d2 --- /dev/null +++ b/configs/foveabox/fovea_r50_fpn_4x4_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './fovea_r50_fpn_4x4_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/fp16/README.md b/configs/fp16/README.md new file mode 100644 index 0000000..17eaa7d --- /dev/null +++ b/configs/fp16/README.md @@ -0,0 +1,22 @@ +# Mixed Precision Training + +## Introduction + +[OTHERS] + +```latex +@article{micikevicius2017mixed, + title={Mixed precision training}, + author={Micikevicius, Paulius and Narang, Sharan and Alben, Jonah and Diamos, Gregory and Elsen, Erich and Garcia, David and Ginsburg, Boris and Houston, Michael and Kuchaiev, Oleksii and Venkatesh, Ganesh and others}, + journal={arXiv preprint arXiv:1710.03740}, + year={2017} +} +``` + +## Results and Models + +| Architecture | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +|:------------:|:---------:|:-------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| +| Faster R-CNN | R-50 | pytorch | 1x | 3.4 | 28.8 | 37.5 | - |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fp16/faster_rcnn_r50_fpn_fp16_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/fp16/faster_rcnn_r50_fpn_fp16_1x_coco/faster_rcnn_r50_fpn_fp16_1x_coco_20200204-d4dc1471.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/fp16/faster_rcnn_r50_fpn_fp16_1x_coco/faster_rcnn_r50_fpn_fp16_1x_coco_20200204_143530.log.json) | +| Mask R-CNN | R-50 | pytorch | 1x | 3.6 | 24.1 | 38.1 | 34.7 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fp16/mask_rcnn_r50_fpn_fp16_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_1x_coco/mask_rcnn_r50_fpn_fp16_1x_coco_20200205-59faf7e4.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_1x_coco/mask_rcnn_r50_fpn_fp16_1x_coco_20200205_130539.log.json) | +| Retinanet | R-50 | pytorch | 1x | 2.8 | 31.6 | 36.4 | |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fp16/retinanet_r50_fpn_fp16_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/fp16/retinanet_r50_fpn_fp16_1x_coco/retinanet_r50_fpn_fp16_1x_coco_20200702-0dbfb212.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/fp16/retinanet_r50_fpn_fp16_1x_coco/retinanet_r50_fpn_fp16_1x_coco_20200702_020127.log.json) | diff --git a/configs/fp16/faster_rcnn_r50_fpn_fp16_1x_coco.py b/configs/fp16/faster_rcnn_r50_fpn_fp16_1x_coco.py new file mode 100644 index 0000000..78fa5b6 --- /dev/null +++ b/configs/fp16/faster_rcnn_r50_fpn_fp16_1x_coco.py @@ -0,0 +1,3 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +# fp16 settings +fp16 = dict(loss_scale=512.) diff --git a/configs/fp16/mask_rcnn_r50_fpn_fp16_1x_coco.py b/configs/fp16/mask_rcnn_r50_fpn_fp16_1x_coco.py new file mode 100644 index 0000000..f506ea8 --- /dev/null +++ b/configs/fp16/mask_rcnn_r50_fpn_fp16_1x_coco.py @@ -0,0 +1,3 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +# fp16 settings +fp16 = dict(loss_scale=512.) diff --git a/configs/fp16/retinanet_r50_fpn_fp16_1x_coco.py b/configs/fp16/retinanet_r50_fpn_fp16_1x_coco.py new file mode 100644 index 0000000..519c4db --- /dev/null +++ b/configs/fp16/retinanet_r50_fpn_fp16_1x_coco.py @@ -0,0 +1,3 @@ +_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py' +# fp16 settings +fp16 = dict(loss_scale=512.) diff --git a/configs/fpg/README.md b/configs/fpg/README.md new file mode 100644 index 0000000..89f5adb --- /dev/null +++ b/configs/fpg/README.md @@ -0,0 +1,29 @@ +# Feature Pyramid Grids + +## Introduction + +```latex +@article{chen2020feature, + title={Feature pyramid grids}, + author={Chen, Kai and Cao, Yuhang and Loy, Chen Change and Lin, Dahua and Feichtenhofer, Christoph}, + journal={arXiv preprint arXiv:2004.03580}, + year={2020} +} +``` + +## Results and Models + +We benchmark the new training schedule (crop training, large batch, unfrozen BN, 50 epochs) introduced in NAS-FPN. +All backbones are Resnet-50 in pytorch style. + +| Method | Neck | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +|:------------:|:-----------:|:-------:|:--------:|:--------------:|:------:|:-------:|:-------:|:--------:| +| Faster R-CNN | FPG | 50e | 20.0 | - | 42.2 | - |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/faster_rcnn_r50_fpg_crop640_50e_coco.py) | +| Faster R-CNN | FPG-chn128 | 50e | 11.9 | - | 41.2 | - |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/faster_rcnn_r50_fpg-chn128_crop640_50e_coco.py) | +| Mask R-CNN | FPG | 50e | 23.2 | - | 42.7 | 37.8 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/mask_rcnn_r50_fpg_crop640_50e_coco.py) | +| Mask R-CNN | FPG-chn128 | 50e | 15.3 | - | 41.7 | 36.9 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/mask_rcnn_r50_fpg-chn128_crop640_50e_coco.py) | +| RetinaNet | FPG | 50e | 20.8 | - | 40.5 | - |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/retinanet_r50_fpg_crop640_50e_coco.py) | +| RetinaNet | FPG-chn128 | 50e | | - | | - |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/retinanet_r50_fpg-chn128_crop640_50e_coco.py) | + +**Note**: Chn128 means to decrease the number of channels of features and convs from 256 (default) to 128 in +Neck and BBox Head, which can greatly decrease memory consumption without sacrificing much precision. diff --git a/configs/fpg/faster_rcnn_r50_fpg-chn128_crop640_50e_coco.py b/configs/fpg/faster_rcnn_r50_fpg-chn128_crop640_50e_coco.py new file mode 100644 index 0000000..4535034 --- /dev/null +++ b/configs/fpg/faster_rcnn_r50_fpg-chn128_crop640_50e_coco.py @@ -0,0 +1,9 @@ +_base_ = 'faster_rcnn_r50_fpg_crop640_50e_coco.py' + +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + neck=dict(out_channels=128, inter_channels=128), + rpn_head=dict(in_channels=128), + roi_head=dict( + bbox_roi_extractor=dict(out_channels=128), + bbox_head=dict(in_channels=128))) diff --git a/configs/fpg/faster_rcnn_r50_fpg_crop640_50e_coco.py b/configs/fpg/faster_rcnn_r50_fpg_crop640_50e_coco.py new file mode 100644 index 0000000..3ab2a2c --- /dev/null +++ b/configs/fpg/faster_rcnn_r50_fpg_crop640_50e_coco.py @@ -0,0 +1,48 @@ +_base_ = 'faster_rcnn_r50_fpn_crop640_50e_coco.py' + +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + neck=dict( + type='FPG', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + inter_channels=256, + num_outs=5, + stack_times=9, + paths=['bu'] * 9, + same_down_trans=None, + same_up_trans=dict( + type='conv', + kernel_size=3, + stride=2, + padding=1, + norm_cfg=norm_cfg, + inplace=False, + order=('act', 'conv', 'norm')), + across_lateral_trans=dict( + type='conv', + kernel_size=1, + norm_cfg=norm_cfg, + inplace=False, + order=('act', 'conv', 'norm')), + across_down_trans=dict( + type='interpolation_conv', + mode='nearest', + kernel_size=3, + norm_cfg=norm_cfg, + order=('act', 'conv', 'norm'), + inplace=False), + across_up_trans=None, + across_skip_trans=dict( + type='conv', + kernel_size=1, + norm_cfg=norm_cfg, + inplace=False, + order=('act', 'conv', 'norm')), + output_trans=dict( + type='last_conv', + kernel_size=3, + order=('act', 'conv', 'norm'), + inplace=False), + norm_cfg=norm_cfg, + skip_inds=[(0, 1, 2, 3), (0, 1, 2), (0, 1), (0, ), ()])) diff --git a/configs/fpg/faster_rcnn_r50_fpn_crop640_50e_coco.py b/configs/fpg/faster_rcnn_r50_fpn_crop640_50e_coco.py new file mode 100644 index 0000000..95f4e91 --- /dev/null +++ b/configs/fpg/faster_rcnn_r50_fpn_crop640_50e_coco.py @@ -0,0 +1,68 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + backbone=dict(norm_cfg=norm_cfg, norm_eval=False), + neck=dict(norm_cfg=norm_cfg), + roi_head=dict(bbox_head=dict(norm_cfg=norm_cfg))) +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', + img_scale=(640, 640), + ratio_range=(0.8, 1.2), + keep_ratio=True), + dict(type='RandomCrop', crop_size=(640, 640)), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=(640, 640)), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(640, 640), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=64), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=8, + workers_per_gpu=4, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# learning policy +optimizer = dict( + type='SGD', + lr=0.08, + momentum=0.9, + weight_decay=0.0001, + paramwise_cfg=dict(norm_decay_mult=0, bypass_duplicate=True)) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=0.1, + step=[30, 40]) +# runtime settings +runner = dict(max_epochs=50) +evaluation = dict(interval=2) diff --git a/configs/fpg/mask_rcnn_r50_fpg-chn128_crop640_50e_coco.py b/configs/fpg/mask_rcnn_r50_fpg-chn128_crop640_50e_coco.py new file mode 100644 index 0000000..baa4a5a --- /dev/null +++ b/configs/fpg/mask_rcnn_r50_fpg-chn128_crop640_50e_coco.py @@ -0,0 +1,10 @@ +_base_ = 'mask_rcnn_r50_fpg_crop640_50e_coco.py' + +model = dict( + neck=dict(out_channels=128, inter_channels=128), + rpn_head=dict(in_channels=128), + roi_head=dict( + bbox_roi_extractor=dict(out_channels=128), + bbox_head=dict(in_channels=128), + mask_roi_extractor=dict(out_channels=128), + mask_head=dict(in_channels=128))) diff --git a/configs/fpg/mask_rcnn_r50_fpg_crop640_50e_coco.py b/configs/fpg/mask_rcnn_r50_fpg_crop640_50e_coco.py new file mode 100644 index 0000000..3c9ea27 --- /dev/null +++ b/configs/fpg/mask_rcnn_r50_fpg_crop640_50e_coco.py @@ -0,0 +1,48 @@ +_base_ = 'mask_rcnn_r50_fpn_crop640_50e_coco.py' + +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + neck=dict( + type='FPG', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + inter_channels=256, + num_outs=5, + stack_times=9, + paths=['bu'] * 9, + same_down_trans=None, + same_up_trans=dict( + type='conv', + kernel_size=3, + stride=2, + padding=1, + norm_cfg=norm_cfg, + inplace=False, + order=('act', 'conv', 'norm')), + across_lateral_trans=dict( + type='conv', + kernel_size=1, + norm_cfg=norm_cfg, + inplace=False, + order=('act', 'conv', 'norm')), + across_down_trans=dict( + type='interpolation_conv', + mode='nearest', + kernel_size=3, + norm_cfg=norm_cfg, + order=('act', 'conv', 'norm'), + inplace=False), + across_up_trans=None, + across_skip_trans=dict( + type='conv', + kernel_size=1, + norm_cfg=norm_cfg, + inplace=False, + order=('act', 'conv', 'norm')), + output_trans=dict( + type='last_conv', + kernel_size=3, + order=('act', 'conv', 'norm'), + inplace=False), + norm_cfg=norm_cfg, + skip_inds=[(0, 1, 2, 3), (0, 1, 2), (0, 1), (0, ), ()])) diff --git a/configs/fpg/mask_rcnn_r50_fpn_crop640_50e_coco.py b/configs/fpg/mask_rcnn_r50_fpn_crop640_50e_coco.py new file mode 100644 index 0000000..8dfdbb4 --- /dev/null +++ b/configs/fpg/mask_rcnn_r50_fpn_crop640_50e_coco.py @@ -0,0 +1,74 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + backbone=dict(norm_cfg=norm_cfg, norm_eval=False), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + norm_cfg=norm_cfg, + num_outs=5), + roi_head=dict( + bbox_head=dict(norm_cfg=norm_cfg), mask_head=dict(norm_cfg=norm_cfg))) +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', + img_scale=(640, 640), + ratio_range=(0.8, 1.2), + keep_ratio=True), + dict(type='RandomCrop', crop_size=(640, 640)), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=(640, 640)), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(640, 640), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=64), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=8, + workers_per_gpu=4, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# learning policy +optimizer = dict( + type='SGD', + lr=0.08, + momentum=0.9, + weight_decay=0.0001, + paramwise_cfg=dict(norm_decay_mult=0, bypass_duplicate=True)) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=0.1, + step=[30, 40]) +# runtime settings +runner = dict(max_epochs=50) +evaluation = dict(interval=2) diff --git a/configs/fpg/retinanet_r50_fpg-chn128_crop640_50e_coco.py b/configs/fpg/retinanet_r50_fpg-chn128_crop640_50e_coco.py new file mode 100644 index 0000000..9a6cf7e --- /dev/null +++ b/configs/fpg/retinanet_r50_fpg-chn128_crop640_50e_coco.py @@ -0,0 +1,5 @@ +_base_ = 'retinanet_r50_fpg_crop640_50e_coco.py' + +model = dict( + neck=dict(out_channels=128, inter_channels=128), + bbox_head=dict(in_channels=128)) diff --git a/configs/fpg/retinanet_r50_fpg_crop640_50e_coco.py b/configs/fpg/retinanet_r50_fpg_crop640_50e_coco.py new file mode 100644 index 0000000..504ed5e --- /dev/null +++ b/configs/fpg/retinanet_r50_fpg_crop640_50e_coco.py @@ -0,0 +1,53 @@ +_base_ = '../nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py' + +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + neck=dict( + _delete_=True, + type='FPG', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + inter_channels=256, + num_outs=5, + add_extra_convs=True, + start_level=1, + stack_times=9, + paths=['bu'] * 9, + same_down_trans=None, + same_up_trans=dict( + type='conv', + kernel_size=3, + stride=2, + padding=1, + norm_cfg=norm_cfg, + inplace=False, + order=('act', 'conv', 'norm')), + across_lateral_trans=dict( + type='conv', + kernel_size=1, + norm_cfg=norm_cfg, + inplace=False, + order=('act', 'conv', 'norm')), + across_down_trans=dict( + type='interpolation_conv', + mode='nearest', + kernel_size=3, + norm_cfg=norm_cfg, + order=('act', 'conv', 'norm'), + inplace=False), + across_up_trans=None, + across_skip_trans=dict( + type='conv', + kernel_size=1, + norm_cfg=norm_cfg, + inplace=False, + order=('act', 'conv', 'norm')), + output_trans=dict( + type='last_conv', + kernel_size=3, + order=('act', 'conv', 'norm'), + inplace=False), + norm_cfg=norm_cfg, + skip_inds=[(0, 1, 2, 3), (0, 1, 2), (0, 1), (0, ), ()])) + +evaluation = dict(interval=2) diff --git a/configs/free_anchor/README.md b/configs/free_anchor/README.md new file mode 100644 index 0000000..6d6474c --- /dev/null +++ b/configs/free_anchor/README.md @@ -0,0 +1,27 @@ +# FreeAnchor: Learning to Match Anchors for Visual Object Detection + +## Introduction + +[ALGORITHM] + +```latex +@inproceedings{zhang2019freeanchor, + title = {{FreeAnchor}: Learning to Match Anchors for Visual Object Detection}, + author = {Zhang, Xiaosong and Wan, Fang and Liu, Chang and Ji, Rongrong and Ye, Qixiang}, + booktitle = {Neural Information Processing Systems}, + year = {2019} +} +``` + +## Results and Models + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:--------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| R-50 | pytorch | 1x | 4.9 | 18.4 | 38.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco/retinanet_free_anchor_r50_fpn_1x_coco_20200130-0f67375f.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco/retinanet_free_anchor_r50_fpn_1x_coco_20200130_095625.log.json) | +| R-101 | pytorch | 1x | 6.8 | 14.9 | 40.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco/retinanet_free_anchor_r101_fpn_1x_coco_20200130-358324e6.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco/retinanet_free_anchor_r101_fpn_1x_coco_20200130_100723.log.json) | +| X-101-32x4d | pytorch | 1x | 8.1 | 11.1 | 41.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/free_anchor/retinanet_free_anchor_x101_32x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_x101_32x4d_fpn_1x_coco/retinanet_free_anchor_x101_32x4d_fpn_1x_coco_20200130-d4846968.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_x101_32x4d_fpn_1x_coco/retinanet_free_anchor_x101_32x4d_fpn_1x_coco_20200130_095627.log.json) | + +**Notes:** + +- We use 8 GPUs with 2 images/GPU. +- For more settings and models, please refer to the [official repo](https://github.com/zhangxiaosong18/FreeAnchor). diff --git a/configs/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco.py b/configs/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco.py new file mode 100644 index 0000000..9917d5c --- /dev/null +++ b/configs/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco.py @@ -0,0 +1,2 @@ +_base_ = './retinanet_free_anchor_r50_fpn_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py b/configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py new file mode 100644 index 0000000..28f983c --- /dev/null +++ b/configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py @@ -0,0 +1,22 @@ +_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py' +model = dict( + bbox_head=dict( + _delete_=True, + type='FreeAnchorRetinaHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=4, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[8, 16, 32, 64, 128]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.1, 0.1, 0.2, 0.2]), + loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.75))) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/free_anchor/retinanet_free_anchor_x101_32x4d_fpn_1x_coco.py b/configs/free_anchor/retinanet_free_anchor_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..e2640c0 --- /dev/null +++ b/configs/free_anchor/retinanet_free_anchor_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,12 @@ +_base_ = './retinanet_free_anchor_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch')) diff --git a/configs/fsaf/README.md b/configs/fsaf/README.md new file mode 100644 index 0000000..42468c8 --- /dev/null +++ b/configs/fsaf/README.md @@ -0,0 +1,45 @@ +# Feature Selective Anchor-Free Module for Single-Shot Object Detection + +[ALGORITHM] + +FSAF is an anchor-free method published in CVPR2019 ([https://arxiv.org/pdf/1903.00621.pdf](https://arxiv.org/pdf/1903.00621.pdf)). +Actually it is equivalent to the anchor-based method with only one anchor at each feature map position in each FPN level. +And this is how we implemented it. +Only the anchor-free branch is released for its better compatibility with the current framework and less computational budget. + +In the original paper, feature maps within the central 0.2-0.5 area of a gt box are tagged as ignored. However, +it is empirically found that a hard threshold (0.2-0.2) gives a further gain on the performance. (see the table below) + +## Main Results + +### Results on R50/R101/X101-FPN + +| Backbone | ignore range | ms-train| Lr schd |Train Mem (GB)| Train time (s/iter) | Inf time (fps) | box AP | Config | Download | +|:----------:| :-------: |:-------:|:-------:|:------------:|:---------------:|:--------------:|:-------------:|:------:|:--------:| +| R-50 | 0.2-0.5 | N | 1x | 3.15 | 0.43 | 12.3 | 36.0 (35.9) | | [model](http://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_pscale0.2_nscale0.5_r50_fpn_1x_coco/fsaf_pscale0.2_nscale0.5_r50_fpn_1x_coco_20200715-b555b0e0.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_pscale0.2_nscale0.5_r50_fpn_1x_coco/fsaf_pscale0.2_nscale0.5_r50_fpn_1x_coco_20200715_094657.log.json) | +| R-50 | 0.2-0.2 | N | 1x | 3.15 | 0.43 | 13.0 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf/fsaf_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r50_fpn_1x_coco/fsaf_r50_fpn_1x_coco-94ccc51f.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r50_fpn_1x_coco/fsaf_r50_fpn_1x_coco_20200428_072327.log.json)| +| R-101 | 0.2-0.2 | N | 1x | 5.08 | 0.58 | 10.8 | 39.3 (37.9) | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf/fsaf_r101_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r101_fpn_1x_coco/fsaf_r101_fpn_1x_coco-9e71098f.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r101_fpn_1x_coco/fsaf_r101_fpn_1x_coco_20200428_160348.log.json)| +| X-101 | 0.2-0.2 | N | 1x | 9.38 | 1.23 | 5.6 | 42.4 (41.0) | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf/fsaf_x101_64x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_x101_64x4d_fpn_1x_coco/fsaf_x101_64x4d_fpn_1x_coco-e3f6e6fd.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_x101_64x4d_fpn_1x_coco/fsaf_x101_64x4d_fpn_1x_coco_20200428_160424.log.json)| + +**Notes:** + +- *1x means the model is trained for 12 epochs.* +- *AP values in the brackets represent those reported in the original paper.* +- *All results are obtained with a single model and single-scale test.* +- *X-101 backbone represents ResNext-101-64x4d.* +- *All pretrained backbones use pytorch style.* +- *All models are trained on 8 Titan-XP gpus and tested on a single gpu.* + +## Citations + +BibTeX reference is as follows. + +```latex +@inproceedings{zhu2019feature, + title={Feature Selective Anchor-Free Module for Single-Shot Object Detection}, + author={Zhu, Chenchen and He, Yihui and Savvides, Marios}, + booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, + pages={840--849}, + year={2019} +} +``` diff --git a/configs/fsaf/fsaf_r101_fpn_1x_coco.py b/configs/fsaf/fsaf_r101_fpn_1x_coco.py new file mode 100644 index 0000000..95a7ae2 --- /dev/null +++ b/configs/fsaf/fsaf_r101_fpn_1x_coco.py @@ -0,0 +1,2 @@ +_base_ = './fsaf_r50_fpn_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/fsaf/fsaf_r50_fpn_1x_coco.py b/configs/fsaf/fsaf_r50_fpn_1x_coco.py new file mode 100644 index 0000000..67f3ec1 --- /dev/null +++ b/configs/fsaf/fsaf_r50_fpn_1x_coco.py @@ -0,0 +1,48 @@ +_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py' +# model settings +model = dict( + type='FSAF', + bbox_head=dict( + type='FSAFHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + reg_decoded_bbox=True, + # Only anchor-free branch is implemented. The anchor generator only + # generates 1 anchor at each feature point, as a substitute of the + # grid of features. + anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=1, + scales_per_octave=1, + ratios=[1.0], + strides=[8, 16, 32, 64, 128]), + bbox_coder=dict(_delete_=True, type='TBLRBBoxCoder', normalizer=4.0), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0, + reduction='none'), + loss_bbox=dict( + _delete_=True, + type='IoULoss', + eps=1e-6, + loss_weight=1.0, + reduction='none')), + # training and testing settings + train_cfg=dict( + assigner=dict( + _delete_=True, + type='CenterRegionAssigner', + pos_scale=0.2, + neg_scale=0.2, + min_pos_iof=0.01), + allowed_border=-1, + pos_weight=-1, + debug=False)) +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=10, norm_type=2)) diff --git a/configs/fsaf/fsaf_x101_64x4d_fpn_1x_coco.py b/configs/fsaf/fsaf_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000..b966f24 --- /dev/null +++ b/configs/fsaf/fsaf_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './fsaf_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/gcnet/README.md b/configs/gcnet/README.md new file mode 100644 index 0000000..0ef8db7 --- /dev/null +++ b/configs/gcnet/README.md @@ -0,0 +1,59 @@ +# GCNet for Object Detection + +By [Yue Cao](http://yue-cao.me), [Jiarui Xu](http://jerryxu.net), [Stephen Lin](https://scholar.google.com/citations?user=c3PYmxUAAAAJ&hl=en), Fangyun Wei, [Han Hu](https://sites.google.com/site/hanhushomepage/). + +We provide config files to reproduce the results in the paper for +["GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond"](https://arxiv.org/abs/1904.11492) on COCO object detection. + +## Introduction + +[ALGORITHM] + +**GCNet** is initially described in [arxiv](https://arxiv.org/abs/1904.11492). Via absorbing advantages of Non-Local Networks (NLNet) and Squeeze-Excitation Networks (SENet), GCNet provides a simple, fast and effective approach for global context modeling, which generally outperforms both NLNet and SENet on major benchmarks for various recognition tasks. + +## Citing GCNet + +```latex +@article{cao2019GCNet, + title={GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond}, + author={Cao, Yue and Xu, Jiarui and Lin, Stephen and Wei, Fangyun and Hu, Han}, + journal={arXiv preprint arXiv:1904.11492}, + year={2019} +} +``` + +## Results and models + +The results on COCO 2017val are shown in the below table. + +| Backbone | Model | Context | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------: | :--------------: | :------------: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | +| R-50-FPN | Mask | GC(c3-c5, r16) | 1x | 5.0 | | 39.7 | 35.9 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco_20200515_211915-187da160.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco_20200515_211915.log.json) | +| R-50-FPN | Mask | GC(c3-c5, r4) | 1x | 5.1 | 15.0 | 39.9 | 36.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco_20200204-17235656.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco_20200204_024626.log.json) | +| R-101-FPN | Mask | GC(c3-c5, r16) | 1x | 7.6 | 11.4 | 41.3 | 37.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco_20200205-e58ae947.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco_20200205_192835.log.json) | +| R-101-FPN | Mask | GC(c3-c5, r4) | 1x | 7.8 | 11.6 | 42.2 | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco_20200206-af22dc9d.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco_20200206_112128.log.json) | + +| Backbone | Model | Context | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------: | :--------------: | :------------: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :-------: | +| R-50-FPN | Mask | - | 1x | 4.4 | 16.6 | 38.4 | 34.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco_20200202-bb3eb55c.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco_20200202_214122.log.json) | +| R-50-FPN | Mask | GC(c3-c5, r16) | 1x | 5.0 | 15.5 | 40.4 | 36.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200202-587b99aa.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200202_174907.log.json) | +| R-50-FPN | Mask | GC(c3-c5, r4) | 1x | 5.1 | 15.1 | 40.7 | 36.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200202-50b90e5c.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200202_085547.log.json) | +| R-101-FPN | Mask | - | 1x | 6.4 | 13.3 | 40.5 | 36.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco_20200210-81658c8a.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco_20200210_220422.log.json) | +| R-101-FPN | Mask | GC(c3-c5, r16) | 1x | 7.6 | 12.0 | 42.2 | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200207-945e77ca.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200207_015330.log.json) | +| R-101-FPN | Mask | GC(c3-c5, r4) | 1x | 7.8 | 11.8 | 42.2 | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200206-8407a3f0.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200206_142508.log.json) | +| X-101-FPN | Mask | - | 1x | 7.6 | 11.3 | 42.4 | 37.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco_20200211-7584841c.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco_20200211_054326.log.json) | +| X-101-FPN | Mask | GC(c3-c5, r16) | 1x | 8.8 | 9.8 | 43.5 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200211-cbed3d2c.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200211_164715.log.json) | +| X-101-FPN | Mask | GC(c3-c5, r4) | 1x | 9.0 | 9.7 | 43.9 | 39.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200212-68164964.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200212_070942.log.json) | +| X-101-FPN | Cascade Mask | - | 1x | 9.2 | 8.4 | 44.7 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco_20200310-d5ad2a5e.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco_20200310_115217.log.json) | +| X-101-FPN | Cascade Mask | GC(c3-c5, r16) | 1x | 10.3 | 7.7 | 46.2 | 39.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200211-10bf2463.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200211_184154.log.json) | +| X-101-FPN | Cascade Mask | GC(c3-c5, r4) | 1x | 10.6 | | 46.4 | 40.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200703_180653-ed035291.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200703_180653.log.json) | +| X-101-FPN | DCN Cascade Mask | - | 1x | | | 44.9 | 38.9 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco_20200516_182249-680fc3f2.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco_20200516_182249.log.json)| +| X-101-FPN | DCN Cascade Mask | GC(c3-c5, r16) | 1x | | | 44.6 | |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco_20200516_015634-08f56b56.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco_20200516_015634.log.json) | +| X-101-FPN | DCN Cascade Mask | GC(c3-c5, r4) | 1x | | | 45.7 | 39.5 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco_20200518_041145-24cabcfd.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco_20200518_041145.log.json) | + +**Notes:** + +- The `SyncBN` is added in the backbone for all models in **Table 2**. +- `GC` denotes Global Context (GC) block is inserted after 1x1 conv of backbone. +- `DCN` denotes replace 3x3 conv with 3x3 Deformable Convolution in `c3-c5` stages of backbone. +- `r4` and `r16` denote ratio 4 and ratio 16 in GC block respectively. diff --git a/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py b/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py new file mode 100644 index 0000000..5118895 --- /dev/null +++ b/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = '../cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), norm_eval=False)) diff --git a/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco.py b/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..464aef7 --- /dev/null +++ b/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = '../dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), norm_eval=False)) diff --git a/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco.py b/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..fa4b6f1 --- /dev/null +++ b/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = '../dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco.py b/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..b76e3e6 --- /dev/null +++ b/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = '../dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 4), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py b/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..50883ff --- /dev/null +++ b/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = '../cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py b/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..31fdd07 --- /dev/null +++ b/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = '../cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 4), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco.py b/configs/gcnet/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..ad6ad47 --- /dev/null +++ b/configs/gcnet/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco.py @@ -0,0 +1,8 @@ +_base_ = '../mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py' +model = dict( + backbone=dict(plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco.py b/configs/gcnet/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..29f9167 --- /dev/null +++ b/configs/gcnet/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco.py @@ -0,0 +1,8 @@ +_base_ = '../mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py' +model = dict( + backbone=dict(plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 4), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco.py b/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco.py new file mode 100644 index 0000000..6e1c5d0 --- /dev/null +++ b/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = '../mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), norm_eval=False)) diff --git a/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py b/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..781dba7 --- /dev/null +++ b/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = '../mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py b/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..32972de --- /dev/null +++ b/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = '../mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 4), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco.py b/configs/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..d299b69 --- /dev/null +++ b/configs/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco.py @@ -0,0 +1,8 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict(plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py b/configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..5ac908e --- /dev/null +++ b/configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py @@ -0,0 +1,8 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict(plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 4), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco.py b/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco.py new file mode 100644 index 0000000..0308a56 --- /dev/null +++ b/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), norm_eval=False)) diff --git a/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py b/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..e04780c --- /dev/null +++ b/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py b/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..980f819 --- /dev/null +++ b/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 4), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py b/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py new file mode 100644 index 0000000..f0c96e5 --- /dev/null +++ b/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = '../mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), norm_eval=False)) diff --git a/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py b/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..7fb8e82 --- /dev/null +++ b/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = '../mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py b/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..b1ddbee --- /dev/null +++ b/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = '../mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 4), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gfl/README.md b/configs/gfl/README.md new file mode 100644 index 0000000..53ae22b --- /dev/null +++ b/configs/gfl/README.md @@ -0,0 +1,32 @@ +# Generalized Focal Loss: Learning Qualified and Distributed Bounding Boxes for Dense Object Detection + +## Introduction + +[ALGORITHM] + +We provide config files to reproduce the object detection results in the paper [Generalized Focal Loss: Learning Qualified and Distributed Bounding Boxes for Dense Object Detection](https://arxiv.org/abs/2006.04388) + +```latex +@article{li2020generalized, + title={Generalized Focal Loss: Learning Qualified and Distributed Bounding Boxes for Dense Object Detection}, + author={Li, Xiang and Wang, Wenhai and Wu, Lijun and Chen, Shuo and Hu, Xiaolin and Li, Jun and Tang, Jinhui and Yang, Jian}, + journal={arXiv preprint arXiv:2006.04388}, + year={2020} +} +``` + +## Results and Models + +| Backbone | Style | Lr schd | Multi-scale Training| Inf time (fps) | box AP | Config | Download | +|:-----------------:|:-------:|:-------:|:-------------------:|:--------------:|:------:|:------:|:--------:| +| R-50 | pytorch | 1x | No | 19.5 | 40.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r50_fpn_1x_coco/gfl_r50_fpn_1x_coco_20200629_121244-25944287.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r50_fpn_1x_coco/gfl_r50_fpn_1x_coco_20200629_121244.log.json) | +| R-50 | pytorch | 2x | Yes | 19.5 | 42.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_r50_fpn_mstrain_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r50_fpn_mstrain_2x_coco/gfl_r50_fpn_mstrain_2x_coco_20200629_213802-37bb1edc.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r50_fpn_mstrain_2x_coco/gfl_r50_fpn_mstrain_2x_coco_20200629_213802.log.json) | +| R-101 | pytorch | 2x | Yes | 14.7 | 44.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_r101_fpn_mstrain_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_mstrain_2x_coco/gfl_r101_fpn_mstrain_2x_coco_20200629_200126-dd12f847.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_mstrain_2x_coco/gfl_r101_fpn_mstrain_2x_coco_20200629_200126.log.json) | +| R-101-dcnv2 | pytorch | 2x | Yes | 12.9 | 47.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco_20200630_102002-134b07df.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco_20200630_102002.log.json) | +| X-101-32x4d | pytorch | 2x | Yes | 12.1 | 45.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco/gfl_x101_32x4d_fpn_mstrain_2x_coco_20200630_102002-50c1ffdb.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco/gfl_x101_32x4d_fpn_mstrain_2x_coco_20200630_102002.log.json) | +| X-101-32x4d-dcnv2 | pytorch | 2x | Yes | 10.7 | 48.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco_20200630_102002-14a2bf25.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco_20200630_102002.log.json) | + +[1] *1x and 2x mean the model is trained for 90K and 180K iterations, respectively.* \ +[2] *All results are obtained with a single model and without any test time data augmentation such as multi-scale, flipping and etc..* \ +[3] *`dcnv2` denotes deformable convolutional networks v2.* \ +[4] *FPS is tested with a single GeForce RTX 2080Ti GPU, using a batch size of 1.* diff --git a/configs/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py b/configs/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py new file mode 100644 index 0000000..eab622b --- /dev/null +++ b/configs/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py @@ -0,0 +1,14 @@ +_base_ = './gfl_r50_fpn_mstrain_2x_coco.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict( + type='ResNet', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True), + norm_eval=True, + style='pytorch')) diff --git a/configs/gfl/gfl_r101_fpn_mstrain_2x_coco.py b/configs/gfl/gfl_r101_fpn_mstrain_2x_coco.py new file mode 100644 index 0000000..c972d0c --- /dev/null +++ b/configs/gfl/gfl_r101_fpn_mstrain_2x_coco.py @@ -0,0 +1,12 @@ +_base_ = './gfl_r50_fpn_mstrain_2x_coco.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict( + type='ResNet', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch')) diff --git a/configs/gfl/gfl_r50_fpn_1x_coco.py b/configs/gfl/gfl_r50_fpn_1x_coco.py new file mode 100644 index 0000000..29fb077 --- /dev/null +++ b/configs/gfl/gfl_r50_fpn_1x_coco.py @@ -0,0 +1,57 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + type='GFL', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_output', + num_outs=5), + bbox_head=dict( + type='GFLHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + octave_base_scale=8, + scales_per_octave=1, + strides=[8, 16, 32, 64, 128]), + loss_cls=dict( + type='QualityFocalLoss', + use_sigmoid=True, + beta=2.0, + loss_weight=1.0), + loss_dfl=dict(type='DistributionFocalLoss', loss_weight=0.25), + reg_max=16, + loss_bbox=dict(type='GIoULoss', loss_weight=2.0)), + # training and testing settings + train_cfg=dict( + assigner=dict(type='ATSSAssigner', topk=9), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/gfl/gfl_r50_fpn_mstrain_2x_coco.py b/configs/gfl/gfl_r50_fpn_mstrain_2x_coco.py new file mode 100644 index 0000000..b8be601 --- /dev/null +++ b/configs/gfl/gfl_r50_fpn_mstrain_2x_coco.py @@ -0,0 +1,22 @@ +_base_ = './gfl_r50_fpn_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) +# multi-scale training +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 480), (1333, 800)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +data = dict(train=dict(pipeline=train_pipeline)) diff --git a/configs/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco.py b/configs/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco.py new file mode 100644 index 0000000..a2370e2 --- /dev/null +++ b/configs/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco.py @@ -0,0 +1,17 @@ +_base_ = './gfl_r50_fpn_mstrain_2x_coco.py' +model = dict( + type='GFL', + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, False, True, True), + norm_eval=True, + style='pytorch')) diff --git a/configs/gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco.py b/configs/gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco.py new file mode 100644 index 0000000..4e00a05 --- /dev/null +++ b/configs/gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco.py @@ -0,0 +1,15 @@ +_base_ = './gfl_r50_fpn_mstrain_2x_coco.py' +model = dict( + type='GFL', + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch')) diff --git a/configs/ghm/README.md b/configs/ghm/README.md new file mode 100644 index 0000000..6a70bcd --- /dev/null +++ b/configs/ghm/README.md @@ -0,0 +1,23 @@ +# Gradient Harmonized Single-stage Detector + +## Introduction + +[ALGORITHM] + +``` +@inproceedings{li2019gradient, + title={Gradient Harmonized Single-stage Detector}, + author={Li, Buyu and Liu, Yu and Wang, Xiaogang}, + booktitle={AAAI Conference on Artificial Intelligence}, + year={2019} +} +``` + +## Results and Models + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | +| R-50-FPN | pytorch | 1x | 4.0 | 3.3 | 37.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ghm/retinanet_ghm_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_r50_fpn_1x_coco/retinanet_ghm_r50_fpn_1x_coco_20200130-a437fda3.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_r50_fpn_1x_coco/retinanet_ghm_r50_fpn_1x_coco_20200130_004213.log.json) | +| R-101-FPN | pytorch | 1x | 6.0 | 4.4 | 39.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ghm/retinanet_ghm_r101_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_r101_fpn_1x_coco/retinanet_ghm_r101_fpn_1x_coco_20200130-c148ee8f.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_r101_fpn_1x_coco/retinanet_ghm_r101_fpn_1x_coco_20200130_145259.log.json) | +| X-101-32x4d-FPN | pytorch | 1x | 7.2 | 5.1 | 40.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ghm/retinanet_ghm_x101_32x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_x101_32x4d_fpn_1x_coco/retinanet_ghm_x101_32x4d_fpn_1x_coco_20200131-e4333bd0.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_x101_32x4d_fpn_1x_coco/retinanet_ghm_x101_32x4d_fpn_1x_coco_20200131_113653.log.json) | +| X-101-64x4d-FPN | pytorch | 1x | 10.3 | 5.2 | 41.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ghm/retinanet_ghm_x101_64x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_x101_64x4d_fpn_1x_coco/retinanet_ghm_x101_64x4d_fpn_1x_coco_20200131-dd381cef.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_x101_64x4d_fpn_1x_coco/retinanet_ghm_x101_64x4d_fpn_1x_coco_20200131_113723.log.json) | diff --git a/configs/ghm/retinanet_ghm_r101_fpn_1x_coco.py b/configs/ghm/retinanet_ghm_r101_fpn_1x_coco.py new file mode 100644 index 0000000..18f899a --- /dev/null +++ b/configs/ghm/retinanet_ghm_r101_fpn_1x_coco.py @@ -0,0 +1,2 @@ +_base_ = './retinanet_ghm_r50_fpn_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/ghm/retinanet_ghm_r50_fpn_1x_coco.py b/configs/ghm/retinanet_ghm_r50_fpn_1x_coco.py new file mode 100644 index 0000000..61b9751 --- /dev/null +++ b/configs/ghm/retinanet_ghm_r50_fpn_1x_coco.py @@ -0,0 +1,19 @@ +_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py' +model = dict( + bbox_head=dict( + loss_cls=dict( + _delete_=True, + type='GHMC', + bins=30, + momentum=0.75, + use_sigmoid=True, + loss_weight=1.0), + loss_bbox=dict( + _delete_=True, + type='GHMR', + mu=0.02, + bins=10, + momentum=0.7, + loss_weight=10.0))) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/ghm/retinanet_ghm_x101_32x4d_fpn_1x_coco.py b/configs/ghm/retinanet_ghm_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..a89fc13 --- /dev/null +++ b/configs/ghm/retinanet_ghm_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './retinanet_ghm_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/ghm/retinanet_ghm_x101_64x4d_fpn_1x_coco.py b/configs/ghm/retinanet_ghm_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000..88013f5 --- /dev/null +++ b/configs/ghm/retinanet_ghm_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './retinanet_ghm_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/gn+ws/README.md b/configs/gn+ws/README.md new file mode 100644 index 0000000..988fb13 --- /dev/null +++ b/configs/gn+ws/README.md @@ -0,0 +1,44 @@ +# Weight Standardization + +## Introduction + +[ALGORITHM] + +``` +@article{weightstandardization, + author = {Siyuan Qiao and Huiyu Wang and Chenxi Liu and Wei Shen and Alan Yuille}, + title = {Weight Standardization}, + journal = {arXiv preprint arXiv:1903.10520}, + year = {2019}, +} +``` + +## Results and Models + +Faster R-CNN + +| Backbone | Style | Normalization | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +|:---------:|:-------:|:-------------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| +| R-50-FPN | pytorch | GN+WS | 1x | 5.9 | 11.7 | 39.7 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco/faster_rcnn_r50_fpn_gn_ws-all_1x_coco_20200130-613d9fe2.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco/faster_rcnn_r50_fpn_gn_ws-all_1x_coco_20200130_210936.log.json) | +| R-101-FPN | pytorch | GN+WS | 1x | 8.9 | 9.0 | 41.7 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/faster_rcnn_r101_fpn_gn_ws-all_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_r101_fpn_gn_ws-all_1x_coco/faster_rcnn_r101_fpn_gn_ws-all_1x_coco_20200205-a93b0d75.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_r101_fpn_gn_ws-all_1x_coco/faster_rcnn_r101_fpn_gn_ws-all_1x_coco_20200205_232146.log.json) | +| X-50-32x4d-FPN | pytorch | GN+WS | 1x | 7.0 | 10.3 | 40.7 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco_20200203-839c5d9d.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco_20200203_220113.log.json) | +| X-101-32x4d-FPN | pytorch | GN+WS | 1x | 10.8 | 7.6 | 42.1 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco_20200212-27da1bc2.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco_20200212_195302.log.json) | + +Mask R-CNN + +| Backbone | Style | Normalization | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +|:---------:|:-------:|:-------------:|:---------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| +| R-50-FPN | pytorch | GN+WS | 2x | 7.3 | 10.5 | 40.6 | 36.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco/mask_rcnn_r50_fpn_gn_ws-all_2x_coco_20200226-16acb762.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco/mask_rcnn_r50_fpn_gn_ws-all_2x_coco_20200226_062128.log.json) | +| R-101-FPN | pytorch | GN+WS | 2x | 10.3 | 8.6 | 42.0 | 37.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_2x_coco/mask_rcnn_r101_fpn_gn_ws-all_2x_coco_20200212-ea357cd9.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_2x_coco/mask_rcnn_r101_fpn_gn_ws-all_2x_coco_20200212_213627.log.json) | +| X-50-32x4d-FPN | pytorch | GN+WS | 2x | 8.4 | 9.3 | 41.1 | 37.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco_20200216-649fdb6f.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco_20200216_201500.log.json) | +| X-101-32x4d-FPN | pytorch | GN+WS | 2x | 12.2 | 7.1 | 42.1 | 37.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco_20200319-33fb95b5.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco_20200319_104101.log.json) | +| R-50-FPN | pytorch | GN+WS | 20-23-24e | 7.3 | - | 41.1 | 37.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco_20200213-487d1283.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco_20200213_035123.log.json) | +| R-101-FPN | pytorch | GN+WS | 20-23-24e | 10.3 | - | 43.1 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco_20200213-57b5a50f.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco_20200213_130142.log.json) | +| X-50-32x4d-FPN | pytorch | GN+WS | 20-23-24e | 8.4 | - | 42.1 | 38.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco_20200226-969bcb2c.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco_20200226_093732.log.json) | +| X-101-32x4d-FPN | pytorch | GN+WS | 20-23-24e | 12.2 | - | 42.7 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco_20200316-e6cd35ef.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco_20200316_013741.log.json) | + +Note: + +- GN+WS requires about 5% more memory than GN, and it is only 5% slower than GN. +- In the paper, a 20-23-24e lr schedule is used instead of 2x. +- The X-50-GN and X-101-GN pretrained models are also shared by the authors. diff --git a/configs/gn+ws/faster_rcnn_r101_fpn_gn_ws-all_1x_coco.py b/configs/gn+ws/faster_rcnn_r101_fpn_gn_ws-all_1x_coco.py new file mode 100644 index 0000000..a5f6bd2 --- /dev/null +++ b/configs/gn+ws/faster_rcnn_r101_fpn_gn_ws-all_1x_coco.py @@ -0,0 +1,3 @@ +_base_ = './faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py' +model = dict( + pretrained='open-mmlab://jhu/resnet101_gn_ws', backbone=dict(depth=101)) diff --git a/configs/gn+ws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py b/configs/gn+ws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py new file mode 100644 index 0000000..497267b --- /dev/null +++ b/configs/gn+ws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +conv_cfg = dict(type='ConvWS') +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + pretrained='open-mmlab://jhu/resnet50_gn_ws', + backbone=dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg), + neck=dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg), + roi_head=dict( + bbox_head=dict( + type='Shared4Conv1FCBBoxHead', + conv_out_channels=256, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg))) diff --git a/configs/gn+ws/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco.py b/configs/gn+ws/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco.py new file mode 100644 index 0000000..061ca69 --- /dev/null +++ b/configs/gn+ws/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco.py @@ -0,0 +1,16 @@ +_base_ = './faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py' +conv_cfg = dict(type='ConvWS') +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + pretrained='open-mmlab://jhu/resnext101_32x4d_gn_ws', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch', + conv_cfg=conv_cfg, + norm_cfg=norm_cfg)) diff --git a/configs/gn+ws/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco.py b/configs/gn+ws/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco.py new file mode 100644 index 0000000..1268980 --- /dev/null +++ b/configs/gn+ws/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco.py @@ -0,0 +1,16 @@ +_base_ = './faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py' +conv_cfg = dict(type='ConvWS') +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + pretrained='open-mmlab://jhu/resnext50_32x4d_gn_ws', + backbone=dict( + type='ResNeXt', + depth=50, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch', + conv_cfg=conv_cfg, + norm_cfg=norm_cfg)) diff --git a/configs/gn+ws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco.py b/configs/gn+ws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco.py new file mode 100644 index 0000000..a790d93 --- /dev/null +++ b/configs/gn+ws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco.py @@ -0,0 +1,4 @@ +_base_ = './mask_rcnn_r101_fpn_gn_ws-all_2x_coco.py' +# learning policy +lr_config = dict(step=[20, 23]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/gn+ws/mask_rcnn_r101_fpn_gn_ws-all_2x_coco.py b/configs/gn+ws/mask_rcnn_r101_fpn_gn_ws-all_2x_coco.py new file mode 100644 index 0000000..4be6817 --- /dev/null +++ b/configs/gn+ws/mask_rcnn_r101_fpn_gn_ws-all_2x_coco.py @@ -0,0 +1,3 @@ +_base_ = './mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py' +model = dict( + pretrained='open-mmlab://jhu/resnet101_gn_ws', backbone=dict(depth=101)) diff --git a/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py new file mode 100644 index 0000000..5516808 --- /dev/null +++ b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py @@ -0,0 +1,4 @@ +_base_ = './mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py' +# learning policy +lr_config = dict(step=[20, 23]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py new file mode 100644 index 0000000..b83e7b5 --- /dev/null +++ b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py @@ -0,0 +1,17 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +conv_cfg = dict(type='ConvWS') +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + pretrained='open-mmlab://jhu/resnet50_gn_ws', + backbone=dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg), + neck=dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg), + roi_head=dict( + bbox_head=dict( + type='Shared4Conv1FCBBoxHead', + conv_out_channels=256, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg), + mask_head=dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg))) +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco.py b/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco.py new file mode 100644 index 0000000..cfa14c9 --- /dev/null +++ b/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco.py @@ -0,0 +1,4 @@ +_base_ = './mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco.py' +# learning policy +lr_config = dict(step=[20, 23]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco.py b/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco.py new file mode 100644 index 0000000..dbe8877 --- /dev/null +++ b/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco.py @@ -0,0 +1,17 @@ +_base_ = './mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py' +# model settings +conv_cfg = dict(type='ConvWS') +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + pretrained='open-mmlab://jhu/resnext101_32x4d_gn_ws', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch', + conv_cfg=conv_cfg, + norm_cfg=norm_cfg)) diff --git a/configs/gn+ws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco.py b/configs/gn+ws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco.py new file mode 100644 index 0000000..79ce0ad --- /dev/null +++ b/configs/gn+ws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco.py @@ -0,0 +1,4 @@ +_base_ = './mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco.py' +# learning policy +lr_config = dict(step=[20, 23]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/gn+ws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco.py b/configs/gn+ws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco.py new file mode 100644 index 0000000..9bbc86e --- /dev/null +++ b/configs/gn+ws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco.py @@ -0,0 +1,17 @@ +_base_ = './mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py' +# model settings +conv_cfg = dict(type='ConvWS') +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + pretrained='open-mmlab://jhu/resnext50_32x4d_gn_ws', + backbone=dict( + type='ResNeXt', + depth=50, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch', + conv_cfg=conv_cfg, + norm_cfg=norm_cfg)) diff --git a/configs/gn/README.md b/configs/gn/README.md new file mode 100644 index 0000000..48ee7ad --- /dev/null +++ b/configs/gn/README.md @@ -0,0 +1,31 @@ +# Group Normalization + +## Introduction + +[ALGORITHM] + +```latex +@inproceedings{wu2018group, + title={Group Normalization}, + author={Wu, Yuxin and He, Kaiming}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2018} +} +``` + +## Results and Models + +| Backbone | model | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +|:-------------:|:----------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| +| R-50-FPN (d) | Mask R-CNN | 2x | 7.1 | 11.0 | 40.2 | 36.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_2x_coco/mask_rcnn_r50_fpn_gn-all_2x_coco_20200206-8eee02a6.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_2x_coco/mask_rcnn_r50_fpn_gn-all_2x_coco_20200206_050355.log.json) | +| R-50-FPN (d) | Mask R-CNN | 3x | 7.1 | - | 40.5 | 36.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn/mask_rcnn_r50_fpn_gn-all_3x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_3x_coco/mask_rcnn_r50_fpn_gn-all_3x_coco_20200214-8b23b1e5.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_3x_coco/mask_rcnn_r50_fpn_gn-all_3x_coco_20200214_063512.log.json) | +| R-101-FPN (d) | Mask R-CNN | 2x | 9.9 | 9.0 | 41.9 | 37.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn/mask_rcnn_r101_fpn_gn-all_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r101_fpn_gn-all_2x_coco/mask_rcnn_r101_fpn_gn-all_2x_coco_20200205-d96b1b50.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r101_fpn_gn-all_2x_coco/mask_rcnn_r101_fpn_gn-all_2x_coco_20200205_234402.log.json) | +| R-101-FPN (d) | Mask R-CNN | 3x | 9.9 | | 42.1 | 38.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn/mask_rcnn_r101_fpn_gn-all_3x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r101_fpn_gn-all_3x_coco/mask_rcnn_r101_fpn_gn-all_3x_coco_20200513_181609-0df864f4.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r101_fpn_gn-all_3x_coco/mask_rcnn_r101_fpn_gn-all_3x_coco_20200513_181609.log.json) | +| R-50-FPN (c) | Mask R-CNN | 2x | 7.1 | 10.9 | 40.0 | 36.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco_20200207-20d3e849.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco_20200207_225832.log.json) | +| R-50-FPN (c) | Mask R-CNN | 3x | 7.1 | - | 40.1 | 36.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco_20200225-542aefbc.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco_20200225_235135.log.json) | + +**Notes:** + +- (d) means pretrained model converted from Detectron, and (c) means the contributed model pretrained by [@thangvubk](https://github.com/thangvubk). +- The `3x` schedule is epoch [28, 34, 36]. +- **Memory, Train/Inf time is outdated.** diff --git a/configs/gn/mask_rcnn_r101_fpn_gn-all_2x_coco.py b/configs/gn/mask_rcnn_r101_fpn_gn-all_2x_coco.py new file mode 100644 index 0000000..0fcc558 --- /dev/null +++ b/configs/gn/mask_rcnn_r101_fpn_gn-all_2x_coco.py @@ -0,0 +1,3 @@ +_base_ = './mask_rcnn_r50_fpn_gn-all_2x_coco.py' +model = dict( + pretrained='open-mmlab://detectron/resnet101_gn', backbone=dict(depth=101)) diff --git a/configs/gn/mask_rcnn_r101_fpn_gn-all_3x_coco.py b/configs/gn/mask_rcnn_r101_fpn_gn-all_3x_coco.py new file mode 100644 index 0000000..12a9d17 --- /dev/null +++ b/configs/gn/mask_rcnn_r101_fpn_gn-all_3x_coco.py @@ -0,0 +1,5 @@ +_base_ = './mask_rcnn_r101_fpn_gn-all_2x_coco.py' + +# learning policy +lr_config = dict(step=[28, 34]) +runner = dict(type='EpochBasedRunner', max_epochs=36) diff --git a/configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py b/configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py new file mode 100644 index 0000000..9c85d26 --- /dev/null +++ b/configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py @@ -0,0 +1,46 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + pretrained='open-mmlab://detectron/resnet50_gn', + backbone=dict(norm_cfg=norm_cfg), + neck=dict(norm_cfg=norm_cfg), + roi_head=dict( + bbox_head=dict( + type='Shared4Conv1FCBBoxHead', + conv_out_channels=256, + norm_cfg=norm_cfg), + mask_head=dict(norm_cfg=norm_cfg))) +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/gn/mask_rcnn_r50_fpn_gn-all_3x_coco.py b/configs/gn/mask_rcnn_r50_fpn_gn-all_3x_coco.py new file mode 100644 index 0000000..f917719 --- /dev/null +++ b/configs/gn/mask_rcnn_r50_fpn_gn-all_3x_coco.py @@ -0,0 +1,5 @@ +_base_ = './mask_rcnn_r50_fpn_gn-all_2x_coco.py' + +# learning policy +lr_config = dict(step=[28, 34]) +runner = dict(type='EpochBasedRunner', max_epochs=36) diff --git a/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py b/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py new file mode 100644 index 0000000..89caaaf --- /dev/null +++ b/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py @@ -0,0 +1,15 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + pretrained='open-mmlab://contrib/resnet50_gn', + backbone=dict(norm_cfg=norm_cfg), + neck=dict(norm_cfg=norm_cfg), + roi_head=dict( + bbox_head=dict( + type='Shared4Conv1FCBBoxHead', + conv_out_channels=256, + norm_cfg=norm_cfg), + mask_head=dict(norm_cfg=norm_cfg))) +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco.py b/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco.py new file mode 100644 index 0000000..66834f0 --- /dev/null +++ b/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco.py @@ -0,0 +1,5 @@ +_base_ = './mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py' + +# learning policy +lr_config = dict(step=[28, 34]) +runner = dict(type='EpochBasedRunner', max_epochs=36) diff --git a/configs/grid_rcnn/README.md b/configs/grid_rcnn/README.md new file mode 100644 index 0000000..a1e8352 --- /dev/null +++ b/configs/grid_rcnn/README.md @@ -0,0 +1,35 @@ +# Grid R-CNN + +## Introduction + +[ALGORITHM] + +```latex +@inproceedings{lu2019grid, + title={Grid r-cnn}, + author={Lu, Xin and Li, Buyu and Yue, Yuxin and Li, Quanquan and Yan, Junjie}, + booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, + year={2019} +} + +@article{lu2019grid, + title={Grid R-CNN Plus: Faster and Better}, + author={Lu, Xin and Li, Buyu and Yue, Yuxin and Li, Quanquan and Yan, Junjie}, + journal={arXiv preprint arXiv:1906.05688}, + year={2019} +} +``` + +## Results and Models + +| Backbone | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:-----------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| R-50 | 2x | 5.1 | 15.0 | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco/grid_rcnn_r50_fpn_gn-head_2x_coco_20200130-6cca8223.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco/grid_rcnn_r50_fpn_gn-head_2x_coco_20200130_221140.log.json) | +| R-101 | 2x | 7.0 | 12.6 | 41.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco/grid_rcnn_r101_fpn_gn-head_2x_coco_20200309-d6eca030.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco/grid_rcnn_r101_fpn_gn-head_2x_coco_20200309_164224.log.json) | +| X-101-32x4d | 2x | 8.3 | 10.8 | 42.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco_20200130-d8f0e3ff.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco_20200130_215413.log.json) | +| X-101-64x4d | 2x | 11.3 | 7.7 | 43.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco_20200204-ec76a754.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco_20200204_080641.log.json) | + +**Notes:** + +- All models are trained with 8 GPUs instead of 32 GPUs in the original paper. +- The warming up lasts for 1 epoch and `2x` here indicates 25 epochs. diff --git a/configs/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco.py b/configs/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco.py new file mode 100644 index 0000000..cf8b648 --- /dev/null +++ b/configs/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco.py @@ -0,0 +1,3 @@ +_base_ = './grid_rcnn_r50_fpn_gn-head_2x_coco.py' + +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_1x_coco.py b/configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_1x_coco.py new file mode 100644 index 0000000..4aa00ec --- /dev/null +++ b/configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = ['grid_rcnn_r50_fpn_gn-head_2x_coco.py'] +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[8, 11]) +checkpoint_config = dict(interval=1) +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=12) diff --git a/configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py b/configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py new file mode 100644 index 0000000..6ed5bcb --- /dev/null +++ b/configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py @@ -0,0 +1,131 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py' +] +# model settings +model = dict( + type='GridRCNN', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), + roi_head=dict( + type='GridRoIHead', + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='Shared2FCBBoxHead', + with_reg=False, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False), + grid_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + grid_head=dict( + type='GridHead', + grid_points=9, + num_convs=8, + in_channels=256, + point_feat_channels=64, + norm_cfg=dict(type='GN', num_groups=36), + loss_grid=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=15))), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + pos_radius=1, + pos_weight=-1, + max_num_grid=192, + debug=False)), + test_cfg=dict( + rpn=dict( + nms_pre=1000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + score_thr=0.03, + nms=dict(type='nms', iou_threshold=0.3), + max_per_img=100))) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=3665, + warmup_ratio=1.0 / 80, + step=[17, 23]) +runner = dict(type='EpochBasedRunner', max_epochs=25) diff --git a/configs/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco.py b/configs/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco.py new file mode 100644 index 0000000..14c1eb2 --- /dev/null +++ b/configs/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco.py @@ -0,0 +1,23 @@ +_base_ = './grid_rcnn_r50_fpn_gn-head_2x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch')) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=3665, + warmup_ratio=1.0 / 80, + step=[17, 23]) +runner = dict(type='EpochBasedRunner', max_epochs=25) diff --git a/configs/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco.py b/configs/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco.py new file mode 100644 index 0000000..2fdc53c --- /dev/null +++ b/configs/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco.py @@ -0,0 +1,12 @@ +_base_ = './grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch')) diff --git a/configs/groie/README.md b/configs/groie/README.md new file mode 100644 index 0000000..490349d --- /dev/null +++ b/configs/groie/README.md @@ -0,0 +1,65 @@ +# GRoIE + +## A novel Region of Interest Extraction Layer for Instance Segmentation + +By Leonardo Rossi, Akbar Karimi and Andrea Prati from +[IMPLab](http://implab.ce.unipr.it/). + +We provide configs to reproduce the results in the paper for +"*A novel Region of Interest Extraction Layer for Instance Segmentation*" +on COCO object detection. + +## Introduction + +[ALGORITHM] + +This paper is motivated by the need to overcome to the limitations of existing +RoI extractors which select only one (the best) layer from FPN. + +Our intuition is that all the layers of FPN retain useful information. + +Therefore, the proposed layer (called Generic RoI Extractor - **GRoIE**) +introduces non-local building blocks and attention mechanisms to boost the +performance. + +## Results and models + +The results on COCO 2017 minival (5k images) are shown in the below table. +You can find +[here](https://drive.google.com/drive/folders/19ssstbq_h0Z1cgxHmJYFO8s1arf3QJbT) +the trained models. + +### Application of GRoIE to different architectures + +| Backbone | Method | Lr schd | box AP | mask AP | Config | Download| +| :-------: | :--------------: | :-----: | :----: | :-----: | :-------:| :--------:| +| R-50-FPN | Faster Original | 1x | 37.4 | | [config](../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130_204655.log.json) | +| R-50-FPN | + GRoIE | 1x | 38.3 | | [config](./faster_rcnn_r50_fpn_groie_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/groie/faster_rcnn_r50_fpn_groie_1x_coco/faster_rcnn_r50_fpn_groie_1x_coco_20200604_211715-66ee9516.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/groie/faster_rcnn_r50_fpn_groie_1x_coco/faster_rcnn_r50_fpn_groie_1x_coco_20200604_211715.log.json) | +| R-50-FPN | Grid R-CNN | 1x | 39.1 | | [config](./grid_rcnn_r50_fpn_gn-head_1x_coco.py)| [model](http://download.openmmlab.com/mmdetection/v2.0/groie/grid_rcnn_r50_fpn_gn-head_1x_coco/grid_rcnn_r50_fpn_gn-head_1x_coco_20200605_202059-64f00ee8.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/groie/grid_rcnn_r50_fpn_gn-head_1x_coco/grid_rcnn_r50_fpn_gn-head_1x_coco_20200605_202059.log.json) | +| R-50-FPN | + GRoIE | 1x | | | [config](./grid_rcnn_r50_fpn_gn-head_groie_1x_coco.py)|| +| R-50-FPN | Mask R-CNN | 1x | 38.2 | 34.7 | [config](../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py)| [model](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205_050542.log.json) | +| R-50-FPN | + GRoIE | 1x | 39.0 | 36.0 | [config](./mask_rcnn_r50_fpn_groie_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_groie_1x_coco/mask_rcnn_r50_fpn_groie_1x_coco_20200604_211715-50d90c74.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_groie_1x_coco/mask_rcnn_r50_fpn_groie_1x_coco_20200604_211715.log.json) | +| R-50-FPN | GC-Net | 1x | 40.7 | 36.5 | [config](../gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200202-50b90e5c.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200202_085547.log.json) | +| R-50-FPN | + GRoIE | 1x | 41.0 | 37.8 | [config](./mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py) |[model](http://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200604_211715-42eb79e1.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200604_211715-42eb79e1.pth) | +| R-101-FPN | GC-Net | 1x | 42.2 | 37.8 | [config](../gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200206-8407a3f0.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200206_142508.log.json) | +| R-101-FPN | + GRoIE | 1x | | | [config](./mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py)| [model](http://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200607_224507-8daae01c.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200607_224507.log.json) | + +## Citation + +If you use this work or benchmark in your research, please cite this project. + +```latex +@misc{rossi2020novel, + title={A novel Region of Interest Extraction Layer for Instance Segmentation}, + author={Leonardo Rossi and Akbar Karimi and Andrea Prati}, + year={2020}, + eprint={2004.13665}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +## Contact + +The implementation of GROI is currently maintained by +[Leonardo Rossi](https://github.com/hachreak/). diff --git a/configs/groie/faster_rcnn_r50_fpn_groie_1x_coco.py b/configs/groie/faster_rcnn_r50_fpn_groie_1x_coco.py new file mode 100644 index 0000000..0fc528b --- /dev/null +++ b/configs/groie/faster_rcnn_r50_fpn_groie_1x_coco.py @@ -0,0 +1,25 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +# model settings +model = dict( + roi_head=dict( + bbox_roi_extractor=dict( + type='GenericRoIExtractor', + aggregation='sum', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + pre_cfg=dict( + type='ConvModule', + in_channels=256, + out_channels=256, + kernel_size=5, + padding=2, + inplace=False, + ), + post_cfg=dict( + type='GeneralizedAttention', + in_channels=256, + spatial_range=-1, + num_heads=6, + attention_type='0100', + kv_stride=2)))) diff --git a/configs/groie/grid_rcnn_r50_fpn_gn-head_groie_1x_coco.py b/configs/groie/grid_rcnn_r50_fpn_gn-head_groie_1x_coco.py new file mode 100644 index 0000000..8e4b4ab --- /dev/null +++ b/configs/groie/grid_rcnn_r50_fpn_gn-head_groie_1x_coco.py @@ -0,0 +1,45 @@ +_base_ = '../grid_rcnn/grid_rcnn_r50_fpn_gn-head_1x_coco.py' +# model settings +model = dict( + roi_head=dict( + bbox_roi_extractor=dict( + type='GenericRoIExtractor', + aggregation='sum', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + pre_cfg=dict( + type='ConvModule', + in_channels=256, + out_channels=256, + kernel_size=5, + padding=2, + inplace=False, + ), + post_cfg=dict( + type='GeneralizedAttention', + in_channels=256, + spatial_range=-1, + num_heads=6, + attention_type='0100', + kv_stride=2)), + grid_roi_extractor=dict( + type='GenericRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + pre_cfg=dict( + type='ConvModule', + in_channels=256, + out_channels=256, + kernel_size=5, + padding=2, + inplace=False, + ), + post_cfg=dict( + type='GeneralizedAttention', + in_channels=256, + spatial_range=-1, + num_heads=6, + attention_type='0100', + kv_stride=2)))) diff --git a/configs/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py b/configs/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py new file mode 100644 index 0000000..8b83722 --- /dev/null +++ b/configs/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py @@ -0,0 +1,45 @@ +_base_ = '../gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py' +# model settings +model = dict( + roi_head=dict( + bbox_roi_extractor=dict( + type='GenericRoIExtractor', + aggregation='sum', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + pre_cfg=dict( + type='ConvModule', + in_channels=256, + out_channels=256, + kernel_size=5, + padding=2, + inplace=False, + ), + post_cfg=dict( + type='GeneralizedAttention', + in_channels=256, + spatial_range=-1, + num_heads=6, + attention_type='0100', + kv_stride=2)), + mask_roi_extractor=dict( + type='GenericRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + pre_cfg=dict( + type='ConvModule', + in_channels=256, + out_channels=256, + kernel_size=5, + padding=2, + inplace=False, + ), + post_cfg=dict( + type='GeneralizedAttention', + in_channels=256, + spatial_range=-1, + num_heads=6, + attention_type='0100', + kv_stride=2)))) diff --git a/configs/groie/mask_rcnn_r50_fpn_groie_1x_coco.py b/configs/groie/mask_rcnn_r50_fpn_groie_1x_coco.py new file mode 100644 index 0000000..81dfb48 --- /dev/null +++ b/configs/groie/mask_rcnn_r50_fpn_groie_1x_coco.py @@ -0,0 +1,45 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +# model settings +model = dict( + roi_head=dict( + bbox_roi_extractor=dict( + type='GenericRoIExtractor', + aggregation='sum', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + pre_cfg=dict( + type='ConvModule', + in_channels=256, + out_channels=256, + kernel_size=5, + padding=2, + inplace=False, + ), + post_cfg=dict( + type='GeneralizedAttention', + in_channels=256, + spatial_range=-1, + num_heads=6, + attention_type='0100', + kv_stride=2)), + mask_roi_extractor=dict( + type='GenericRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + pre_cfg=dict( + type='ConvModule', + in_channels=256, + out_channels=256, + kernel_size=5, + padding=2, + inplace=False, + ), + post_cfg=dict( + type='GeneralizedAttention', + in_channels=256, + spatial_range=-1, + num_heads=6, + attention_type='0100', + kv_stride=2)))) diff --git a/configs/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py b/configs/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py new file mode 100644 index 0000000..852c5ca --- /dev/null +++ b/configs/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py @@ -0,0 +1,45 @@ +_base_ = '../gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py' +# model settings +model = dict( + roi_head=dict( + bbox_roi_extractor=dict( + type='GenericRoIExtractor', + aggregation='sum', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + pre_cfg=dict( + type='ConvModule', + in_channels=256, + out_channels=256, + kernel_size=5, + padding=2, + inplace=False, + ), + post_cfg=dict( + type='GeneralizedAttention', + in_channels=256, + spatial_range=-1, + num_heads=6, + attention_type='0100', + kv_stride=2)), + mask_roi_extractor=dict( + type='GenericRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + pre_cfg=dict( + type='ConvModule', + in_channels=256, + out_channels=256, + kernel_size=5, + padding=2, + inplace=False, + ), + post_cfg=dict( + type='GeneralizedAttention', + in_channels=256, + spatial_range=-1, + num_heads=6, + attention_type='0100', + kv_stride=2)))) diff --git a/configs/guided_anchoring/README.md b/configs/guided_anchoring/README.md new file mode 100644 index 0000000..4c3c86c --- /dev/null +++ b/configs/guided_anchoring/README.md @@ -0,0 +1,49 @@ +# Region Proposal by Guided Anchoring + +## Introduction + +[ALGORITHM] + +We provide config files to reproduce the results in the CVPR 2019 paper for [Region Proposal by Guided Anchoring](https://arxiv.org/abs/1901.03278). + +```latex +@inproceedings{wang2019region, + title={Region Proposal by Guided Anchoring}, + author={Jiaqi Wang and Kai Chen and Shuo Yang and Chen Change Loy and Dahua Lin}, + booktitle={IEEE Conference on Computer Vision and Pattern Recognition}, + year={2019} +} +``` + +## Results and Models + +The results on COCO 2017 val is shown in the below table. (results on test-dev are usually slightly higher than val). + +| Method | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | AR 1000 | Config | Download | +| :----: | :-------------: | :-----: | :-----: | :------: | :------------: | :-----: | :------: | :--------: | +| GA-RPN | R-50-FPN | caffe | 1x | 5.3 | 15.8 | 68.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco/ga_rpn_r50_caffe_fpn_1x_coco_20200531-899008a6.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco/ga_rpn_r50_caffe_fpn_1x_coco_20200531_011819.log.json) | +| GA-RPN | R-101-FPN | caffe | 1x | 7.3 | 13.0 | 69.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco/ga_rpn_r101_caffe_fpn_1x_coco_20200531-ca9ba8fb.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco/ga_rpn_r101_caffe_fpn_1x_coco_20200531_011812.log.json) | +| GA-RPN | X-101-32x4d-FPN | pytorch | 1x | 8.5 | 10.0 | 70.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco/ga_rpn_x101_32x4d_fpn_1x_coco_20200220-c28d1b18.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco/ga_rpn_x101_32x4d_fpn_1x_coco_20200220_221326.log.json) | +| GA-RPN | X-101-64x4d-FPN | pytorch | 1x | 7.1 | 7.5 | 71.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco/ga_rpn_x101_64x4d_fpn_1x_coco_20200225-3c6e1aa2.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco/ga_rpn_x101_64x4d_fpn_1x_coco_20200225_152704.log.json) | + +| Method | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :------------: | :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | +| GA-Faster RCNN | R-50-FPN | caffe | 1x | 5.5 | | 39.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco/ga_faster_r50_caffe_fpn_1x_coco_20200702_000718-a11ccfe6.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco/ga_faster_r50_caffe_fpn_1x_coco_20200702_000718.log.json) | +| GA-Faster RCNN | R-101-FPN | caffe | 1x | 7.5 | | 41.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco/ga_faster_r101_caffe_fpn_1x_coco_bbox_mAP-0.415_20200505_115528-fb82e499.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco/ga_faster_r101_caffe_fpn_1x_coco_20200505_115528.log.json) | +| GA-Faster RCNN | X-101-32x4d-FPN | pytorch | 1x | 8.7 | 9.7 | 43.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco/ga_faster_x101_32x4d_fpn_1x_coco_20200215-1ded9da3.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco/ga_faster_x101_32x4d_fpn_1x_coco_20200215_184547.log.json) | +| GA-Faster RCNN | X-101-64x4d-FPN | pytorch | 1x | 11.8 | 7.3 | 43.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco/ga_faster_x101_64x4d_fpn_1x_coco_20200215-0fa7bde7.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco/ga_faster_x101_64x4d_fpn_1x_coco_20200215_104455.log.json) | +| GA-RetinaNet | R-50-FPN | caffe | 1x | 3.5 | 16.8 | 36.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco/ga_retinanet_r50_caffe_fpn_1x_coco_20201020-39581c6f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco/ga_retinanet_r50_caffe_fpn_1x_coco_20201020_225450.log.json) | +| GA-RetinaNet | R-101-FPN | caffe | 1x | 5.5 | 12.9 | 39.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco/ga_retinanet_r101_caffe_fpn_1x_coco_20200531-6266453c.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco/ga_retinanet_r101_caffe_fpn_1x_coco_20200531_012847.log.json) | +| GA-RetinaNet | X-101-32x4d-FPN | pytorch | 1x | 6.9 | 10.6 | 40.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco/ga_retinanet_x101_32x4d_fpn_1x_coco_20200219-40c56caa.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco/ga_retinanet_x101_32x4d_fpn_1x_coco_20200219_223025.log.json) | +| GA-RetinaNet | X-101-64x4d-FPN | pytorch | 1x | 9.9 | 7.7 | 41.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco/ga_retinanet_x101_64x4d_fpn_1x_coco_20200226-ef9f7f1f.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco/ga_retinanet_x101_64x4d_fpn_1x_coco_20200226_221123.log.json) | + +- In the Guided Anchoring paper, `score_thr` is set to 0.001 in Fast/Faster RCNN and 0.05 in RetinaNet for both baselines and Guided Anchoring. + +- Performance on COCO test-dev benchmark are shown as follows. + +| Method | Backbone | Style | Lr schd | Aug Train | Score thr | AP | AP_50 | AP_75 | AP_small | AP_medium | AP_large | Download | +| :------------: | :-------: | :---: | :-----: | :-------: | :-------: | :---: | :---: | :---: | :------: | :-------: | :------: | :------: | +| GA-Faster RCNN | R-101-FPN | caffe | 1x | F | 0.05 | | | | | | | | +| GA-Faster RCNN | R-101-FPN | caffe | 1x | F | 0.001 | | | | | | | | +| GA-RetinaNet | R-101-FPN | caffe | 1x | F | 0.05 | | | | | | | | +| GA-RetinaNet | R-101-FPN | caffe | 2x | T | 0.05 | | | | | | | | diff --git a/configs/guided_anchoring/ga_fast_r50_caffe_fpn_1x_coco.py b/configs/guided_anchoring/ga_fast_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..e15bc29 --- /dev/null +++ b/configs/guided_anchoring/ga_fast_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,63 @@ +_base_ = '../fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron2/resnet50_caffe', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + norm_eval=True, + style='caffe'), + roi_head=dict( + bbox_head=dict(bbox_coder=dict(target_stds=[0.05, 0.05, 0.1, 0.1]))), + # model training and testing settings + train_cfg=dict( + rcnn=dict( + assigner=dict(pos_iou_thr=0.6, neg_iou_thr=0.6, min_pos_iou=0.6), + sampler=dict(num=256))), + test_cfg=dict(rcnn=dict(score_thr=1e-3))) +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=300), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=None), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img', 'proposals']), + ]) +] +data = dict( + train=dict( + proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_train2017.pkl', + pipeline=train_pipeline), + val=dict( + proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_val2017.pkl', + pipeline=test_pipeline), + test=dict( + proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_val2017.pkl', + pipeline=test_pipeline)) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco.py b/configs/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..f438a47 --- /dev/null +++ b/configs/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = './ga_faster_r50_caffe_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron2/resnet101_caffe', + backbone=dict(depth=101)) diff --git a/configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py b/configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..b0add92 --- /dev/null +++ b/configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,65 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py' +model = dict( + rpn_head=dict( + _delete_=True, + type='GARPNHead', + in_channels=256, + feat_channels=256, + approx_anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=8, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + square_anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + scales=[8], + strides=[4, 8, 16, 32, 64]), + anchor_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.07, 0.07, 0.14, 0.14]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.07, 0.07, 0.11, 0.11]), + loc_filter_thr=0.01, + loss_loc=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), + roi_head=dict( + bbox_head=dict(bbox_coder=dict(target_stds=[0.05, 0.05, 0.1, 0.1]))), + # model training and testing settings + train_cfg=dict( + rpn=dict( + ga_assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + ga_sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + center_ratio=0.2, + ignore_ratio=0.5), + rpn_proposal=dict(nms_post=1000, max_per_img=300), + rcnn=dict( + assigner=dict(pos_iou_thr=0.6, neg_iou_thr=0.6, min_pos_iou=0.6), + sampler=dict(type='RandomSampler', num=256))), + test_cfg=dict( + rpn=dict(nms_post=1000, max_per_img=300), rcnn=dict(score_thr=1e-3))) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/guided_anchoring/ga_faster_r50_fpn_1x_coco.py b/configs/guided_anchoring/ga_faster_r50_fpn_1x_coco.py new file mode 100644 index 0000000..e3d8238 --- /dev/null +++ b/configs/guided_anchoring/ga_faster_r50_fpn_1x_coco.py @@ -0,0 +1,65 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + rpn_head=dict( + _delete_=True, + type='GARPNHead', + in_channels=256, + feat_channels=256, + approx_anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=8, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + square_anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + scales=[8], + strides=[4, 8, 16, 32, 64]), + anchor_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.07, 0.07, 0.14, 0.14]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.07, 0.07, 0.11, 0.11]), + loc_filter_thr=0.01, + loss_loc=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), + roi_head=dict( + bbox_head=dict(bbox_coder=dict(target_stds=[0.05, 0.05, 0.1, 0.1]))), + # model training and testing settings + train_cfg=dict( + rpn=dict( + ga_assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + ga_sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + center_ratio=0.2, + ignore_ratio=0.5), + rpn_proposal=dict(nms_post=1000, max_per_img=300), + rcnn=dict( + assigner=dict(pos_iou_thr=0.6, neg_iou_thr=0.6, min_pos_iou=0.6), + sampler=dict(type='RandomSampler', num=256))), + test_cfg=dict( + rpn=dict(nms_post=1000, max_per_img=300), rcnn=dict(score_thr=1e-3))) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco.py b/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..c9a035f --- /dev/null +++ b/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './ga_faster_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco.py b/configs/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000..87bbfdc --- /dev/null +++ b/configs/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './ga_faster_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco.py b/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..0048965 --- /dev/null +++ b/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = './ga_retinanet_r50_caffe_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron2/resnet101_caffe', + backbone=dict(depth=101)) diff --git a/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_mstrain_2x.py b/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_mstrain_2x.py new file mode 100644 index 0000000..85fa2f5 --- /dev/null +++ b/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_mstrain_2x.py @@ -0,0 +1,167 @@ +_base_ = '../_base_/default_runtime.py' + +# model settings +model = dict( + type='RetinaNet', + pretrained='open-mmlab://detectron2/resnet101_caffe', + backbone=dict( + type='ResNet', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + norm_eval=True, + style='caffe'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs=True, + num_outs=5), + bbox_head=dict( + type='GARetinaHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + approx_anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=4, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[8, 16, 32, 64, 128]), + square_anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + scales=[4], + strides=[8, 16, 32, 64, 128]), + anchor_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loc_filter_thr=0.01, + loss_loc=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=0.04, loss_weight=1.0))) +# training and testing settings +train_cfg = dict( + ga_assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0.4, + ignore_iof_thr=-1), + ga_sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.0, + ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + center_ratio=0.2, + ignore_ratio=0.5, + debug=False) +test_cfg = dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 480), (1333, 960)], + keep_ratio=True, + multiscale_mode='range'), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) +evaluation = dict(interval=1, metric='bbox') +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[16, 22]) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco.py b/configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..3351201 --- /dev/null +++ b/configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,62 @@ +_base_ = '../retinanet/retinanet_r50_caffe_fpn_1x_coco.py' +model = dict( + bbox_head=dict( + _delete_=True, + type='GARetinaHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + approx_anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=4, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[8, 16, 32, 64, 128]), + square_anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + scales=[4], + strides=[8, 16, 32, 64, 128]), + anchor_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loc_filter_thr=0.01, + loss_loc=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=0.04, loss_weight=1.0)), + # training and testing settings + train_cfg=dict( + ga_assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0.4, + ignore_iof_thr=-1), + ga_sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + assigner=dict(neg_iou_thr=0.5, min_pos_iou=0.0), + center_ratio=0.2, + ignore_ratio=0.5)) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/guided_anchoring/ga_retinanet_r50_fpn_1x_coco.py b/configs/guided_anchoring/ga_retinanet_r50_fpn_1x_coco.py new file mode 100644 index 0000000..7694723 --- /dev/null +++ b/configs/guided_anchoring/ga_retinanet_r50_fpn_1x_coco.py @@ -0,0 +1,62 @@ +_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py' +model = dict( + bbox_head=dict( + _delete_=True, + type='GARetinaHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + approx_anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=4, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[8, 16, 32, 64, 128]), + square_anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + scales=[4], + strides=[8, 16, 32, 64, 128]), + anchor_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loc_filter_thr=0.01, + loss_loc=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=0.04, loss_weight=1.0)), + # training and testing settings + train_cfg=dict( + ga_assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0.4, + ignore_iof_thr=-1), + ga_sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + assigner=dict(neg_iou_thr=0.5, min_pos_iou=0.0), + center_ratio=0.2, + ignore_ratio=0.5)) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco.py b/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..18daadd --- /dev/null +++ b/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './ga_retinanet_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco.py b/configs/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000..1b18c2b --- /dev/null +++ b/configs/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './ga_retinanet_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco.py b/configs/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..8d15476 --- /dev/null +++ b/configs/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = './ga_rpn_r50_caffe_fpn_1x_coco.py' +# model settings +model = dict( + pretrained='open-mmlab://detectron2/resnet101_caffe', + backbone=dict(depth=101)) diff --git a/configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco.py b/configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..7830894 --- /dev/null +++ b/configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,58 @@ +_base_ = '../rpn/rpn_r50_caffe_fpn_1x_coco.py' +model = dict( + rpn_head=dict( + _delete_=True, + type='GARPNHead', + in_channels=256, + feat_channels=256, + approx_anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=8, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + square_anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + scales=[8], + strides=[4, 8, 16, 32, 64]), + anchor_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.07, 0.07, 0.14, 0.14]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.07, 0.07, 0.11, 0.11]), + loc_filter_thr=0.01, + loss_loc=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict( + rpn=dict( + ga_assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + ga_sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + center_ratio=0.2, + ignore_ratio=0.5)), + test_cfg=dict(rpn=dict(nms_post=1000))) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/guided_anchoring/ga_rpn_r50_fpn_1x_coco.py b/configs/guided_anchoring/ga_rpn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..27ab3e7 --- /dev/null +++ b/configs/guided_anchoring/ga_rpn_r50_fpn_1x_coco.py @@ -0,0 +1,58 @@ +_base_ = '../rpn/rpn_r50_fpn_1x_coco.py' +model = dict( + rpn_head=dict( + _delete_=True, + type='GARPNHead', + in_channels=256, + feat_channels=256, + approx_anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=8, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + square_anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + scales=[8], + strides=[4, 8, 16, 32, 64]), + anchor_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.07, 0.07, 0.14, 0.14]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.07, 0.07, 0.11, 0.11]), + loc_filter_thr=0.01, + loss_loc=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict( + rpn=dict( + ga_assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + ga_sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + center_ratio=0.2, + ignore_ratio=0.5)), + test_cfg=dict(rpn=dict(nms_post=1000))) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco.py b/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..1e0fe49 --- /dev/null +++ b/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './ga_rpn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco.py b/configs/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000..bf66b6b --- /dev/null +++ b/configs/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './ga_rpn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/hrnet/README.md b/configs/hrnet/README.md new file mode 100644 index 0000000..35b8878 --- /dev/null +++ b/configs/hrnet/README.md @@ -0,0 +1,88 @@ +# High-resolution networks (HRNets) for object detection + +## Introduction + +[ALGORITHM] + +```latex +@inproceedings{SunXLW19, + title={Deep High-Resolution Representation Learning for Human Pose Estimation}, + author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang}, + booktitle={CVPR}, + year={2019} +} + +@article{SunZJCXLMWLW19, + title={High-Resolution Representations for Labeling Pixels and Regions}, + author={Ke Sun and Yang Zhao and Borui Jiang and Tianheng Cheng and Bin Xiao + and Dong Liu and Yadong Mu and Xinggang Wang and Wenyu Liu and Jingdong Wang}, + journal = {CoRR}, + volume = {abs/1904.04514}, + year={2019} +} +``` + +## Results and Models + +### Faster R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :-------------:|:------:| :------:| :--------:| +| HRNetV2p-W18 | pytorch | 1x | 6.6 | 13.4 | 36.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco/faster_rcnn_hrnetv2p_w18_1x_coco_20200130-56651a6d.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco/faster_rcnn_hrnetv2p_w18_1x_coco_20200130_211246.log.json) | +| HRNetV2p-W18 | pytorch | 2x | 6.6 | | 38.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco/faster_rcnn_hrnetv2p_w18_2x_coco_20200702_085731-a4ec0611.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco/faster_rcnn_hrnetv2p_w18_2x_coco_20200702_085731.log.json) | +| HRNetV2p-W32 | pytorch | 1x | 9.0 | 12.4 | 40.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco/faster_rcnn_hrnetv2p_w32_1x_coco_20200130-6e286425.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco/faster_rcnn_hrnetv2p_w32_1x_coco_20200130_204442.log.json) | +| HRNetV2p-W32 | pytorch | 2x | 9.0 | | 41.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco/faster_rcnn_hrnetv2p_w32_2x_coco_20200529_015927-976a9c15.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco/faster_rcnn_hrnetv2p_w32_2x_coco_20200529_015927.log.json) | +| HRNetV2p-W40 | pytorch | 1x | 10.4 | 10.5 | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco/faster_rcnn_hrnetv2p_w40_1x_coco_20200210-95c1f5ce.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco/faster_rcnn_hrnetv2p_w40_1x_coco_20200210_125315.log.json) | +| HRNetV2p-W40 | pytorch | 2x | 10.4 | | 42.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco/faster_rcnn_hrnetv2p_w40_2x_coco_20200512_161033-0f236ef4.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco/faster_rcnn_hrnetv2p_w40_2x_coco_20200512_161033.log.json) | + +### Mask R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :-------------:|:------:| :------:|:------:|:--------:| +| HRNetV2p-W18 | pytorch | 1x | 7.0 | 11.7 | 37.7 | 34.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco/mask_rcnn_hrnetv2p_w18_1x_coco_20200205-1c3d78ed.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco/mask_rcnn_hrnetv2p_w18_1x_coco_20200205_232523.log.json) | +| HRNetV2p-W18 | pytorch | 2x | 7.0 | - | 39.8 | 36.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco/mask_rcnn_hrnetv2p_w18_2x_coco_20200212-b3c825b1.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco/mask_rcnn_hrnetv2p_w18_2x_coco_20200212_134222.log.json) | +| HRNetV2p-W32 | pytorch | 1x | 9.4 | 11.3 | 41.2 | 37.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco/mask_rcnn_hrnetv2p_w32_1x_coco_20200207-b29f616e.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco/mask_rcnn_hrnetv2p_w32_1x_coco_20200207_055017.log.json) | +| HRNetV2p-W32 | pytorch | 2x | 9.4 | - | 42.5 | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco/mask_rcnn_hrnetv2p_w32_2x_coco_20200213-45b75b4d.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco/mask_rcnn_hrnetv2p_w32_2x_coco_20200213_150518.log.json) | +| HRNetV2p-W40 | pytorch | 1x | 10.9 | | 42.1 | 37.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco/mask_rcnn_hrnetv2p_w40_1x_coco_20200511_015646-66738b35.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco/mask_rcnn_hrnetv2p_w40_1x_coco_20200511_015646.log.json) | +| HRNetV2p-W40 | pytorch | 2x | 10.9 | | 42.8 | 38.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco/mask_rcnn_hrnetv2p_w40_2x_coco_20200512_163732-aed5e4ab.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco/mask_rcnn_hrnetv2p_w40_2x_coco_20200512_163732.log.json) | + +### Cascade R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :-------------:|:------:| :------: | :--------: | +| HRNetV2p-W18 | pytorch | 20e | 7.0 | 11.0 | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco/cascade_rcnn_hrnetv2p_w18_20e_coco_20200210-434be9d7.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco/cascade_rcnn_hrnetv2p_w18_20e_coco_20200210_105632.log.json) | +| HRNetV2p-W32 | pytorch | 20e | 9.4 | 11.0 | 43.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco/cascade_rcnn_hrnetv2p_w32_20e_coco_20200208-928455a4.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco/cascade_rcnn_hrnetv2p_w32_20e_coco_20200208_160511.log.json) | +| HRNetV2p-W40 | pytorch | 20e | 10.8 | | 43.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco/cascade_rcnn_hrnetv2p_w40_20e_coco_20200512_161112-75e47b04.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco/cascade_rcnn_hrnetv2p_w40_20e_coco_20200512_161112.log.json) | + +### Cascade Mask R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :-------------:|:------:| :------:|:------:|:--------:| +| HRNetV2p-W18 | pytorch | 20e | 8.5 | 8.5 |41.6 |36.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco/cascade_mask_rcnn_hrnetv2p_w18_20e_coco_20200210-b543cd2b.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco/cascade_mask_rcnn_hrnetv2p_w18_20e_coco_20200210_093149.log.json) | +| HRNetV2p-W32 | pytorch | 20e | | 8.3 |44.3 |38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco/cascade_mask_rcnn_hrnetv2p_w32_20e_coco_20200512_154043-39d9cf7b.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco/cascade_mask_rcnn_hrnetv2p_w32_20e_coco_20200512_154043.log.json) | +| HRNetV2p-W40 | pytorch | 20e | 12.5 | |45.1 |39.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco/cascade_mask_rcnn_hrnetv2p_w40_20e_coco_20200527_204922-969c4610.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco/cascade_mask_rcnn_hrnetv2p_w40_20e_coco_20200527_204922.log.json) | + +### Hybrid Task Cascade (HTC) + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :-------------:|:------:| :------:|:------:|:--------:| +| HRNetV2p-W18 | pytorch | 20e | 10.8 | 4.7 | 42.8 | 37.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/htc_hrnetv2p_w18_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w18_20e_coco/htc_hrnetv2p_w18_20e_coco_20200210-b266988c.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w18_20e_coco/htc_hrnetv2p_w18_20e_coco_20200210_182735.log.json) | +| HRNetV2p-W32 | pytorch | 20e | 13.1 | 4.9 | 45.4 | 39.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/htc_hrnetv2p_w32_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w32_20e_coco/htc_hrnetv2p_w32_20e_coco_20200207-7639fa12.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w32_20e_coco/htc_hrnetv2p_w32_20e_coco_20200207_193153.log.json) | +| HRNetV2p-W40 | pytorch | 20e | 14.6 | | 46.4 | 40.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/htc_hrnetv2p_w40_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w40_20e_coco/htc_hrnetv2p_w40_20e_coco_20200529_183411-417c4d5b.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w40_20e_coco/htc_hrnetv2p_w40_20e_coco_20200529_183411.log.json) | + +### FCOS + +| Backbone | Style | GN | MS train | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:---------:|:-------:|:-------:|:--------:|:-------:|:------:|:------:|:------:|:------:|:--------:| +|HRNetV2p-W18| pytorch | Y | N | 1x | 13.0 | 12.9 | 35.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco_20201212_100710-4ad151de.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco_20201212_100710.log.json) | +|HRNetV2p-W18| pytorch | Y | N | 2x | 13.0 | - | 38.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco_20201212_101110-5c575fa5.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco_20201212_101110.log.json) | +|HRNetV2p-W32| pytorch | Y | N | 1x | 17.5 | 12.9 | 39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco_20201211_134730-cb8055c0.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco_20201211_134730.log.json) | +|HRNetV2p-W32| pytorch | Y | N | 2x | 17.5 | - | 40.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco_20201212_112133-77b6b9bb.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco_20201212_112133.log.json) | +|HRNetV2p-W18| pytorch | Y | Y | 2x | 13.0 | 12.9 | 38.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco_20201212_111651-441e9d9f.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco_20201212_111651.log.json) | +|HRNetV2p-W32| pytorch | Y | Y | 2x | 17.5 | 12.4 | 41.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco_20201212_090846-b6f2b49f.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco_20201212_090846.log.json) | +|HRNetV2p-W48| pytorch | Y | Y | 2x | 20.3 | 10.8 | 42.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco_20201212_124752-f22d2ce5.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco_20201212_124752.log.json) | + +**Note:** + +- The `28e` schedule in HTC indicates decreasing the lr at 24 and 27 epochs, with a total of 28 epochs. +- HRNetV2 ImageNet pretrained models are in [HRNets for Image Classification](https://github.com/HRNet/HRNet-Image-Classification). diff --git a/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco.py b/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco.py new file mode 100644 index 0000000..e8df265 --- /dev/null +++ b/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco.py @@ -0,0 +1,10 @@ +_base_ = './cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py' +# model settings +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(18, 36)), + stage3=dict(num_channels=(18, 36, 72)), + stage4=dict(num_channels=(18, 36, 72, 144)))), + neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256)) diff --git a/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py b/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py new file mode 100644 index 0000000..d410f23 --- /dev/null +++ b/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py @@ -0,0 +1,39 @@ +_base_ = '../cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w32', + backbone=dict( + _delete_=True, + type='HRNet', + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256)))), + neck=dict( + _delete_=True, + type='HRFPN', + in_channels=[32, 64, 128, 256], + out_channels=256)) +# learning policy +lr_config = dict(step=[16, 19]) +runner = dict(type='EpochBasedRunner', max_epochs=20) diff --git a/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco.py b/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco.py new file mode 100644 index 0000000..29b1469 --- /dev/null +++ b/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco.py @@ -0,0 +1,11 @@ +_base_ = './cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py' +# model settings +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w40', + backbone=dict( + type='HRNet', + extra=dict( + stage2=dict(num_channels=(40, 80)), + stage3=dict(num_channels=(40, 80, 160)), + stage4=dict(num_channels=(40, 80, 160, 320)))), + neck=dict(type='HRFPN', in_channels=[40, 80, 160, 320], out_channels=256)) diff --git a/configs/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco.py b/configs/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco.py new file mode 100644 index 0000000..9585a4f --- /dev/null +++ b/configs/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco.py @@ -0,0 +1,10 @@ +_base_ = './cascade_rcnn_hrnetv2p_w32_20e_coco.py' +# model settings +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(18, 36)), + stage3=dict(num_channels=(18, 36, 72)), + stage4=dict(num_channels=(18, 36, 72, 144)))), + neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256)) diff --git a/configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco.py b/configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco.py new file mode 100644 index 0000000..ec1bb76 --- /dev/null +++ b/configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco.py @@ -0,0 +1,39 @@ +_base_ = '../cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w32', + backbone=dict( + _delete_=True, + type='HRNet', + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256)))), + neck=dict( + _delete_=True, + type='HRFPN', + in_channels=[32, 64, 128, 256], + out_channels=256)) +# learning policy +lr_config = dict(step=[16, 19]) +runner = dict(type='EpochBasedRunner', max_epochs=20) diff --git a/configs/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco.py b/configs/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco.py new file mode 100644 index 0000000..bd43e47 --- /dev/null +++ b/configs/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco.py @@ -0,0 +1,11 @@ +_base_ = './cascade_rcnn_hrnetv2p_w32_20e_coco.py' +# model settings +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w40', + backbone=dict( + type='HRNet', + extra=dict( + stage2=dict(num_channels=(40, 80)), + stage3=dict(num_channels=(40, 80, 160)), + stage4=dict(num_channels=(40, 80, 160, 320)))), + neck=dict(type='HRFPN', in_channels=[40, 80, 160, 320], out_channels=256)) diff --git a/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.py b/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.py new file mode 100644 index 0000000..9907bcb --- /dev/null +++ b/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.py @@ -0,0 +1,10 @@ +_base_ = './faster_rcnn_hrnetv2p_w32_1x_coco.py' +# model settings +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(18, 36)), + stage3=dict(num_channels=(18, 36, 72)), + stage4=dict(num_channels=(18, 36, 72, 144)))), + neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256)) diff --git a/configs/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco.py b/configs/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco.py new file mode 100644 index 0000000..a4b987a --- /dev/null +++ b/configs/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco.py @@ -0,0 +1,5 @@ +_base_ = './faster_rcnn_hrnetv2p_w18_1x_coco.py' + +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco.py b/configs/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco.py new file mode 100644 index 0000000..190e81c --- /dev/null +++ b/configs/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco.py @@ -0,0 +1,36 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w32', + backbone=dict( + _delete_=True, + type='HRNet', + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256)))), + neck=dict( + _delete_=True, + type='HRFPN', + in_channels=[32, 64, 128, 256], + out_channels=256)) diff --git a/configs/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco.py b/configs/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco.py new file mode 100644 index 0000000..63c8717 --- /dev/null +++ b/configs/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './faster_rcnn_hrnetv2p_w32_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco.py b/configs/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco.py new file mode 100644 index 0000000..d0fd9fa --- /dev/null +++ b/configs/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco.py @@ -0,0 +1,10 @@ +_base_ = './faster_rcnn_hrnetv2p_w32_1x_coco.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w40', + backbone=dict( + type='HRNet', + extra=dict( + stage2=dict(num_channels=(40, 80)), + stage3=dict(num_channels=(40, 80, 160)), + stage4=dict(num_channels=(40, 80, 160, 320)))), + neck=dict(type='HRFPN', in_channels=[40, 80, 160, 320], out_channels=256)) diff --git a/configs/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco.py b/configs/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco.py new file mode 100644 index 0000000..585cc2c --- /dev/null +++ b/configs/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './faster_rcnn_hrnetv2p_w40_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco.py b/configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco.py new file mode 100644 index 0000000..20bffb9 --- /dev/null +++ b/configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco.py @@ -0,0 +1,9 @@ +_base_ = './fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(18, 36)), + stage3=dict(num_channels=(18, 36, 72)), + stage4=dict(num_channels=(18, 36, 72, 144)))), + neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256)) diff --git a/configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco.py b/configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco.py new file mode 100644 index 0000000..3497595 --- /dev/null +++ b/configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './fcos_hrnetv2p_w18_gn-head_4x4_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco.py b/configs/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco.py new file mode 100644 index 0000000..b845128 --- /dev/null +++ b/configs/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco.py @@ -0,0 +1,9 @@ +_base_ = './fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(18, 36)), + stage3=dict(num_channels=(18, 36, 72)), + stage4=dict(num_channels=(18, 36, 72, 144)))), + neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256)) diff --git a/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py b/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py new file mode 100644 index 0000000..98f1cb7 --- /dev/null +++ b/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py @@ -0,0 +1,69 @@ +_base_ = '../fcos/fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w32', + backbone=dict( + _delete_=True, + type='HRNet', + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256)))), + neck=dict( + _delete_=True, + type='HRFPN', + in_channels=[32, 64, 128, 256], + out_channels=256, + stride=2, + num_outs=5)) +img_norm_cfg = dict( + mean=[103.53, 116.28, 123.675], std=[57.375, 57.12, 58.395], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco.py b/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco.py new file mode 100644 index 0000000..7b38130 --- /dev/null +++ b/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco.py b/configs/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco.py new file mode 100644 index 0000000..482f887 --- /dev/null +++ b/configs/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco.py @@ -0,0 +1,39 @@ +_base_ = './fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py' +img_norm_cfg = dict( + mean=[103.53, 116.28, 123.675], std=[57.375, 57.12, 58.395], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco.py b/configs/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco.py new file mode 100644 index 0000000..452b0fe --- /dev/null +++ b/configs/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco.py @@ -0,0 +1,10 @@ +_base_ = './fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w40', + backbone=dict( + type='HRNet', + extra=dict( + stage2=dict(num_channels=(40, 80)), + stage3=dict(num_channels=(40, 80, 160)), + stage4=dict(num_channels=(40, 80, 160, 320)))), + neck=dict(type='HRFPN', in_channels=[40, 80, 160, 320], out_channels=256)) diff --git a/configs/hrnet/htc_hrnetv2p_w18_20e_coco.py b/configs/hrnet/htc_hrnetv2p_w18_20e_coco.py new file mode 100644 index 0000000..391636f --- /dev/null +++ b/configs/hrnet/htc_hrnetv2p_w18_20e_coco.py @@ -0,0 +1,9 @@ +_base_ = './htc_hrnetv2p_w32_20e_coco.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(18, 36)), + stage3=dict(num_channels=(18, 36, 72)), + stage4=dict(num_channels=(18, 36, 72, 144)))), + neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256)) diff --git a/configs/hrnet/htc_hrnetv2p_w32_20e_coco.py b/configs/hrnet/htc_hrnetv2p_w32_20e_coco.py new file mode 100644 index 0000000..aee7808 --- /dev/null +++ b/configs/hrnet/htc_hrnetv2p_w32_20e_coco.py @@ -0,0 +1,36 @@ +_base_ = '../htc/htc_r50_fpn_20e_coco.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w32', + backbone=dict( + _delete_=True, + type='HRNet', + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256)))), + neck=dict( + _delete_=True, + type='HRFPN', + in_channels=[32, 64, 128, 256], + out_channels=256)) diff --git a/configs/hrnet/htc_hrnetv2p_w40_20e_coco.py b/configs/hrnet/htc_hrnetv2p_w40_20e_coco.py new file mode 100644 index 0000000..abf6fb5 --- /dev/null +++ b/configs/hrnet/htc_hrnetv2p_w40_20e_coco.py @@ -0,0 +1,10 @@ +_base_ = './htc_hrnetv2p_w32_20e_coco.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w40', + backbone=dict( + type='HRNet', + extra=dict( + stage2=dict(num_channels=(40, 80)), + stage3=dict(num_channels=(40, 80, 160)), + stage4=dict(num_channels=(40, 80, 160, 320)))), + neck=dict(type='HRFPN', in_channels=[40, 80, 160, 320], out_channels=256)) diff --git a/configs/hrnet/htc_hrnetv2p_w40_28e_coco.py b/configs/hrnet/htc_hrnetv2p_w40_28e_coco.py new file mode 100644 index 0000000..7067e8b --- /dev/null +++ b/configs/hrnet/htc_hrnetv2p_w40_28e_coco.py @@ -0,0 +1,4 @@ +_base_ = './htc_hrnetv2p_w40_20e_coco.py' +# learning policy +lr_config = dict(step=[24, 27]) +runner = dict(type='EpochBasedRunner', max_epochs=28) diff --git a/configs/hrnet/htc_x101_64x4d_fpn_16x1_28e_coco.py b/configs/hrnet/htc_x101_64x4d_fpn_16x1_28e_coco.py new file mode 100644 index 0000000..815f285 --- /dev/null +++ b/configs/hrnet/htc_x101_64x4d_fpn_16x1_28e_coco.py @@ -0,0 +1,4 @@ +_base_ = '../htc/htc_x101_64x4d_fpn_16x1_20e_coco.py' +# learning policy +lr_config = dict(step=[24, 27]) +runner = dict(type='EpochBasedRunner', max_epochs=28) diff --git a/configs/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py b/configs/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py new file mode 100644 index 0000000..82a5f46 --- /dev/null +++ b/configs/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py @@ -0,0 +1,9 @@ +_base_ = './mask_rcnn_hrnetv2p_w32_1x_coco.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(18, 36)), + stage3=dict(num_channels=(18, 36, 72)), + stage4=dict(num_channels=(18, 36, 72, 144)))), + neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256)) diff --git a/configs/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco.py b/configs/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco.py new file mode 100644 index 0000000..ca62682 --- /dev/null +++ b/configs/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './mask_rcnn_hrnetv2p_w18_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco.py b/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco.py new file mode 100644 index 0000000..f533af6 --- /dev/null +++ b/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco.py @@ -0,0 +1,36 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w32', + backbone=dict( + _delete_=True, + type='HRNet', + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256)))), + neck=dict( + _delete_=True, + type='HRFPN', + in_channels=[32, 64, 128, 256], + out_channels=256)) diff --git a/configs/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco.py b/configs/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco.py new file mode 100644 index 0000000..63d5d13 --- /dev/null +++ b/configs/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './mask_rcnn_hrnetv2p_w32_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco.py b/configs/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco.py new file mode 100644 index 0000000..5b10c16 --- /dev/null +++ b/configs/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco.py @@ -0,0 +1,10 @@ +_base_ = './mask_rcnn_hrnetv2p_w18_1x_coco.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w40', + backbone=dict( + type='HRNet', + extra=dict( + stage2=dict(num_channels=(40, 80)), + stage3=dict(num_channels=(40, 80, 160)), + stage4=dict(num_channels=(40, 80, 160, 320)))), + neck=dict(type='HRFPN', in_channels=[40, 80, 160, 320], out_channels=256)) diff --git a/configs/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco.py b/configs/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco.py new file mode 100644 index 0000000..3a2a510 --- /dev/null +++ b/configs/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './mask_rcnn_hrnetv2p_w40_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/htc/README.md b/configs/htc/README.md new file mode 100644 index 0000000..6af02da --- /dev/null +++ b/configs/htc/README.md @@ -0,0 +1,57 @@ +# Hybrid Task Cascade for Instance Segmentation + +## Introduction + +[ALGORITHM] + +We provide config files to reproduce the results in the CVPR 2019 paper for [Hybrid Task Cascade](https://arxiv.org/abs/1901.07518). + +```latex +@inproceedings{chen2019hybrid, + title={Hybrid task cascade for instance segmentation}, + author={Chen, Kai and Pang, Jiangmiao and Wang, Jiaqi and Xiong, Yu and Li, Xiaoxiao and Sun, Shuyang and Feng, Wansen and Liu, Ziwei and Shi, Jianping and Ouyang, Wanli and Chen Change Loy and Dahua Lin}, + booktitle={IEEE Conference on Computer Vision and Pattern Recognition}, + year={2019} +} +``` + +## Dataset + +HTC requires COCO and [COCO-stuff](http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/stuffthingmaps_trainval2017.zip) dataset for training. You need to download and extract it in the COCO dataset path. +The directory should be like this. + +```none +mmdetection +├── mmdet +├── tools +├── configs +├── data +│ ├── coco +│ │ ├── annotations +│ │ ├── train2017 +│ │ ├── val2017 +│ │ ├── test2017 +| | ├── stuffthingmaps +``` + +## Results and Models + +The results on COCO 2017val are shown in the below table. (results on test-dev are usually slightly higher than val) + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +|:---------:|:-------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| +| R-50-FPN | pytorch | 1x | 8.2 | 5.8 | 42.3 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc/htc_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/htc/htc_r50_fpn_1x_coco/htc_r50_fpn_1x_coco_20200317-7332cf16.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/htc/htc_r50_fpn_1x_coco/htc_r50_fpn_1x_coco_20200317_070435.log.json) | +| R-50-FPN | pytorch | 20e | 8.2 | - | 43.3 | 38.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc/htc_r50_fpn_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/htc/htc_r50_fpn_20e_coco/htc_r50_fpn_20e_coco_20200319-fe28c577.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/htc/htc_r50_fpn_20e_coco/htc_r50_fpn_20e_coco_20200319_070313.log.json) | +| R-101-FPN | pytorch | 20e | 10.2 | 5.5 | 44.8 | 39.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc/htc_r101_fpn_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/htc/htc_r101_fpn_20e_coco/htc_r101_fpn_20e_coco_20200317-9b41b48f.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/htc/htc_r101_fpn_20e_coco/htc_r101_fpn_20e_coco_20200317_153107.log.json) | +| X-101-32x4d-FPN | pytorch |20e| 11.4 | 5.0 | 46.1 | 40.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc/htc_x101_32x4d_fpn_16x1_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_32x4d_fpn_16x1_20e_coco/htc_x101_32x4d_fpn_16x1_20e_coco_20200318-de97ae01.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_32x4d_fpn_16x1_20e_coco/htc_x101_32x4d_fpn_16x1_20e_coco_20200318_034519.log.json) | +| X-101-64x4d-FPN | pytorch |20e| 14.5 | 4.4 | 47.0 | 41.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc/htc_x101_64x4d_fpn_16x1_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_64x4d_fpn_16x1_20e_coco/htc_x101_64x4d_fpn_16x1_20e_coco_20200318-b181fd7a.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_64x4d_fpn_16x1_20e_coco/htc_x101_64x4d_fpn_16x1_20e_coco_20200318_081711.log.json) | + +- In the HTC paper and COCO 2018 Challenge, `score_thr` is set to 0.001 for both baselines and HTC. +- We use 8 GPUs with 2 images/GPU for R-50 and R-101 models, and 16 GPUs with 1 image/GPU for X-101 models. + If you would like to train X-101 HTC with 8 GPUs, you need to change the lr from 0.02 to 0.01. + +We also provide a powerful HTC with DCN and multi-scale training model. No testing augmentation is used. + +| Backbone | Style | DCN | training scales | Lr schd | box AP | mask AP | Config | Download | +|:----------------:|:-------:|:-----:|:---------------:|:-------:|:------:|:-------:|:------:|:--------:| +| X-101-64x4d-FPN | pytorch | c3-c5 | 400~1400 | 20e | 50.4 | 43.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco_20200312-946fd751.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco_20200312_203410.log.json) | diff --git a/configs/htc/htc_r101_fpn_20e_coco.py b/configs/htc/htc_r101_fpn_20e_coco.py new file mode 100644 index 0000000..de3d5b7 --- /dev/null +++ b/configs/htc/htc_r101_fpn_20e_coco.py @@ -0,0 +1,5 @@ +_base_ = './htc_r50_fpn_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) +# learning policy +lr_config = dict(step=[16, 19]) +runner = dict(type='EpochBasedRunner', max_epochs=20) diff --git a/configs/htc/htc_r50_fpn_1x_coco.py b/configs/htc/htc_r50_fpn_1x_coco.py new file mode 100644 index 0000000..929cf46 --- /dev/null +++ b/configs/htc/htc_r50_fpn_1x_coco.py @@ -0,0 +1,56 @@ +_base_ = './htc_without_semantic_r50_fpn_1x_coco.py' +model = dict( + roi_head=dict( + semantic_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), + out_channels=256, + featmap_strides=[8]), + semantic_head=dict( + type='FusedSemanticHead', + num_ins=5, + fusion_level=1, + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=183, + ignore_label=255, + loss_weight=0.2))) +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='SegRescale', scale_factor=1 / 8), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict( + seg_prefix=data_root + 'stuffthingmaps/train2017/', + pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/htc/htc_r50_fpn_20e_coco.py b/configs/htc/htc_r50_fpn_20e_coco.py new file mode 100644 index 0000000..7d2e011 --- /dev/null +++ b/configs/htc/htc_r50_fpn_20e_coco.py @@ -0,0 +1,4 @@ +_base_ = './htc_r50_fpn_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 19]) +runner = dict(type='EpochBasedRunner', max_epochs=20) diff --git a/configs/htc/htc_without_semantic_r50_fpn_1x_coco.py b/configs/htc/htc_without_semantic_r50_fpn_1x_coco.py new file mode 100644 index 0000000..d028d98 --- /dev/null +++ b/configs/htc/htc_without_semantic_r50_fpn_1x_coco.py @@ -0,0 +1,236 @@ +_base_ = [ + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# model settings +model = dict( + type='HybridTaskCascade', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), + roi_head=dict( + type='HybridTaskCascadeRoIHead', + interleaved=True, + mask_info_flow=True, + num_stages=3, + stage_loss_weights=[1, 0.5, 0.25], + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=[ + dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, + loss_weight=1.0)), + dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.05, 0.05, 0.1, 0.1]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, + loss_weight=1.0)), + dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.033, 0.033, 0.067, 0.067]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) + ], + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + mask_head=[ + dict( + type='HTCMaskHead', + with_conv_res=False, + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=80, + loss_mask=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), + dict( + type='HTCMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=80, + loss_mask=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), + dict( + type='HTCMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=80, + loss_mask=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)) + ]), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=[ + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False), + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.6, + neg_iou_thr=0.6, + min_pos_iou=0.6, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False), + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.7, + min_pos_iou=0.7, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False) + ]), + test_cfg=dict( + rpn=dict( + nms_pre=1000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + score_thr=0.001, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100, + mask_thr_binary=0.5))) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline)) diff --git a/configs/htc/htc_x101_32x4d_fpn_16x1_20e_coco.py b/configs/htc/htc_x101_32x4d_fpn_16x1_20e_coco.py new file mode 100644 index 0000000..b9e5524 --- /dev/null +++ b/configs/htc/htc_x101_32x4d_fpn_16x1_20e_coco.py @@ -0,0 +1,18 @@ +_base_ = './htc_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch')) +data = dict(samples_per_gpu=1, workers_per_gpu=1) +# learning policy +lr_config = dict(step=[16, 19]) +runner = dict(type='EpochBasedRunner', max_epochs=20) diff --git a/configs/htc/htc_x101_64x4d_fpn_16x1_20e_coco.py b/configs/htc/htc_x101_64x4d_fpn_16x1_20e_coco.py new file mode 100644 index 0000000..b140f75 --- /dev/null +++ b/configs/htc/htc_x101_64x4d_fpn_16x1_20e_coco.py @@ -0,0 +1,18 @@ +_base_ = './htc_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch')) +data = dict(samples_per_gpu=1, workers_per_gpu=1) +# learning policy +lr_config = dict(step=[16, 19]) +runner = dict(type='EpochBasedRunner', max_epochs=20) diff --git a/configs/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py b/configs/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py new file mode 100644 index 0000000..da89e09 --- /dev/null +++ b/configs/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py @@ -0,0 +1,42 @@ +_base_ = './htc_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) +# dataset settings +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), + dict( + type='Resize', + img_scale=[(1600, 400), (1600, 1400)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='SegRescale', scale_factor=1 / 8), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), +] +data = dict( + samples_per_gpu=1, workers_per_gpu=1, train=dict(pipeline=train_pipeline)) +# learning policy +lr_config = dict(step=[16, 19]) +runner = dict(type='EpochBasedRunner', max_epochs=20) diff --git a/configs/instaboost/README.md b/configs/instaboost/README.md new file mode 100644 index 0000000..5ab74a1 --- /dev/null +++ b/configs/instaboost/README.md @@ -0,0 +1,44 @@ +# InstaBoost for MMDetection + +[ALGORITHM] + +Configs in this directory is the implementation for ICCV2019 paper "InstaBoost: Boosting Instance Segmentation Via Probability Map Guided Copy-Pasting" and provided by the authors of the paper. InstaBoost is a data augmentation method for object detection and instance segmentation. The paper has been released on [`arXiv`](https://arxiv.org/abs/1908.07801). + +```latex +@inproceedings{fang2019instaboost, + title={Instaboost: Boosting instance segmentation via probability map guided copy-pasting}, + author={Fang, Hao-Shu and Sun, Jianhua and Wang, Runzhong and Gou, Minghao and Li, Yong-Lu and Lu, Cewu}, + booktitle={Proceedings of the IEEE International Conference on Computer Vision}, + pages={682--691}, + year={2019} +} +``` + +## Usage + +### Requirements + +You need to install `instaboostfast` before using it. + +```shell +pip install instaboostfast +``` + +The code and more details can be found [here](https://github.com/GothicAi/Instaboost). + +### Integration with MMDetection + +InstaBoost have been already integrated in the data pipeline, thus all you need is to add or change **InstaBoost** configurations after **LoadImageFromFile**. We have provided examples like [this](mask_rcnn_r50_fpn_instaboost_4x#L121). You can refer to [`InstaBoostConfig`](https://github.com/GothicAi/InstaBoost-pypi#instaboostconfig) for more details. + +## Results and Models + +- All models were trained on `coco_2017_train` and tested on `coco_2017_val` for conveinience of evaluation and comparison. In the paper, the results are obtained from `test-dev`. +- To balance accuracy and training time when using InstaBoost, models released in this page are all trained for 48 Epochs. Other training and testing configs strictly follow the original framework. +- For results and models in MMDetection V1.x, please refer to [Instaboost](https://github.com/GothicAi/Instaboost). + +| Network | Backbone | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :--------: | :-----: | :------: | :------------: | :------:| :-----: | :------: | :-----------------: | +| Mask R-CNN | R-50-FPN | 4x | 4.4 | 17.5 | 40.6 | 36.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco/mask_rcnn_r50_fpn_instaboost_4x_coco_20200307-d025f83a.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco/mask_rcnn_r50_fpn_instaboost_4x_coco_20200307_223635.log.json) | +| Mask R-CNN | R-101-FPN | 4x | 6.4 | | 42.5 | 38.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/instaboost/mask_rcnn_r101_fpn_instaboost_4x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_r101_fpn_instaboost_4x_coco/mask_rcnn_r101_fpn_instaboost_4x_coco_20200703_235738-f23f3a5f.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_r101_fpn_instaboost_4x_coco/mask_rcnn_r101_fpn_instaboost_4x_coco_20200703_235738.log.json) | +| Mask R-CNN | X-101-64x4d-FPN | 4x | 10.7 | | 44.7 | 39.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/instaboost/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco_20200515_080947-8ed58c1b.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco_20200515_080947.log.json) | +| Cascade R-CNN | R-101-FPN | 4x | 6.0 | 12.0 | 43.7 | 38.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco_20200307-c19d98d9.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco_20200307_223646.log.json) | diff --git a/configs/instaboost/cascade_mask_rcnn_r101_fpn_instaboost_4x_coco.py b/configs/instaboost/cascade_mask_rcnn_r101_fpn_instaboost_4x_coco.py new file mode 100644 index 0000000..723ab02 --- /dev/null +++ b/configs/instaboost/cascade_mask_rcnn_r101_fpn_instaboost_4x_coco.py @@ -0,0 +1,3 @@ +_base_ = './cascade_mask_rcnn_r50_fpn_instaboost_4x_coco.py' + +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco.py b/configs/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco.py new file mode 100644 index 0000000..a89a81f --- /dev/null +++ b/configs/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco.py @@ -0,0 +1,28 @@ +_base_ = '../cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='InstaBoost', + action_candidate=('normal', 'horizontal', 'skip'), + action_prob=(1, 0, 0), + scale=(0.8, 1.2), + dx=15, + dy=15, + theta=(-1, 1), + color_prob=0.5, + hflag=False, + aug_ratio=0.5), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +data = dict(train=dict(pipeline=train_pipeline)) +# learning policy +lr_config = dict(step=[32, 44]) +runner = dict(type='EpochBasedRunner', max_epochs=48) diff --git a/configs/instaboost/cascade_mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco.py b/configs/instaboost/cascade_mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco.py new file mode 100644 index 0000000..7cf5f30 --- /dev/null +++ b/configs/instaboost/cascade_mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco.py @@ -0,0 +1,13 @@ +_base_ = './cascade_mask_rcnn_r50_fpn_instaboost_4x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/instaboost/mask_rcnn_r101_fpn_instaboost_4x_coco.py b/configs/instaboost/mask_rcnn_r101_fpn_instaboost_4x_coco.py new file mode 100644 index 0000000..c281947 --- /dev/null +++ b/configs/instaboost/mask_rcnn_r101_fpn_instaboost_4x_coco.py @@ -0,0 +1,2 @@ +_base_ = './mask_rcnn_r50_fpn_instaboost_4x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco.py b/configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco.py new file mode 100644 index 0000000..55ca62b --- /dev/null +++ b/configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco.py @@ -0,0 +1,28 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='InstaBoost', + action_candidate=('normal', 'horizontal', 'skip'), + action_prob=(1, 0, 0), + scale=(0.8, 1.2), + dx=15, + dy=15, + theta=(-1, 1), + color_prob=0.5, + hflag=False, + aug_ratio=0.5), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +data = dict(train=dict(pipeline=train_pipeline)) +# learning policy +lr_config = dict(step=[32, 44]) +runner = dict(type='EpochBasedRunner', max_epochs=48) diff --git a/configs/instaboost/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco.py b/configs/instaboost/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco.py new file mode 100644 index 0000000..0acd088 --- /dev/null +++ b/configs/instaboost/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco.py @@ -0,0 +1,13 @@ +_base_ = './mask_rcnn_r50_fpn_instaboost_4x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/legacy_1.x/README.md b/configs/legacy_1.x/README.md new file mode 100644 index 0000000..61b2aa8 --- /dev/null +++ b/configs/legacy_1.x/README.md @@ -0,0 +1,53 @@ +# Legacy Configs in MMDetection V1.x + +[OTHERS] + +Configs in this directory implement the legacy configs used by MMDetection V1.x and its model zoos. + +To help users convert their models from V1.x to MMDetection V2.0, we provide v1.x configs to inference the converted v1.x models. +Due to the BC-breaking changes in MMDetection V2.0 from MMDetection V1.x, running inference with the same model weights in these two version will produce different results. The difference will cause within 1% AP absolute difference as can be found in the following table. + +## Usage + +To upgrade the model version, the users need to do the following steps. + +### 1. Convert model weights + +There are three main difference in the model weights between V1.x and V2.0 codebases. + +1. Since the class order in all the detector's classification branch is reordered, all the legacy model weights need to go through the conversion process. +2. The regression and segmentation head no longer contain the background channel. Weights in these background channels should be removed to fix in the current codebase. +3. For two-stage detectors, their wegihts need to be upgraded since MMDetection V2.0 refactors all the two-stage detectors with `RoIHead`. + +The users can do the same modification as mentioned above for the self-implemented +detectors. We provide a scripts `tools/model_converters/upgrade_model_version.py` to convert the model weights in the V1.x model zoo. + +```bash +python tools/model_converters/upgrade_model_version.py ${OLD_MODEL_PATH} ${NEW_MODEL_PATH} --num-classes ${NUM_CLASSES} + +``` + +- OLD_MODEL_PATH: the path to load the model weights in 1.x version. +- NEW_MODEL_PATH: the path to save the converted model weights in 2.0 version. +- NUM_CLASSES: number of classes of the original model weights. Usually it is 81 for COCO dataset, 21 for VOC dataset. + The number of classes in V2.0 models should be equal to that in V1.x models - 1. + +### 2. Use configs with legacy settings + +After converting the model weights, checkout to the v1.2 release to find the corresponding config file that uses the legacy settings. +The V1.x models usually need these three legacy modules: `LegacyAnchorGenerator`, `LegacyDeltaXYWHBBoxCoder`, and `RoIAlign(align=False)`. +For models using ResNet Caffe backbones, they also need to change the pretrain name and the corresponding `img_norm_cfg`. +An example is in [`retinanet_r50_caffe_fpn_1x_coco_v1.py`](retinanet_r50_caffe_fpn_1x_coco_v1.py) +Then use the config to test the model weights. For most models, the obtained results should be close to that in V1.x. +We provide configs of some common structures in this directory. + +## Performance + +The performance change after converting the models in this directory are listed as the following. +| Method | Style | Lr schd | V1.x box AP | V1.x mask AP | V2.0 box AP | V2.0 mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------:| :-----: |:------:| :-----: | :-------: |:------------------------------------------------------------------------------------------------------------------------------: | +| Mask R-CNN R-50-FPN | pytorch | 1x | 37.3 | 34.2 | 36.8 | 33.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/legacy_1.x/mask_rcnn_r50_fpn_1x_coco_v1.py) | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth)| +| RetinaNet R-50-FPN | caffe | 1x | 35.8 | - | 35.4 | - | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/legacy_1.x/retinanet_r50_caffe_1x_coco_v1.py) | +| RetinaNet R-50-FPN | pytorch | 1x | 35.6 |-|35.2| -| [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/legacy_1.x/retinanet_r50_fpn_1x_coco_v1.py) | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_r50_fpn_1x_20181125-7b0c2548.pth) | +| Cascade Mask R-CNN R-50-FPN | pytorch | 1x | 41.2 | 35.7 |40.8| 35.6| [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/legacy_1.x/cascade_mask_rcnn_r50_fpn_1x_coco_v1.py) | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r50_fpn_1x_20181123-88b170c9.pth) | +| SSD300-VGG16 | caffe | 120e | 25.7 |-|25.4|-| [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/legacy_1.x/ssd300_coco_v1.py) | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd300_coco_vgg16_caffe_120e_20181221-84d7110b.pth) | diff --git a/configs/legacy_1.x/cascade_mask_rcnn_r50_fpn_1x_coco_v1.py b/configs/legacy_1.x/cascade_mask_rcnn_r50_fpn_1x_coco_v1.py new file mode 100644 index 0000000..5899444 --- /dev/null +++ b/configs/legacy_1.x/cascade_mask_rcnn_r50_fpn_1x_coco_v1.py @@ -0,0 +1,79 @@ +_base_ = [ + '../_base_/models/cascade_mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + type='CascadeRCNN', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + anchor_generator=dict(type='LegacyAnchorGenerator', center_offset=0.5), + bbox_coder=dict( + type='LegacyDeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0])), + roi_head=dict( + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict( + type='RoIAlign', + output_size=7, + sampling_ratio=2, + aligned=False)), + bbox_head=[ + dict( + type='Shared2FCBBoxHead', + reg_class_agnostic=True, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='LegacyDeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2])), + dict( + type='Shared2FCBBoxHead', + reg_class_agnostic=True, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='LegacyDeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.05, 0.05, 0.1, 0.1])), + dict( + type='Shared2FCBBoxHead', + reg_class_agnostic=True, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='LegacyDeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.033, 0.033, 0.067, 0.067])), + ], + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict( + type='RoIAlign', + output_size=14, + sampling_ratio=2, + aligned=False)))) +dist_params = dict(backend='nccl', port=29515) diff --git a/configs/legacy_1.x/faster_rcnn_r50_fpn_1x_coco_v1.py b/configs/legacy_1.x/faster_rcnn_r50_fpn_1x_coco_v1.py new file mode 100644 index 0000000..fb2f2d1 --- /dev/null +++ b/configs/legacy_1.x/faster_rcnn_r50_fpn_1x_coco_v1.py @@ -0,0 +1,37 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +model = dict( + type='FasterRCNN', + pretrained='torchvision://resnet50', + rpn_head=dict( + type='RPNHead', + anchor_generator=dict( + type='LegacyAnchorGenerator', + center_offset=0.5, + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict(type='LegacyDeltaXYWHBBoxCoder'), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), + roi_head=dict( + type='StandardRoIHead', + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict( + type='RoIAlign', + output_size=7, + sampling_ratio=2, + aligned=False), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + bbox_coder=dict(type='LegacyDeltaXYWHBBoxCoder'), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rpn_proposal=dict(max_per_img=2000), + rcnn=dict(assigner=dict(match_low_quality=True)))) diff --git a/configs/legacy_1.x/mask_rcnn_r50_fpn_1x_coco_v1.py b/configs/legacy_1.x/mask_rcnn_r50_fpn_1x_coco_v1.py new file mode 100644 index 0000000..04581bb --- /dev/null +++ b/configs/legacy_1.x/mask_rcnn_r50_fpn_1x_coco_v1.py @@ -0,0 +1,34 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +model = dict( + rpn_head=dict( + anchor_generator=dict(type='LegacyAnchorGenerator', center_offset=0.5), + bbox_coder=dict(type='LegacyDeltaXYWHBBoxCoder'), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), + roi_head=dict( + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict( + type='RoIAlign', + output_size=7, + sampling_ratio=2, + aligned=False)), + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict( + type='RoIAlign', + output_size=14, + sampling_ratio=2, + aligned=False)), + bbox_head=dict( + bbox_coder=dict(type='LegacyDeltaXYWHBBoxCoder'), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))), + + # model training and testing settings + train_cfg=dict( + rpn_proposal=dict(max_per_img=2000), + rcnn=dict(assigner=dict(match_low_quality=True)))) diff --git a/configs/legacy_1.x/retinanet_r50_caffe_fpn_1x_coco_v1.py b/configs/legacy_1.x/retinanet_r50_caffe_fpn_1x_coco_v1.py new file mode 100644 index 0000000..ef9392f --- /dev/null +++ b/configs/legacy_1.x/retinanet_r50_caffe_fpn_1x_coco_v1.py @@ -0,0 +1,37 @@ +_base_ = './retinanet_r50_fpn_1x_coco_v1.py' +model = dict( + pretrained='open-mmlab://detectron/resnet50_caffe', + backbone=dict( + norm_cfg=dict(requires_grad=False), norm_eval=True, style='caffe')) +# use caffe img_norm +img_norm_cfg = dict( + mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/legacy_1.x/retinanet_r50_fpn_1x_coco_v1.py b/configs/legacy_1.x/retinanet_r50_fpn_1x_coco_v1.py new file mode 100644 index 0000000..6198b97 --- /dev/null +++ b/configs/legacy_1.x/retinanet_r50_fpn_1x_coco_v1.py @@ -0,0 +1,17 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + bbox_head=dict( + type='RetinaHead', + anchor_generator=dict( + type='LegacyAnchorGenerator', + center_offset=0.5, + octave_base_scale=4, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[8, 16, 32, 64, 128]), + bbox_coder=dict(type='LegacyDeltaXYWHBBoxCoder'), + loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0))) diff --git a/configs/legacy_1.x/ssd300_coco_v1.py b/configs/legacy_1.x/ssd300_coco_v1.py new file mode 100644 index 0000000..b194e76 --- /dev/null +++ b/configs/legacy_1.x/ssd300_coco_v1.py @@ -0,0 +1,79 @@ +_base_ = [ + '../_base_/models/ssd300.py', '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py' +] +# model settings +input_size = 300 +model = dict( + bbox_head=dict( + type='SSDHead', + anchor_generator=dict( + type='LegacySSDAnchorGenerator', + scale_major=False, + input_size=input_size, + basesize_ratio_range=(0.15, 0.9), + strides=[8, 16, 32, 64, 100, 300], + ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]), + bbox_coder=dict( + type='LegacyDeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.1, 0.1, 0.2, 0.2]))) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 4)), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=(300, 300), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(300, 300), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=8, + workers_per_gpu=3, + train=dict( + _delete_=True, + type='RepeatDataset', + times=5, + dataset=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline)), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4) +optimizer_config = dict(_delete_=True) +dist_params = dict(backend='nccl', port=29555) diff --git a/configs/libra_rcnn/README.md b/configs/libra_rcnn/README.md new file mode 100644 index 0000000..1f28087 --- /dev/null +++ b/configs/libra_rcnn/README.md @@ -0,0 +1,28 @@ +# Libra R-CNN: Towards Balanced Learning for Object Detection + +## Introduction + +[ALGORITHM] + +We provide config files to reproduce the results in the CVPR 2019 paper [Libra R-CNN](https://arxiv.org/pdf/1904.02701.pdf). + +``` +@inproceedings{pang2019libra, + title={Libra R-CNN: Towards Balanced Learning for Object Detection}, + author={Pang, Jiangmiao and Chen, Kai and Shi, Jianping and Feng, Huajun and Ouyang, Wanli and Dahua Lin}, + booktitle={IEEE Conference on Computer Vision and Pattern Recognition}, + year={2019} +} +``` + +## Results and models + +The results on COCO 2017val are shown in the below table. (results on test-dev are usually slightly higher than val) + +| Architecture | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:------------:|:---------------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| Faster R-CNN | R-50-FPN | pytorch | 1x | 4.6 | 19.0 | 38.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco/libra_faster_rcnn_r50_fpn_1x_coco_20200130-3afee3a9.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco/libra_faster_rcnn_r50_fpn_1x_coco_20200130_204655.log.json) | +| Fast R-CNN | R-50-FPN | pytorch | 1x | | | | | +| Faster R-CNN | R-101-FPN | pytorch | 1x | 6.5 | 14.4 | 40.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco/libra_faster_rcnn_r101_fpn_1x_coco_20200203-8dba6a5a.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco/libra_faster_rcnn_r101_fpn_1x_coco_20200203_001405.log.json) | +| Faster R-CNN | X-101-64x4d-FPN | pytorch | 1x | 10.8 | 8.5 | 42.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco/libra_faster_rcnn_x101_64x4d_fpn_1x_coco_20200315-3a7d0488.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco/libra_faster_rcnn_x101_64x4d_fpn_1x_coco_20200315_231625.log.json) | +| RetinaNet | R-50-FPN | pytorch | 1x | 4.2 | 17.7 | 37.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/libra_rcnn/libra_retinanet_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_retinanet_r50_fpn_1x_coco/libra_retinanet_r50_fpn_1x_coco_20200205-804d94ce.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_retinanet_r50_fpn_1x_coco/libra_retinanet_r50_fpn_1x_coco_20200205_112757.log.json) | diff --git a/configs/libra_rcnn/libra_fast_rcnn_r50_fpn_1x_coco.py b/configs/libra_rcnn/libra_fast_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..efbedc8 --- /dev/null +++ b/configs/libra_rcnn/libra_fast_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,50 @@ +_base_ = '../fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py' +# model settings +model = dict( + neck=[ + dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + dict( + type='BFP', + in_channels=256, + num_levels=5, + refine_level=2, + refine_type='non_local') + ], + roi_head=dict( + bbox_head=dict( + loss_bbox=dict( + _delete_=True, + type='BalancedL1Loss', + alpha=0.5, + gamma=1.5, + beta=1.0, + loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rcnn=dict( + sampler=dict( + _delete_=True, + type='CombinedSampler', + num=512, + pos_fraction=0.25, + add_gt_as_proposals=True, + pos_sampler=dict(type='InstanceBalancedPosSampler'), + neg_sampler=dict( + type='IoUBalancedNegSampler', + floor_thr=-1, + floor_fraction=0, + num_bins=3))))) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +data = dict( + train=dict(proposal_file=data_root + + 'libra_proposals/rpn_r50_fpn_1x_train2017.pkl'), + val=dict(proposal_file=data_root + + 'libra_proposals/rpn_r50_fpn_1x_val2017.pkl'), + test=dict(proposal_file=data_root + + 'libra_proposals/rpn_r50_fpn_1x_val2017.pkl')) diff --git a/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco.py b/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco.py new file mode 100644 index 0000000..8e36c9b --- /dev/null +++ b/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco.py @@ -0,0 +1,2 @@ +_base_ = './libra_faster_rcnn_r50_fpn_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py b/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..89a0d7b --- /dev/null +++ b/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,41 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +# model settings +model = dict( + neck=[ + dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + dict( + type='BFP', + in_channels=256, + num_levels=5, + refine_level=2, + refine_type='non_local') + ], + roi_head=dict( + bbox_head=dict( + loss_bbox=dict( + _delete_=True, + type='BalancedL1Loss', + alpha=0.5, + gamma=1.5, + beta=1.0, + loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rpn=dict(sampler=dict(neg_pos_ub=5), allowed_border=-1), + rcnn=dict( + sampler=dict( + _delete_=True, + type='CombinedSampler', + num=512, + pos_fraction=0.25, + add_gt_as_proposals=True, + pos_sampler=dict(type='InstanceBalancedPosSampler'), + neg_sampler=dict( + type='IoUBalancedNegSampler', + floor_thr=-1, + floor_fraction=0, + num_bins=3))))) diff --git a/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco.py b/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000..e945532 --- /dev/null +++ b/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './libra_faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/libra_rcnn/libra_retinanet_r50_fpn_1x_coco.py b/configs/libra_rcnn/libra_retinanet_r50_fpn_1x_coco.py new file mode 100644 index 0000000..be27420 --- /dev/null +++ b/configs/libra_rcnn/libra_retinanet_r50_fpn_1x_coco.py @@ -0,0 +1,26 @@ +_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py' +# model settings +model = dict( + neck=[ + dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_input', + num_outs=5), + dict( + type='BFP', + in_channels=256, + num_levels=5, + refine_level=1, + refine_type='non_local') + ], + bbox_head=dict( + loss_bbox=dict( + _delete_=True, + type='BalancedL1Loss', + alpha=0.5, + gamma=1.5, + beta=0.11, + loss_weight=1.0))) diff --git a/configs/lvis/README.md b/configs/lvis/README.md new file mode 100644 index 0000000..3276803 --- /dev/null +++ b/configs/lvis/README.md @@ -0,0 +1,51 @@ +# LVIS dataset + +## Introduction + +[DATASET] + +```latex +@inproceedings{gupta2019lvis, + title={{LVIS}: A Dataset for Large Vocabulary Instance Segmentation}, + author={Gupta, Agrim and Dollar, Piotr and Girshick, Ross}, + booktitle={Proceedings of the {IEEE} Conference on Computer Vision and Pattern Recognition}, + year={2019} +} +``` + +## Common Setting + +* Please follow [install guide](../../docs/install.md#install-mmdetection) to install open-mmlab forked cocoapi first. +* Run following scripts to install our forked lvis-api. + + ```shell + # mmlvis is fully compatible with official lvis + pip install mmlvis + ``` + + or + + ```shell + pip install -r requirements/optional.txt + ``` + +* All experiments use oversample strategy [here](../../docs/tutorials/new_dataset.md#class-balanced-dataset) with oversample threshold `1e-3`. +* The size of LVIS v0.5 is half of COCO, so schedule `2x` in LVIS is roughly the same iterations as `1x` in COCO. + +## Results and models of LVIS v0.5 + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: |:--------: | +| R-50-FPN | pytorch | 2x | - | - | 26.1 | 25.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis-dbd06831.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_20200531_160435.log.json) | +| R-101-FPN | pytorch | 2x | - | - | 27.1 | 27.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis-54582ee2.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis_20200601_134748.log.json) | +| X-101-32x4d-FPN | pytorch | 2x | - | - | 26.7 | 26.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis-3cf55ea2.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis_20200531_221749.log.json) | +| X-101-64x4d-FPN | pytorch | 2x | - | - | 26.4 | 26.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis-1c99a5ad.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis_20200601_194651.log.json) | + +## Results and models of LVIS v1 + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | +| R-50-FPN | pytorch | 1x | 9.1 | - | 22.5 | 21.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1-aa78ac3d.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1-20200829_061305.log.json) | +| R-101-FPN | pytorch | 1x | 10.8 | - | 24.6 | 23.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1-ec55ce32.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1-20200829_070959.log.json) | +| X-101-32x4d-FPN | pytorch | 1x | 11.8 | - | 26.7 | 25.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1-ebbc5c81.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1-20200829_071317.log.json) | +| X-101-64x4d-FPN | pytorch | 1x | 14.6 | - | 27.2 | 25.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1-43d9edfe.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1-20200830_060206.log.json) | diff --git a/configs/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1.py b/configs/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1.py new file mode 100644 index 0000000..1881865 --- /dev/null +++ b/configs/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1.py @@ -0,0 +1,2 @@ +_base_ = './mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py b/configs/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py new file mode 100644 index 0000000..2d2816c --- /dev/null +++ b/configs/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py @@ -0,0 +1,2 @@ +_base_ = './mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py b/configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py new file mode 100644 index 0000000..92ddb52 --- /dev/null +++ b/configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py @@ -0,0 +1,31 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/lvis_v1_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + roi_head=dict( + bbox_head=dict(num_classes=1203), mask_head=dict(num_classes=1203)), + test_cfg=dict( + rcnn=dict( + score_thr=0.0001, + # LVIS allows up to 300 + max_per_img=300))) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +data = dict(train=dict(dataset=dict(pipeline=train_pipeline))) diff --git a/configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py b/configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py new file mode 100644 index 0000000..d53c5dc --- /dev/null +++ b/configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py @@ -0,0 +1,31 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/lvis_v0.5_instance.py', + '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py' +] +model = dict( + roi_head=dict( + bbox_head=dict(num_classes=1230), mask_head=dict(num_classes=1230)), + test_cfg=dict( + rcnn=dict( + score_thr=0.0001, + # LVIS allows up to 300 + max_per_img=300))) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +data = dict(train=dict(dataset=dict(pipeline=train_pipeline))) diff --git a/configs/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py b/configs/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py new file mode 100644 index 0000000..5abcc2e --- /dev/null +++ b/configs/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py @@ -0,0 +1,13 @@ +_base_ = './mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py b/configs/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py new file mode 100644 index 0000000..439c39a --- /dev/null +++ b/configs/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py @@ -0,0 +1,13 @@ +_base_ = './mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py b/configs/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py new file mode 100644 index 0000000..f77adba --- /dev/null +++ b/configs/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py @@ -0,0 +1,13 @@ +_base_ = './mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py b/configs/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py new file mode 100644 index 0000000..2136255 --- /dev/null +++ b/configs/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py @@ -0,0 +1,13 @@ +_base_ = './mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/mask_rcnn/README.md b/configs/mask_rcnn/README.md new file mode 100644 index 0000000..fd1dc5b --- /dev/null +++ b/configs/mask_rcnn/README.md @@ -0,0 +1,43 @@ +# Mask R-CNN + +## Introduction + +[ALGORITHM] + +```latex +@article{He_2017, + title={Mask R-CNN}, + journal={2017 IEEE International Conference on Computer Vision (ICCV)}, + publisher={IEEE}, + author={He, Kaiming and Gkioxari, Georgia and Dollar, Piotr and Girshick, Ross}, + year={2017}, + month={Oct} +} +``` + +## Results and models + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | +| R-50-FPN | caffe | 1x | 4.3 | | 38.0 | 34.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco/mask_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.38__segm_mAP-0.344_20200504_231812-0ebd1859.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco/mask_rcnn_r50_caffe_fpn_1x_coco_20200504_231812.log.json) | +| R-50-FPN | pytorch | 1x | 4.4 | 16.1 | 38.2 | 34.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205_050542.log.json) | +| R-50-FPN | pytorch | 2x | - | - | 39.2 | 35.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_fpn_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_2x_coco/mask_rcnn_r50_fpn_2x_coco_bbox_mAP-0.392__segm_mAP-0.354_20200505_003907-3e542a40.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_2x_coco/mask_rcnn_r50_fpn_2x_coco_20200505_003907.log.json) | +| R-101-FPN | caffe | 1x | | | 40.4 | 36.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco/mask_rcnn_r101_caffe_fpn_1x_coco_20200601_095758-805e06c1.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco/mask_rcnn_r101_caffe_fpn_1x_coco_20200601_095758.log.json)| +| R-101-FPN | pytorch | 1x | 6.4 | 13.5 | 40.0 | 36.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_1x_coco/mask_rcnn_r101_fpn_1x_coco_20200204-1efe0ed5.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_1x_coco/mask_rcnn_r101_fpn_1x_coco_20200204_144809.log.json) | +| R-101-FPN | pytorch | 2x | - | - | 40.8 | 36.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r101_fpn_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_2x_coco/mask_rcnn_r101_fpn_2x_coco_bbox_mAP-0.408__segm_mAP-0.366_20200505_071027-14b391c7.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_2x_coco/mask_rcnn_r101_fpn_2x_coco_20200505_071027.log.json) | +| X-101-32x4d-FPN | pytorch | 1x | 7.6 | 11.3 | 41.9 | 37.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco/mask_rcnn_x101_32x4d_fpn_1x_coco_20200205-478d0b67.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco/mask_rcnn_x101_32x4d_fpn_1x_coco_20200205_034906.log.json) | +| X-101-32x4d-FPN | pytorch | 2x | - | - | 42.2 | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco/mask_rcnn_x101_32x4d_fpn_2x_coco_bbox_mAP-0.422__segm_mAP-0.378_20200506_004702-faef898c.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco/mask_rcnn_x101_32x4d_fpn_2x_coco_20200506_004702.log.json) | +| X-101-64x4d-FPN | pytorch | 1x | 10.7 | 8.0 | 42.8 | 38.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco/mask_rcnn_x101_64x4d_fpn_1x_coco_20200201-9352eb0d.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco/mask_rcnn_x101_64x4d_fpn_1x_coco_20200201_124310.log.json) | +| X-101-64x4d-FPN | pytorch | 2x | - | - | 42.7 | 38.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco/mask_rcnn_x101_64x4d_fpn_2x_coco_20200509_224208-39d6f70c.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco/mask_rcnn_x101_64x4d_fpn_2x_coco_20200509_224208.log.json)| +| X-101-32x8d-FPN | pytorch | 1x | - | - | 42.8 | 38.3 | | + +## Pre-trained Models + +We also train some models with longer schedules and multi-scale training. The users could finetune them for downstream tasks. + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | +| [R-50-FPN](./mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco.py) | caffe | 2x | 4.3 | | 40.3 | 36.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco_bbox_mAP-0.403__segm_mAP-0.365_20200504_231822-a75c98ce.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco_20200504_231822.log.json) +| [R-50-FPN](./mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py) | caffe | 3x | 4.3 | | 40.8 | 37.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_20200504_163245.log.json) +| [X-101-32x8d-FPN](./mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco.py) | pytorch | 1x | - | | 43.6 | 39.0 | +| [X-101-32x8d-FPN](./mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco.py) | pytorch | 3x | - | | 44.0 | 39.3 | diff --git a/configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco.py b/configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..230181c --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = './mask_rcnn_r50_caffe_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron2/resnet101_caffe', + backbone=dict(depth=101)) diff --git a/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py b/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py new file mode 100644 index 0000000..db02d9b --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py @@ -0,0 +1,2 @@ +_base_ = './mask_rcnn_r50_fpn_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/mask_rcnn/mask_rcnn_r101_fpn_2x_coco.py b/configs/mask_rcnn/mask_rcnn_r101_fpn_2x_coco.py new file mode 100644 index 0000000..c8cb2d8 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r101_fpn_2x_coco.py @@ -0,0 +1,2 @@ +_base_ = './mask_rcnn_r50_fpn_2x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/mask_rcnn/mask_rcnn_r50_caffe_c4_1x_coco.py b/configs/mask_rcnn/mask_rcnn_r50_caffe_c4_1x_coco.py new file mode 100644 index 0000000..a44c018 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_caffe_c4_1x_coco.py @@ -0,0 +1,39 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_caffe_c4.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..0471fe8 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,36 @@ +_base_ = './mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron2/resnet50_caffe', + backbone=dict(norm_cfg=dict(requires_grad=False), style='caffe')) +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py new file mode 100644 index 0000000..5d6215d --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py @@ -0,0 +1,45 @@ +_base_ = './mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron2/resnet50_caffe', + backbone=dict(norm_cfg=dict(requires_grad=False), style='caffe')) +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco.py b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco.py new file mode 100644 index 0000000..4f7150c --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 23]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py new file mode 100644 index 0000000..1b48a21 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py @@ -0,0 +1,4 @@ +_base_ = './mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py' +# learning policy +lr_config = dict(step=[28, 34]) +runner = dict(type='EpochBasedRunner', max_epochs=36) diff --git a/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain_1x_coco.py b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain_1x_coco.py new file mode 100644 index 0000000..86c5b13 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain_1x_coco.py @@ -0,0 +1,41 @@ +_base_ = './mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron2/resnet50_caffe', + backbone=dict(norm_cfg=dict(requires_grad=False), style='caffe')) +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_poly_1x_coco_v1.py b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_poly_1x_coco_v1.py new file mode 100644 index 0000000..431e5ab --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_poly_1x_coco_v1.py @@ -0,0 +1,57 @@ +_base_ = './mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnet50_caffe_bgr', + backbone=dict(norm_cfg=dict(requires_grad=False), style='caffe'), + rpn_head=dict( + loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), + roi_head=dict( + bbox_roi_extractor=dict( + roi_layer=dict( + type='RoIAlign', + output_size=7, + sampling_ratio=2, + aligned=False)), + bbox_head=dict( + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), + mask_roi_extractor=dict( + roi_layer=dict( + type='RoIAlign', + output_size=14, + sampling_ratio=2, + aligned=False)))) +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py b/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..6a6c924 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] diff --git a/configs/mask_rcnn/mask_rcnn_r50_fpn_2x_coco.py b/configs/mask_rcnn/mask_rcnn_r50_fpn_2x_coco.py new file mode 100644 index 0000000..932b1f9 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_fpn_2x_coco.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py' +] diff --git a/configs/mask_rcnn/mask_rcnn_r50_fpn_poly_1x_coco.py b/configs/mask_rcnn/mask_rcnn_r50_fpn_poly_1x_coco.py new file mode 100644 index 0000000..9eb6d57 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_fpn_poly_1x_coco.py @@ -0,0 +1,23 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +data = dict(train=dict(pipeline=train_pipeline)) diff --git a/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py b/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..d0016d1 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './mask_rcnn_r101_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco.py b/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco.py new file mode 100644 index 0000000..d4189c6 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco.py @@ -0,0 +1,13 @@ +_base_ = './mask_rcnn_r101_fpn_2x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_1x_coco.py b/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_1x_coco.py new file mode 100644 index 0000000..ee034b7 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_1x_coco.py @@ -0,0 +1,63 @@ +_base_ = './mask_rcnn_r101_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron2/resnext101_32x8d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=8, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + style='pytorch')) + +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], + std=[57.375, 57.120, 58.395], + to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) diff --git a/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_1x_coco.py b/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_1x_coco.py new file mode 100644 index 0000000..1c12432 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_1x_coco.py @@ -0,0 +1,58 @@ +_base_ = './mask_rcnn_r101_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron2/resnext101_32x8d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=8, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + style='pytorch')) + +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], + std=[57.375, 57.120, 58.395], + to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco.py b/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco.py new file mode 100644 index 0000000..93b7d51 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco.py @@ -0,0 +1,61 @@ +_base_ = './mask_rcnn_r101_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron2/resnext101_32x8d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=8, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + style='pytorch')) + +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], + std=[57.375, 57.120, 58.395], + to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) + +lr_config = dict(step=[28, 34]) +runner = dict(type='EpochBasedRunner', max_epochs=36) diff --git a/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco.py b/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000..31e5943 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './mask_rcnn_x101_32x4d_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco.py b/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco.py new file mode 100644 index 0000000..9ba92c5 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco.py @@ -0,0 +1,13 @@ +_base_ = './mask_rcnn_x101_32x4d_fpn_2x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/ms_rcnn/README.md b/configs/ms_rcnn/README.md new file mode 100644 index 0000000..c19dee3 --- /dev/null +++ b/configs/ms_rcnn/README.md @@ -0,0 +1,26 @@ +# Mask Scoring R-CNN + +## Introduction + +[ALGORITHM] + +``` +@inproceedings{huang2019msrcnn, + title={Mask Scoring R-CNN}, + author={Zhaojin Huang and Lichao Huang and Yongchao Gong and Chang Huang and Xinggang Wang}, + booktitle={IEEE Conference on Computer Vision and Pattern Recognition}, + year={2019}, +} +``` + +## Results and Models + +| Backbone | style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +|:-------------:|:----------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| +| R-50-FPN | caffe | 1x | 4.5 | | 38.2 | 36.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco/ms_rcnn_r50_caffe_fpn_1x_coco_20200702_180848-61c9355e.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco/ms_rcnn_r50_caffe_fpn_1x_coco_20200702_180848.log.json) | +| R-50-FPN | caffe | 2x | - | - | 38.8 | 36.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco/ms_rcnn_r50_caffe_fpn_2x_coco_bbox_mAP-0.388__segm_mAP-0.363_20200506_004738-ee87b137.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco/ms_rcnn_r50_caffe_fpn_2x_coco_20200506_004738.log.json) | +| R-101-FPN | caffe | 1x | 6.5 | | 40.4 | 37.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco/ms_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.404__segm_mAP-0.376_20200506_004755-b9b12a37.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco/ms_rcnn_r101_caffe_fpn_1x_coco_20200506_004755.log.json) | +| R-101-FPN | caffe | 2x | - | - | 41.1 | 38.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco/ms_rcnn_r101_caffe_fpn_2x_coco_bbox_mAP-0.411__segm_mAP-0.381_20200506_011134-5f3cc74f.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco/ms_rcnn_r101_caffe_fpn_2x_coco_20200506_011134.log.json) | +| R-X101-32x4d | pytorch | 2x | 7.9 | 11.0 | 41.8 | 38.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco/ms_rcnn_x101_32x4d_fpn_1x_coco_20200206-81fd1740.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco/ms_rcnn_x101_32x4d_fpn_1x_coco_20200206_100113.log.json) | +| R-X101-64x4d | pytorch | 1x | 11.0 | 8.0 | 43.0 | 39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco/ms_rcnn_x101_64x4d_fpn_1x_coco_20200206-86ba88d2.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco/ms_rcnn_x101_64x4d_fpn_1x_coco_20200206_091744.log.json) | +| R-X101-64x4d | pytorch | 2x | 11.0 | 8.0 | 42.6 | 39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco/ms_rcnn_x101_64x4d_fpn_2x_coco_20200308-02a445e2.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco/ms_rcnn_x101_64x4d_fpn_2x_coco_20200308_012247.log.json) | diff --git a/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco.py b/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..3bd33c4 --- /dev/null +++ b/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = './ms_rcnn_r50_caffe_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron2/resnet101_caffe', + backbone=dict(depth=101)) diff --git a/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco.py b/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco.py new file mode 100644 index 0000000..202bcce --- /dev/null +++ b/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './ms_rcnn_r101_caffe_fpn_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py b/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..5845125 --- /dev/null +++ b/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,16 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py' +model = dict( + type='MaskScoringRCNN', + roi_head=dict( + type='MaskScoringRoIHead', + mask_iou_head=dict( + type='MaskIoUHead', + num_convs=4, + num_fcs=2, + roi_feat_size=14, + in_channels=256, + conv_out_channels=256, + fc_out_channels=1024, + num_classes=80)), + # model training and testing settings + train_cfg=dict(rcnn=dict(mask_thr_binary=0.5))) diff --git a/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco.py b/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco.py new file mode 100644 index 0000000..008a70a --- /dev/null +++ b/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './ms_rcnn_r50_caffe_fpn_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/ms_rcnn/ms_rcnn_r50_fpn_1x_coco.py b/configs/ms_rcnn/ms_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..0a163ce --- /dev/null +++ b/configs/ms_rcnn/ms_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,16 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + type='MaskScoringRCNN', + roi_head=dict( + type='MaskScoringRoIHead', + mask_iou_head=dict( + type='MaskIoUHead', + num_convs=4, + num_fcs=2, + roi_feat_size=14, + in_channels=256, + conv_out_channels=256, + fc_out_channels=1024, + num_classes=80)), + # model training and testing settings + train_cfg=dict(rcnn=dict(mask_thr_binary=0.5))) diff --git a/configs/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco.py b/configs/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..4a78a25 --- /dev/null +++ b/configs/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './ms_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco.py b/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000..61a0cef --- /dev/null +++ b/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './ms_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco.py b/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco.py new file mode 100644 index 0000000..54c605b --- /dev/null +++ b/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './ms_rcnn_x101_64x4d_fpn_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/nas_fcos/README.md b/configs/nas_fcos/README.md new file mode 100644 index 0000000..05ac996 --- /dev/null +++ b/configs/nas_fcos/README.md @@ -0,0 +1,25 @@ +# NAS-FCOS: Fast Neural Architecture Search for Object Detection + +## Introduction + +[ALGORITHM] + +```latex +@article{wang2019fcos, + title={Nas-fcos: Fast neural architecture search for object detection}, + author={Wang, Ning and Gao, Yang and Chen, Hao and Wang, Peng and Tian, Zhi and Shen, Chunhua}, + journal={arXiv preprint arXiv:1906.04423}, + year={2019} +} +``` + +## Results and Models + +| Head | Backbone | Style | GN-head | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:---------:|:---------:|:-------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| NAS-FCOSHead | R-50 | caffe | Y | 1x | | | 39.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco_20200520-1bdba3ce.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco_20200520.log.json) | +| FCOSHead | R-50 | caffe | Y | 1x | | | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco_20200521-7fdcbce0.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco_20200521.log.json) | + +**Notes:** + +- To be consistent with the author's implementation, we use 4 GPUs with 4 images/GPU. diff --git a/configs/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco.py b/configs/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco.py new file mode 100644 index 0000000..1910312 --- /dev/null +++ b/configs/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco.py @@ -0,0 +1,98 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +model = dict( + type='NASFCOS', + pretrained='open-mmlab://detectron2/resnet50_caffe', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False, eps=0), + style='caffe'), + neck=dict( + type='NASFCOS_FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs=True, + num_outs=5, + norm_cfg=dict(type='BN'), + conv_cfg=dict(type='DCNv2', deform_groups=2)), + bbox_head=dict( + type='FCOSHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + strides=[8, 16, 32, 64, 128], + norm_cfg=dict(type='GN', num_groups=32), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='IoULoss', loss_weight=1.0), + loss_centerness=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), + train_cfg=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0, + ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100)) + +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=2, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) + +optimizer = dict( + lr=0.01, paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.)) diff --git a/configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py b/configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py new file mode 100644 index 0000000..ef81123 --- /dev/null +++ b/configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py @@ -0,0 +1,97 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +model = dict( + type='NASFCOS', + pretrained='open-mmlab://detectron2/resnet50_caffe', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False, eps=0), + style='caffe'), + neck=dict( + type='NASFCOS_FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs=True, + num_outs=5, + norm_cfg=dict(type='BN'), + conv_cfg=dict(type='DCNv2', deform_groups=2)), + bbox_head=dict( + type='NASFCOSHead', + num_classes=80, + in_channels=256, + feat_channels=256, + strides=[8, 16, 32, 64, 128], + norm_cfg=dict(type='GN', num_groups=32), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='IoULoss', loss_weight=1.0), + loss_centerness=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), + train_cfg=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0, + ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100)) + +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=2, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) + +optimizer = dict( + lr=0.01, paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.)) diff --git a/configs/nas_fpn/README.md b/configs/nas_fpn/README.md new file mode 100644 index 0000000..81f25b2 --- /dev/null +++ b/configs/nas_fpn/README.md @@ -0,0 +1,26 @@ +# NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection + +## Introduction + +[ALGORITHM] + +```latex +@inproceedings{ghiasi2019fpn, + title={Nas-fpn: Learning scalable feature pyramid architecture for object detection}, + author={Ghiasi, Golnaz and Lin, Tsung-Yi and Le, Quoc V}, + booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, + pages={7036--7045}, + year={2019} +} +``` + +## Results and Models + +We benchmark the new training schedule (crop training, large batch, unfrozen BN, 50 epochs) introduced in NAS-FPN. RetinaNet is used in the paper. + +| Backbone | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:-----------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| R-50-FPN | 50e | 12.9 | 22.9 | 37.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/nas_fpn/retinanet_r50_fpn_crop640_50e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/nas_fpn/retinanet_r50_fpn_crop640_50e_coco/retinanet_r50_fpn_crop640_50e_coco-9b953d76.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/nas_fpn/retinanet_r50_fpn_crop640_50e_coco/retinanet_r50_fpn_crop640_50e_coco_20200529_095329.log.json) | +| R-50-NASFPN | 50e | 13.2 | 23.0 | 40.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco/retinanet_r50_nasfpn_crop640_50e_coco-0ad1f644.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco/retinanet_r50_nasfpn_crop640_50e_coco_20200528_230008.log.json) | + +**Note**: We find that it is unstable to train NAS-FPN and there is a small chance that results can be 3% mAP lower. diff --git a/configs/nas_fpn/retinanet_r50_fpn_crop640_50e_coco.py b/configs/nas_fpn/retinanet_r50_fpn_crop640_50e_coco.py new file mode 100644 index 0000000..d4c7c98 --- /dev/null +++ b/configs/nas_fpn/retinanet_r50_fpn_crop640_50e_coco.py @@ -0,0 +1,80 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py' +] +cudnn_benchmark = True +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch'), + neck=dict( + relu_before_extra_convs=True, + no_norm_on_lateral=True, + norm_cfg=norm_cfg), + bbox_head=dict(type='RetinaSepBNHead', num_ins=5, norm_cfg=norm_cfg), + # training and testing settings + train_cfg=dict(assigner=dict(neg_iou_thr=0.5))) +# dataset settings +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=(640, 640), + ratio_range=(0.8, 1.2), + keep_ratio=True), + dict(type='RandomCrop', crop_size=(640, 640)), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=(640, 640)), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(640, 640), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=64), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=8, + workers_per_gpu=4, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict( + type='SGD', + lr=0.08, + momentum=0.9, + weight_decay=0.0001, + paramwise_cfg=dict(norm_decay_mult=0, bypass_duplicate=True)) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=0.1, + step=[30, 40]) +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=50) diff --git a/configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py b/configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py new file mode 100644 index 0000000..8a2ef26 --- /dev/null +++ b/configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py @@ -0,0 +1,79 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py' +] +cudnn_benchmark = True +# model settings +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + type='RetinaNet', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch'), + neck=dict(type='NASFPN', stack_times=7, norm_cfg=norm_cfg), + bbox_head=dict(type='RetinaSepBNHead', num_ins=5, norm_cfg=norm_cfg), + # training and testing settings + train_cfg=dict(assigner=dict(neg_iou_thr=0.5))) +# dataset settings +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=(640, 640), + ratio_range=(0.8, 1.2), + keep_ratio=True), + dict(type='RandomCrop', crop_size=(640, 640)), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=(640, 640)), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(640, 640), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=128), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=8, + workers_per_gpu=4, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict( + type='SGD', + lr=0.08, + momentum=0.9, + weight_decay=0.0001, + paramwise_cfg=dict(norm_decay_mult=0, bypass_duplicate=True)) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=0.1, + step=[30, 40]) +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=50) diff --git a/configs/paa/README.md b/configs/paa/README.md new file mode 100644 index 0000000..9960dcf --- /dev/null +++ b/configs/paa/README.md @@ -0,0 +1,35 @@ +# Probabilistic Anchor Assignment with IoU Prediction for Object Detection + +[ALGORITHM] + +```latex +@inproceedings{paa-eccv2020, + title={Probabilistic Anchor Assignment with IoU Prediction for Object Detection}, + author={Kim, Kang and Lee, Hee Seok}, + booktitle = {ECCV}, + year={2020} +} +``` + +## Results and Models + +We provide config files to reproduce the object detection results in the +ECCV 2020 paper for Probabilistic Anchor Assignment with IoU +Prediction for Object Detection. + +| Backbone | Lr schd | Mem (GB) | Score voting | box AP | Config | Download | +|:-----------:|:-------:|:--------:|:------------:|:------:|:------:|:--------:| +| R-50-FPN | 12e | 3.7 | True | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_1x_coco/paa_r50_fpn_1x_coco_20200821-936edec3.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_1x_coco/paa_r50_fpn_1x_coco_20200821-936edec3.log.json) | +| R-50-FPN | 12e | 3.7 | False | 40.2 | - | +| R-50-FPN | 18e | 3.7 | True | 41.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r50_fpn_1.5x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_1.5x_coco/paa_r50_fpn_1.5x_coco_20200823-805d6078.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_1.5x_coco/paa_r50_fpn_1.5x_coco_20200823-805d6078.log.json) | +| R-50-FPN | 18e | 3.7 | False | 41.2 | - | +| R-50-FPN | 24e | 3.7 | True | 41.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r50_fpn_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_2x_coco/paa_r50_fpn_2x_coco_20200821-c98bfc4e.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_2x_coco/paa_r50_fpn_2x_coco_20200821-c98bfc4e.log.json) | +| R-50-FPN | 36e | 3.7 | True | 43.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r50_fpn_mstrain_3x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_mstrain_3x_coco/paa_r50_fpn_mstrain_3x_coco_20210121_145722-06a6880b.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_mstrain_3x_coco/paa_r50_fpn_mstrain_3x_coco_20210121_145722.log.json) | +| R-101-FPN | 12e | 6.2 | True | 42.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r101_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_1x_coco/paa_r101_fpn_1x_coco_20200821-0a1825a4.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_1x_coco/paa_r101_fpn_1x_coco_20200821-0a1825a4.log.json) | +| R-101-FPN | 12e | 6.2 | False | 42.4 | - | +| R-101-FPN | 24e | 6.2 | True | 43.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r101_fpn_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_2x_coco/paa_r101_fpn_2x_coco_20200821-6829f96b.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_2x_coco/paa_r101_fpn_2x_coco_20200821-6829f96b.log.json) | +| R-101-FPN | 36e | 6.2 | True | 45.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r101_fpn_mstrain_3x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_mstrain_3x_coco/paa_r101_fpn_mstrain_3x_coco_20210122_084202-83250d22.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_mstrain_3x_coco/paa_r101_fpn_mstrain_3x_coco_20210122_084202.log.json) | + +**Note**: + +1. We find that the performance is unstable with 1x setting and may fluctuate by about 0.2 mAP. We report the best results. diff --git a/configs/paa/paa_r101_fpn_1x_coco.py b/configs/paa/paa_r101_fpn_1x_coco.py new file mode 100644 index 0000000..9d2b1a6 --- /dev/null +++ b/configs/paa/paa_r101_fpn_1x_coco.py @@ -0,0 +1,2 @@ +_base_ = './paa_r50_fpn_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/paa/paa_r101_fpn_2x_coco.py b/configs/paa/paa_r101_fpn_2x_coco.py new file mode 100644 index 0000000..641ef76 --- /dev/null +++ b/configs/paa/paa_r101_fpn_2x_coco.py @@ -0,0 +1,3 @@ +_base_ = './paa_r101_fpn_1x_coco.py' +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/paa/paa_r101_fpn_mstrain_3x_coco.py b/configs/paa/paa_r101_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..6f23df7 --- /dev/null +++ b/configs/paa/paa_r101_fpn_mstrain_3x_coco.py @@ -0,0 +1,2 @@ +_base_ = './paa_r50_fpn_mstrain_3x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/paa/paa_r50_fpn_1.5x_coco.py b/configs/paa/paa_r50_fpn_1.5x_coco.py new file mode 100644 index 0000000..aabce4a --- /dev/null +++ b/configs/paa/paa_r50_fpn_1.5x_coco.py @@ -0,0 +1,3 @@ +_base_ = './paa_r50_fpn_1x_coco.py' +lr_config = dict(step=[12, 16]) +runner = dict(type='EpochBasedRunner', max_epochs=18) diff --git a/configs/paa/paa_r50_fpn_1x_coco.py b/configs/paa/paa_r50_fpn_1x_coco.py new file mode 100644 index 0000000..cd84410 --- /dev/null +++ b/configs/paa/paa_r50_fpn_1x_coco.py @@ -0,0 +1,70 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + type='PAA', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_output', + num_outs=5), + bbox_head=dict( + type='PAAHead', + reg_decoded_bbox=True, + score_voting=True, + topk=9, + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + octave_base_scale=8, + scales_per_octave=1, + strides=[8, 16, 32, 64, 128]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.1, 0.1, 0.2, 0.2]), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='GIoULoss', loss_weight=1.3), + loss_centerness=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.5)), + # training and testing settings + train_cfg=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.1, + neg_iou_thr=0.1, + min_pos_iou=0, + ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/paa/paa_r50_fpn_2x_coco.py b/configs/paa/paa_r50_fpn_2x_coco.py new file mode 100644 index 0000000..663d2c0 --- /dev/null +++ b/configs/paa/paa_r50_fpn_2x_coco.py @@ -0,0 +1,3 @@ +_base_ = './paa_r50_fpn_1x_coco.py' +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/paa/paa_r50_fpn_mstrain_3x_coco.py b/configs/paa/paa_r50_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..91fa28c --- /dev/null +++ b/configs/paa/paa_r50_fpn_mstrain_3x_coco.py @@ -0,0 +1,20 @@ +_base_ = './paa_r50_fpn_1x_coco.py' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +data = dict(train=dict(pipeline=train_pipeline)) +lr_config = dict(step=[28, 34]) +runner = dict(type='EpochBasedRunner', max_epochs=36) diff --git a/configs/pafpn/README.md b/configs/pafpn/README.md new file mode 100644 index 0000000..03227e2 --- /dev/null +++ b/configs/pafpn/README.md @@ -0,0 +1,26 @@ +# Path Aggregation Network for Instance Segmentation + +## Introduction + +[ALGORITHM] + +``` +@inproceedings{liu2018path, + author = {Shu Liu and + Lu Qi and + Haifang Qin and + Jianping Shi and + Jiaya Jia}, + title = {Path Aggregation Network for Instance Segmentation}, + booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2018} +} +``` + +## Results and Models + +## Results and Models + +| Backbone | style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +|:-------------:|:----------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| +| R-50-FPN | pytorch | 1x | 4.0 | 17.2 | 37.5 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/pafpn/faster_rcnn_r50_pafpn_1x_coco/faster_rcnn_r50_pafpn_1x_coco_bbox_mAP-0.375_20200503_105836-b7b4b9bd.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/pafpn/faster_rcnn_r50_pafpn_1x_coco/faster_rcnn_r50_pafpn_1x_coco_20200503_105836.log.json) | diff --git a/configs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py b/configs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py new file mode 100644 index 0000000..b2fdef9 --- /dev/null +++ b/configs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py @@ -0,0 +1,8 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' + +model = dict( + neck=dict( + type='PAFPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5)) diff --git a/configs/pascal_voc/README.md b/configs/pascal_voc/README.md new file mode 100644 index 0000000..f730242 --- /dev/null +++ b/configs/pascal_voc/README.md @@ -0,0 +1,23 @@ +# PASCAL VOC Dataset + +[DATASET] + +``` +@Article{Everingham10, + author = "Everingham, M. and Van~Gool, L. and Williams, C. K. I. and Winn, J. and Zisserman, A.", + title = "The Pascal Visual Object Classes (VOC) Challenge", + journal = "International Journal of Computer Vision", + volume = "88", + year = "2010", + number = "2", + month = jun, + pages = "303--338", +} +``` + +## Results and Models + +| Architecture | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:------------:|:---------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| Faster R-CNN | R-50 | pytorch | 1x | 2.6 | - | 79.5 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712/faster_rcnn_r50_fpn_1x_voc0712_20200624-c9895d40.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712/20200623_015208.log.json) | +| Retinanet | R-50 | pytorch | 1x | 2.1 | - | 77.3 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pascal_voc/retinanet_r50_fpn_1x_voc0712.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/pascal_voc/retinanet_r50_fpn_1x_voc0712/retinanet_r50_fpn_1x_voc0712_20200617-47cbdd0e.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/pascal_voc/retinanet_r50_fpn_1x_voc0712/retinanet_r50_fpn_1x_voc0712_20200616_014642.log.json) | diff --git a/configs/pascal_voc/faster_rcnn_r18_fpn_1x_voc0712.py b/configs/pascal_voc/faster_rcnn_r18_fpn_1x_voc0712.py new file mode 100644 index 0000000..02973e8 --- /dev/null +++ b/configs/pascal_voc/faster_rcnn_r18_fpn_1x_voc0712.py @@ -0,0 +1,14 @@ +_base_ = [ + "../_base_/models/faster_rcnn_r50_fpn.py", + "../_base_/datasets/voc0712.py", + "../_base_/default_runtime.py", +] +model = dict(roi_head=dict(bbox_head=dict(num_classes=20))) +# optimizer +optimizer = dict(type="SGD", lr=0.01, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +# actual epoch = 3 * 3 = 9 +lr_config = dict(policy="step", step=[3]) +# runtime settings +total_epochs = 4 # actual epoch = 4 * 3 = 12 diff --git a/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py b/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py new file mode 100644 index 0000000..7866ace --- /dev/null +++ b/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py @@ -0,0 +1,14 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', '../_base_/datasets/voc0712.py', + '../_base_/default_runtime.py' +] +model = dict(roi_head=dict(bbox_head=dict(num_classes=20))) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +# actual epoch = 3 * 3 = 9 +lr_config = dict(policy='step', step=[3]) +# runtime settings +runner = dict( + type='EpochBasedRunner', max_epochs=4) # actual epoch = 4 * 3 = 12 diff --git a/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712_cocofmt.py b/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712_cocofmt.py new file mode 100644 index 0000000..12eee2c --- /dev/null +++ b/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712_cocofmt.py @@ -0,0 +1,75 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', '../_base_/datasets/voc0712.py', + '../_base_/default_runtime.py' +] +model = dict(roi_head=dict(bbox_head=dict(num_classes=20))) + +CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', + 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', + 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor') + +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/VOCdevkit/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1000, 600), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1000, 600), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='RepeatDataset', + times=3, + dataset=dict( + type=dataset_type, + ann_file='data/voc0712_trainval.json', + img_prefix='data/VOCdevkit', + pipeline=train_pipeline, + classes=CLASSES)), + val=dict( + type=dataset_type, + ann_file='data/voc07_test.json', + img_prefix='data/VOCdevkit', + pipeline=test_pipeline, + classes=CLASSES), + test=dict( + type=dataset_type, + ann_file='data/voc07_test.json', + img_prefix='data/VOCdevkit', + pipeline=test_pipeline, + classes=CLASSES)) +evaluation = dict(interval=1, metric='bbox') + +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +# actual epoch = 3 * 3 = 9 +lr_config = dict(policy='step', step=[3]) +# runtime settings +runner = dict( + type='EpochBasedRunner', max_epochs=4) # actual epoch = 4 * 3 = 12 diff --git a/configs/pascal_voc/retinanet_r50_fpn_1x_voc0712.py b/configs/pascal_voc/retinanet_r50_fpn_1x_voc0712.py new file mode 100644 index 0000000..b4b050d --- /dev/null +++ b/configs/pascal_voc/retinanet_r50_fpn_1x_voc0712.py @@ -0,0 +1,14 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', '../_base_/datasets/voc0712.py', + '../_base_/default_runtime.py' +] +model = dict(bbox_head=dict(num_classes=20)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +# actual epoch = 3 * 3 = 9 +lr_config = dict(policy='step', step=[3]) +# runtime settings +runner = dict( + type='EpochBasedRunner', max_epochs=4) # actual epoch = 4 * 3 = 12 diff --git a/configs/pascal_voc/ssd300_voc0712.py b/configs/pascal_voc/ssd300_voc0712.py new file mode 100644 index 0000000..271ebe3 --- /dev/null +++ b/configs/pascal_voc/ssd300_voc0712.py @@ -0,0 +1,69 @@ +_base_ = [ + '../_base_/models/ssd300.py', '../_base_/datasets/voc0712.py', + '../_base_/default_runtime.py' +] +model = dict( + bbox_head=dict( + num_classes=20, anchor_generator=dict(basesize_ratio_range=(0.2, + 0.9)))) +# dataset settings +dataset_type = 'VOCDataset' +data_root = 'data/VOCdevkit/' +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 4)), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=(300, 300), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(300, 300), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=8, + workers_per_gpu=3, + train=dict( + type='RepeatDataset', times=10, dataset=dict(pipeline=train_pipeline)), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4) +optimizer_config = dict() +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[16, 20]) +checkpoint_config = dict(interval=1) +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/pascal_voc/ssd512_voc0712.py b/configs/pascal_voc/ssd512_voc0712.py new file mode 100644 index 0000000..365a65f --- /dev/null +++ b/configs/pascal_voc/ssd512_voc0712.py @@ -0,0 +1,53 @@ +_base_ = 'ssd300_voc0712.py' +input_size = 512 +model = dict( + backbone=dict(input_size=input_size), + bbox_head=dict( + in_channels=(512, 1024, 512, 256, 256, 256, 256), + anchor_generator=dict( + input_size=input_size, + strides=[8, 16, 32, 64, 128, 256, 512], + basesize_ratio_range=(0.15, 0.9), + ratios=([2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2])))) +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 4)), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=(512, 512), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(dataset=dict(pipeline=train_pipeline)), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/pisa/README.md b/configs/pisa/README.md new file mode 100644 index 0000000..2ab689e --- /dev/null +++ b/configs/pisa/README.md @@ -0,0 +1,40 @@ +# Prime Sample Attention in Object Detection + +## Introduction + +[ALGORITHM] + +```latex +@inproceedings{cao2019prime, + title={Prime sample attention in object detection}, + author={Cao, Yuhang and Chen, Kai and Loy, Chen Change and Lin, Dahua}, + booktitle={IEEE Conference on Computer Vision and Pattern Recognition}, + year={2020} +} +``` + +## Results and models + +| PISA | Network | Backbone | Lr schd | box AP | mask AP | Config | Download | +|:----:|:-------:|:-------------------:|:-------:|:------:|:-------:|:------:|:--------:| +| × | Faster R-CNN | R-50-FPN | 1x | 36.4 | | - | +| √ | Faster R-CNN | R-50-FPN | 1x | 38.4 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_faster_rcnn_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_faster_rcnn_r50_fpn_1x_coco/pisa_faster_rcnn_r50_fpn_1x_coco-dea93523.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_faster_rcnn_r50_fpn_1x_coco/pisa_faster_rcnn_r50_fpn_1x_coco_20200506_185619.log.json) | +| × | Faster R-CNN | X101-32x4d-FPN | 1x | 40.1 | | - | +| √ | Faster R-CNN | X101-32x4d-FPN | 1x | 41.9 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco-e4accec4.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco_20200505_181503.log.json) | +| × | Mask R-CNN | R-50-FPN | 1x | 37.3 | 34.2 | - | +| √ | Mask R-CNN | R-50-FPN | 1x | 39.1 | 35.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_mask_rcnn_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_mask_rcnn_r50_fpn_1x_coco/pisa_mask_rcnn_r50_fpn_1x_coco-dfcedba6.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_mask_rcnn_r50_fpn_1x_coco/pisa_mask_rcnn_r50_fpn_1x_coco_20200508_150500.log.json) | +| × | Mask R-CNN | X101-32x4d-FPN | 1x | 41.1 | 37.1 | - | +| √ | Mask R-CNN | X101-32x4d-FPN | 1x | | | | +| × | RetinaNet | R-50-FPN | 1x | 35.6 | | - | +| √ | RetinaNet | R-50-FPN | 1x | 36.9 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_retinanet_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_retinanet_r50_fpn_1x_coco/pisa_retinanet_r50_fpn_1x_coco-76409952.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_retinanet_r50_fpn_1x_coco/pisa_retinanet_r50_fpn_1x_coco_20200504_014311.log.json) | +| × | RetinaNet | X101-32x4d-FPN | 1x | 39.0 | | - | +| √ | RetinaNet | X101-32x4d-FPN | 1x | 40.7 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_retinanet_x101_32x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_retinanet_x101_32x4d_fpn_1x_coco/pisa_retinanet_x101_32x4d_fpn_1x_coco-a0c13c73.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_retinanet_x101_32x4d_fpn_1x_coco/pisa_retinanet_x101_32x4d_fpn_1x_coco_20200505_001404.log.json) | +| × | SSD300 | VGG16 | 1x | 25.6 | | - | +| √ | SSD300 | VGG16 | 1x | 27.6 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_ssd300_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_ssd300_coco/pisa_ssd300_coco-710e3ac9.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_ssd300_coco/pisa_ssd300_coco_20200504_144325.log.json) | +| × | SSD300 | VGG16 | 1x | 29.3 | | - | +| √ | SSD300 | VGG16 | 1x | 31.8 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_ssd512_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_ssd512_coco/pisa_ssd512_coco-247addee.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_ssd512_coco/pisa_ssd512_coco_20200508_131030.log.json) | + +**Notes:** + +- In the original paper, all models are trained and tested on mmdet v1.x, thus results may not be exactly the same with this release on v2.0. +- It is noted PISA only modifies the training pipeline so the inference time remains the same with the baseline. diff --git a/configs/pisa/pisa_faster_rcnn_r50_fpn_1x_coco.py b/configs/pisa/pisa_faster_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..71e65b0 --- /dev/null +++ b/configs/pisa/pisa_faster_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,30 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' + +model = dict( + roi_head=dict( + type='PISARoIHead', + bbox_head=dict( + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))), + train_cfg=dict( + rpn_proposal=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + sampler=dict( + type='ScoreHLRSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + k=0.5, + bias=0.), + isr=dict(k=2, bias=0), + carl=dict(k=1, bias=0.2))), + test_cfg=dict( + rpn=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0))) diff --git a/configs/pisa/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco.py b/configs/pisa/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..16edd99 --- /dev/null +++ b/configs/pisa/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,30 @@ +_base_ = '../faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco.py' + +model = dict( + roi_head=dict( + type='PISARoIHead', + bbox_head=dict( + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))), + train_cfg=dict( + rpn_proposal=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + sampler=dict( + type='ScoreHLRSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + k=0.5, + bias=0.), + isr=dict(k=2, bias=0), + carl=dict(k=1, bias=0.2))), + test_cfg=dict( + rpn=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0))) diff --git a/configs/pisa/pisa_mask_rcnn_r50_fpn_1x_coco.py b/configs/pisa/pisa_mask_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..047a293 --- /dev/null +++ b/configs/pisa/pisa_mask_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,30 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' + +model = dict( + roi_head=dict( + type='PISARoIHead', + bbox_head=dict( + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))), + train_cfg=dict( + rpn_proposal=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + sampler=dict( + type='ScoreHLRSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + k=0.5, + bias=0.), + isr=dict(k=2, bias=0), + carl=dict(k=1, bias=0.2))), + test_cfg=dict( + rpn=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0))) diff --git a/configs/pisa/pisa_mask_rcnn_x101_32x4d_fpn_1x_coco.py b/configs/pisa/pisa_mask_rcnn_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..2186a8f --- /dev/null +++ b/configs/pisa/pisa_mask_rcnn_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,30 @@ +_base_ = '../mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py' + +model = dict( + roi_head=dict( + type='PISARoIHead', + bbox_head=dict( + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))), + train_cfg=dict( + rpn_proposal=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + sampler=dict( + type='ScoreHLRSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + k=0.5, + bias=0.), + isr=dict(k=2, bias=0), + carl=dict(k=1, bias=0.2))), + test_cfg=dict( + rpn=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0))) diff --git a/configs/pisa/pisa_retinanet_r50_fpn_1x_coco.py b/configs/pisa/pisa_retinanet_r50_fpn_1x_coco.py new file mode 100644 index 0000000..70f89e2 --- /dev/null +++ b/configs/pisa/pisa_retinanet_r50_fpn_1x_coco.py @@ -0,0 +1,7 @@ +_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py' + +model = dict( + bbox_head=dict( + type='PISARetinaHead', + loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)), + train_cfg=dict(isr=dict(k=2., bias=0.), carl=dict(k=1., bias=0.2))) diff --git a/configs/pisa/pisa_retinanet_x101_32x4d_fpn_1x_coco.py b/configs/pisa/pisa_retinanet_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..b97b672 --- /dev/null +++ b/configs/pisa/pisa_retinanet_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,7 @@ +_base_ = '../retinanet/retinanet_x101_32x4d_fpn_1x_coco.py' + +model = dict( + bbox_head=dict( + type='PISARetinaHead', + loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)), + train_cfg=dict(isr=dict(k=2., bias=0.), carl=dict(k=1., bias=0.2))) diff --git a/configs/pisa/pisa_ssd300_coco.py b/configs/pisa/pisa_ssd300_coco.py new file mode 100644 index 0000000..b5cc006 --- /dev/null +++ b/configs/pisa/pisa_ssd300_coco.py @@ -0,0 +1,8 @@ +_base_ = '../ssd/ssd300_coco.py' + +model = dict( + bbox_head=dict(type='PISASSDHead'), + train_cfg=dict(isr=dict(k=2., bias=0.), carl=dict(k=1., bias=0.2))) + +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/pisa/pisa_ssd512_coco.py b/configs/pisa/pisa_ssd512_coco.py new file mode 100644 index 0000000..3219d6d --- /dev/null +++ b/configs/pisa/pisa_ssd512_coco.py @@ -0,0 +1,8 @@ +_base_ = '../ssd/ssd512_coco.py' + +model = dict( + bbox_head=dict(type='PISASSDHead'), + train_cfg=dict(isr=dict(k=2., bias=0.), carl=dict(k=1., bias=0.2))) + +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/point_rend/README.md b/configs/point_rend/README.md new file mode 100644 index 0000000..af5ded1 --- /dev/null +++ b/configs/point_rend/README.md @@ -0,0 +1,23 @@ +# PointRend + +## Introduction + +[ALGORITHM] + +```latex +@InProceedings{kirillov2019pointrend, + title={{PointRend}: Image Segmentation as Rendering}, + author={Alexander Kirillov and Yuxin Wu and Kaiming He and Ross Girshick}, + journal={ArXiv:1912.08193}, + year={2019} +} +``` + +## Results and models + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | +| R-50-FPN | caffe | 1x | 4.6 | | 38.4 | 36.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco/point_rend_r50_caffe_fpn_mstrain_1x_coco-1bcb5fb4.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco/point_rend_r50_caffe_fpn_mstrain_1x_coco_20200612_161407.log.json) | +| R-50-FPN | caffe | 3x | 4.6 | | 41.0 | 38.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_3x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/point_rend/point_rend_r50_caffe_fpn_mstrain_3x_coco/point_rend_r50_caffe_fpn_mstrain_3x_coco-e0ebb6b7.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/point_rend/point_rend_r50_caffe_fpn_mstrain_3x_coco/point_rend_r50_caffe_fpn_mstrain_3x_coco_20200614_002632.log.json) | + +Note: All models are trained with multi-scale, the input image shorter side is randomly scaled to one of (640, 672, 704, 736, 768, 800). diff --git a/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py b/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py new file mode 100644 index 0000000..0c0e563 --- /dev/null +++ b/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py @@ -0,0 +1,44 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain_1x_coco.py' +# model settings +model = dict( + type='PointRend', + roi_head=dict( + type='PointRendRoIHead', + mask_roi_extractor=dict( + type='GenericRoIExtractor', + aggregation='concat', + roi_layer=dict( + _delete_=True, type='SimpleRoIAlign', output_size=14), + out_channels=256, + featmap_strides=[4]), + mask_head=dict( + _delete_=True, + type='CoarseMaskHead', + num_fcs=2, + in_channels=256, + conv_out_channels=256, + fc_out_channels=1024, + num_classes=80, + loss_mask=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), + point_head=dict( + type='MaskPointHead', + num_fcs=3, + in_channels=256, + fc_channels=256, + num_classes=80, + coarse_pred_each_layer=True, + loss_point=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rcnn=dict( + mask_size=7, + num_points=14 * 14, + oversample_ratio=3, + importance_sample_ratio=0.75)), + test_cfg=dict( + rcnn=dict( + subdivision_steps=5, + subdivision_num_points=28 * 28, + scale_factor=2))) diff --git a/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_3x_coco.py b/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..169278e --- /dev/null +++ b/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_3x_coco.py @@ -0,0 +1,4 @@ +_base_ = './point_rend_r50_caffe_fpn_mstrain_1x_coco.py' +# learning policy +lr_config = dict(step=[28, 34]) +runner = dict(type='EpochBasedRunner', max_epochs=36) diff --git a/configs/regnet/README.md b/configs/regnet/README.md new file mode 100644 index 0000000..0ccd407 --- /dev/null +++ b/configs/regnet/README.md @@ -0,0 +1,96 @@ +# Designing Network Design Spaces + +## Introduction + +[BACKBONE] + +We implement RegNetX and RegNetY models in detection systems and provide their first results on Mask R-CNN, Faster R-CNN and RetinaNet. + +The pre-trained modles are converted from [model zoo of pycls](https://github.com/facebookresearch/pycls/blob/master/MODEL_ZOO.md). + +```latex +@article{radosavovic2020designing, + title={Designing Network Design Spaces}, + author={Ilija Radosavovic and Raj Prateek Kosaraju and Ross Girshick and Kaiming He and Piotr Dollár}, + year={2020}, + eprint={2003.13678}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +## Usage + +To use a regnet model, there are two steps to do: + +1. Convert the model to ResNet-style supported by MMDetection +2. Modify backbone and neck in config accordingly + +### Convert model + +We already prepare models of FLOPs from 400M to 12G in our model zoo. + +For more general usage, we also provide script `regnet2mmdet.py` in the tools directory to convert the key of models pretrained by [pycls](https://github.com/facebookresearch/pycls/) to +ResNet-style checkpoints used in MMDetection. + +```bash +python -u tools/model_converters/regnet2mmdet.py ${PRETRAIN_PATH} ${STORE_PATH} +``` + +This script convert model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`. + +### Modify config + +The users can modify the config's `depth` of backbone and corresponding keys in `arch` according to the configs in the [pycls model zoo](https://github.com/facebookresearch/pycls/blob/master/MODEL_ZOO.md). +The parameter `in_channels` in FPN can be found in the Figure 15 & 16 of the paper (`wi` in the legend). +This directory already provides some configs with their performance, using RegNetX from 800MF to 12GF level. +For other pre-trained models or self-implemented regnet models, the users are responsible to check these parameters by themselves. + +**Note**: Although Fig. 15 & 16 also provide `w0`, `wa`, `wm`, `group_w`, and `bot_mul` for `arch`, they are quantized thus inaccurate, using them sometimes produces different backbone that does not match the key in the pre-trained model. + +## Results + +### Mask R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :---------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | +| [R-50-FPN](../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py)| pytorch | 1x | 4.4 | 12.0 | 38.2 | 34.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205_050542.log.json) | +|[RegNetX-3.2GF-FPN](./mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py)| pytorch | 1x |5.0 ||40.3|36.6|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco/mask_rcnn_regnetx-3.2GF_fpn_1x_coco_20200520_163141-2a9d1814.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco/mask_rcnn_regnetx-3.2GF_fpn_1x_coco_20200520_163141.log.json) | +|[RegNetX-4.0GF-FPN](./mask_rcnn_regnetx-4GF_fpn_1x_coco.py)| pytorch | 1x |5.5||41.5|37.4|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco/mask_rcnn_regnetx-4GF_fpn_1x_coco_20200517_180217-32e9c92d.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco/mask_rcnn_regnetx-4GF_fpn_1x_coco_20200517_180217.log.json) | +| [R-101-FPN](../mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py)| pytorch | 1x | 6.4 | 10.3 | 40.0 | 36.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_1x_coco/mask_rcnn_r101_fpn_1x_coco_20200204-1efe0ed5.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_1x_coco/mask_rcnn_r101_fpn_1x_coco_20200204_144809.log.json) | +|[RegNetX-6.4GF-FPN](./mask_rcnn_regnetx-6.4GF_fpn_1x_coco.py)| pytorch | 1x |6.1 ||41.0|37.1|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco/mask_rcnn_regnetx-6.4GF_fpn_1x_coco_20200517_180439-3a7aae83.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco/mask_rcnn_regnetx-6.4GF_fpn_1x_coco_20200517_180439.log.json) | +| [X-101-32x4d-FPN](../mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py) | pytorch | 1x | 7.6 | 9.4 | 41.9 | 37.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco/mask_rcnn_x101_32x4d_fpn_1x_coco_20200205-478d0b67.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco/mask_rcnn_x101_32x4d_fpn_1x_coco_20200205_034906.log.json) | +|[RegNetX-8.0GF-FPN](./mask_rcnn_regnetx-8GF_fpn_1x_coco.py)| pytorch | 1x |6.4 ||41.7|37.5|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco/mask_rcnn_regnetx-8GF_fpn_1x_coco_20200517_180515-09daa87e.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco/mask_rcnn_regnetx-8GF_fpn_1x_coco_20200517_180515.log.json) | +|[RegNetX-12GF-FPN](./mask_rcnn_regnetx-12GF_fpn_1x_coco.py)| pytorch | 1x |7.4 ||42.2|38|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco/mask_rcnn_regnetx-12GF_fpn_1x_coco_20200517_180552-b538bd8b.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco/mask_rcnn_regnetx-12GF_fpn_1x_coco_20200517_180552.log.json) | +|[RegNetX-3.2GF-FPN-DCN-C3-C5](./mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco.py)| pytorch | 1x |5.0 ||40.3|36.6|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco_20200520_172726-75f40794.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco_20200520_172726.log.json) | + +### Faster R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :---------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | +| [R-50-FPN](../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py)| pytorch | 1x | 4.0 | 18.2 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130_204655.log.json) | +|[RegNetX-3.2GF-FPN](./faster_rcnn_regnetx-3.2GF_fpn_1x_coco.py)| pytorch | 1x | 4.5||39.9|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco/faster_rcnn_regnetx-3.2GF_fpn_1x_coco_20200517_175927-126fd9bf.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco/faster_rcnn_regnetx-3.2GF_fpn_1x_coco_20200517_175927.log.json) | +|[RegNetX-3.2GF-FPN](./faster_rcnn_regnetx-3.2GF_fpn_2x_coco.py)| pytorch | 2x | 4.5||41.1|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco/faster_rcnn_regnetx-3.2GF_fpn_2x_coco_20200520_223955-e2081918.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco/faster_rcnn_regnetx-3.2GF_fpn_2x_coco_20200520_223955.log.json) | + +### RetinaNet + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :---------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | +| [R-50-FPN](../retinanet/retinanet_r50_fpn_1x_coco.py) | pytorch | 1x | 3.8 | 16.6 | 36.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_1x_coco/retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_1x_coco/retinanet_r50_fpn_1x_coco_20200130_002941.log.json) | +|[RegNetX-800MF-FPN](./retinanet_regnetx-800MF_fpn_1x_coco.py)| pytorch | 1x |2.5||35.6|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/retinanet_regnetx-800MF_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-800MF_fpn_1x_coco/retinanet_regnetx-800MF_fpn_1x_coco_20200517_191403-f6f91d10.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-800MF_fpn_1x_coco/retinanet_regnetx-800MF_fpn_1x_coco_20200517_191403.log.json) | +|[RegNetX-1.6GF-FPN](./retinanet_regnetx-1.6GF_fpn_1x_coco.py)| pytorch | 1x |3.3||37.3|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco/retinanet_regnetx-1.6GF_fpn_1x_coco_20200517_191403-37009a9d.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco/retinanet_regnetx-1.6GF_fpn_1x_coco_20200517_191403.log.json) | +|[RegNetX-3.2GF-FPN](./retinanet_regnetx-3.2GF_fpn_1x_coco.py)| pytorch | 1x |4.2 ||39.1|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco/retinanet_regnetx-3.2GF_fpn_1x_coco_20200520_163141-cb1509e8.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco/retinanet_regnetx-3.2GF_fpn_1x_coco_20200520_163141.log.json) | + +### Pre-trained models + +We also train some models with longer schedules and multi-scale training. The users could finetune them for downstream tasks. + +| Method | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-----: | :-----: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | +|Faster RCNN |[RegNetX-3.2GF-FPN](./faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py)| pytorch | 3x |5.0 ||42.2|-|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco_20200520_224253-bf85ae3e.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco_20200520_224253.log.json) | +|Mask RCNN |[RegNetX-3.2GF-FPN](./mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py)| pytorch | 3x |5.0 ||43.1|38.7|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco_20200521_202221-99879813.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco_20200521_202221.log.json) | + +### Notice + +1. The models are trained using a different weight decay, i.e., `weight_decay=5e-5` according to the setting in ImageNet training. This brings improvement of at least 0.7 AP absolute but does not improve the model using ResNet-50. +2. RetinaNets using RegNets are trained with learning rate 0.02 with gradient clip. We find that using learning rate 0.02 could improve the results by at least 0.7 AP absolute and gradient clip is necessary to stabilize the training. However, this does not improve the performance of ResNet-50-FPN RetinaNet. diff --git a/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco.py b/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco.py new file mode 100644 index 0000000..4fc61a3 --- /dev/null +++ b/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco.py @@ -0,0 +1,56 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + pretrained='open-mmlab://regnetx_3.2gf', + backbone=dict( + _delete_=True, + type='RegNet', + arch='regnetx_3.2gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[96, 192, 432, 1008], + out_channels=256, + num_outs=5)) +img_norm_cfg = dict( + # The mean and std are used in PyCls when training RegNets + mean=[103.53, 116.28, 123.675], + std=[57.375, 57.12, 58.395], + to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005) diff --git a/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco.py b/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco.py new file mode 100644 index 0000000..612490b --- /dev/null +++ b/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco.py @@ -0,0 +1,3 @@ +_base_ = './faster_rcnn_regnetx-3.2GF_fpn_1x_coco.py' +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py b/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..e73a098 --- /dev/null +++ b/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py @@ -0,0 +1,63 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + pretrained='open-mmlab://regnetx_3.2gf', + backbone=dict( + _delete_=True, + type='RegNet', + arch='regnetx_3.2gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[96, 192, 432, 1008], + out_channels=256, + num_outs=5)) +img_norm_cfg = dict( + # The mean and std are used in PyCls when training RegNets + mean=[103.53, 116.28, 123.675], + std=[57.375, 57.12, 58.395], + to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005) +lr_config = dict(step=[28, 34]) +runner = dict(type='EpochBasedRunner', max_epochs=36) diff --git a/configs/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco.py b/configs/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco.py new file mode 100644 index 0000000..104d6d4 --- /dev/null +++ b/configs/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco.py @@ -0,0 +1,16 @@ +_base_ = './mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://regnetx_12gf', + backbone=dict( + type='RegNet', + arch='regnetx_12gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[224, 448, 896, 2240], + out_channels=256, + num_outs=5)) diff --git a/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py b/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py new file mode 100644 index 0000000..19168b5 --- /dev/null +++ b/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py @@ -0,0 +1,57 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + pretrained='open-mmlab://regnetx_3.2gf', + backbone=dict( + _delete_=True, + type='RegNet', + arch='regnetx_3.2gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[96, 192, 432, 1008], + out_channels=256, + num_outs=5)) +img_norm_cfg = dict( + # The mean and std are used in PyCls when training RegNets + mean=[103.53, 116.28, 123.675], + std=[57.375, 57.12, 58.395], + to_rgb=False) +train_pipeline = [ + # Images are converted to float32 directly after loading in PyCls + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005) diff --git a/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco.py b/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..dd5153e --- /dev/null +++ b/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = 'mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://regnetx_3.2gf', + backbone=dict( + dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py b/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..e4107e7 --- /dev/null +++ b/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py @@ -0,0 +1,65 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + pretrained='open-mmlab://regnetx_3.2gf', + backbone=dict( + _delete_=True, + type='RegNet', + arch='regnetx_3.2gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[96, 192, 432, 1008], + out_channels=256, + num_outs=5)) +img_norm_cfg = dict( + # The mean and std are used in PyCls when training RegNets + mean=[103.53, 116.28, 123.675], + std=[57.375, 57.12, 58.395], + to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005) +lr_config = dict(step=[28, 34]) +runner = dict(type='EpochBasedRunner', max_epochs=36) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco.py b/configs/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco.py new file mode 100644 index 0000000..8830ef0 --- /dev/null +++ b/configs/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco.py @@ -0,0 +1,16 @@ +_base_ = './mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://regnetx_4.0gf', + backbone=dict( + type='RegNet', + arch='regnetx_4.0gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[80, 240, 560, 1360], + out_channels=256, + num_outs=5)) diff --git a/configs/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco.py b/configs/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco.py new file mode 100644 index 0000000..7569ef3 --- /dev/null +++ b/configs/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco.py @@ -0,0 +1,16 @@ +_base_ = './mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://regnetx_6.4gf', + backbone=dict( + type='RegNet', + arch='regnetx_6.4gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[168, 392, 784, 1624], + out_channels=256, + num_outs=5)) diff --git a/configs/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco.py b/configs/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco.py new file mode 100644 index 0000000..b589026 --- /dev/null +++ b/configs/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco.py @@ -0,0 +1,16 @@ +_base_ = './mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://regnetx_8.0gf', + backbone=dict( + type='RegNet', + arch='regnetx_8.0gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[80, 240, 720, 1920], + out_channels=256, + num_outs=5)) diff --git a/configs/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco.py b/configs/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco.py new file mode 100644 index 0000000..4f2beb8 --- /dev/null +++ b/configs/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco.py @@ -0,0 +1,16 @@ +_base_ = './retinanet_regnetx-3.2GF_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://regnetx_1.6gf', + backbone=dict( + type='RegNet', + arch='regnetx_1.6gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[72, 168, 408, 912], + out_channels=256, + num_outs=5)) diff --git a/configs/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco.py b/configs/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco.py new file mode 100644 index 0000000..8f483a1 --- /dev/null +++ b/configs/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco.py @@ -0,0 +1,58 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + pretrained='open-mmlab://regnetx_3.2gf', + backbone=dict( + _delete_=True, + type='RegNet', + arch='regnetx_3.2gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[96, 192, 432, 1008], + out_channels=256, + num_outs=5)) +img_norm_cfg = dict( + # The mean and std are used in PyCls when training RegNets + mean=[103.53, 116.28, 123.675], + std=[57.375, 57.12, 58.395], + to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/regnet/retinanet_regnetx-800MF_fpn_1x_coco.py b/configs/regnet/retinanet_regnetx-800MF_fpn_1x_coco.py new file mode 100644 index 0000000..fe1d659 --- /dev/null +++ b/configs/regnet/retinanet_regnetx-800MF_fpn_1x_coco.py @@ -0,0 +1,16 @@ +_base_ = './retinanet_regnetx-3.2GF_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://regnetx_800mf', + backbone=dict( + type='RegNet', + arch='regnetx_800mf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[64, 128, 288, 672], + out_channels=256, + num_outs=5)) diff --git a/configs/reppoints/README.md b/configs/reppoints/README.md new file mode 100644 index 0000000..2ab22cd --- /dev/null +++ b/configs/reppoints/README.md @@ -0,0 +1,54 @@ +# RepPoints: Point Set Representation for Object Detection + +By [Ze Yang](https://yangze.tech/), [Shaohui Liu](http://b1ueber2y.me/), and [Han Hu](https://ancientmooner.github.io/). + +We provide code support and configuration files to reproduce the results in the paper for +["RepPoints: Point Set Representation for Object Detection"](https://arxiv.org/abs/1904.11490) on COCO object detection. + +## Introduction + +[ALGORITHM] + +**RepPoints**, initially described in [arXiv](https://arxiv.org/abs/1904.11490), is a new representation method for visual objects, on which visual understanding tasks are typically centered. Visual object representation, aiming at both geometric description and appearance feature extraction, is conventionally achieved by `bounding box + RoIPool (RoIAlign)`. The bounding box representation is convenient to use; however, it provides only a rectangular localization of objects that lacks geometric precision and may consequently degrade feature quality. Our new representation, RepPoints, models objects by a `point set` instead of a `bounding box`, which learns to adaptively position themselves over an object in a manner that circumscribes the object’s `spatial extent` and enables `semantically aligned feature extraction`. This richer and more flexible representation maintains the convenience of bounding boxes while facilitating various visual understanding applications. This repo demonstrated the effectiveness of RepPoints for COCO object detection. + +Another feature of this repo is the demonstration of an `anchor-free detector`, which can be as effective as state-of-the-art anchor-based detection methods. The anchor-free detector can utilize either `bounding box` or `RepPoints` as the basic object representation. + +
+ +

Learning RepPoints in Object Detection.

+
+ +## Citing RepPoints + +``` +@inproceedings{yang2019reppoints, + title={RepPoints: Point Set Representation for Object Detection}, + author={Yang, Ze and Liu, Shaohui and Hu, Han and Wang, Liwei and Lin, Stephen}, + booktitle={The IEEE International Conference on Computer Vision (ICCV)}, + month={Oct}, + year={2019} +} +``` + +## Results and models + +The results on COCO 2017val are shown in the table below. + +| Method | Backbone | GN | Anchor | convert func | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:---------:|:-------------:|:---:|:------:|:------------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| BBox | R-50-FPN | Y | single | - | 1x | 3.9 | 15.9 | 36.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints/bbox_r50_grid_fpn_gn-neck+head_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/reppoints/bbox_r50_grid_fpn_gn-neck%2Bhead_1x_coco/bbox_r50_grid_fpn_gn-neck%2Bhead_1x_coco_20200329-c98bfa96.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/reppoints/bbox_r50_grid_fpn_gn-neck%2Bhead_1x_coco/bbox_r50_grid_fpn_gn-neck%2Bhead_1x_coco_20200329_145916.log.json) | +| BBox | R-50-FPN | Y | none | - | 1x | 3.9 | 15.4 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints/bbox_r50_grid_center_fpn_gn-neck+Bhead_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/reppoints/bbox_r50_grid_center_fpn_gn-neck%2Bhead_1x_coco/bbox_r50_grid_center_fpn_gn-neck%2Bhead_1x_coco_20200330-00f73d58.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/reppoints/bbox_r50_grid_center_fpn_gn-neck%2Bhead_1x_coco/bbox_r50_grid_center_fpn_gn-neck%2Bhead_1x_coco_20200330_233609.log.json) | +| RepPoints | R-50-FPN | N | none | moment | 1x | 3.3 | 18.5 | 37.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints/reppoints_moment_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r50_fpn_1x_coco/reppoints_moment_r50_fpn_1x_coco_20200330-b73db8d1.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r50_fpn_1x_coco/reppoints_moment_r50_fpn_1x_coco_20200330_233609.log.json) | +| RepPoints | R-50-FPN | Y | none | moment | 1x | 3.9 | 17.5 | 38.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints/reppoints_moment_r50_fpn_gn-neck%2Bhead_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r50_fpn_gn-neck%2Bhead_1x_coco/reppoints_moment_r50_fpn_gn-neck%2Bhead_1x_coco_20200329-4b38409a.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r50_fpn_gn-neck%2Bhead_1x_coco/reppoints_moment_r50_fpn_gn-neck%2Bhead_1x_coco_20200329_145952.log.json) | +| RepPoints | R-50-FPN | Y | none | moment | 2x | 3.9 | - | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r50_fpn_gn-neck%2Bhead_2x_coco/reppoints_moment_r50_fpn_gn-neck%2Bhead_2x_coco_20200329-91babaa2.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r50_fpn_gn-neck%2Bhead_2x_coco/reppoints_moment_r50_fpn_gn-neck%2Bhead_2x_coco_20200329_150020.log.json) | +| RepPoints | R-101-FPN | Y | none | moment | 2x | 5.8 | 13.7 | 40.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints/reppoints_moment_r101_fpn_gn-neck+head_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r101_fpn_gn-neck%2Bhead_2x_coco/reppoints_moment_r101_fpn_gn-neck%2Bhead_2x_coco_20200329-4fbc7310.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r101_fpn_gn-neck%2Bhead_2x_coco/reppoints_moment_r101_fpn_gn-neck%2Bhead_2x_coco_20200329_132205.log.json) | +| RepPoints | R-101-FPN-DCN | Y | none | moment | 2x | 5.9 | 12.1 | 42.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco_20200329-3309fbf2.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco_20200329_132134.log.json) | +| RepPoints | X-101-FPN-DCN | Y | none | moment | 2x | 7.1 | 9.3 | 44.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco_20200329-f87da1ea.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco_20200329_132201.log.json) | + +**Notes:** + +- `R-xx`, `X-xx` denote the ResNet and ResNeXt architectures, respectively. +- `DCN` denotes replacing 3x3 conv with the 3x3 deformable convolution in `c3-c5` stages of backbone. +- `none` in the `anchor` column means 2-d `center point` (x,y) is used to represent the initial object hypothesis. `single` denotes one 4-d anchor box (x,y,w,h) with IoU based label assign criterion is adopted. +- `moment`, `partial MinMax`, `MinMax` in the `convert func` column are three functions to convert a point set to a pseudo box. +- Note the results here are slightly different from those reported in the paper, due to framework change. While the original paper uses an [MXNet](https://mxnet.apache.org/) implementation, we re-implement the method in [PyTorch](https://pytorch.org/) based on mmdetection. diff --git a/configs/reppoints/bbox_r50_grid_center_fpn_gn-neck+head_1x_coco.py b/configs/reppoints/bbox_r50_grid_center_fpn_gn-neck+head_1x_coco.py new file mode 100644 index 0000000..b24c8db --- /dev/null +++ b/configs/reppoints/bbox_r50_grid_center_fpn_gn-neck+head_1x_coco.py @@ -0,0 +1,2 @@ +_base_ = './reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py' +model = dict(bbox_head=dict(transform_method='minmax', use_grid_points=True)) diff --git a/configs/reppoints/bbox_r50_grid_fpn_gn-neck+head_1x_coco.py b/configs/reppoints/bbox_r50_grid_fpn_gn-neck+head_1x_coco.py new file mode 100644 index 0000000..8d5013d --- /dev/null +++ b/configs/reppoints/bbox_r50_grid_fpn_gn-neck+head_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py' +model = dict( + bbox_head=dict(transform_method='minmax', use_grid_points=True), + # training and testing settings + train_cfg=dict( + init=dict( + assigner=dict( + _delete_=True, + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0, + ignore_iof_thr=-1)))) diff --git a/configs/reppoints/reppoints.png b/configs/reppoints/reppoints.png new file mode 100644 index 0000000..a9306d9 Binary files /dev/null and b/configs/reppoints/reppoints.png differ diff --git a/configs/reppoints/reppoints_minmax_r50_fpn_gn-neck+head_1x_coco.py b/configs/reppoints/reppoints_minmax_r50_fpn_gn-neck+head_1x_coco.py new file mode 100644 index 0000000..0f56a46 --- /dev/null +++ b/configs/reppoints/reppoints_minmax_r50_fpn_gn-neck+head_1x_coco.py @@ -0,0 +1,2 @@ +_base_ = './reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py' +model = dict(bbox_head=dict(transform_method='minmax')) diff --git a/configs/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py b/configs/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py new file mode 100644 index 0000000..241754c --- /dev/null +++ b/configs/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py @@ -0,0 +1,7 @@ +_base_ = './reppoints_moment_r50_fpn_gn-neck+head_2x_coco.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict( + depth=101, + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/reppoints/reppoints_moment_r101_fpn_gn-neck+head_2x_coco.py b/configs/reppoints/reppoints_moment_r101_fpn_gn-neck+head_2x_coco.py new file mode 100644 index 0000000..19efa0d --- /dev/null +++ b/configs/reppoints/reppoints_moment_r101_fpn_gn-neck+head_2x_coco.py @@ -0,0 +1,2 @@ +_base_ = './reppoints_moment_r50_fpn_gn-neck+head_2x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/reppoints/reppoints_moment_r50_fpn_1x_coco.py b/configs/reppoints/reppoints_moment_r50_fpn_1x_coco.py new file mode 100644 index 0000000..8df2a8f --- /dev/null +++ b/configs/reppoints/reppoints_moment_r50_fpn_1x_coco.py @@ -0,0 +1,67 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + type='RepPointsDetector', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_input', + num_outs=5), + bbox_head=dict( + type='RepPointsHead', + num_classes=80, + in_channels=256, + feat_channels=256, + point_feat_channels=256, + stacked_convs=3, + num_points=9, + gradient_mul=0.1, + point_strides=[8, 16, 32, 64, 128], + point_base_scale=4, + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox_init=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.5), + loss_bbox_refine=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0), + transform_method='moment'), + # training and testing settings + train_cfg=dict( + init=dict( + assigner=dict(type='PointAssigner', scale=4, pos_num=1), + allowed_border=-1, + pos_weight=-1, + debug=False), + refine=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0, + ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + debug=False)), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100)) +optimizer = dict(lr=0.01) diff --git a/configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py b/configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py new file mode 100644 index 0000000..337f167 --- /dev/null +++ b/configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = './reppoints_moment_r50_fpn_1x_coco.py' +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict(neck=dict(norm_cfg=norm_cfg), bbox_head=dict(norm_cfg=norm_cfg)) +optimizer = dict(lr=0.01) diff --git a/configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_2x_coco.py b/configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_2x_coco.py new file mode 100644 index 0000000..feca44a --- /dev/null +++ b/configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_2x_coco.py @@ -0,0 +1,3 @@ +_base_ = './reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py' +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py b/configs/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py new file mode 100644 index 0000000..c33019d --- /dev/null +++ b/configs/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py @@ -0,0 +1,15 @@ +_base_ = './reppoints_moment_r50_fpn_gn-neck+head_2x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/reppoints/reppoints_partial_minmax_r50_fpn_gn-neck+head_1x_coco.py b/configs/reppoints/reppoints_partial_minmax_r50_fpn_gn-neck+head_1x_coco.py new file mode 100644 index 0000000..9a63bd0 --- /dev/null +++ b/configs/reppoints/reppoints_partial_minmax_r50_fpn_gn-neck+head_1x_coco.py @@ -0,0 +1,2 @@ +_base_ = './reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py' +model = dict(bbox_head=dict(transform_method='partial_minmax')) diff --git a/configs/res2net/README.md b/configs/res2net/README.md new file mode 100644 index 0000000..ca9f934 --- /dev/null +++ b/configs/res2net/README.md @@ -0,0 +1,65 @@ +# Res2Net for object detection and instance segmentation + +## Introduction + +[ALGORITHM] + +We propose a novel building block for CNNs, namely Res2Net, by constructing hierarchical residual-like connections within one single residual block. The Res2Net represents multi-scale features at a granular level and increases the range of receptive fields for each network layer. + +| Backbone |Params. | GFLOPs | top-1 err. | top-5 err. | +| :-------------: |:----: | :-----: | :--------: | :--------: | +| ResNet-101 |44.6 M | 7.8 | 22.63 | 6.44 | +| ResNeXt-101-64x4d |83.5M | 15.5 | 20.40 | - | +| HRNetV2p-W48 | 77.5M | 16.1 | 20.70 | 5.50 | +| Res2Net-101 | 45.2M | 8.3 | 18.77 | 4.64 | + +Compared with other backbone networks, Res2Net requires fewer parameters and FLOPs. + +**Note:** + +- GFLOPs for classification are calculated with image size (224x224). + +```latex +@article{gao2019res2net, + title={Res2Net: A New Multi-scale Backbone Architecture}, + author={Gao, Shang-Hua and Cheng, Ming-Ming and Zhao, Kai and Zhang, Xin-Yu and Yang, Ming-Hsuan and Torr, Philip}, + journal={IEEE TPAMI}, + year={2020}, + doi={10.1109/TPAMI.2019.2938758}, +} +``` + +## Results and Models + +### Faster R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | +|R2-101-FPN | pytorch | 2x | 7.4 | - | 43.0 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/res2net/faster_rcnn_r2_101_fpn_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/res2net/faster_rcnn_r2_101_fpn_2x_coco/faster_rcnn_r2_101_fpn_2x_coco-175f1da6.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/res2net/faster_rcnn_r2_101_fpn_2x_coco/faster_rcnn_r2_101_fpn_2x_coco_20200514_231734.log.json) | + +### Mask R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | +|R2-101-FPN | pytorch | 2x | 7.9 | - | 43.6 | 38.7 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/res2net/mask_rcnn_r2_101_fpn_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/res2net/mask_rcnn_r2_101_fpn_2x_coco/mask_rcnn_r2_101_fpn_2x_coco-17f061e8.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/res2net/mask_rcnn_r2_101_fpn_2x_coco/mask_rcnn_r2_101_fpn_2x_coco_20200515_002413.log.json) | + +### Cascade R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | +|R2-101-FPN | pytorch | 20e | 7.8 | - | 45.7 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/res2net/cascade_rcnn_r2_101_fpn_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/res2net/cascade_rcnn_r2_101_fpn_20e_coco/cascade_rcnn_r2_101_fpn_20e_coco-f4b7b7db.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/res2net/cascade_rcnn_r2_101_fpn_20e_coco/cascade_rcnn_r2_101_fpn_20e_coco_20200515_091644.log.json) | + +### Cascade Mask R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | +R2-101-FPN | pytorch | 20e | 9.5 | - | 46.4 | 40.0 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/res2net/cascade_mask_rcnn_r2_101_fpn_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/res2net/cascade_mask_rcnn_r2_101_fpn_20e_coco/cascade_mask_rcnn_r2_101_fpn_20e_coco-8a7b41e1.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/res2net/cascade_mask_rcnn_r2_101_fpn_20e_coco/cascade_mask_rcnn_r2_101_fpn_20e_coco_20200515_091645.log.json) | + +### Hybrid Task Cascade (HTC) + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | +| R2-101-FPN | pytorch | 20e | - | - | 47.5 | 41.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/res2net/htc_r2_101_fpn_20e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/res2net/htc_r2_101_fpn_20e_coco/htc_r2_101_fpn_20e_coco-3a8d2112.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/res2net/htc_r2_101_fpn_20e_coco/htc_r2_101_fpn_20e_coco_20200515_150029.log.json) | + +- Res2Net ImageNet pretrained models are in [Res2Net-PretrainedModels](https://github.com/Res2Net/Res2Net-PretrainedModels). +- More applications of Res2Net are in [Res2Net-Github](https://github.com/Res2Net/). diff --git a/configs/res2net/cascade_mask_rcnn_r2_101_fpn_20e_coco.py b/configs/res2net/cascade_mask_rcnn_r2_101_fpn_20e_coco.py new file mode 100644 index 0000000..50df4e2 --- /dev/null +++ b/configs/res2net/cascade_mask_rcnn_r2_101_fpn_20e_coco.py @@ -0,0 +1,4 @@ +_base_ = '../cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco.py' +model = dict( + pretrained='open-mmlab://res2net101_v1d_26w_4s', + backbone=dict(type='Res2Net', depth=101, scales=4, base_width=26)) diff --git a/configs/res2net/cascade_rcnn_r2_101_fpn_20e_coco.py b/configs/res2net/cascade_rcnn_r2_101_fpn_20e_coco.py new file mode 100644 index 0000000..1cac759 --- /dev/null +++ b/configs/res2net/cascade_rcnn_r2_101_fpn_20e_coco.py @@ -0,0 +1,4 @@ +_base_ = '../cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco.py' +model = dict( + pretrained='open-mmlab://res2net101_v1d_26w_4s', + backbone=dict(type='Res2Net', depth=101, scales=4, base_width=26)) diff --git a/configs/res2net/faster_rcnn_r2_101_fpn_2x_coco.py b/configs/res2net/faster_rcnn_r2_101_fpn_2x_coco.py new file mode 100644 index 0000000..85004e0 --- /dev/null +++ b/configs/res2net/faster_rcnn_r2_101_fpn_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_2x_coco.py' +model = dict( + pretrained='open-mmlab://res2net101_v1d_26w_4s', + backbone=dict(type='Res2Net', depth=101, scales=4, base_width=26)) diff --git a/configs/res2net/htc_r2_101_fpn_20e_coco.py b/configs/res2net/htc_r2_101_fpn_20e_coco.py new file mode 100644 index 0000000..3c4cc75 --- /dev/null +++ b/configs/res2net/htc_r2_101_fpn_20e_coco.py @@ -0,0 +1,7 @@ +_base_ = '../htc/htc_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://res2net101_v1d_26w_4s', + backbone=dict(type='Res2Net', depth=101, scales=4, base_width=26)) +# learning policy +lr_config = dict(step=[16, 19]) +runner = dict(type='EpochBasedRunner', max_epochs=20) diff --git a/configs/res2net/mask_rcnn_r2_101_fpn_2x_coco.py b/configs/res2net/mask_rcnn_r2_101_fpn_2x_coco.py new file mode 100644 index 0000000..a620188 --- /dev/null +++ b/configs/res2net/mask_rcnn_r2_101_fpn_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_2x_coco.py' +model = dict( + pretrained='open-mmlab://res2net101_v1d_26w_4s', + backbone=dict(type='Res2Net', depth=101, scales=4, base_width=26)) diff --git a/configs/resnest/README.md b/configs/resnest/README.md new file mode 100644 index 0000000..d34d1c2 --- /dev/null +++ b/configs/resnest/README.md @@ -0,0 +1,44 @@ +# ResNeSt: Split-Attention Networks + +## Introduction + +[BACKBONE] + +```latex +@article{zhang2020resnest, +title={ResNeSt: Split-Attention Networks}, +author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander}, +journal={arXiv preprint arXiv:2004.08955}, +year={2020} +} +``` + +## Results and Models + +### Faster R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | +|S-50-FPN | pytorch | 1x | 4.8 | - | 42.0 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/resnest/faster_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/resnest/faster_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/faster_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco_20200926_125502-20289c16.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/resnest/faster_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/faster_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco-20200926_125502.log.json) | +|S-101-FPN | pytorch | 1x | 7.1 | - | 44.5 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/resnest/faster_rcnn_s101_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/resnest/faster_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/faster_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco_20201006_021058-421517f1.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/resnest/faster_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/faster_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco-20201006_021058.log.json) | + +### Mask R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | +|S-50-FPN | pytorch | 1x | 5.5 | - | 42.6 | 38.1 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/resnest/mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/resnest/mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco_20200926_125503-8a2c3d47.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/resnest/mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco-20200926_125503.log.json) | +|S-101-FPN | pytorch | 1x | 7.8 | - | 45.2 | 40.2 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/resnest/mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/resnest/mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco_20201005_215831-af60cdf9.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/resnest/mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco-20201005_215831.log.json) | + +### Cascade R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | +|S-50-FPN | pytorch | 1x | - | - | 44.5 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/resnest/cascade_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/cascade_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco_20201122_213640-763cc7b5.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/cascade_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco-20201005_113242.log.json) | +|S-101-FPN | pytorch | 1x | 8.4 | - | 46.8 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/resnest/cascade_rcnn_s101_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/cascade_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco_20201005_113242-b9459f8f.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/cascade_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco-20201122_213640.log.json) | + +### Cascade Mask R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | +|S-50-FPN | pytorch | 1x | - | - | 45.4 | 39.5 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/resnest/cascade_mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/cascade_mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco_20201122_104428-99eca4c7.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/cascade_mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco-20201122_104428.log.json) | +|S-101-FPN | pytorch | 1x | 10.5 | - | 47.7 | 41.4 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/resnest/cascade_mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/cascade_mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco_20201005_113243-42607475.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/cascade_mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco-20201005_113243.log.json) | diff --git a/configs/resnest/cascade_mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco.py b/configs/resnest/cascade_mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco.py new file mode 100644 index 0000000..3995603 --- /dev/null +++ b/configs/resnest/cascade_mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = './cascade_mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict(stem_channels=128, depth=101)) diff --git a/configs/resnest/cascade_mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py b/configs/resnest/cascade_mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py new file mode 100644 index 0000000..f2cf444 --- /dev/null +++ b/configs/resnest/cascade_mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py @@ -0,0 +1,118 @@ +_base_ = '../cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://resnest50', + backbone=dict( + type='ResNeSt', + stem_channels=64, + depth=50, + radix=2, + reduction_factor=4, + avg_down_stride=True, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch'), + roi_head=dict( + bbox_head=[ + dict( + type='Shared4Conv1FCBBoxHead', + in_channels=256, + conv_out_channels=256, + fc_out_channels=1024, + norm_cfg=norm_cfg, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, + loss_weight=1.0)), + dict( + type='Shared4Conv1FCBBoxHead', + in_channels=256, + conv_out_channels=256, + fc_out_channels=1024, + norm_cfg=norm_cfg, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.05, 0.05, 0.1, 0.1]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, + loss_weight=1.0)), + dict( + type='Shared4Conv1FCBBoxHead', + in_channels=256, + conv_out_channels=256, + fc_out_channels=1024, + norm_cfg=norm_cfg, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.033, 0.033, 0.067, 0.067]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) + ], + mask_head=dict(norm_cfg=norm_cfg))) +# # use ResNeSt img_norm +img_norm_cfg = dict( + mean=[123.68, 116.779, 103.939], std=[58.393, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/resnest/cascade_rcnn_s101_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py b/configs/resnest/cascade_rcnn_s101_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py new file mode 100644 index 0000000..53964a3 --- /dev/null +++ b/configs/resnest/cascade_rcnn_s101_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = './cascade_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict(stem_channels=128, depth=101)) diff --git a/configs/resnest/cascade_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py b/configs/resnest/cascade_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py new file mode 100644 index 0000000..78a154b --- /dev/null +++ b/configs/resnest/cascade_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py @@ -0,0 +1,116 @@ +_base_ = '../cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://resnest50', + backbone=dict( + type='ResNeSt', + stem_channels=64, + depth=50, + radix=2, + reduction_factor=4, + avg_down_stride=True, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch'), + roi_head=dict( + bbox_head=[ + dict( + type='Shared4Conv1FCBBoxHead', + in_channels=256, + conv_out_channels=256, + fc_out_channels=1024, + norm_cfg=norm_cfg, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, + loss_weight=1.0)), + dict( + type='Shared4Conv1FCBBoxHead', + in_channels=256, + conv_out_channels=256, + fc_out_channels=1024, + norm_cfg=norm_cfg, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.05, 0.05, 0.1, 0.1]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, + loss_weight=1.0)), + dict( + type='Shared4Conv1FCBBoxHead', + in_channels=256, + conv_out_channels=256, + fc_out_channels=1024, + norm_cfg=norm_cfg, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.033, 0.033, 0.067, 0.067]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) + ], )) +# # use ResNeSt img_norm +img_norm_cfg = dict( + mean=[123.68, 116.779, 103.939], std=[58.393, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', + with_bbox=True, + with_mask=False, + poly2mask=False), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/resnest/faster_rcnn_s101_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py b/configs/resnest/faster_rcnn_s101_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py new file mode 100644 index 0000000..1915ab1 --- /dev/null +++ b/configs/resnest/faster_rcnn_s101_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = './faster_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict(stem_channels=128, depth=101)) diff --git a/configs/resnest/faster_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py b/configs/resnest/faster_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py new file mode 100644 index 0000000..422fbca --- /dev/null +++ b/configs/resnest/faster_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py @@ -0,0 +1,62 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://resnest50', + backbone=dict( + type='ResNeSt', + stem_channels=64, + depth=50, + radix=2, + reduction_factor=4, + avg_down_stride=True, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch'), + roi_head=dict( + bbox_head=dict( + type='Shared4Conv1FCBBoxHead', + conv_out_channels=256, + norm_cfg=norm_cfg))) +# # use ResNeSt img_norm +img_norm_cfg = dict( + mean=[123.68, 116.779, 103.939], std=[58.393, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', + with_bbox=True, + with_mask=False, + poly2mask=False), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/resnest/mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco.py b/configs/resnest/mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco.py new file mode 100644 index 0000000..89e077d --- /dev/null +++ b/configs/resnest/mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = './mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict(stem_channels=128, depth=101)) diff --git a/configs/resnest/mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py b/configs/resnest/mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py new file mode 100644 index 0000000..29f21fd --- /dev/null +++ b/configs/resnest/mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py @@ -0,0 +1,64 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://resnest50', + backbone=dict( + type='ResNeSt', + stem_channels=64, + depth=50, + radix=2, + reduction_factor=4, + avg_down_stride=True, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch'), + roi_head=dict( + bbox_head=dict( + type='Shared4Conv1FCBBoxHead', + conv_out_channels=256, + norm_cfg=norm_cfg), + mask_head=dict(norm_cfg=norm_cfg))) +# # use ResNeSt img_norm +img_norm_cfg = dict( + mean=[123.68, 116.779, 103.939], std=[58.393, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/retinanet/README.md b/configs/retinanet/README.md new file mode 100644 index 0000000..6b66534 --- /dev/null +++ b/configs/retinanet/README.md @@ -0,0 +1,29 @@ +# Focal Loss for Dense Object Detection + +## Introduction + +[ALGORITHM] + +```latex +@inproceedings{lin2017focal, + title={Focal loss for dense object detection}, + author={Lin, Tsung-Yi and Goyal, Priya and Girshick, Ross and He, Kaiming and Doll{\'a}r, Piotr}, + booktitle={Proceedings of the IEEE international conference on computer vision}, + year={2017} +} +``` + +## Results and models + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | +| R-50-FPN | caffe | 1x | 3.5 | 18.6 | 36.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r50_caffe_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_caffe_fpn_1x_coco/retinanet_r50_caffe_fpn_1x_coco_20200531-f11027c5.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_caffe_fpn_1x_coco/retinanet_r50_caffe_fpn_1x_coco_20200531_012518.log.json) | +| R-50-FPN | pytorch | 1x | 3.8 | 19.0 | 36.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_1x_coco/retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_1x_coco/retinanet_r50_fpn_1x_coco_20200130_002941.log.json) | +| R-50-FPN | pytorch | 2x | - | - | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r50_fpn_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_2x_coco/retinanet_r50_fpn_2x_coco_20200131-fdb43119.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_2x_coco/retinanet_r50_fpn_2x_coco_20200131_114738.log.json) | +| R-101-FPN | caffe | 1x | 5.5 | 14.7 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r101_caffe_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_caffe_fpn_1x_coco/retinanet_r101_caffe_fpn_1x_coco_20200531-b428fa0f.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_caffe_fpn_1x_coco/retinanet_r101_caffe_fpn_1x_coco_20200531_012536.log.json) | +| R-101-FPN | pytorch | 1x | 5.7 | 15.0 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r101_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_fpn_1x_coco/retinanet_r101_fpn_1x_coco_20200130-7a93545f.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_fpn_1x_coco/retinanet_r101_fpn_1x_coco_20200130_003055.log.json) | +| R-101-FPN | pytorch | 2x | - | - | 38.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r101_fpn_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_fpn_2x_coco/retinanet_r101_fpn_2x_coco_20200131-5560aee8.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_fpn_2x_coco/retinanet_r101_fpn_2x_coco_20200131_114859.log.json) | +| X-101-32x4d-FPN | pytorch | 1x | 7.0 | 12.1 | 39.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_x101_32x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_32x4d_fpn_1x_coco/retinanet_x101_32x4d_fpn_1x_coco_20200130-5c8b7ec4.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_32x4d_fpn_1x_coco/retinanet_x101_32x4d_fpn_1x_coco_20200130_003004.log.json) | +| X-101-32x4d-FPN | pytorch | 2x | - | - | 40.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_x101_32x4d_fpn_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_32x4d_fpn_2x_coco/retinanet_x101_32x4d_fpn_2x_coco_20200131-237fc5e1.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_32x4d_fpn_2x_coco/retinanet_x101_32x4d_fpn_2x_coco_20200131_114812.log.json) | +| X-101-64x4d-FPN | pytorch | 1x | 10.0 | 8.7 | 41.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_x101_64x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_64x4d_fpn_1x_coco/retinanet_x101_64x4d_fpn_1x_coco_20200130-366f5af1.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_64x4d_fpn_1x_coco/retinanet_x101_64x4d_fpn_1x_coco_20200130_003008.log.json) | +| X-101-64x4d-FPN | pytorch | 2x | - | - | 40.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_x101_64x4d_fpn_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_64x4d_fpn_2x_coco/retinanet_x101_64x4d_fpn_2x_coco_20200131-bca068ab.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_64x4d_fpn_2x_coco/retinanet_x101_64x4d_fpn_2x_coco_20200131_114833.log.json) | diff --git a/configs/retinanet/retinanet_r101_caffe_fpn_1x_coco.py b/configs/retinanet/retinanet_r101_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..21d227b --- /dev/null +++ b/configs/retinanet/retinanet_r101_caffe_fpn_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = './retinanet_r50_caffe_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron2/resnet101_caffe', + backbone=dict(depth=101)) diff --git a/configs/retinanet/retinanet_r101_fpn_1x_coco.py b/configs/retinanet/retinanet_r101_fpn_1x_coco.py new file mode 100644 index 0000000..1e6f463 --- /dev/null +++ b/configs/retinanet/retinanet_r101_fpn_1x_coco.py @@ -0,0 +1,2 @@ +_base_ = './retinanet_r50_fpn_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/retinanet/retinanet_r101_fpn_2x_coco.py b/configs/retinanet/retinanet_r101_fpn_2x_coco.py new file mode 100644 index 0000000..c12088a --- /dev/null +++ b/configs/retinanet/retinanet_r101_fpn_2x_coco.py @@ -0,0 +1,2 @@ +_base_ = './retinanet_r50_fpn_2x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/retinanet/retinanet_r50_caffe_fpn_1x_coco.py b/configs/retinanet/retinanet_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..028c1a3 --- /dev/null +++ b/configs/retinanet/retinanet_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,37 @@ +_base_ = './retinanet_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron2/resnet50_caffe', + backbone=dict( + norm_cfg=dict(requires_grad=False), norm_eval=True, style='caffe')) +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/retinanet/retinanet_r50_caffe_fpn_mstrain_1x_coco.py b/configs/retinanet/retinanet_r50_caffe_fpn_mstrain_1x_coco.py new file mode 100644 index 0000000..f2a0dec --- /dev/null +++ b/configs/retinanet/retinanet_r50_caffe_fpn_mstrain_1x_coco.py @@ -0,0 +1,42 @@ +_base_ = './retinanet_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron2/resnet50_caffe', + backbone=dict( + norm_cfg=dict(requires_grad=False), norm_eval=True, style='caffe')) +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/retinanet/retinanet_r50_caffe_fpn_mstrain_2x_coco.py b/configs/retinanet/retinanet_r50_caffe_fpn_mstrain_2x_coco.py new file mode 100644 index 0000000..eea9690 --- /dev/null +++ b/configs/retinanet/retinanet_r50_caffe_fpn_mstrain_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './retinanet_r50_caffe_fpn_mstrain_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 23]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/retinanet/retinanet_r50_caffe_fpn_mstrain_3x_coco.py b/configs/retinanet/retinanet_r50_caffe_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..8057650 --- /dev/null +++ b/configs/retinanet/retinanet_r50_caffe_fpn_mstrain_3x_coco.py @@ -0,0 +1,4 @@ +_base_ = './retinanet_r50_caffe_fpn_mstrain_1x_coco.py' +# learning policy +lr_config = dict(step=[28, 34]) +runner = dict(type='EpochBasedRunner', max_epochs=36) diff --git a/configs/retinanet/retinanet_r50_fpn_1x_coco.py b/configs/retinanet/retinanet_r50_fpn_1x_coco.py new file mode 100644 index 0000000..04bd696 --- /dev/null +++ b/configs/retinanet/retinanet_r50_fpn_1x_coco.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/retinanet/retinanet_r50_fpn_2x_coco.py b/configs/retinanet/retinanet_r50_fpn_2x_coco.py new file mode 100644 index 0000000..927915f --- /dev/null +++ b/configs/retinanet/retinanet_r50_fpn_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './retinanet_r50_fpn_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/retinanet/retinanet_x101_32x4d_fpn_1x_coco.py b/configs/retinanet/retinanet_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..9927f8f --- /dev/null +++ b/configs/retinanet/retinanet_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './retinanet_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/retinanet/retinanet_x101_32x4d_fpn_2x_coco.py b/configs/retinanet/retinanet_x101_32x4d_fpn_2x_coco.py new file mode 100644 index 0000000..cd78b6d --- /dev/null +++ b/configs/retinanet/retinanet_x101_32x4d_fpn_2x_coco.py @@ -0,0 +1,13 @@ +_base_ = './retinanet_r50_fpn_2x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/retinanet/retinanet_x101_64x4d_fpn_1x_coco.py b/configs/retinanet/retinanet_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000..cc40f26 --- /dev/null +++ b/configs/retinanet/retinanet_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './retinanet_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/retinanet/retinanet_x101_64x4d_fpn_2x_coco.py b/configs/retinanet/retinanet_x101_64x4d_fpn_2x_coco.py new file mode 100644 index 0000000..eac05a6 --- /dev/null +++ b/configs/retinanet/retinanet_x101_64x4d_fpn_2x_coco.py @@ -0,0 +1,13 @@ +_base_ = './retinanet_r50_fpn_2x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/rpn/README.md b/configs/rpn/README.md new file mode 100644 index 0000000..4f6f712 --- /dev/null +++ b/configs/rpn/README.md @@ -0,0 +1,29 @@ +# Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks + +## Introduction + +[ALGORITHM] + +```latex +@inproceedings{ren2015faster, + title={Faster r-cnn: Towards real-time object detection with region proposal networks}, + author={Ren, Shaoqing and He, Kaiming and Girshick, Ross and Sun, Jian}, + booktitle={Advances in neural information processing systems}, + year={2015} +} +``` + +## Results and models + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | AR1000 | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | +| R-50-FPN | caffe | 1x | 3.5 | 22.6 | 58.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_r50_caffe_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r50_caffe_fpn_1x_coco/rpn_r50_caffe_fpn_1x_coco_20200531-5b903a37.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r50_caffe_fpn_1x_coco/rpn_r50_caffe_fpn_1x_coco_20200531_012334.log.json) | +| R-50-FPN | pytorch | 1x | 3.8 | 22.3 | 58.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r50_fpn_1x_coco/rpn_r50_fpn_1x_coco_20200218-5525fa2e.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r50_fpn_1x_coco/rpn_r50_fpn_1x_coco_20200218_151240.log.json) | +| R-50-FPN | pytorch | 2x | - | - | 58.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_r50_fpn_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r50_fpn_2x_coco/rpn_r50_fpn_2x_coco_20200131-0728c9b3.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r50_fpn_2x_coco/rpn_r50_fpn_2x_coco_20200131_190631.log.json) | +| R-101-FPN | caffe | 1x | 5.4 | 17.3 | 60.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_r101_caffe_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r101_caffe_fpn_1x_coco/rpn_r101_caffe_fpn_1x_coco_20200531-0629a2e2.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r101_caffe_fpn_1x_coco/rpn_r101_caffe_fpn_1x_coco_20200531_012345.log.json) | +| R-101-FPN | pytorch | 1x | 5.8 | 16.5 | 59.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_r101_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r101_fpn_1x_coco/rpn_r101_fpn_1x_coco_20200131-2ace2249.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r101_fpn_1x_coco/rpn_r101_fpn_1x_coco_20200131_191000.log.json) | +| R-101-FPN | pytorch | 2x | - | - | 60.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_r101_fpn_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r101_fpn_2x_coco/rpn_r101_fpn_2x_coco_20200131-24e3db1a.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r101_fpn_2x_coco/rpn_r101_fpn_2x_coco_20200131_191106.log.json) | +| X-101-32x4d-FPN | pytorch | 1x | 7.0 | 13.0 | 60.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_x101_32x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_32x4d_fpn_1x_coco/rpn_x101_32x4d_fpn_1x_coco_20200219-b02646c6.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_32x4d_fpn_1x_coco/rpn_x101_32x4d_fpn_1x_coco_20200219_012037.log.json) | +| X-101-32x4d-FPN | pytorch | 2x | - | - | 61.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_x101_32x4d_fpn_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_32x4d_fpn_2x_coco/rpn_x101_32x4d_fpn_2x_coco_20200208-d22bd0bb.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_32x4d_fpn_2x_coco/rpn_x101_32x4d_fpn_2x_coco_20200208_200752.log.json) | +| X-101-64x4d-FPN | pytorch | 1x | 10.1 | 9.1 | 61.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_x101_64x4d_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_64x4d_fpn_1x_coco/rpn_x101_64x4d_fpn_1x_coco_20200208-cde6f7dd.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_64x4d_fpn_1x_coco/rpn_x101_64x4d_fpn_1x_coco_20200208_200752.log.json) | +| X-101-64x4d-FPN | pytorch | 2x | - | - | 61.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/rpn/rpn_x101_64x4d_fpn_2x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_64x4d_fpn_2x_coco/rpn_x101_64x4d_fpn_2x_coco_20200208-c65f524f.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_64x4d_fpn_2x_coco/rpn_x101_64x4d_fpn_2x_coco_20200208_200752.log.json) | diff --git a/configs/rpn/rpn_r101_caffe_fpn_1x_coco.py b/configs/rpn/rpn_r101_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..e616fdf --- /dev/null +++ b/configs/rpn/rpn_r101_caffe_fpn_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = './rpn_r50_caffe_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron2/resnet101_caffe', + backbone=dict(depth=101)) diff --git a/configs/rpn/rpn_r101_fpn_1x_coco.py b/configs/rpn/rpn_r101_fpn_1x_coco.py new file mode 100644 index 0000000..b2af611 --- /dev/null +++ b/configs/rpn/rpn_r101_fpn_1x_coco.py @@ -0,0 +1,2 @@ +_base_ = './rpn_r50_fpn_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/rpn/rpn_r101_fpn_2x_coco.py b/configs/rpn/rpn_r101_fpn_2x_coco.py new file mode 100644 index 0000000..6908d30 --- /dev/null +++ b/configs/rpn/rpn_r101_fpn_2x_coco.py @@ -0,0 +1,2 @@ +_base_ = './rpn_r50_fpn_2x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/rpn/rpn_r50_caffe_c4_1x_coco.py b/configs/rpn/rpn_r50_caffe_c4_1x_coco.py new file mode 100644 index 0000000..6da0ee9 --- /dev/null +++ b/configs/rpn/rpn_r50_caffe_c4_1x_coco.py @@ -0,0 +1,38 @@ +_base_ = [ + '../_base_/models/rpn_r50_caffe_c4.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# dataset settings +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_label=False), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +evaluation = dict(interval=1, metric='proposal_fast') diff --git a/configs/rpn/rpn_r50_caffe_fpn_1x_coco.py b/configs/rpn/rpn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..398f3c1 --- /dev/null +++ b/configs/rpn/rpn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,37 @@ +_base_ = './rpn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://detectron2/resnet50_caffe', + backbone=dict( + norm_cfg=dict(requires_grad=False), norm_eval=True, style='caffe')) +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_label=False), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/rpn/rpn_r50_fpn_1x_coco.py b/configs/rpn/rpn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..26f95a3 --- /dev/null +++ b/configs/rpn/rpn_r50_fpn_1x_coco.py @@ -0,0 +1,18 @@ +_base_ = [ + '../_base_/models/rpn_r50_fpn.py', '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_label=False), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes']), +] +data = dict(train=dict(pipeline=train_pipeline)) +evaluation = dict(interval=1, metric='proposal_fast') diff --git a/configs/rpn/rpn_r50_fpn_2x_coco.py b/configs/rpn/rpn_r50_fpn_2x_coco.py new file mode 100644 index 0000000..2f264bf --- /dev/null +++ b/configs/rpn/rpn_r50_fpn_2x_coco.py @@ -0,0 +1,5 @@ +_base_ = './rpn_r50_fpn_1x_coco.py' + +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/rpn/rpn_x101_32x4d_fpn_1x_coco.py b/configs/rpn/rpn_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..83bd700 --- /dev/null +++ b/configs/rpn/rpn_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './rpn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/rpn/rpn_x101_32x4d_fpn_2x_coco.py b/configs/rpn/rpn_x101_32x4d_fpn_2x_coco.py new file mode 100644 index 0000000..979afb9 --- /dev/null +++ b/configs/rpn/rpn_x101_32x4d_fpn_2x_coco.py @@ -0,0 +1,13 @@ +_base_ = './rpn_r50_fpn_2x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/rpn/rpn_x101_64x4d_fpn_1x_coco.py b/configs/rpn/rpn_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000..bb7f0a6 --- /dev/null +++ b/configs/rpn/rpn_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './rpn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/rpn/rpn_x101_64x4d_fpn_2x_coco.py b/configs/rpn/rpn_x101_64x4d_fpn_2x_coco.py new file mode 100644 index 0000000..8c766f0 --- /dev/null +++ b/configs/rpn/rpn_x101_64x4d_fpn_2x_coco.py @@ -0,0 +1,13 @@ +_base_ = './rpn_r50_fpn_2x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) diff --git a/configs/sabl/README.md b/configs/sabl/README.md new file mode 100644 index 0000000..34b8367 --- /dev/null +++ b/configs/sabl/README.md @@ -0,0 +1,37 @@ +# Side-Aware Boundary Localization for More Precise Object Detection + +## Introduction + +[ALGORITHM] + +We provide config files to reproduce the object detection results in the ECCV 2020 Spotlight paper for [Side-Aware Boundary Localization for More Precise Object Detection](https://arxiv.org/abs/1912.04260). + +```latex +@inproceedings{Wang_2020_ECCV, + title = {Side-Aware Boundary Localization for More Precise Object Detection}, + author = {Jiaqi Wang and Wenwei Zhang and Yuhang Cao and Kai Chen and Jiangmiao Pang and Tao Gong and Jianping Shi and Chen Change Loy and Dahua Lin}, + booktitle = {ECCV}, + year = {2020} +} +``` + +## Results and Models + +The results on COCO 2017 val is shown in the below table. (results on test-dev are usually slightly higher than val). +Single-scale testing (1333x800) is adopted in all results. + +| Method | Backbone | Lr schd | ms-train | box AP | Config | Download | +| :----------------: | :-------: | :-----: | :------: | :----: | :----------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| SABL Faster R-CNN | R-50-FPN | 1x | N | 39.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_faster_rcnn_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_faster_rcnn_r50_fpn_1x_coco/sabl_faster_rcnn_r50_fpn_1x_coco-e867595b.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_faster_rcnn_r50_fpn_1x_coco/20200830_130324.log.json) | +| SABL Faster R-CNN | R-101-FPN | 1x | N | 41.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_faster_rcnn_r101_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_faster_rcnn_r101_fpn_1x_coco/sabl_faster_rcnn_r101_fpn_1x_coco-f804c6c1.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_faster_rcnn_r101_fpn_1x_coco/20200830_183949.log.json) | +| SABL Cascade R-CNN | R-50-FPN | 1x | N | 41.6 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_cascade_rcnn_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_cascade_rcnn_r50_fpn_1x_coco/sabl_cascade_rcnn_r50_fpn_1x_coco-e1748e5e.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_cascade_rcnn_r50_fpn_1x_coco/20200831_033726.log.json) | +| SABL Cascade R-CNN | R-101-FPN | 1x | N | 43.0 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_cascade_rcnn_r101_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_cascade_rcnn_r101_fpn_1x_coco/sabl_cascade_rcnn_r101_fpn_1x_coco-2b83e87c.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_cascade_rcnn_r101_fpn_1x_coco/20200831_141745.log.json) | + +| Method | Backbone | GN | Lr schd | ms-train | box AP | Config | Download | +| :------------: | :-------: | :---: | :-----: | :---------: | :----: | :---------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| SABL RetinaNet | R-50-FPN | N | 1x | N | 37.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_retinanet_r50_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r50_fpn_1x_coco/sabl_retinanet_r50_fpn_1x_coco-6c54fd4f.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r50_fpn_1x_coco/20200830_053451.log.json) | +| SABL RetinaNet | R-50-FPN | Y | 1x | N | 38.8 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_retinanet_r50_fpn_gn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r50_fpn_gn_1x_coco/sabl_retinanet_r50_fpn_gn_1x_coco-e16dfcf1.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r50_fpn_gn_1x_coco/20200831_141955.log.json) | +| SABL RetinaNet | R-101-FPN | N | 1x | N | 39.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_retinanet_r101_fpn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_1x_coco/sabl_retinanet_r101_fpn_1x_coco-42026904.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_1x_coco/20200831_034256.log.json) | +| SABL RetinaNet | R-101-FPN | Y | 1x | N | 40.5 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_retinanet_r101_fpn_gn_1x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_gn_1x_coco/sabl_retinanet_r101_fpn_gn_1x_coco-40a893e8.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_gn_1x_coco/20200830_201422.log.json) | +| SABL RetinaNet | R-101-FPN | Y | 2x | Y (640~800) | 42.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_640_800_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_640_800_coco/sabl_retinanet_r101_fpn_gn_2x_ms_640_800_coco-1e63382c.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_640_800_coco/20200830_144807.log.json) | +| SABL RetinaNet | R-101-FPN | Y | 2x | Y (480~960) | 43.6 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_480_960_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_480_960_coco/sabl_retinanet_r101_fpn_gn_2x_ms_480_960_coco-5342f857.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_480_960_coco/20200830_164537.log.json) | diff --git a/configs/sabl/sabl_cascade_rcnn_r101_fpn_1x_coco.py b/configs/sabl/sabl_cascade_rcnn_r101_fpn_1x_coco.py new file mode 100644 index 0000000..0322006 --- /dev/null +++ b/configs/sabl/sabl_cascade_rcnn_r101_fpn_1x_coco.py @@ -0,0 +1,88 @@ +_base_ = [ + '../_base_/models/cascade_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# model settings +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(depth=101), + roi_head=dict(bbox_head=[ + dict( + type='SABLHead', + num_classes=80, + cls_in_channels=256, + reg_in_channels=256, + roi_feat_size=7, + reg_feat_up_ratio=2, + reg_pre_kernel=3, + reg_post_kernel=3, + reg_pre_num=2, + reg_post_num=1, + cls_out_channels=1024, + reg_offset_out_channels=256, + reg_cls_out_channels=256, + num_cls_fcs=1, + num_reg_fcs=0, + reg_class_agnostic=True, + norm_cfg=None, + bbox_coder=dict( + type='BucketingBBoxCoder', num_buckets=14, scale_factor=1.7), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox_reg=dict(type='SmoothL1Loss', beta=0.1, + loss_weight=1.0)), + dict( + type='SABLHead', + num_classes=80, + cls_in_channels=256, + reg_in_channels=256, + roi_feat_size=7, + reg_feat_up_ratio=2, + reg_pre_kernel=3, + reg_post_kernel=3, + reg_pre_num=2, + reg_post_num=1, + cls_out_channels=1024, + reg_offset_out_channels=256, + reg_cls_out_channels=256, + num_cls_fcs=1, + num_reg_fcs=0, + reg_class_agnostic=True, + norm_cfg=None, + bbox_coder=dict( + type='BucketingBBoxCoder', num_buckets=14, scale_factor=1.5), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox_reg=dict(type='SmoothL1Loss', beta=0.1, + loss_weight=1.0)), + dict( + type='SABLHead', + num_classes=80, + cls_in_channels=256, + reg_in_channels=256, + roi_feat_size=7, + reg_feat_up_ratio=2, + reg_pre_kernel=3, + reg_post_kernel=3, + reg_pre_num=2, + reg_post_num=1, + cls_out_channels=1024, + reg_offset_out_channels=256, + reg_cls_out_channels=256, + num_cls_fcs=1, + num_reg_fcs=0, + reg_class_agnostic=True, + norm_cfg=None, + bbox_coder=dict( + type='BucketingBBoxCoder', num_buckets=14, scale_factor=1.3), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox_reg=dict(type='SmoothL1Loss', beta=0.1, loss_weight=1.0)) + ])) diff --git a/configs/sabl/sabl_cascade_rcnn_r50_fpn_1x_coco.py b/configs/sabl/sabl_cascade_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..4b28a59 --- /dev/null +++ b/configs/sabl/sabl_cascade_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,86 @@ +_base_ = [ + '../_base_/models/cascade_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# model settings +model = dict( + roi_head=dict(bbox_head=[ + dict( + type='SABLHead', + num_classes=80, + cls_in_channels=256, + reg_in_channels=256, + roi_feat_size=7, + reg_feat_up_ratio=2, + reg_pre_kernel=3, + reg_post_kernel=3, + reg_pre_num=2, + reg_post_num=1, + cls_out_channels=1024, + reg_offset_out_channels=256, + reg_cls_out_channels=256, + num_cls_fcs=1, + num_reg_fcs=0, + reg_class_agnostic=True, + norm_cfg=None, + bbox_coder=dict( + type='BucketingBBoxCoder', num_buckets=14, scale_factor=1.7), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox_reg=dict(type='SmoothL1Loss', beta=0.1, + loss_weight=1.0)), + dict( + type='SABLHead', + num_classes=80, + cls_in_channels=256, + reg_in_channels=256, + roi_feat_size=7, + reg_feat_up_ratio=2, + reg_pre_kernel=3, + reg_post_kernel=3, + reg_pre_num=2, + reg_post_num=1, + cls_out_channels=1024, + reg_offset_out_channels=256, + reg_cls_out_channels=256, + num_cls_fcs=1, + num_reg_fcs=0, + reg_class_agnostic=True, + norm_cfg=None, + bbox_coder=dict( + type='BucketingBBoxCoder', num_buckets=14, scale_factor=1.5), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox_reg=dict(type='SmoothL1Loss', beta=0.1, + loss_weight=1.0)), + dict( + type='SABLHead', + num_classes=80, + cls_in_channels=256, + reg_in_channels=256, + roi_feat_size=7, + reg_feat_up_ratio=2, + reg_pre_kernel=3, + reg_post_kernel=3, + reg_pre_num=2, + reg_post_num=1, + cls_out_channels=1024, + reg_offset_out_channels=256, + reg_cls_out_channels=256, + num_cls_fcs=1, + num_reg_fcs=0, + reg_class_agnostic=True, + norm_cfg=None, + bbox_coder=dict( + type='BucketingBBoxCoder', num_buckets=14, scale_factor=1.3), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox_reg=dict(type='SmoothL1Loss', beta=0.1, loss_weight=1.0)) + ])) diff --git a/configs/sabl/sabl_faster_rcnn_r101_fpn_1x_coco.py b/configs/sabl/sabl_faster_rcnn_r101_fpn_1x_coco.py new file mode 100644 index 0000000..4c797ca --- /dev/null +++ b/configs/sabl/sabl_faster_rcnn_r101_fpn_1x_coco.py @@ -0,0 +1,36 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(depth=101), + roi_head=dict( + bbox_head=dict( + _delete_=True, + type='SABLHead', + num_classes=80, + cls_in_channels=256, + reg_in_channels=256, + roi_feat_size=7, + reg_feat_up_ratio=2, + reg_pre_kernel=3, + reg_post_kernel=3, + reg_pre_num=2, + reg_post_num=1, + cls_out_channels=1024, + reg_offset_out_channels=256, + reg_cls_out_channels=256, + num_cls_fcs=1, + num_reg_fcs=0, + reg_class_agnostic=True, + norm_cfg=None, + bbox_coder=dict( + type='BucketingBBoxCoder', num_buckets=14, scale_factor=1.7), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox_reg=dict(type='SmoothL1Loss', beta=0.1, + loss_weight=1.0)))) diff --git a/configs/sabl/sabl_faster_rcnn_r50_fpn_1x_coco.py b/configs/sabl/sabl_faster_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..732c7ba --- /dev/null +++ b/configs/sabl/sabl_faster_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,34 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + roi_head=dict( + bbox_head=dict( + _delete_=True, + type='SABLHead', + num_classes=80, + cls_in_channels=256, + reg_in_channels=256, + roi_feat_size=7, + reg_feat_up_ratio=2, + reg_pre_kernel=3, + reg_post_kernel=3, + reg_pre_num=2, + reg_post_num=1, + cls_out_channels=1024, + reg_offset_out_channels=256, + reg_cls_out_channels=256, + num_cls_fcs=1, + num_reg_fcs=0, + reg_class_agnostic=True, + norm_cfg=None, + bbox_coder=dict( + type='BucketingBBoxCoder', num_buckets=14, scale_factor=1.7), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox_reg=dict(type='SmoothL1Loss', beta=0.1, + loss_weight=1.0)))) diff --git a/configs/sabl/sabl_retinanet_r101_fpn_1x_coco.py b/configs/sabl/sabl_retinanet_r101_fpn_1x_coco.py new file mode 100644 index 0000000..ed3a96c --- /dev/null +++ b/configs/sabl/sabl_retinanet_r101_fpn_1x_coco.py @@ -0,0 +1,52 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# model settings +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(depth=101), + bbox_head=dict( + _delete_=True, + type='SABLRetinaHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + approx_anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=4, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[8, 16, 32, 64, 128]), + square_anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + scales=[4], + strides=[8, 16, 32, 64, 128]), + bbox_coder=dict( + type='BucketingBBoxCoder', num_buckets=14, scale_factor=3.0), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.5), + loss_bbox_reg=dict( + type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.5)), + # training and testing settings + train_cfg=dict( + assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0.0, + ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + debug=False)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/sabl/sabl_retinanet_r101_fpn_gn_1x_coco.py b/configs/sabl/sabl_retinanet_r101_fpn_gn_1x_coco.py new file mode 100644 index 0000000..ec78263 --- /dev/null +++ b/configs/sabl/sabl_retinanet_r101_fpn_gn_1x_coco.py @@ -0,0 +1,54 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# model settings +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(depth=101), + bbox_head=dict( + _delete_=True, + type='SABLRetinaHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + approx_anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=4, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[8, 16, 32, 64, 128]), + square_anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + scales=[4], + strides=[8, 16, 32, 64, 128]), + norm_cfg=norm_cfg, + bbox_coder=dict( + type='BucketingBBoxCoder', num_buckets=14, scale_factor=3.0), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.5), + loss_bbox_reg=dict( + type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.5)), + # training and testing settings + train_cfg=dict( + assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0.0, + ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + debug=False)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_480_960_coco.py b/configs/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_480_960_coco.py new file mode 100644 index 0000000..2a47c60 --- /dev/null +++ b/configs/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_480_960_coco.py @@ -0,0 +1,71 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py' +] +# model settings +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(depth=101), + bbox_head=dict( + _delete_=True, + type='SABLRetinaHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + approx_anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=4, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[8, 16, 32, 64, 128]), + square_anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + scales=[4], + strides=[8, 16, 32, 64, 128]), + norm_cfg=norm_cfg, + bbox_coder=dict( + type='BucketingBBoxCoder', num_buckets=14, scale_factor=3.0), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.5), + loss_bbox_reg=dict( + type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.5)), + # training and testing settings + train_cfg=dict( + assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0.0, + ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + debug=False)) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 480), (1333, 960)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +data = dict(train=dict(pipeline=train_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_640_800_coco.py b/configs/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_640_800_coco.py new file mode 100644 index 0000000..f26062f --- /dev/null +++ b/configs/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_640_800_coco.py @@ -0,0 +1,71 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py' +] +# model settings +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(depth=101), + bbox_head=dict( + _delete_=True, + type='SABLRetinaHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + approx_anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=4, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[8, 16, 32, 64, 128]), + square_anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + scales=[4], + strides=[8, 16, 32, 64, 128]), + norm_cfg=norm_cfg, + bbox_coder=dict( + type='BucketingBBoxCoder', num_buckets=14, scale_factor=3.0), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.5), + loss_bbox_reg=dict( + type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.5)), + # training and testing settings + train_cfg=dict( + assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0.0, + ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + debug=False)) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +data = dict(train=dict(pipeline=train_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/sabl/sabl_retinanet_r50_fpn_1x_coco.py b/configs/sabl/sabl_retinanet_r50_fpn_1x_coco.py new file mode 100644 index 0000000..6fe6bd6 --- /dev/null +++ b/configs/sabl/sabl_retinanet_r50_fpn_1x_coco.py @@ -0,0 +1,50 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# model settings +model = dict( + bbox_head=dict( + _delete_=True, + type='SABLRetinaHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + approx_anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=4, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[8, 16, 32, 64, 128]), + square_anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + scales=[4], + strides=[8, 16, 32, 64, 128]), + bbox_coder=dict( + type='BucketingBBoxCoder', num_buckets=14, scale_factor=3.0), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.5), + loss_bbox_reg=dict( + type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.5)), + # training and testing settings + train_cfg=dict( + assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0.0, + ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + debug=False)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/sabl/sabl_retinanet_r50_fpn_gn_1x_coco.py b/configs/sabl/sabl_retinanet_r50_fpn_gn_1x_coco.py new file mode 100644 index 0000000..6acf080 --- /dev/null +++ b/configs/sabl/sabl_retinanet_r50_fpn_gn_1x_coco.py @@ -0,0 +1,52 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# model settings +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + bbox_head=dict( + _delete_=True, + type='SABLRetinaHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + approx_anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=4, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[8, 16, 32, 64, 128]), + square_anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + scales=[4], + strides=[8, 16, 32, 64, 128]), + norm_cfg=norm_cfg, + bbox_coder=dict( + type='BucketingBBoxCoder', num_buckets=14, scale_factor=3.0), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.5), + loss_bbox_reg=dict( + type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.5)), + # training and testing settings + train_cfg=dict( + assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0.0, + ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + debug=False)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/scnet/README.md b/configs/scnet/README.md new file mode 100644 index 0000000..1749df0 --- /dev/null +++ b/configs/scnet/README.md @@ -0,0 +1,51 @@ +# SCNet + +## Introduction + +[ALGORITHM] + +We provide the code for reproducing experiment results of [SCNet](https://arxiv.org/abs/2012.10150). + +``` +@inproceedings{vu2019cascade, + title={SCNet: Training Inference Sample Consistency for Instance Segmentation}, + author={Vu, Thang and Haeyong, Kang and Yoo, Chang D}, + booktitle={AAAI}, + year={2021} +} +``` + +## Dataset + +SCNet requires COCO and [COCO-stuff](http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/stuffthingmaps_trainval2017.zip) dataset for training. You need to download and extract it in the COCO dataset path. +The directory should be like this. + +```none +mmdetection +├── mmdet +├── tools +├── configs +├── data +│ ├── coco +│ │ ├── annotations +│ │ ├── train2017 +│ │ ├── val2017 +│ │ ├── test2017 +| | ├── stuffthingmaps +``` + +## Results and Models + +The results on COCO 2017val are shown in the below table. (results on test-dev are usually slightly higher than val) + +| Backbone | Style | Lr schd | Mem (GB) | Inf speed (fps) | box AP | mask AP | TTA box AP | TTA mask AP | Config | Download | +|:---------------:|:-------:|:-------:|:--------:|:---------------:|:------:|:-------:|:----------:|:-----------:|:------:|:------------:| +| R-50-FPN | pytorch | 1x | 7.0 | 6.2 | 43.5 | 39.2 | 44.8 | 40.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/scnet/scnet_r50_fpn_1x_coco.py) | [model](https://drive.google.com/file/d/1K5_8-P0EC43WZFtoO3q9_JE-df8pEc7J/view?usp=sharing) \| [log](https://drive.google.com/file/d/1ZFS6QhFfxlOnDYPiGpSDP_Fzgb7iDGN3/view?usp=sharing) | +| R-50-FPN | pytorch | 20e | 7.0 | 6.2 | 44.5 | 40.0 | 45.8 | 41.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/scnet/scnet_r50_fpn_20e_coco.py) | [model](https://drive.google.com/file/d/15VGLCt5-IO5TbzB4Kw6ZyoF6QH0Q511A/view?usp=sharing) \| [log](https://drive.google.com/file/d/1-LnkOXN8n5ojQW34H0qZ625cgrnWpqSX/view?usp=sharing) | +| R-101-FPN | pytorch | 20e | 8.9 | 5.8 | 45.8 | 40.9 | 47.3 | 42.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/scnet/scnet_r101_fpn_20e_coco.py) | [model](https://drive.google.com/file/d/1aeCGHsOBdfIqVBnBPp0JUE_RSIau3583/view?usp=sharing) \| [log](https://drive.google.com/file/d/1iRx-9GRgTaIDsz-we3DGwFVH22nbvCLa/view?usp=sharing) | +| X-101-64x4d-FPN | pytorch | 20e | 13.2 | 4.9 | 47.5 | 42.3 | 48.9 | 44.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/scnet/scnet_x101_64x4d_fpn_20e_coco.py) | [model](https://drive.google.com/file/d/1YjgutUKz4TTPpqSWGKUTkZJ8_X-kyCfY/view?usp=sharing) \| [log](https://drive.google.com/file/d/1OsfQJ8gwtqIQ61k358yxY21sCvbUcRjs/view?usp=sharing) | + +### Notes + +- Training hyper-parameters are identical to those of [HTC](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc). +- TTA means Test Time Augmentation, which applies horizonal flip and multi-scale testing. Refer to [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/scnet/scnet_r50_fpn_1x_coco.py). diff --git a/configs/scnet/scnet_r101_fpn_20e_coco.py b/configs/scnet/scnet_r101_fpn_20e_coco.py new file mode 100644 index 0000000..cef0668 --- /dev/null +++ b/configs/scnet/scnet_r101_fpn_20e_coco.py @@ -0,0 +1,2 @@ +_base_ = './scnet_r50_fpn_20e_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/scnet/scnet_r50_fpn_1x_coco.py b/configs/scnet/scnet_r50_fpn_1x_coco.py new file mode 100644 index 0000000..e4215a6 --- /dev/null +++ b/configs/scnet/scnet_r50_fpn_1x_coco.py @@ -0,0 +1,136 @@ +_base_ = '../htc/htc_r50_fpn_1x_coco.py' +# model settings +model = dict( + type='SCNet', + roi_head=dict( + _delete_=True, + type='SCNetRoIHead', + num_stages=3, + stage_loss_weights=[1, 0.5, 0.25], + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=[ + dict( + type='SCNetBBoxHead', + num_shared_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, + loss_weight=1.0)), + dict( + type='SCNetBBoxHead', + num_shared_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.05, 0.05, 0.1, 0.1]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, + loss_weight=1.0)), + dict( + type='SCNetBBoxHead', + num_shared_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.033, 0.033, 0.067, 0.067]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) + ], + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + mask_head=dict( + type='SCNetMaskHead', + num_convs=12, + in_channels=256, + conv_out_channels=256, + num_classes=80, + conv_to_res=True, + loss_mask=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), + semantic_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), + out_channels=256, + featmap_strides=[8]), + semantic_head=dict( + type='SCNetSemanticHead', + num_ins=5, + fusion_level=1, + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=183, + ignore_label=255, + loss_weight=0.2, + conv_to_res=True), + glbctx_head=dict( + type='GlobalContextHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=80, + loss_weight=3.0, + conv_to_res=True), + feat_relay_head=dict( + type='FeatureRelayHead', + in_channels=1024, + out_conv_channels=256, + roi_feat_size=7, + scale_factor=2))) + +# uncomment below code to enable test time augmentations +# img_norm_cfg = dict( +# mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +# test_pipeline = [ +# dict(type='LoadImageFromFile'), +# dict( +# type='MultiScaleFlipAug', +# img_scale=[(600, 900), (800, 1200), (1000, 1500), (1200, 1800), +# (1400, 2100)], +# flip=True, +# transforms=[ +# dict(type='Resize', keep_ratio=True), +# dict(type='RandomFlip', flip_ratio=0.5), +# dict(type='Normalize', **img_norm_cfg), +# dict(type='Pad', size_divisor=32), +# dict(type='ImageToTensor', keys=['img']), +# dict(type='Collect', keys=['img']), +# ]) +# ] +# data = dict( +# val=dict(pipeline=test_pipeline), +# test=dict(pipeline=test_pipeline)) diff --git a/configs/scnet/scnet_r50_fpn_20e_coco.py b/configs/scnet/scnet_r50_fpn_20e_coco.py new file mode 100644 index 0000000..3b121a6 --- /dev/null +++ b/configs/scnet/scnet_r50_fpn_20e_coco.py @@ -0,0 +1,4 @@ +_base_ = './scnet_r50_fpn_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 19]) +runner = dict(type='EpochBasedRunner', max_epochs=20) diff --git a/configs/scnet/scnet_x101_64x4d_fpn_20e_coco.py b/configs/scnet/scnet_x101_64x4d_fpn_20e_coco.py new file mode 100644 index 0000000..a0ff32b --- /dev/null +++ b/configs/scnet/scnet_x101_64x4d_fpn_20e_coco.py @@ -0,0 +1,14 @@ +_base_ = './scnet_r50_fpn_20e_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch')) diff --git a/configs/scnet/scnet_x101_64x4d_fpn_8x1_20e_coco.py b/configs/scnet/scnet_x101_64x4d_fpn_8x1_20e_coco.py new file mode 100644 index 0000000..9f3ce6d --- /dev/null +++ b/configs/scnet/scnet_x101_64x4d_fpn_8x1_20e_coco.py @@ -0,0 +1,3 @@ +_base_ = './scnet_x101_64x4d_fpn_20e_coco.py' +data = dict(samples_per_gpu=1, workers_per_gpu=1) +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/scratch/README.md b/configs/scratch/README.md new file mode 100644 index 0000000..a338dc5 --- /dev/null +++ b/configs/scratch/README.md @@ -0,0 +1,25 @@ +# Rethinking ImageNet Pre-training + +## Introduction + +[ALGORITHM] + +```latex +@article{he2018rethinking, + title={Rethinking imagenet pre-training}, + author={He, Kaiming and Girshick, Ross and Doll{\'a}r, Piotr}, + journal={arXiv preprint arXiv:1811.08883}, + year={2018} +} +``` + +## Results and Models + +| Model | Backbone | Style | Lr schd | box AP | mask AP | Config | Download | +|:------------:|:---------:|:-------:|:-------:|:------:|:-------:|:------:|:--------:| +| Faster R-CNN | R-50-FPN | pytorch | 6x | 40.7 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/scratch/faster_rcnn_r50_fpn_gn-all_scratch_6x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/scratch/faster_rcnn_r50_fpn_gn-all_scratch_6x_coco/scratch_faster_rcnn_r50_fpn_gn_6x_bbox_mAP-0.407_20200201_193013-90813d01.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/scratch/faster_rcnn_r50_fpn_gn-all_scratch_6x_coco/scratch_faster_rcnn_r50_fpn_gn_6x_20200201_193013.log.json) | +| Mask R-CNN | R-50-FPN | pytorch | 6x | 41.2 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/scratch/mask_rcnn_r50_fpn_gn-all_scratch_6x_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/scratch/mask_rcnn_r50_fpn_gn-all_scratch_6x_coco/scratch_mask_rcnn_r50_fpn_gn_6x_bbox_mAP-0.412__segm_mAP-0.374_20200201_193051-1e190a40.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/scratch/mask_rcnn_r50_fpn_gn-all_scratch_6x_coco/scratch_mask_rcnn_r50_fpn_gn_6x_20200201_193051.log.json) | + +Note: + +- The above models are trained with 16 GPUs. diff --git a/configs/scratch/faster_rcnn_r50_fpn_gn-all_scratch_6x_coco.py b/configs/scratch/faster_rcnn_r50_fpn_gn-all_scratch_6x_coco.py new file mode 100644 index 0000000..636f3f6 --- /dev/null +++ b/configs/scratch/faster_rcnn_r50_fpn_gn-all_scratch_6x_coco.py @@ -0,0 +1,22 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + pretrained=None, + backbone=dict( + frozen_stages=-1, zero_init_residual=False, norm_cfg=norm_cfg), + neck=dict(norm_cfg=norm_cfg), + roi_head=dict( + bbox_head=dict( + type='Shared4Conv1FCBBoxHead', + conv_out_channels=256, + norm_cfg=norm_cfg))) +# optimizer +optimizer = dict(paramwise_cfg=dict(norm_decay_mult=0)) +optimizer_config = dict(_delete_=True, grad_clip=None) +# learning policy +lr_config = dict(warmup_ratio=0.1, step=[65, 71]) +runner = dict(type='EpochBasedRunner', max_epochs=73) diff --git a/configs/scratch/mask_rcnn_r50_fpn_gn-all_scratch_6x_coco.py b/configs/scratch/mask_rcnn_r50_fpn_gn-all_scratch_6x_coco.py new file mode 100644 index 0000000..6277a97 --- /dev/null +++ b/configs/scratch/mask_rcnn_r50_fpn_gn-all_scratch_6x_coco.py @@ -0,0 +1,23 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + pretrained=None, + backbone=dict( + frozen_stages=-1, zero_init_residual=False, norm_cfg=norm_cfg), + neck=dict(norm_cfg=norm_cfg), + roi_head=dict( + bbox_head=dict( + type='Shared4Conv1FCBBoxHead', + conv_out_channels=256, + norm_cfg=norm_cfg), + mask_head=dict(norm_cfg=norm_cfg))) +# optimizer +optimizer = dict(paramwise_cfg=dict(norm_decay_mult=0)) +optimizer_config = dict(_delete_=True, grad_clip=None) +# learning policy +lr_config = dict(warmup_ratio=0.1, step=[65, 71]) +runner = dict(type='EpochBasedRunner', max_epochs=73) diff --git a/configs/sparse_rcnn/README.md b/configs/sparse_rcnn/README.md new file mode 100644 index 0000000..60cc8a9 --- /dev/null +++ b/configs/sparse_rcnn/README.md @@ -0,0 +1,28 @@ +# Sparse R-CNN: End-to-End Object Detection with Learnable Proposals + +## Introduction + +[ALGORITHM] + +``` +@article{peize2020sparse, + title = {{SparseR-CNN}: End-to-End Object Detection with Learnable Proposals}, + author = {Peize Sun and Rufeng Zhang and Yi Jiang and Tao Kong and Chenfeng Xu and Wei Zhan and Masayoshi Tomizuka and Lei Li and Zehuan Yuan and Changhu Wang and Ping Luo}, + journal = {arXiv preprint arXiv:2011.12450}, + year = {2020} +} +``` + +## Results and Models + +| Model | Backbone | Style | Lr schd | Number of Proposals |Multi-Scale| RandomCrop | box AP | Config | Download | +|:------------:|:---------:|:-------:|:-------:|:-------: |:-------: |:---------:|:------:|:------:|:--------:| +| Sparse R-CNN | R-50-FPN | pytorch | 1x | 100 | False | False | 37.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco/sparse_rcnn_r50_fpn_1x_coco_20201222_214453-dc79b137.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco/sparse_rcnn_r50_fpn_1x_coco_20201222_214453-dc79b137.log.json) | +| Sparse R-CNN | R-50-FPN | pytorch | 3x | 100 | True | False | 42.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/sparse_rcnn/sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco/sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco_20201218_154234-7bc5c054.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco/sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco_20201218_154234-7bc5c054.log.json) | +| Sparse R-CNN | R-50-FPN | pytorch | 3x | 300 | True | True | 45.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/sparse_rcnn/sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco/sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20201223_024605-9fe92701.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco/sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20201223_024605-9fe92701.log.json) | +| Sparse R-CNN | R-101-FPN | pytorch | 3x | 100 | True | False | 44.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/sparse_rcnn/sparse_rcnn_r101_fpn_mstrain_480-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r101_fpn_mstrain_480-800_3x_coco/sparse_rcnn_r101_fpn_mstrain_480-800_3x_coco_20201223_121552-6c46c9d6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r101_fpn_mstrain_480-800_3x_coco/sparse_rcnn_r101_fpn_mstrain_480-800_3x_coco_20201223_121552-6c46c9d6.log.json) | +| Sparse R-CNN | R-101-FPN | pytorch | 3x | 300 | True | True | 46.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/sparse_rcnn/sparse_rcnn_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco/sparse_rcnn_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20201223_023452-c23c3564.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco/sparse_rcnn_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20201223_023452-c23c3564.log.json) | + +### Notes + +We observe about 0.3 AP noise especially when using ResNet-101 as the backbone. diff --git a/configs/sparse_rcnn/sparse_rcnn_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py b/configs/sparse_rcnn/sparse_rcnn_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py new file mode 100644 index 0000000..e7a94db --- /dev/null +++ b/configs/sparse_rcnn/sparse_rcnn_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py @@ -0,0 +1,3 @@ +_base_ = './sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py' + +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/sparse_rcnn/sparse_rcnn_r101_fpn_mstrain_480-800_3x_coco.py b/configs/sparse_rcnn/sparse_rcnn_r101_fpn_mstrain_480-800_3x_coco.py new file mode 100644 index 0000000..0439fc1 --- /dev/null +++ b/configs/sparse_rcnn/sparse_rcnn_r101_fpn_mstrain_480-800_3x_coco.py @@ -0,0 +1,3 @@ +_base_ = './sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco.py' + +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py b/configs/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..512eca6 --- /dev/null +++ b/configs/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,95 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +num_stages = 6 +num_proposals = 100 +model = dict( + type='SparseRCNN', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=0, + add_extra_convs='on_input', + num_outs=4), + rpn_head=dict( + type='EmbeddingRPNHead', + num_proposals=num_proposals, + proposal_feature_channel=256), + roi_head=dict( + type='SparseRoIHead', + num_stages=num_stages, + stage_loss_weights=[1] * num_stages, + proposal_feature_channel=256, + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=[ + dict( + type='DIIHead', + num_classes=80, + num_ffn_fcs=2, + num_heads=8, + num_cls_fcs=1, + num_reg_fcs=3, + feedforward_channels=2048, + in_channels=256, + dropout=0.0, + ffn_act_cfg=dict(type='ReLU', inplace=True), + dynamic_conv_cfg=dict( + type='DynamicConv', + in_channels=256, + feat_channels=64, + out_channels=256, + input_feat_shape=7, + act_cfg=dict(type='ReLU', inplace=True), + norm_cfg=dict(type='LN')), + loss_bbox=dict(type='L1Loss', loss_weight=5.0), + loss_iou=dict(type='GIoULoss', loss_weight=2.0), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=2.0), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + clip_border=False, + target_means=[0., 0., 0., 0.], + target_stds=[0.5, 0.5, 1., 1.])) for _ in range(num_stages) + ]), + # training and testing settings + train_cfg=dict( + rpn=None, + rcnn=[ + dict( + assigner=dict( + type='HungarianAssigner', + cls_cost=dict(type='FocalLossCost', weight=2.0), + reg_cost=dict(type='BBoxL1Cost', weight=5.0), + iou_cost=dict(type='IoUCost', iou_mode='giou', + weight=2.0)), + sampler=dict(type='PseudoSampler'), + pos_weight=1) for _ in range(num_stages) + ]), + test_cfg=dict(rpn=None, rcnn=dict(max_per_img=num_proposals))) + +# optimizer +optimizer = dict(_delete_=True, type='AdamW', lr=0.000025, weight_decay=0.0001) +optimizer_config = dict(_delete_=True, grad_clip=dict(max_norm=1, norm_type=2)) +# learning policy +lr_config = dict(policy='step', step=[8, 11]) +runner = dict(type='EpochBasedRunner', max_epochs=12) diff --git a/configs/sparse_rcnn/sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py b/configs/sparse_rcnn/sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py new file mode 100644 index 0000000..36f1d62 --- /dev/null +++ b/configs/sparse_rcnn/sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py @@ -0,0 +1,52 @@ +_base_ = './sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco.py' +num_proposals = 300 +model = dict( + rpn_head=dict(num_proposals=num_proposals), + test_cfg=dict( + _delete_=True, rpn=None, rcnn=dict(max_per_img=num_proposals))) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +# augmentation strategy originates from DETR. +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict( + type='AutoAugment', + policies=[[ + dict( + type='Resize', + img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), + (608, 1333), (640, 1333), (672, 1333), (704, 1333), + (736, 1333), (768, 1333), (800, 1333)], + multiscale_mode='value', + keep_ratio=True) + ], + [ + dict( + type='Resize', + img_scale=[(400, 1333), (500, 1333), (600, 1333)], + multiscale_mode='value', + keep_ratio=True), + dict( + type='RandomCrop', + crop_type='absolute_range', + crop_size=(384, 600), + allow_negative_crop=True), + dict( + type='Resize', + img_scale=[(480, 1333), (512, 1333), (544, 1333), + (576, 1333), (608, 1333), (640, 1333), + (672, 1333), (704, 1333), (736, 1333), + (768, 1333), (800, 1333)], + multiscale_mode='value', + override=True, + keep_ratio=True) + ]]), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) +] +data = dict(train=dict(pipeline=train_pipeline)) diff --git a/configs/sparse_rcnn/sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco.py b/configs/sparse_rcnn/sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco.py new file mode 100644 index 0000000..2fa2a80 --- /dev/null +++ b/configs/sparse_rcnn/sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco.py @@ -0,0 +1,23 @@ +_base_ = './sparse_rcnn_r50_fpn_1x_coco.py' + +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +min_values = (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, value) for value in min_values], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) +] + +data = dict(train=dict(pipeline=train_pipeline)) +lr_config = dict(policy='step', step=[27, 33]) +runner = dict(type='EpochBasedRunner', max_epochs=36) diff --git a/configs/ssd/README.md b/configs/ssd/README.md new file mode 100644 index 0000000..51262d6 --- /dev/null +++ b/configs/ssd/README.md @@ -0,0 +1,21 @@ +# SSD: Single Shot MultiBox Detector + +## Introduction + +[ALGORITHM] + +```latex +@article{Liu_2016, + title={SSD: Single Shot MultiBox Detector}, + journal={ECCV}, + author={Liu, Wei and Anguelov, Dragomir and Erhan, Dumitru and Szegedy, Christian and Reed, Scott and Fu, Cheng-Yang and Berg, Alexander C.}, + year={2016}, +} +``` + +## Results and models + +| Backbone | Size | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :------: | :---: | :---: | :-----: | :------: | :------------: | :----: | :------: | :--------: | +| VGG16 | 300 | caffe | 120e | 10.2 | 43.7 | 25.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd/ssd300_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/ssd/ssd300_coco/ssd300_coco_20200307-a92d2092.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/ssd/ssd300_coco/ssd300_coco_20200307_174216.log.json) | +| VGG16 | 512 | caffe | 120e | 9.3 | 30.7 | 29.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd/ssd512_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/ssd/ssd512_coco/ssd512_coco_20200308-038c5591.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/ssd/ssd512_coco/ssd512_coco_20200308_134447.log.json) | diff --git a/configs/ssd/ssd300_coco.py b/configs/ssd/ssd300_coco.py new file mode 100644 index 0000000..75c5e4e --- /dev/null +++ b/configs/ssd/ssd300_coco.py @@ -0,0 +1,62 @@ +_base_ = [ + '../_base_/models/ssd300.py', '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py' +] +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 4)), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=(300, 300), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(300, 300), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=8, + workers_per_gpu=3, + train=dict( + _delete_=True, + type='RepeatDataset', + times=5, + dataset=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline)), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4) +optimizer_config = dict(_delete_=True) diff --git a/configs/ssd/ssd512_coco.py b/configs/ssd/ssd512_coco.py new file mode 100644 index 0000000..44d2920 --- /dev/null +++ b/configs/ssd/ssd512_coco.py @@ -0,0 +1,71 @@ +_base_ = 'ssd300_coco.py' +input_size = 512 +model = dict( + backbone=dict(input_size=input_size), + bbox_head=dict( + in_channels=(512, 1024, 512, 256, 256, 256, 256), + anchor_generator=dict( + type='SSDAnchorGenerator', + scale_major=False, + input_size=input_size, + basesize_ratio_range=(0.1, 0.9), + strides=[8, 16, 32, 64, 128, 256, 512], + ratios=[[2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]]))) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 4)), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=(512, 512), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=8, + workers_per_gpu=3, + train=dict( + _delete_=True, + type='RepeatDataset', + times=5, + dataset=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline)), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4) +optimizer_config = dict(_delete_=True) diff --git a/configs/tridentnet/README.md b/configs/tridentnet/README.md new file mode 100644 index 0000000..8ab7c28 --- /dev/null +++ b/configs/tridentnet/README.md @@ -0,0 +1,28 @@ +# Scale-Aware Trident Networks for Object Detection + +## Introduction + +[ALGORITHM] + +``` +@InProceedings{li2019scale, + title={Scale-Aware Trident Networks for Object Detection}, + author={Li, Yanghao and Chen, Yuntao and Wang, Naiyan and Zhang, Zhaoxiang}, + journal={The International Conference on Computer Vision (ICCV)}, + year={2019} +} +``` + +## Results and models + +We reports the test results using only one branch for inference. + +| Backbone | Style | mstrain | Lr schd | Mem (GB) | Inf time (fps) | box AP | Download | +| :-------------: | :-----: | :-----: | :-----: | :------: | :------------: | :----: | :------: | +| R-50 | caffe | N | 1x | | | 37.7 |[model](https://download.openmmlab.com/mmdetection/v2.0/tridentnet/tridentnet_r50_caffe_1x_coco/tridentnet_r50_caffe_1x_coco_20201230_141838-2ec0b530.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/tridentnet/tridentnet_r50_caffe_1x_coco/tridentnet_r50_caffe_1x_coco_20201230_141838.log.json) | +| R-50 | caffe | Y | 1x | | | 37.6 |[model](https://download.openmmlab.com/mmdetection/v2.0/tridentnet/tridentnet_r50_caffe_mstrain_1x_coco/tridentnet_r50_caffe_mstrain_1x_coco_20201230_141839-6ce55ccb.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/tridentnet/tridentnet_r50_caffe_mstrain_1x_coco/tridentnet_r50_caffe_mstrain_1x_coco_20201230_141839.log.json) | +| R-50 | caffe | Y | 3x | | | 40.3 |[model](https://download.openmmlab.com/mmdetection/v2.0/tridentnet/tridentnet_r50_caffe_mstrain_3x_coco/tridentnet_r50_caffe_mstrain_3x_coco_20201130_100539-46d227ba.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/tridentnet/tridentnet_r50_caffe_mstrain_3x_coco/tridentnet_r50_caffe_mstrain_3x_coco_20201130_100539.log.json) | + +**Note** + +Similar to [Detectron2](https://github.com/facebookresearch/detectron2/tree/master/projects/TridentNet), we haven't implemented the Scale-aware Training Scheme in section 4.2 of the paper. diff --git a/configs/tridentnet/tridentnet_r50_caffe_1x_coco.py b/configs/tridentnet/tridentnet_r50_caffe_1x_coco.py new file mode 100644 index 0000000..a6a668c --- /dev/null +++ b/configs/tridentnet/tridentnet_r50_caffe_1x_coco.py @@ -0,0 +1,53 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_caffe_c4.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +model = dict( + type='TridentFasterRCNN', + pretrained='open-mmlab://detectron2/resnet50_caffe', + backbone=dict( + type='TridentResNet', + trident_dilations=(1, 2, 3), + num_branch=3, + test_branch_idx=1), + roi_head=dict(type='TridentRoIHead', num_branch=3, test_branch_idx=1), + train_cfg=dict( + rpn_proposal=dict(max_per_img=500), + rcnn=dict( + sampler=dict(num=128, pos_fraction=0.5, + add_gt_as_proposals=False)))) + +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/tridentnet/tridentnet_r50_caffe_mstrain_1x_coco.py b/configs/tridentnet/tridentnet_r50_caffe_mstrain_1x_coco.py new file mode 100644 index 0000000..c73d9ea --- /dev/null +++ b/configs/tridentnet/tridentnet_r50_caffe_mstrain_1x_coco.py @@ -0,0 +1,22 @@ +_base_ = 'tridentnet_r50_caffe_1x_coco.py' + +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) +] + +data = dict(train=dict(pipeline=train_pipeline)) diff --git a/configs/tridentnet/tridentnet_r50_caffe_mstrain_3x_coco.py b/configs/tridentnet/tridentnet_r50_caffe_mstrain_3x_coco.py new file mode 100644 index 0000000..0f40282 --- /dev/null +++ b/configs/tridentnet/tridentnet_r50_caffe_mstrain_3x_coco.py @@ -0,0 +1,4 @@ +_base_ = 'tridentnet_r50_caffe_mstrain_1x_coco.py' + +lr_config = dict(step=[28, 34]) +runner = dict(type='EpochBasedRunner', max_epochs=36) diff --git a/configs/vfnet/README.md b/configs/vfnet/README.md new file mode 100644 index 0000000..d1a94d1 --- /dev/null +++ b/configs/vfnet/README.md @@ -0,0 +1,43 @@ +# VarifocalNet: An IoU-aware Dense Object Detector + +## Introduction + +[ALGORITHM] + +**VarifocalNet (VFNet)** learns to predict the IoU-aware classification score which mixes the object presence confidence and localization accuracy together as the detection score for a bounding box. The learning is supervised by the proposed Varifocal Loss (VFL), based on a new star-shaped bounding box feature representation (the features at nine yellow sampling points). Given the new representation, the object localization accuracy is further improved by refining the initially regressed bounding box. The full paper is available at: [https://arxiv.org/abs/2008.13367](https://arxiv.org/abs/2008.13367). + +
+ +

Learning to Predict the IoU-aware Classification Score.

+
+ +## Citing VarifocalNet + +```latex +@article{zhang2020varifocalnet, + title={VarifocalNet: An IoU-aware Dense Object Detector}, + author={Zhang, Haoyang and Wang, Ying and Dayoub, Feras and S{\"u}nderhauf, Niko}, + journal={arXiv preprint arXiv:2008.13367}, + year={2020} +} +``` + +## Results and Models + +| Backbone | Style | DCN | MS train | Lr schd |Inf time (fps) | box AP (val) | box AP (test-dev) | Config | Download | +|:------------:|:---------:|:-------:|:--------:|:-------:|:-------------:|:------------:|:-----------------:|:------:|:--------:| +| R-50 | pytorch | N | N | 1x | - | 41.6 | 41.6 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/vfnet_r50_fpn_1x_coco.py) | [model](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_1x_coco/vfnet_r50_fpn_1x_coco_20201027-38db6f58.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_1x_coco/vfnet_r50_fpn_1x_coco.json)| +| R-50 | pytorch | N | Y | 2x | - | 44.5 | 44.8 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/vfnet_r50_fpn_mstrain_2x_coco.py) | [model](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_mstrain_2x_coco/vfnet_r50_fpn_mstrain_2x_coco_20201027-7cc75bd2.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_mstrain_2x_coco/vfnet_r50_fpn_mstrain_2x_coco.json)| +| R-50 | pytorch | Y | Y | 2x | - | 47.8 | 48.0 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco.py) | [model](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-6879c318.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco.json)| +| R-101 | pytorch | N | N | 1x | - | 43.0 | 43.6 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/vfnet_r101_fpn_1x_coco.py) | [model](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_1x_coco/vfnet_r101_fpn_1x_coco_20201027pth-c831ece7.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_1x_coco/vfnet_r101_fpn_1x_coco.json)| +| R-101 | pytorch | N | Y | 2x | - | 46.2 | 46.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/vfnet_r101_fpn_mstrain_2x_coco.py) | [model](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_mstrain_2x_coco/vfnet_r101_fpn_mstrain_2x_coco_20201027pth-4a5d53f1.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_mstrain_2x_coco/vfnet_r101_fpn_mstrain_2x_coco.json)| +| R-101 | pytorch | Y | Y | 2x | - | 49.0 | 49.2 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco.py) | [model](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-7729adb5.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco.json)| +| X-101-32x4d | pytorch | Y | Y | 2x | - | 49.7 | 50.0 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/vfnet_x101_32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.py) | [model](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/vfnet/vfnet_x101_32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_x101_32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-d300a6fc.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/vfnet/vfnet_x101_32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_x101_32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.json)| +| X-101-64x4d | pytorch | Y | Y | 2x | - | 50.4 | 50.8 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.py) | [model](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/vfnet/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-b5f6da5e.pth) | [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/vfnet/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.json)| + +**Notes:** + +- The MS-train scale range is 1333x[480:960] (`range` mode) and the inference scale keeps 1333x800. +- DCN means using `DCNv2` in both backbone and head. +- Inference time will be updated soon. +- More results and pre-trained models can be found in [VarifocalNet-Github](https://github.com/hyz-xmaster/VarifocalNet) diff --git a/configs/vfnet/vfnet_r101_fpn_1x_coco.py b/configs/vfnet/vfnet_r101_fpn_1x_coco.py new file mode 100644 index 0000000..0952131 --- /dev/null +++ b/configs/vfnet/vfnet_r101_fpn_1x_coco.py @@ -0,0 +1,2 @@ +_base_ = './vfnet_r50_fpn_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/vfnet/vfnet_r101_fpn_2x_coco.py b/configs/vfnet/vfnet_r101_fpn_2x_coco.py new file mode 100644 index 0000000..334657d --- /dev/null +++ b/configs/vfnet/vfnet_r101_fpn_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './vfnet_r50_fpn_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/vfnet/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco.py b/configs/vfnet/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco.py new file mode 100644 index 0000000..f8ef6ec --- /dev/null +++ b/configs/vfnet/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco.py @@ -0,0 +1,14 @@ +_base_ = './vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict( + type='ResNet', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/vfnet/vfnet_r101_fpn_mstrain_2x_coco.py b/configs/vfnet/vfnet_r101_fpn_mstrain_2x_coco.py new file mode 100644 index 0000000..be7f075 --- /dev/null +++ b/configs/vfnet/vfnet_r101_fpn_mstrain_2x_coco.py @@ -0,0 +1,2 @@ +_base_ = './vfnet_r50_fpn_mstrain_2x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/vfnet/vfnet_r2_101_fpn_mdconv_c3-c5_mstrain_2x_coco.py b/configs/vfnet/vfnet_r2_101_fpn_mdconv_c3-c5_mstrain_2x_coco.py new file mode 100644 index 0000000..8da3122 --- /dev/null +++ b/configs/vfnet/vfnet_r2_101_fpn_mdconv_c3-c5_mstrain_2x_coco.py @@ -0,0 +1,16 @@ +_base_ = './vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco.py' +model = dict( + pretrained='open-mmlab://res2net101_v1d_26w_4s', + backbone=dict( + type='Res2Net', + depth=101, + scales=4, + base_width=26, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/vfnet/vfnet_r2_101_fpn_mstrain_2x_coco.py b/configs/vfnet/vfnet_r2_101_fpn_mstrain_2x_coco.py new file mode 100644 index 0000000..2bcf779 --- /dev/null +++ b/configs/vfnet/vfnet_r2_101_fpn_mstrain_2x_coco.py @@ -0,0 +1,14 @@ +_base_ = './vfnet_r50_fpn_mstrain_2x_coco.py' +model = dict( + pretrained='open-mmlab://res2net101_v1d_26w_4s', + backbone=dict( + type='Res2Net', + depth=101, + scales=4, + base_width=26, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch')) diff --git a/configs/vfnet/vfnet_r50_fpn_1x_coco.py b/configs/vfnet/vfnet_r50_fpn_1x_coco.py new file mode 100644 index 0000000..76566bd --- /dev/null +++ b/configs/vfnet/vfnet_r50_fpn_1x_coco.py @@ -0,0 +1,108 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# model settings +model = dict( + type='VFNet', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs=True, + extra_convs_on_inputs=False, # use P5 + num_outs=5, + relu_before_extra_convs=True), + bbox_head=dict( + type='VFNetHead', + num_classes=80, + in_channels=256, + stacked_convs=3, + feat_channels=256, + strides=[8, 16, 32, 64, 128], + center_sampling=False, + dcn_on_last_conv=False, + use_atss=True, + use_vfl=True, + loss_cls=dict( + type='VarifocalLoss', + use_sigmoid=True, + alpha=0.75, + gamma=2.0, + iou_weighted=True, + loss_weight=1.0), + loss_bbox=dict(type='GIoULoss', loss_weight=1.5), + loss_bbox_refine=dict(type='GIoULoss', loss_weight=2.0)), + # training and testing settings + train_cfg=dict( + assigner=dict(type='ATSSAssigner', topk=9), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100)) + +# data setting +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) + +# optimizer +optimizer = dict( + lr=0.01, paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.)) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.1, + step=[8, 11]) +runner = dict(type='EpochBasedRunner', max_epochs=12) diff --git a/configs/vfnet/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco.py b/configs/vfnet/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco.py new file mode 100644 index 0000000..24d2093 --- /dev/null +++ b/configs/vfnet/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco.py @@ -0,0 +1,6 @@ +_base_ = './vfnet_r50_fpn_mstrain_2x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True)), + bbox_head=dict(dcn_on_last_conv=True)) diff --git a/configs/vfnet/vfnet_r50_fpn_mstrain_2x_coco.py b/configs/vfnet/vfnet_r50_fpn_mstrain_2x_coco.py new file mode 100644 index 0000000..6078bb9 --- /dev/null +++ b/configs/vfnet/vfnet_r50_fpn_mstrain_2x_coco.py @@ -0,0 +1,39 @@ +_base_ = './vfnet_r50_fpn_1x_coco.py' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 480), (1333, 960)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/vfnet/vfnet_x101_32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.py b/configs/vfnet/vfnet_x101_32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.py new file mode 100644 index 0000000..ebeef6f --- /dev/null +++ b/configs/vfnet/vfnet_x101_32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.py @@ -0,0 +1,16 @@ +_base_ = './vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/vfnet/vfnet_x101_32x4d_fpn_mstrain_2x_coco.py b/configs/vfnet/vfnet_x101_32x4d_fpn_mstrain_2x_coco.py new file mode 100644 index 0000000..5ed2650 --- /dev/null +++ b/configs/vfnet/vfnet_x101_32x4d_fpn_mstrain_2x_coco.py @@ -0,0 +1,14 @@ +_base_ = './vfnet_r50_fpn_mstrain_2x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch')) diff --git a/configs/vfnet/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.py b/configs/vfnet/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.py new file mode 100644 index 0000000..2e19078 --- /dev/null +++ b/configs/vfnet/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.py @@ -0,0 +1,16 @@ +_base_ = './vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/vfnet/vfnet_x101_64x4d_fpn_mstrain_2x_coco.py b/configs/vfnet/vfnet_x101_64x4d_fpn_mstrain_2x_coco.py new file mode 100644 index 0000000..4329b34 --- /dev/null +++ b/configs/vfnet/vfnet_x101_64x4d_fpn_mstrain_2x_coco.py @@ -0,0 +1,14 @@ +_base_ = './vfnet_r50_fpn_mstrain_2x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch')) diff --git a/configs/wider_face/README.md b/configs/wider_face/README.md new file mode 100644 index 0000000..c62e10d --- /dev/null +++ b/configs/wider_face/README.md @@ -0,0 +1,43 @@ +# WIDER Face Dataset + +[DATASET] + +To use the WIDER Face dataset you need to download it +and extract to the `data/WIDERFace` folder. Annotation in the VOC format +can be found in this [repo](https://github.com/sovrasov/wider-face-pascal-voc-annotations.git). +You should move the annotation files from `WIDER_train_annotations` and `WIDER_val_annotations` folders +to the `Annotation` folders inside the corresponding directories `WIDER_train` and `WIDER_val`. +Also annotation lists `val.txt` and `train.txt` should be copied to `data/WIDERFace` from `WIDER_train_annotations` and `WIDER_val_annotations`. +The directory should be like this: + +``` +mmdetection +├── mmdet +├── tools +├── configs +├── data +│ ├── WIDERFace +│ │ ├── WIDER_train +│ | │ ├──0--Parade +│ | │ ├── ... +│ | │ ├── Annotations +│ │ ├── WIDER_val +│ | │ ├──0--Parade +│ | │ ├── ... +│ | │ ├── Annotations +│ │ ├── val.txt +│ │ ├── train.txt + +``` + +After that you can train the SSD300 on WIDER by launching training with the `ssd300_wider_face.py` config or +create your own config based on the presented one. + +``` +@inproceedings{yang2016wider, + Author = {Yang, Shuo and Luo, Ping and Loy, Chen Change and Tang, Xiaoou}, + Booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + Title = {WIDER FACE: A Face Detection Benchmark}, + Year = {2016} +} +``` diff --git a/configs/wider_face/ssd300_wider_face.py b/configs/wider_face/ssd300_wider_face.py new file mode 100644 index 0000000..5a3eb38 --- /dev/null +++ b/configs/wider_face/ssd300_wider_face.py @@ -0,0 +1,18 @@ +_base_ = [ + '../_base_/models/ssd300.py', '../_base_/datasets/wider_face.py', + '../_base_/default_runtime.py' +] +model = dict(bbox_head=dict(num_classes=1)) +# optimizer +optimizer = dict(type='SGD', lr=0.012, momentum=0.9, weight_decay=5e-4) +optimizer_config = dict() +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=0.001, + step=[16, 20]) +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=24) +log_config = dict(interval=1) diff --git a/configs/yolact/README.md b/configs/yolact/README.md new file mode 100644 index 0000000..92b1377 --- /dev/null +++ b/configs/yolact/README.md @@ -0,0 +1,71 @@ +# **Y**ou **O**nly **L**ook **A**t **C**oefficien**T**s + +[ALGORITHM] + +``` + ██╗ ██╗ ██████╗ ██╗ █████╗ ██████╗████████╗ + ╚██╗ ██╔╝██╔═══██╗██║ ██╔══██╗██╔════╝╚══██╔══╝ + ╚████╔╝ ██║ ██║██║ ███████║██║ ██║ + ╚██╔╝ ██║ ██║██║ ██╔══██║██║ ██║ + ██║ ╚██████╔╝███████╗██║ ██║╚██████╗ ██║ + ╚═╝ ╚═════╝ ╚══════╝╚═╝ ╚═╝ ╚═════╝ ╚═╝ +``` + +A simple, fully convolutional model for real-time instance segmentation. This is the code for our paper: + +- [YOLACT: Real-time Instance Segmentation](https://arxiv.org/abs/1904.02689) + + +For a real-time demo, check out our ICCV video: +[![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/0pMfmo8qfpQ/0.jpg)](https://www.youtube.com/watch?v=0pMfmo8qfpQ) + +## Evaluation + +Here are our YOLACT models along with their FPS on a Titan Xp and mAP on COCO's `val`: + +| Image Size | GPU x BS | Backbone | *FPS | mAP | Weights | Configs | Download | +|:----------:|:--------:|:-------------:|:-----:|:----:|:-------:|:------:|:--------:| +| 550 | 1x8 | Resnet50-FPN | 42.5 | 29.0 | | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/yolact_r50_1x8_coco.py) |[model](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/yolact/yolact_r50_1x8_coco_20200908-f38d58df.pth) | +| 550 | 8x8 | Resnet50-FPN | 42.5 | 28.4 | | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/yolact_r50_8x8_coco.py) | [model](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/yolact/yolact_r50_8x8_coco_20200908-ca34f5db.pth) | +| 550 | 1x8 | Resnet101-FPN | 33.5 | 30.4 | | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/yolact_r101_1x8_coco.py) | [model](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/yolact/yolact_r101_1x8_coco_20200908-4cbe9101.pth) | + +*Note: The FPS is evaluated by the [original implementation](https://github.com/dbolya/yolact). When calculating FPS, only the model inference time is taken into account. Data loading and post-processing operations such as converting masks to RLE code, generating COCO JSON results, image rendering are not included. + +## Training + +All the aforementioned models are trained with a single GPU. It typically takes ~12GB VRAM when using resnet-101 as the backbone. If you want to try multiple GPUs training, you may have to modify the configuration files accordingly, such as adjusting the training schedule and freezing batch norm. + +```Shell +# Trains using the resnet-101 backbone with a batch size of 8 on a single GPU. +./tools/dist_train.sh configs/yolact/yolact_r101.py 1 +``` + +## Testing + +Please refer to [mmdetection/docs/getting_started.md](https://github.com/open-mmlab/mmdetection/blob/master/docs/getting_started.md#inference-with-pretrained-models). + +## Citation + +If you use YOLACT or this code base in your work, please cite + +```latex +@inproceedings{yolact-iccv2019, + author = {Daniel Bolya and Chong Zhou and Fanyi Xiao and Yong Jae Lee}, + title = {YOLACT: {Real-time} Instance Segmentation}, + booktitle = {ICCV}, + year = {2019}, +} +``` + + diff --git a/configs/yolact/yolact_r101_1x8_coco.py b/configs/yolact/yolact_r101_1x8_coco.py new file mode 100644 index 0000000..2864b59 --- /dev/null +++ b/configs/yolact/yolact_r101_1x8_coco.py @@ -0,0 +1,3 @@ +_base_ = './yolact_r50_1x8_coco.py' + +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) diff --git a/configs/yolact/yolact_r50_1x8_coco.py b/configs/yolact/yolact_r50_1x8_coco.py new file mode 100644 index 0000000..d0e5ace --- /dev/null +++ b/configs/yolact/yolact_r50_1x8_coco.py @@ -0,0 +1,160 @@ +_base_ = '../_base_/default_runtime.py' + +# model settings +img_size = 550 +model = dict( + type='YOLACT', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=-1, # do not freeze stem + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=False, # update the statistics of bn + zero_init_residual=False, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_input', + num_outs=5, + upsample_cfg=dict(mode='bilinear')), + bbox_head=dict( + type='YOLACTHead', + num_classes=80, + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=3, + scales_per_octave=1, + base_sizes=[8, 16, 32, 64, 128], + ratios=[0.5, 1.0, 2.0], + strides=[550.0 / x for x in [69, 35, 18, 9, 5]], + centers=[(550 * 0.5 / x, 550 * 0.5 / x) + for x in [69, 35, 18, 9, 5]]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.1, 0.1, 0.2, 0.2]), + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + reduction='none', + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.5), + num_head_convs=1, + num_protos=32, + use_ohem=True), + mask_head=dict( + type='YOLACTProtonet', + in_channels=256, + num_protos=32, + num_classes=80, + max_masks_to_train=100, + loss_mask_weight=6.125), + segm_head=dict( + type='YOLACTSegmHead', + num_classes=80, + in_channels=256, + loss_segm=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), + # training and testing settings + train_cfg=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0., + ignore_iof_thr=-1, + gt_max_assign_all=False), + # smoothl1_beta=1., + allowed_border=-1, + pos_weight=-1, + neg_pos_ratio=3, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + iou_thr=0.5, + top_k=200, + max_per_img=100)) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.68, 116.78, 103.94], std=[58.40, 57.12, 57.38], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='FilterAnnotations', min_gt_bbox_wh=(4.0, 4.0)), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 4)), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=(img_size, img_size), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(img_size, img_size), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=8, + workers_per_gpu=4, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4) +optimizer_config = dict() +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.1, + step=[20, 42, 49, 52]) +runner = dict(type='EpochBasedRunner', max_epochs=55) +cudnn_benchmark = True +evaluation = dict(metric=['bbox', 'segm']) diff --git a/configs/yolact/yolact_r50_8x8_coco.py b/configs/yolact/yolact_r50_8x8_coco.py new file mode 100644 index 0000000..b3adcb7 --- /dev/null +++ b/configs/yolact/yolact_r50_8x8_coco.py @@ -0,0 +1,11 @@ +_base_ = 'yolact_r50_1x8_coco.py' + +optimizer = dict(type='SGD', lr=8e-3, momentum=0.9, weight_decay=5e-4) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=0.1, + step=[20, 42, 49, 52]) diff --git a/configs/yolo/README.md b/configs/yolo/README.md new file mode 100644 index 0000000..1f539c6 --- /dev/null +++ b/configs/yolo/README.md @@ -0,0 +1,28 @@ +# YOLOv3 + +## Introduction + +[ALGORITHM] + +```latex +@misc{redmon2018yolov3, + title={YOLOv3: An Incremental Improvement}, + author={Joseph Redmon and Ali Farhadi}, + year={2018}, + eprint={1804.02767}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +## Results and Models + +| Backbone | Scale | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | +| DarkNet-53 | 320 | 273e | 2.7 | 63.9 | 27.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo/yolov3_d53_320_273e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_320_273e_coco/yolov3_d53_320_273e_coco-421362b6.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_320_273e_coco/yolov3_d53_320_273e_coco-20200819_172101.log.json) | +| DarkNet-53 | 416 | 273e | 3.8 | 61.2 | 30.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo/yolov3_d53_mstrain-416_273e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_mstrain-416_273e_coco/yolov3_d53_mstrain-416_273e_coco-2b60fcd9.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_mstrain-416_273e_coco/yolov3_d53_mstrain-416_273e_coco-20200819_173424.log.json) | +| DarkNet-53 | 608 | 273e | 7.1 | 48.1 | 33.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo/yolov3_d53_mstrain-608_273e_coco.py) | [model](http://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_mstrain-608_273e_coco/yolov3_d53_mstrain-608_273e_coco-139f5633.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_mstrain-608_273e_coco/yolov3_d53_mstrain-608_273e_coco-20200819_170820.log.json) | + +## Credit + +This implementation originates from the project of Haoyu Wu(@wuhy08) at Western Digital. diff --git a/configs/yolo/yolov3_d53_320_273e_coco.py b/configs/yolo/yolov3_d53_320_273e_coco.py new file mode 100644 index 0000000..87359f6 --- /dev/null +++ b/configs/yolo/yolov3_d53_320_273e_coco.py @@ -0,0 +1,42 @@ +_base_ = './yolov3_d53_mstrain-608_273e_coco.py' +# dataset settings +img_norm_cfg = dict(mean=[0, 0, 0], std=[255., 255., 255.], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='PhotoMetricDistortion'), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 2)), + dict( + type='MinIoURandomCrop', + min_ious=(0.4, 0.5, 0.6, 0.7, 0.8, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=(320, 320), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(320, 320), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/yolo/yolov3_d53_mstrain-416_273e_coco.py b/configs/yolo/yolov3_d53_mstrain-416_273e_coco.py new file mode 100644 index 0000000..d029b5c --- /dev/null +++ b/configs/yolo/yolov3_d53_mstrain-416_273e_coco.py @@ -0,0 +1,42 @@ +_base_ = './yolov3_d53_mstrain-608_273e_coco.py' +# dataset settings +img_norm_cfg = dict(mean=[0, 0, 0], std=[255., 255., 255.], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='PhotoMetricDistortion'), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 2)), + dict( + type='MinIoURandomCrop', + min_ious=(0.4, 0.5, 0.6, 0.7, 0.8, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=[(320, 320), (416, 416)], keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(416, 416), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/yolo/yolov3_d53_mstrain-608_273e_coco.py b/configs/yolo/yolov3_d53_mstrain-608_273e_coco.py new file mode 100644 index 0000000..9c65305 --- /dev/null +++ b/configs/yolo/yolov3_d53_mstrain-608_273e_coco.py @@ -0,0 +1,124 @@ +_base_ = '../_base_/default_runtime.py' +# model settings +model = dict( + type='YOLOV3', + pretrained='open-mmlab://darknet53', + backbone=dict(type='Darknet', depth=53, out_indices=(3, 4, 5)), + neck=dict( + type='YOLOV3Neck', + num_scales=3, + in_channels=[1024, 512, 256], + out_channels=[512, 256, 128]), + bbox_head=dict( + type='YOLOV3Head', + num_classes=80, + in_channels=[512, 256, 128], + out_channels=[1024, 512, 256], + anchor_generator=dict( + type='YOLOAnchorGenerator', + base_sizes=[[(116, 90), (156, 198), (373, 326)], + [(30, 61), (62, 45), (59, 119)], + [(10, 13), (16, 30), (33, 23)]], + strides=[32, 16, 8]), + bbox_coder=dict(type='YOLOBBoxCoder'), + featmap_strides=[32, 16, 8], + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + loss_weight=1.0, + reduction='sum'), + loss_conf=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + loss_weight=1.0, + reduction='sum'), + loss_xy=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + loss_weight=2.0, + reduction='sum'), + loss_wh=dict(type='MSELoss', loss_weight=2.0, reduction='sum')), + # training and testing settings + train_cfg=dict( + assigner=dict( + type='GridAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0)), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + conf_thr=0.005, + nms=dict(type='nms', iou_threshold=0.45), + max_per_img=100)) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict(mean=[0, 0, 0], std=[255., 255., 255.], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='PhotoMetricDistortion'), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 2)), + dict( + type='MinIoURandomCrop', + min_ious=(0.4, 0.5, 0.6, 0.7, 0.8, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=[(320, 320), (608, 608)], keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(608, 608), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=8, + workers_per_gpu=4, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=2000, # same as burn-in in darknet + warmup_ratio=0.1, + step=[218, 246]) +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=273) +evaluation = dict(interval=1, metric=['bbox'])