update detr test

yangxue0827 · Jun 13, 2024 · f8e54cf · f8e54cf
1 parent 2b768aa
commit f8e54cf
Show file tree

Hide file tree

Showing 10 changed files with 1,131 additions and 62 deletions.
diff --git a/README.md b/README.md
@@ -24,9 +24,12 @@ For instructions on installation, pretrained models, training and evaluation, pl
 
 ### Oriented Object Detection
 
-|  Detector  | mAP | Configs | Download | None |
+|  Detector  | mAP | Configs | Download | Note |
 | :--------: |:---:|:-------:|:--------:|:----:|
+| Deformable DETR | 17.1 | [deformable_detr_r50_1x_rsg](configs/ars_detr/deformable_detr_r50_1x_rsg.py) | [log](https://huggingface.co/yangxue/RSG-MMRotate/raw/main/deformable_detr_r50_1x_rsg.log) \| [ckpt](https://huggingface.co/yangxue/RSG-MMRotate/resolve/main/deformable_detr_r50_1x_rsg-fe862bb3.pth?download=true) |
+| ARS-DETR | 28.1 | [dn_arw_arm_arcsl_rdetr_r50_1x_rsg](configs/ars_detr/dn_arw_arm_arcsl_rdetr_r50_1x_rsg.py) | [log](https://huggingface.co/yangxue/RSG-MMRotate/raw/main/dn_arw_arm_arcsl_rdetr_r50_1x_rsg.log) \| [ckpt](https://huggingface.co/yangxue/RSG-MMRotate/resolve/main/dn_arw_arm_arcsl_rdetr_r50_1x_rsg-cbb34897.pth?download=true) |
 | RetinaNet | 21.8 | [rotated_retinanet_hbb_r50_fpn_1x_rsg_oc](configs/rotated_retinanet/rotated_retinanet_hbb_r50_fpn_1x_rsg_oc.py) | [log](https://huggingface.co/yangxue/RSG-MMRotate/raw/main/rotated_retinanet_hbb_r50_fpn_1x_rsg_oc.log) \| [ckpt](https://huggingface.co/yangxue/RSG-MMRotate/resolve/main/rotated_retinanet_hbb_r50_fpn_1x_rsg_oc-3ec35d77.pth?download=true) |
+| ATSS | 20.4 | [rotated_atss_hbb_r50_fpn_1x_rsg_oc](configs/rotated_atss/rotated_atss_hbb_r50_fpn_1x_rsg_oc.py) | [log](https://huggingface.co/yangxue/RSG-MMRotate/raw/main/rotated_atss_hbb_r50_fpn_1x_rsg_oc.log) \| [ckpt](https://huggingface.co/yangxue/RSG-MMRotate/resolve/main/rotated_atss_hbb_r50_fpn_1x_rsg_oc-f65f07c2.pth?download=true) | 
 |  KLD  |  25.0  |   [rotated_retinanet_hbb_kld_r50_fpn_1x_rsg_oc](configs/kld/rotated_retinanet_hbb_kld_r50_fpn_1x_rsg_oc.py)  |  [log](https://huggingface.co/yangxue/RSG-MMRotate/raw/main/rotated_retinanet_hbb_kld_r50_fpn_1x_rsg_oc.log) \| [ckpt](https://huggingface.co/yangxue/RSG-MMRotate/resolve/main/rotated_retinanet_hbb_kld_r50_fpn_1x_rsg_oc-343a0b83.pth?download=true) |
 |  GWD  |  25.3  |   [rotated_retinanet_hbb_gwd_r50_fpn_1x_rsg_oc](configs/gwd/rotated_retinanet_hbb_gwd_r50_fpn_1x_rsg_oc.py)  |  [log](https://huggingface.co/yangxue/RSG-MMRotate/raw/main/rotated_retinanet_hbb_gwd_r50_fpn_1x_rsg_oc.log) \| [ckpt](https://huggingface.co/yangxue/RSG-MMRotate/resolve/main/rotated_retinanet_hbb_gwd_r50_fpn_1x_rsg_oc-566d2398.pth?download=true) |
 | KFIoU |  25.5  |   [rotated_retinanet_hbb_kfiou_r50_fpn_1x_rsg_oc](configs/kfiou/rotated_retinanet_hbb_kfiou_r50_fpn_1x_rsg_oc.py)  |  [log](https://huggingface.co/yangxue/RSG-MMRotate/raw/main/rotated_retinanet_hbb_kfiou_r50_fpn_1x_rsg_oc.log) \| [ckpt](https://huggingface.co/yangxue/RSG-MMRotate/resolve/main/rotated_retinanet_hbb_kfiou_r50_fpn_1x_rsg_oc-198081a6.pth?download=true) |
@@ -41,7 +44,8 @@ For instructions on installation, pretrained models, training and evaluation, pl
 | Gliding Vertex | 30.7 | [gliding_vertex_r50_fpn_1x_rsg_le90](configs/gliding_vertex/gliding_vertex_r50_fpn_1x_rsg_le90.py) | [log](https://huggingface.co/yangxue/RSG-MMRotate/raw/main/gliding_vertex_r50_fpn_1x_rsg_le90.log) \| [ckpt](https://huggingface.co/yangxue/RSG-MMRotate/resolve/main/gliding_vertex_r50_fpn_1x_rsg_le90-5c0bc879.pth?download=true) |
 | Oriented RCNN | 33.2 | [oriented_rcnn_r50_fpn_1x_rsg_le90](configs/oriented_rcnn/oriented_rcnn_r50_fpn_1x_rsg_le90.py) | [log](https://huggingface.co/yangxue/RSG-MMRotate/raw/main/oriented_rcnn_r50_fpn_1x_rsg_le90.log) \| [ckpt](https://huggingface.co/yangxue/RSG-MMRotate/resolve/main/oriented_rcnn_r50_fpn_1x_rsg_le90-0b66f6a4.pth?download=true) |
 | RoI Transformer | 35.7 | [roi_trans_r50_fpn_1x_rsg_le90](configs/roi_trans/roi_trans_r50_fpn_1x_rsg_le90.py) | [log](https://huggingface.co/yangxue/RSG-MMRotate/raw/main/roi_trans_r50_fpn_1x_rsg_le90.log) \| [ckpt](https://huggingface.co/yangxue/RSG-MMRotate/resolve/main/roi_trans_r50_fpn_1x_rsg_le90-e42f64d6.pth?download=true) |
-| ReDet | 39.1 | [redet_re50_refpn_1x_rsg_le90](configs/redet/redet_re50_refpn_1x_rsg_le90.py) | [log](https://huggingface.co/yangxue/RSG-MMRotate/raw/main/redet_re50_refpn_1x_rsg_le90.log) \| [ckpt](https://huggingface.co/yangxue/RSG-MMRotate/resolve/main/redet_re50_refpn_1x_rsg_le90-d163f450.pth?download=true) |
+| ReDet | 39.1 | [redet_re50_refpn_1x_rsg_le90](configs/redet/redet_re50_refpn_1x_rsg_le90.py) | [log](https://huggingface.co/yangxue/RSG-MMRotate/raw/main/redet_re50_refpn_1x_rsg_le90.log) \| [ckpt](https://huggingface.co/yangxue/RSG-MMRotate/resolve/main/redet_re50_refpn_1x_rsg_le90-d163f450.pth?download=true) | [ReResNet50](https://huggingface.co/yangxue/RSG-MMRotate/resolve/main/re_resnet50_c8_batch256-25b16846.pth?download=true) |
+| Oriented RCNN | 40.7 | [oriented_rcnn_swin-l_fpn_1x_rsg_le90](configs/oriented_rcnn/oriented_rcnn_swin-l_fpn_1x_rsg_le90.py) | [log](https://huggingface.co/yangxue/RSG-MMRotate/raw/main/oriented_rcnn_swin-l_fpn_1x_rsg_le90.log) \| [ckpt](https://huggingface.co/yangxue/RSG-MMRotate/resolve/main/oriented_rcnn_swin-l_fpn_1x_rsg_le90-fe6f9e2d.pth?download=true) | [Swin-L](https://huggingface.co/yangxue/RSG-MMRotate/resolve/main/swin_large_patch4_window7_224_22k_20220412-aeecf2aa.pth?download=true) |
 
 ## 🖊️ Citation
 
@@ -50,7 +54,7 @@ If you find this work helpful for your research, please consider giving this rep
 ```bibtex
 @article{li2024scene,
   title={Scene Graph Generation in Large-Size VHR Satellite Imagery: A Large-Scale Dataset and A Context-Aware Approach},
-  author={L1, Yansheng and Wang, Linlin and Wang, Tingzhu and Wang, Qi and Sun, Xian and Yang, Xue and Wang, Wenbin and Luo, Junwei and Deng, Youming and Li, Haifeng and Dang, Bo and Zhang, Yongjun and Yan Junchi},
+  author={L1, Yansheng and Wang, Linlin and Wang, Tingzhu and Yang, Xue and Wang, Qi and Sun, Xian and Wang, Wenbin and Luo, Junwei and Deng, Youming and Li, Haifeng and Dang, Bo and Zhang, Yongjun and Yan Junchi},
   journal={arXiv preprint arXiv:},
   year={2024}
 }

diff --git a/configs/h2rbox/h2rbox_r50_fpn_1x_rsg_le90_adamw5e-5.py b/configs/h2rbox/h2rbox_r50_fpn_1x_rsg_le90_adamw5e-5.py
@@ -0,0 +1,135 @@
+_base_ = [
+    '../_base_/datasets/rsg.py',
+    '../_base_/schedules/schedule_1x.py',
+    '../_base_/default_runtime.py'
+]
+angle_version = 'le90'
+
+# model settings
+model = dict(
+    type='H2RBox',
+    crop_size=(1024, 1024),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        zero_init_residual=False,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',  # use P5
+        num_outs=5,
+        relu_before_extra_convs=True),
+    bbox_head=dict(
+        type='H2RBoxHead',
+        num_classes=48,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        strides=[8, 16, 32, 64, 128],
+        center_sampling=True,
+        center_sample_radius=1.5,
+        norm_on_bbox=True,
+        centerness_on_reg=True,
+        separate_angle=False,
+        scale_angle=True,
+        reassigner='one2one',
+        rect_classes=[4, 44],
+        bbox_coder=dict(
+            type='DistanceAnglePointCoder', angle_version=angle_version),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='IoULoss', loss_weight=1.0),
+        loss_bbox_aug=dict(
+            type='H2RBoxLoss',
+            loss_weight=0.4,
+            center_loss_cfg=dict(type='L1Loss', loss_weight=0.0),
+            shape_loss_cfg=dict(type='IoULoss', loss_weight=1.0),
+            angle_loss_cfg=dict(type='L1Loss', loss_weight=1.0)),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
+    # training and testing settings
+    train_cfg=None,
+    test_cfg=dict(
+        nms_pre=2000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(iou_thr=0.1),
+        max_per_img=2000))
+
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(type='FilterNoCenterObject', img_scale=(1024, 1024), crop_size=(1024, 1024)),
+    dict(type='RResize', img_scale=(1024, 1024)),
+    dict(
+        type='RRandomFlip',
+        flip_ratio=[0.25, 0.25, 0.25],
+        direction=['horizontal', 'vertical', 'diagonal'],
+        version=angle_version),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=1),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
+]
+
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        scale_factor=1.0,
+        flip=False,
+        transforms=[
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=64),
+            dict(type='DefaultFormatBundle'),
+            dict(type='Collect', keys=['img'])
+        ])
+]
+
+data_root = 'data/RSG/'
+data = dict(
+    train=dict(type='RSGWSOODDataset', pipeline=train_pipeline,
+               ann_file=data_root + 'train/annfiles/',
+               img_prefix=data_root + 'train/images/',
+               version=angle_version),
+    val=dict(type='RSGWSOODDataset', pipeline=test_pipeline,
+             ann_file=data_root + 'test/annfiles/',
+             img_prefix=data_root + 'test/images/',
+             version=angle_version),
+    test=dict(type='RSGWSOODDataset', pipeline=test_pipeline,
+              ann_file=data_root + 'test/annfiles/',
+              img_prefix=data_root + 'test/images/',
+              version=angle_version))
+
+log_config = dict(interval=50)
+
+optimizer = dict(
+    _delete_=True,
+    type='AdamW',
+    lr=0.00005,
+    betas=(0.9, 0.999),
+    weight_decay=0.05,
+    paramwise_cfg=dict(
+        custom_keys={
+            'absolute_pos_embed': dict(decay_mult=0.),
+            'relative_position_bias_table': dict(decay_mult=0.),
+            'norm': dict(decay_mult=0.)
+        }))
+
+checkpoint_config = dict(interval=1, max_keep_ckpts=1)
+evaluation = dict(interval=6, metric='mAP')
diff --git a/configs/h2rbox/h2rbox_r50_fpn_3x_rsg_le90.py b/configs/h2rbox/h2rbox_r50_fpn_3x_rsg_le90.py
@@ -0,0 +1,135 @@
+_base_ = [
+    '../_base_/datasets/rsg.py',
+    '../_base_/schedules/schedule_3x.py',
+    '../_base_/default_runtime.py'
+]
+angle_version = 'le90'
+
+# model settings
+model = dict(
+    type='H2RBox',
+    crop_size=(1024, 1024),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        zero_init_residual=False,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',  # use P5
+        num_outs=5,
+        relu_before_extra_convs=True),
+    bbox_head=dict(
+        type='H2RBoxHead',
+        num_classes=48,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        strides=[8, 16, 32, 64, 128],
+        center_sampling=True,
+        center_sample_radius=1.5,
+        norm_on_bbox=True,
+        centerness_on_reg=True,
+        separate_angle=False,
+        scale_angle=True,
+        reassigner='one2one',
+        rect_classes=[4, 44],
+        bbox_coder=dict(
+            type='DistanceAnglePointCoder', angle_version=angle_version),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='IoULoss', loss_weight=1.0),
+        loss_bbox_aug=dict(
+            type='H2RBoxLoss',
+            loss_weight=0.4,
+            center_loss_cfg=dict(type='L1Loss', loss_weight=0.0),
+            shape_loss_cfg=dict(type='IoULoss', loss_weight=1.0),
+            angle_loss_cfg=dict(type='L1Loss', loss_weight=1.0)),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
+    # training and testing settings
+    train_cfg=None,
+    test_cfg=dict(
+        nms_pre=2000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(iou_thr=0.1),
+        max_per_img=2000))
+
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(type='FilterNoCenterObject', img_scale=(1024, 1024), crop_size=(1024, 1024)),
+    dict(type='RResize', img_scale=(1024, 1024)),
+    dict(
+        type='RRandomFlip',
+        flip_ratio=[0.25, 0.25, 0.25],
+        direction=['horizontal', 'vertical', 'diagonal'],
+        version=angle_version),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=1),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
+]
+
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        scale_factor=1.0,
+        flip=False,
+        transforms=[
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=64),
+            dict(type='DefaultFormatBundle'),
+            dict(type='Collect', keys=['img'])
+        ])
+]
+
+data_root = 'data/RSG/'
+data = dict(
+    train=dict(type='RSGWSOODDataset', pipeline=train_pipeline,
+               ann_file=data_root + 'train/annfiles/',
+               img_prefix=data_root + 'train/images/',
+               version=angle_version),
+    val=dict(type='RSGWSOODDataset', pipeline=test_pipeline,
+             ann_file=data_root + 'test/annfiles/',
+             img_prefix=data_root + 'test/images/',
+             version=angle_version),
+    test=dict(type='RSGWSOODDataset', pipeline=test_pipeline,
+              ann_file=data_root + 'test/annfiles/',
+              img_prefix=data_root + 'test/images/',
+              version=angle_version))
+
+log_config = dict(interval=50)
+
+optimizer = dict(
+    _delete_=True,
+    type='AdamW',
+    lr=0.0001,
+    betas=(0.9, 0.999),
+    weight_decay=0.05,
+    paramwise_cfg=dict(
+        custom_keys={
+            'absolute_pos_embed': dict(decay_mult=0.),
+            'relative_position_bias_table': dict(decay_mult=0.),
+            'norm': dict(decay_mult=0.)
+        }))
+
+checkpoint_config = dict(interval=1, max_keep_ckpts=1)
+evaluation = dict(interval=6, metric='mAP')
diff --git a/mmrotate/models/detectors/__init__.py b/mmrotate/models/detectors/__init__.py
@@ -15,6 +15,7 @@
 from .two_stage import RotatedTwoStageDetector
 from .two_stage_crop import RotatedTwoStageDetectorCrop
 from .rotated_detr import RotatedDETR
+from .rotated_detr_crop import RotatedDETRCrop
 from .rotated_deformable_detr import RotatedDeformableDETR
 from .ars_detr import ARSDETR
 from .h2rbox import H2RBox
@@ -26,5 +27,6 @@
     'GlidingVertex', 'ReDet', 'R3Det', 'S2ANet', 'RotatedRepPoints',
     'RotatedBaseDetector', 'RotatedSingleStageDetectorCrop', 'RotatedTwoStageDetector',
     'RotatedSingleStageDetector', 'RotatedFCOS', 'RotatedTwoStageDetectorCrop',
-    'RotatedDETR', 'RotatedDeformableDETR', 'ARSDETR', 'H2RBox','R3DetCrop','S2ANetCrop'
+    'RotatedDETR', 'RotatedDeformableDETR', 'ARSDETR', 'H2RBox','R3DetCrop','S2ANetCrop',
+    'RotatedDETRCrop'
 ]
diff --git a/mmrotate/models/detectors/ars_detr.py b/mmrotate/models/detectors/ars_detr.py
@@ -2,11 +2,11 @@
 # from ..builder import DETECTORS
 # from mmdet.models.builder import DETECTORS
 from ..builder import ROTATED_DETECTORS
-from .rotated_detr import RotatedDETR
+from .rotated_detr_crop import RotatedDETRCrop
 
 
 @ROTATED_DETECTORS.register_module()
-class ARSDETR(RotatedDETR):
+class ARSDETR(RotatedDETRCrop):
 
     def __init__(self, *args, **kwargs):
-        super(RotatedDETR, self).__init__(*args, **kwargs)
+        super(RotatedDETRCrop, self).__init__(*args, **kwargs)