From 129566e0dc03d0ba625e736c7a5371fd7c11f8f9 Mon Sep 17 00:00:00 2001
From: dnth <dickson.neoh@gmail.com>
Date: Wed, 26 Jan 2022 13:55:11 +0800
Subject: [PATCH] add custom configs with swin backbone for retinanet and vfnet

---
 .../retinanet_swin-b-p4-w7_fpn_1x_coco.py     |  16 +++
 .../retinanet_swin-s-p4-w7_fpn_1x_coco.py     |  10 ++
 .../retinanet_swin-t-p4-w7_fpn_1x_coco.py     |  30 +++++
 .../vfnet/vfnet_swin-b-p4-w7_fpn_1x_coco.py   |  16 +++
 .../vfnet/vfnet_swin-s-p4-w7_fpn_1x_coco.py   |  10 ++
 .../vfnet/vfnet_swin-t-p4-w7_fpn_1x_coco.py   | 118 ++++++++++++++++++
 6 files changed, 200 insertions(+)
 create mode 100644 custom_configs/retinanet/retinanet_swin-b-p4-w7_fpn_1x_coco.py
 create mode 100644 custom_configs/retinanet/retinanet_swin-s-p4-w7_fpn_1x_coco.py
 create mode 100644 custom_configs/retinanet/retinanet_swin-t-p4-w7_fpn_1x_coco.py
 create mode 100644 custom_configs/vfnet/vfnet_swin-b-p4-w7_fpn_1x_coco.py
 create mode 100644 custom_configs/vfnet/vfnet_swin-s-p4-w7_fpn_1x_coco.py
 create mode 100644 custom_configs/vfnet/vfnet_swin-t-p4-w7_fpn_1x_coco.py

diff --git a/custom_configs/retinanet/retinanet_swin-b-p4-w7_fpn_1x_coco.py b/custom_configs/retinanet/retinanet_swin-b-p4-w7_fpn_1x_coco.py
new file mode 100644
index 0000000..0835db6
--- /dev/null
+++ b/custom_configs/retinanet/retinanet_swin-b-p4-w7_fpn_1x_coco.py
@@ -0,0 +1,16 @@
+_base_ = ["./retinanet_swin-t-p4-w7_fpn_1x_coco.py"]
+
+pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224.pth'  # noqa
+
+model = dict(
+    backbone=dict(
+        embed_dims=128,
+        depths=[2, 2, 18, 2],
+        num_heads=[4, 8, 16, 32],
+        out_indices=(0, 1, 2, 3),
+        init_cfg=dict(type='Pretrained', checkpoint=pretrained),
+    ),
+    neck=dict(
+        in_channels=[128, 256, 512, 1024],
+    ),
+)
diff --git a/custom_configs/retinanet/retinanet_swin-s-p4-w7_fpn_1x_coco.py b/custom_configs/retinanet/retinanet_swin-s-p4-w7_fpn_1x_coco.py
new file mode 100644
index 0000000..80fa394
--- /dev/null
+++ b/custom_configs/retinanet/retinanet_swin-s-p4-w7_fpn_1x_coco.py
@@ -0,0 +1,10 @@
+_base_ = ["./retinanet_swin-t-p4-w7_fpn_1x_coco.py"]
+
+pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth'  # noqa
+
+model = dict(
+    backbone=dict(
+        depths=[2, 2, 18, 2],
+        init_cfg=dict(type='Pretrained', checkpoint=pretrained),
+    ),
+)
diff --git a/custom_configs/retinanet/retinanet_swin-t-p4-w7_fpn_1x_coco.py b/custom_configs/retinanet/retinanet_swin-t-p4-w7_fpn_1x_coco.py
new file mode 100644
index 0000000..3315093
--- /dev/null
+++ b/custom_configs/retinanet/retinanet_swin-t-p4-w7_fpn_1x_coco.py
@@ -0,0 +1,30 @@
+_base_ = [
+    '../_base_/models/retinanet_r50_fpn.py',
+    '../_base_/datasets/coco_detection.py',
+    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
+]
+pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth'  # noqa
+model = dict(
+    backbone=dict(
+        _delete_=True,
+        type='SwinTransformer',
+        embed_dims=96,
+        depths=[2, 2, 6, 2],
+        num_heads=[3, 6, 12, 24],
+        window_size=7,
+        mlp_ratio=4,
+        qkv_bias=True,
+        qk_scale=None,
+        drop_rate=0.,
+        attn_drop_rate=0.,
+        drop_path_rate=0.2,
+        patch_norm=True,
+        out_indices=(1, 2, 3),
+        # Please only add indices that would be used
+        # in FPN, otherwise some parameter will not be used
+        with_cp=False,
+        convert_weights=True,
+        init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
+    neck=dict(in_channels=[192, 384, 768], start_level=0, num_outs=5))
+
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
diff --git a/custom_configs/vfnet/vfnet_swin-b-p4-w7_fpn_1x_coco.py b/custom_configs/vfnet/vfnet_swin-b-p4-w7_fpn_1x_coco.py
new file mode 100644
index 0000000..f8405fe
--- /dev/null
+++ b/custom_configs/vfnet/vfnet_swin-b-p4-w7_fpn_1x_coco.py
@@ -0,0 +1,16 @@
+_base_ = ["./vfnet_swin-t-p4-w7_fpn_1x_coco.py"]
+
+pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224.pth'  # noqa
+
+model = dict(
+    backbone=dict(
+        embed_dims=128,
+        depths=[2, 2, 18, 2],
+        num_heads=[4, 8, 16, 32],
+        out_indices=(0, 1, 2, 3),
+        init_cfg=dict(type='Pretrained', checkpoint=pretrained),
+    ),
+    neck=dict(
+        in_channels=[128, 256, 512, 1024],
+    ),
+)
diff --git a/custom_configs/vfnet/vfnet_swin-s-p4-w7_fpn_1x_coco.py b/custom_configs/vfnet/vfnet_swin-s-p4-w7_fpn_1x_coco.py
new file mode 100644
index 0000000..8b0011f
--- /dev/null
+++ b/custom_configs/vfnet/vfnet_swin-s-p4-w7_fpn_1x_coco.py
@@ -0,0 +1,10 @@
+_base_ = ["./vfnet_swin-t-p4-w7_fpn_1x_coco.py"]
+
+pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth'  # noqa
+
+model = dict(
+    backbone=dict(
+        depths=[2, 2, 18, 2],
+        init_cfg=dict(type='Pretrained', checkpoint=pretrained),
+    ),
+)
diff --git a/custom_configs/vfnet/vfnet_swin-t-p4-w7_fpn_1x_coco.py b/custom_configs/vfnet/vfnet_swin-t-p4-w7_fpn_1x_coco.py
new file mode 100644
index 0000000..4d4796d
--- /dev/null
+++ b/custom_configs/vfnet/vfnet_swin-t-p4-w7_fpn_1x_coco.py
@@ -0,0 +1,118 @@
+_base_ = [
+    '../_base_/datasets/coco_detection.py',
+    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
+]
+pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth'  # noqa
+# model settings
+model = dict(
+    type='VFNet',
+    backbone=dict(
+        #_delete_=True,
+        type='SwinTransformer',
+        embed_dims=96,
+        depths=[2, 2, 6, 2],
+        num_heads=[3, 6, 12, 24],
+        window_size=7,
+        mlp_ratio=4,
+        qkv_bias=True,
+        qk_scale=None,
+        drop_rate=0.,
+        attn_drop_rate=0.,
+        drop_path_rate=0.2,
+        patch_norm=True,
+        out_indices=(1, 2, 3),
+        # Please only add indices that would be used
+        # in FPN, otherwise some parameter will not be used
+        with_cp=False,
+        convert_weights=True,
+        init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
+    neck=dict(
+        type='FPN',
+        in_channels=[192, 384, 768],
+        out_channels=256,
+        start_level=0,
+        add_extra_convs='on_output',  # use P5
+        num_outs=5,
+        relu_before_extra_convs=True),
+    bbox_head=dict(
+        type='VFNetHead',
+        num_classes=80,
+        in_channels=256,
+        stacked_convs=3,
+        feat_channels=256,
+        strides=[8, 16, 32, 64, 128],
+        center_sampling=False,
+        dcn_on_last_conv=False,
+        use_atss=True,
+        use_vfl=True,
+        loss_cls=dict(
+            type='VarifocalLoss',
+            use_sigmoid=True,
+            alpha=0.75,
+            gamma=2.0,
+            iou_weighted=True,
+            loss_weight=1.0),
+        loss_bbox=dict(type='GIoULoss', loss_weight=1.5),
+        loss_bbox_refine=dict(type='GIoULoss', loss_weight=2.0)),
+    # training and testing settings
+    train_cfg=dict(
+        assigner=dict(type='ATSSAssigner', topk=9),
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.6),
+        max_per_img=100))
+
+# data setting
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1333, 800),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='DefaultFormatBundle'),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(pipeline=train_pipeline),
+    val=dict(pipeline=test_pipeline),
+    test=dict(pipeline=test_pipeline))
+
+# optimizer
+optimizer = dict(
+    lr=0.01, paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.))
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.1,
+    step=[8, 11])
+runner = dict(type='EpochBasedRunner', max_epochs=12)