Support XDecoder inference and eval (#10505)

Co-authored-by: 谢昕辰 <[email protected]>
open-mmlab · Jun 16, 2023 · fbdae96 · fbdae96
1 parent f2f8925
commit fbdae96
Show file tree

Hide file tree

Showing 83 changed files with 7,357 additions and 703 deletions.
diff --git a/README.md b/README.md
@@ -61,6 +61,10 @@ English | [简体中文](README_zh-CN.md)
     <img src="https://user-images.githubusercontent.com/25839884/219026120-ba71e48b-6e94-4bd4-b4e9-b7d175b5e362.png" width="3%" alt="" /></a>
 </div>
 
+<div align="center">
+<img src="https://github.com/open-mmlab/mmdetection/assets/17425982/6c29886f-ae7a-4a55-8be4-352ee85b7d3e"/>
+</div>
+
 ## Introduction
 
 MMDetection is an open source object detection toolbox based on PyTorch. It is

diff --git a/README_zh-CN.md b/README_zh-CN.md
@@ -61,6 +61,10 @@
     <img src="https://user-images.githubusercontent.com/25839884/219026120-ba71e48b-6e94-4bd4-b4e9-b7d175b5e362.png" width="3%" alt="" /></a>
 </div>
 
+<div align="center">
+<img src="https://github.com/open-mmlab/mmdetection/assets/17425982/6c29886f-ae7a-4a55-8be4-352ee85b7d3e"/>
+</div>
+
 ## 简介
 
 MMDetection 是一个基于 PyTorch 的目标检测开源工具箱。它是 [OpenMMLab](https://openmmlab.com/) 项目的一部分。

diff --git a/configs/_base_/datasets/ade20k_instance.py b/configs/_base_/datasets/ade20k_instance.py
@@ -0,0 +1,53 @@
+# dataset settings
+dataset_type = 'ADE20KInstanceDataset'
+data_root = 'data/ADEChallengeData2016/'
+
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+
+# data_root = 's3://openmmlab/datasets/detection/ADEChallengeData2016/'
+
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+
+test_pipeline = [
+    dict(type='LoadImageFromFile', backend_args=backend_args),
+    dict(type='Resize', scale=(2560, 640), keep_ratio=True),
+    # If you don't have a gt annotation, delete the pipeline
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='ade20k_instance_val.json',
+        data_prefix=dict(img='images/validation'),
+        test_mode=True,
+        pipeline=test_pipeline,
+        backend_args=backend_args))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+    type='CocoMetric',
+    ann_file=data_root + 'ade20k_instance_val.json',
+    metric=['bbox', 'segm'],
+    format_only=False,
+    backend_args=backend_args)
+test_evaluator = val_evaluator
diff --git a/configs/_base_/datasets/ade20k_panoptic.py b/configs/_base_/datasets/ade20k_panoptic.py
@@ -4,43 +4,20 @@
 
 backend_args = None
 
-train_pipeline = [
-    dict(type='LoadImageFromFile', backend_args=backend_args),
-    dict(type='LoadPanopticAnnotations', backend_args=backend_args),
-    # TODO: the performance of `FixScaleResize` need to check.
-    dict(type='FixScaleResize', scale=(2560, 640), backend_args=backend_args),
-    dict(type='RandomCrop', crop_size=(640, 640), crop_type='absolute'),
-    dict(type='RandomFlip', prob=0.5),
-    dict(type='PackDetInputs')
-]
 test_pipeline = [
     dict(type='LoadImageFromFile', backend_args=backend_args),
-    dict(type='Resize', scale=(640, 640), keep_ratio=True),
+    dict(type='Resize', scale=(2560, 640), keep_ratio=True),
     dict(type='LoadPanopticAnnotations', backend_args=backend_args),
     dict(
         type='PackDetInputs',
         meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
                    'scale_factor'))
 ]
 
-train_dataloader = dict(
-    batch_size=4,
-    num_workers=2,
-    persistent_workers=True,
-    sampler=dict(type='DefaultSampler', shuffle=True),
-    batch_sampler=dict(type='AspectRatioBatchSampler'),
-    dataset=dict(
-        type=dataset_type,
-        data_root=data_root,
-        ann_file='ade20k_panoptic_train.json',
-        data_prefix=dict(img='images/training/', seg='ade20k_panoptic_train/'),
-        filter_cfg=dict(filter_empty_gt=True, min_size=32),
-        pipeline=train_pipeline,
-        backend_args=backend_args))
 val_dataloader = dict(
     batch_size=1,
-    num_workers=2,
-    persistent_workers=True,
+    num_workers=0,
+    persistent_workers=False,
     drop_last=False,
     sampler=dict(type='DefaultSampler', shuffle=False),
     dataset=dict(

diff --git a/configs/_base_/datasets/ade20k_semantic.py b/configs/_base_/datasets/ade20k_semantic.py
@@ -0,0 +1,48 @@
+dataset_type = 'ADE20KSegDataset'
+data_root = 'data/ADEChallengeData2016/'
+
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+
+# data_root = 's3://openmmlab/datasets/detection/ADEChallengeData2016/'
+
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+
+test_pipeline = [
+    dict(type='LoadImageFromFile', backend_args=backend_args),
+    dict(type='Resize', scale=(2048, 512), keep_ratio=True),
+    dict(
+        type='LoadAnnotations',
+        with_bbox=False,
+        with_mask=False,
+        with_seg=True,
+        reduce_zero_label=True),
+    dict(
+        type='PackDetInputs', meta_keys=('img_path', 'ori_shape', 'img_shape'))
+]
+
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            img_path='images/validation',
+            seg_map_path='annotations/validation'),
+        pipeline=test_pipeline))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(type='SemSegMetric', iou_metrics=['mIoU'])
+test_evaluator = val_evaluator
diff --git a/configs/_base_/datasets/coco_caption.py b/configs/_base_/datasets/coco_caption.py
@@ -1,6 +1,6 @@
 # data settings
 
-dataset_type = 'COCOCaptionDataset'
+dataset_type = 'CocoCaptionDataset'
 data_root = 'data/coco/'
 
 # Example to use different file client

diff --git a/configs/_base_/datasets/coco_semantic.py b/configs/_base_/datasets/coco_semantic.py
@@ -0,0 +1,78 @@
+# dataset settings
+dataset_type = 'CocoSegDataset'
+data_root = 'data/coco/'
+
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+
+# data_root = 's3://openmmlab/datasets/detection/coco/'
+
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+
+train_pipeline = [
+    dict(type='LoadImageFromFile', backend_args=backend_args),
+    dict(
+        type='LoadAnnotations',
+        with_bbox=False,
+        with_label=False,
+        with_seg=True),
+    dict(type='Resize', scale=(1333, 800), keep_ratio=True),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PackDetInputs')
+]
+
+test_pipeline = [
+    dict(type='LoadImageFromFile', backend_args=backend_args),
+    dict(type='Resize', scale=(1333, 800), keep_ratio=True),
+    dict(
+        type='LoadAnnotations',
+        with_bbox=False,
+        with_label=False,
+        with_seg=True),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
+]
+
+# For stuffthingmaps_semseg, please refer to
+# `docs/en/user_guides/dataset_prepare.md`
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    batch_sampler=dict(type='AspectRatioBatchSampler'),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            img_path='train2017/',
+            seg_map_path='stuffthingmaps_semseg/train2017/'),
+        pipeline=train_pipeline))
+
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            img_path='val2017/',
+            seg_map_path='stuffthingmaps_semseg/val2017/'),
+        pipeline=test_pipeline))
+
+test_dataloader = val_dataloader
+
+val_evaluator = dict(type='SemSegMetric', iou_metrics=['mIoU'])
+test_evaluator = val_evaluator
diff --git a/configs/_base_/datasets/refcoco+.py b/configs/_base_/datasets/refcoco+.py
@@ -1,44 +1,24 @@
 # dataset settings
-dataset_type = 'RefCOCODataset'
-data_root = 'data/refcoco/'
+dataset_type = 'RefCocoDataset'
+data_root = 'data/coco/'
 
 backend_args = None
 
-train_pipeline = [
-    dict(type='LoadImageFromFile'),
-    dict(type='Resize', scale=(1333, 800), keep_ratio=True),
-    dict(type='RandomFlip', prob=0.5),
-    dict(
-        type='PackDetInputs',
-        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
-                   'scale_factor', 'text', 'image_id'))
-]
-
 test_pipeline = [
-    dict(type='LoadImageFromFile'),
+    dict(type='LoadImageFromFile', backend_args=backend_args),
     dict(type='Resize', scale=(1333, 800), keep_ratio=True),
+    dict(
+        type='LoadAnnotations',
+        with_mask=True,
+        with_bbox=False,
+        with_seg=False,
+        with_label=False),
     dict(
         type='PackDetInputs',
         meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
-                   'scale_factor', 'text', 'image_id'))
+                   'scale_factor', 'gt_masks', 'text'))
 ]
 
-train_dataloader = dict(
-    batch_size=2,
-    num_workers=2,
-    persistent_workers=True,
-    sampler=dict(type='DefaultSampler', shuffle=True),
-    batch_sampler=dict(type='AspectRatioBatchSampler'),
-    dataset=dict(
-        type=dataset_type,
-        data_root=data_root,
-        data_prefix=dict(img='train2014/'),
-        ann_file='refcoco+/instances.json',
-        split_file='refcoco+/refs(unc).p',
-        split='train',
-        pipeline=train_pipeline,
-        backend_args=backend_args))
-
 val_dataloader = dict(
     batch_size=1,
     num_workers=2,
@@ -48,12 +28,12 @@
     dataset=dict(
         type=dataset_type,
         data_root=data_root,
-        data_prefix=dict(img='train2014/'),
+        data_prefix=dict(img_path='train2014/'),
         ann_file='refcoco+/instances.json',
         split_file='refcoco+/refs(unc).p',
         split='val',
-        pipeline=test_pipeline,
-        backend_args=backend_args))
+        text_mode='select_first',
+        pipeline=test_pipeline))
 
 test_dataloader = dict(
     batch_size=1,
@@ -64,11 +44,12 @@
     dataset=dict(
         type=dataset_type,
         data_root=data_root,
-        data_prefix=dict(img='train2014/'),
+        data_prefix=dict(img_path='train2014/'),
         ann_file='refcoco+/instances.json',
         split_file='refcoco+/refs(unc).p',
         split='testA',  # or 'testB'
-        pipeline=test_pipeline,
-        backend_args=backend_args))
+        text_mode='select_first',
+        pipeline=test_pipeline))
 
-# TODO: set the metrics
+val_evaluator = dict(type='RefSegMetric', metric=['cIoU', 'mIoU'])
+test_evaluator = val_evaluator