From f741c354a25d1c209e6e537e4d1478fd717cacf2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Haian=20Huang=28=E6=B7=B1=E5=BA=A6=E7=9C=B8=29?=
 <1286304229@qq.com>
Date: Thu, 15 Jun 2023 15:40:11 +0800
Subject: [PATCH] Format code (#10512)

---
 configs/_base_/datasets/ade20k_panoptic.py    |  2 +-
 configs/_base_/datasets/ade20k_semantic.py    |  8 +-
 configs/_base_/datasets/refcoco+.py           |  2 +-
 configs/_base_/datasets/refcoco.py            |  2 +-
 configs/_base_/datasets/refcocog.py           |  2 +-
 docs/en/user_guides/dataset_prepare.md        |  1 -
 mmdet/datasets/ade20k.py                      |  8 +-
 mmdet/datasets/coco_semantic.py               | 90 +++++++++----------
 mmdet/datasets/transforms/formatting.py       |  2 +-
 mmdet/datasets/transforms/loading.py          |  6 +-
 mmdet/datasets/transforms/transforms.py       | 32 +++----
 projects/XDecoder/README.md                   |  6 +-
 ...iny_zeroshot_open-vocab-instance_ade20k.py |  7 +-
 ...-tiny_zeroshot_open-vocab-instance_coco.py |  1 +
 ...iny_zeroshot_open-vocab-panoptic_ade20k.py | 71 +++++++--------
 ...-tiny_zeroshot_open-vocab-panoptic_coco.py |  1 +
 tools/dataset_converters/ade20k2coco.py       | 34 +++----
 tools/dataset_converters/coco_stuff164k.py    |  2 +-
 ...coco_semantic_annos_from_panoptic_annos.py |  2 +-
 tools/misc/download_dataset.py                |  3 +-
 20 files changed, 136 insertions(+), 146 deletions(-)

diff --git a/configs/_base_/datasets/ade20k_panoptic.py b/configs/_base_/datasets/ade20k_panoptic.py
index 48b570fe6ae..2ae21b007a3 100644
--- a/configs/_base_/datasets/ade20k_panoptic.py
+++ b/configs/_base_/datasets/ade20k_panoptic.py
@@ -6,7 +6,7 @@
 
 test_pipeline = [
     dict(type='LoadImageFromFile', backend_args=backend_args),
-    dict(type='Resize', scale=(640, 640), keep_ratio=True),
+    dict(type='Resize', scale=(2560, 640), keep_ratio=True),
     dict(type='LoadPanopticAnnotations', backend_args=backend_args),
     dict(
         type='PackDetInputs',
diff --git a/configs/_base_/datasets/ade20k_semantic.py b/configs/_base_/datasets/ade20k_semantic.py
index 68899e4b6b0..522a7757041 100644
--- a/configs/_base_/datasets/ade20k_semantic.py
+++ b/configs/_base_/datasets/ade20k_semantic.py
@@ -1,5 +1,5 @@
 dataset_type = 'ADE20KSegDataset'
-data_root = 'data/ade/ADEChallengeData2016'
+data_root = 'data/ADEChallengeData2016/'
 
 # Example to use different file client
 # Method 1: simply set the data root and let the file I/O module
@@ -26,14 +26,14 @@
         with_seg=True,
         reduce_zero_label=True),
     dict(
-        type='PackDetInputs',
-        meta_keys=('img_path', 'ori_shape', 'img_shape', 'text'))
+        type='PackDetInputs', meta_keys=('img_path', 'ori_shape', 'img_shape'))
 ]
 
 val_dataloader = dict(
     batch_size=1,
-    num_workers=4,
+    num_workers=2,
     persistent_workers=True,
+    drop_last=False,
     sampler=dict(type='DefaultSampler', shuffle=False),
     dataset=dict(
         type=dataset_type,
diff --git a/configs/_base_/datasets/refcoco+.py b/configs/_base_/datasets/refcoco+.py
index 9d7ce8adee1..602f07b03e1 100644
--- a/configs/_base_/datasets/refcoco+.py
+++ b/configs/_base_/datasets/refcoco+.py
@@ -5,7 +5,7 @@
 backend_args = None
 
 test_pipeline = [
-    dict(type='LoadImageFromFile'),
+    dict(type='LoadImageFromFile', backend_args=backend_args),
     dict(type='Resize', scale=(1333, 800), keep_ratio=True),
     dict(
         type='LoadAnnotations',
diff --git a/configs/_base_/datasets/refcoco.py b/configs/_base_/datasets/refcoco.py
index fdea1132ae1..21f58c41e4b 100644
--- a/configs/_base_/datasets/refcoco.py
+++ b/configs/_base_/datasets/refcoco.py
@@ -5,7 +5,7 @@
 backend_args = None
 
 test_pipeline = [
-    dict(type='LoadImageFromFile'),
+    dict(type='LoadImageFromFile', backend_args=backend_args),
     dict(type='Resize', scale=(1333, 800), keep_ratio=True),
     dict(
         type='LoadAnnotations',
diff --git a/configs/_base_/datasets/refcocog.py b/configs/_base_/datasets/refcocog.py
index 21d42b3f7c5..8c544280b3b 100644
--- a/configs/_base_/datasets/refcocog.py
+++ b/configs/_base_/datasets/refcocog.py
@@ -5,7 +5,7 @@
 backend_args = None
 
 test_pipeline = [
-    dict(type='LoadImageFromFile'),
+    dict(type='LoadImageFromFile', backend_args=backend_args),
     dict(type='Resize', scale=(1333, 800), keep_ratio=True),
     dict(
         type='LoadAnnotations',
diff --git a/docs/en/user_guides/dataset_prepare.md b/docs/en/user_guides/dataset_prepare.md
index 6c7ec0d8eb4..7ca6629c823 100644
--- a/docs/en/user_guides/dataset_prepare.md
+++ b/docs/en/user_guides/dataset_prepare.md
@@ -111,7 +111,6 @@ Then move the annotations to the `data/ADEChallengeData2016` directory and run t
 ```shell
 mv data/annotations_instance data/ADEChallengeData2016/
 mv data/categoryMapping.txt data/ADEChallengeData2016/
-mv data/objectInfo150.txt data/ADEChallengeData2016/
 mv data/imgCatIds.json data/ADEChallengeData2016/
 python tools/dataset_converters/ade20k2coco.py data/ADEChallengeData2016 --task panoptic
 python tools/dataset_converters/ade20k2coco.py data/ADEChallengeData2016 --task instance
diff --git a/mmdet/datasets/ade20k.py b/mmdet/datasets/ade20k.py
index ed4c2548239..573271cb5d0 100644
--- a/mmdet/datasets/ade20k.py
+++ b/mmdet/datasets/ade20k.py
@@ -66,8 +66,8 @@ class ADE20KPanopticDataset(CocoPanopticDataset):
          'book', 'bench', 'countertop', 'stove', 'palm, palm tree',
          'kitchen island', 'computer', 'swivel chair', 'boat',
          'arcade machine', 'bus', 'towel', 'light', 'truck', 'chandelier',
-         'awning, sunshade, sunblind', 'street lamp', 'booth', 'tv', 'airplane',
-         'clothes', 'pole',
+         'awning, sunshade, sunblind', 'street lamp', 'booth', 'tv',
+         'airplane', 'clothes', 'pole',
          'bannister, banister, balustrade, balusters, handrail',
          'ottoman, pouf, pouffe, puff, hassock', 'bottle', 'van', 'ship',
          'fountain', 'washer, automatic washer, washing machine',
@@ -105,8 +105,8 @@ class ADE20KPanopticDataset(CocoPanopticDataset):
          'book', 'bench', 'countertop', 'stove', 'palm, palm tree',
          'kitchen island', 'computer', 'swivel chair', 'boat',
          'arcade machine', 'bus', 'towel', 'light', 'truck', 'chandelier',
-         'awning, sunshade, sunblind', 'street lamp', 'booth', 'tv', 'airplane',
-         'clothes', 'pole',
+         'awning, sunshade, sunblind', 'street lamp', 'booth', 'tv',
+         'airplane', 'clothes', 'pole',
          'bannister, banister, balustrade, balusters, handrail',
          'ottoman, pouf, pouffe, puff, hassock', 'bottle', 'van', 'ship',
          'fountain', 'washer, automatic washer, washing machine',
diff --git a/mmdet/datasets/coco_semantic.py b/mmdet/datasets/coco_semantic.py
index 8fc26ce8aa0..75256845445 100644
--- a/mmdet/datasets/coco_semantic.py
+++ b/mmdet/datasets/coco_semantic.py
@@ -1,4 +1,4 @@
-# Copyright [c] OpenMMLab. All rights reserved.
+# Copyright (c) OpenMMLab. All rights reserved.
 from mmdet.registry import DATASETS
 from .ade20k import ADE20KSegDataset
 
@@ -44,47 +44,47 @@ class CocoSegDataset(ADE20KSegDataset):
             'wall-concrete', 'wall-other', 'wall-panel', 'wall-stone',
             'wall-tile', 'wall-wood', 'water-other', 'waterdrops',
             'window-blind', 'window-other', 'wood'),
-        palette=[[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50],
-                 [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255],
-                 [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7],
-                 [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82],
-                 [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3],
-                 [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255],
-                 [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220],
-                 [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224],
-                 [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255],
-                 [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7],
-                 [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153],
-                 [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255],
-                 [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0],
-                 [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255],
-                 [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255],
-                 [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255],
-                 [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0],
-                 [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0],
-                 [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255],
-                 [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255],
-                 [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20],
-                 [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255],
-                 [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255],
-                 [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255],
-                 [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0],
-                 [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0],
-                 [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255],
-                 [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112],
-                 [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160],
-                 [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163],
-                 [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0],
-                 [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0],
-                 [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255],
-                 [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204],
-                 [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255],
-                 [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255],
-                 [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194],
-                 [102, 255, 0], [92, 0, 255], [107, 255, 200], [58, 41, 149],
-                 [183, 121, 142], [255, 73, 97], [107, 142, 35],
-                 [190, 153, 153], [146, 139, 141], [70, 130, 180],
-                 [134, 199, 156], [209, 226, 140], [96, 36, 108], [96, 96, 96],
-                 [64, 170, 64], [152, 251, 152], [208, 229, 228],
-                 [206, 186, 171], [152, 161, 64], [116, 112, 0], [0, 114, 143],
-                 [102, 102, 156], [250, 141, 255]])
+        palette=[(120, 120, 120), (180, 120, 120), (6, 230, 230), (80, 50, 50),
+                 (4, 200, 3), (120, 120, 80), (140, 140, 140), (204, 5, 255),
+                 (230, 230, 230), (4, 250, 7), (224, 5, 255), (235, 255, 7),
+                 (150, 5, 61), (120, 120, 70), (8, 255, 51), (255, 6, 82),
+                 (143, 255, 140), (204, 255, 4), (255, 51, 7), (204, 70, 3),
+                 (0, 102, 200), (61, 230, 250), (255, 6, 51), (11, 102, 255),
+                 (255, 7, 71), (255, 9, 224), (9, 7, 230), (220, 220, 220),
+                 (255, 9, 92), (112, 9, 255), (8, 255, 214), (7, 255, 224),
+                 (255, 184, 6), (10, 255, 71), (255, 41, 10), (7, 255, 255),
+                 (224, 255, 8), (102, 8, 255), (255, 61, 6), (255, 194, 7),
+                 (255, 122, 8), (0, 255, 20), (255, 8, 41), (255, 5, 153),
+                 (6, 51, 255), (235, 12, 255), (160, 150, 20), (0, 163, 255),
+                 (140, 140, 140), (250, 10, 15), (20, 255, 0), (31, 255, 0),
+                 (255, 31, 0), (255, 224, 0), (153, 255, 0), (0, 0, 255),
+                 (255, 71, 0), (0, 235, 255), (0, 173, 255), (31, 0, 255),
+                 (11, 200, 200), (255, 82, 0), (0, 255, 245), (0, 61, 255),
+                 (0, 255, 112), (0, 255, 133), (255, 0, 0), (255, 163, 0),
+                 (255, 102, 0), (194, 255, 0), (0, 143, 255), (51, 255, 0),
+                 (0, 82, 255), (0, 255, 41), (0, 255, 173), (10, 0, 255),
+                 (173, 255, 0), (0, 255, 153), (255, 92, 0), (255, 0, 255),
+                 (255, 0, 245), (255, 0, 102), (255, 173, 0), (255, 0, 20),
+                 (255, 184, 184), (0, 31, 255), (0, 255, 61), (0, 71, 255),
+                 (255, 0, 204), (0, 255, 194), (0, 255, 82), (0, 10, 255),
+                 (0, 112, 255), (51, 0, 255), (0, 194, 255), (0, 122, 255),
+                 (0, 255, 163), (255, 153, 0), (0, 255, 10), (255, 112, 0),
+                 (143, 255, 0), (82, 0, 255), (163, 255, 0), (255, 235, 0),
+                 (8, 184, 170), (133, 0, 255), (0, 255, 92), (184, 0, 255),
+                 (255, 0, 31), (0, 184, 255), (0, 214, 255), (255, 0, 112),
+                 (92, 255, 0), (0, 224, 255), (112, 224, 255), (70, 184, 160),
+                 (163, 0, 255), (153, 0, 255), (71, 255, 0), (255, 0, 163),
+                 (255, 204, 0), (255, 0, 143), (0, 255, 235), (133, 255, 0),
+                 (255, 0, 235), (245, 0, 255), (255, 0, 122), (255, 245, 0),
+                 (10, 190, 212), (214, 255, 0), (0, 204, 255), (20, 0, 255),
+                 (255, 255, 0), (0, 153, 255), (0, 41, 255), (0, 255, 204),
+                 (41, 0, 255), (41, 255, 0), (173, 0, 255), (0, 245, 255),
+                 (71, 0, 255), (122, 0, 255), (0, 255, 184), (0, 92, 255),
+                 (184, 255, 0), (0, 133, 255), (255, 214, 0), (25, 194, 194),
+                 (102, 255, 0), (92, 0, 255), (107, 255, 200), (58, 41, 149),
+                 (183, 121, 142), (255, 73, 97), (107, 142, 35),
+                 (190, 153, 153), (146, 139, 141), (70, 130, 180),
+                 (134, 199, 156), (209, 226, 140), (96, 36, 108), (96, 96, 96),
+                 (64, 170, 64), (152, 251, 152), (208, 229, 228),
+                 (206, 186, 171), (152, 161, 64), (116, 112, 0), (0, 114, 143),
+                 (102, 102, 156), (250, 141, 255)])
diff --git a/mmdet/datasets/transforms/formatting.py b/mmdet/datasets/transforms/formatting.py
index 35be3eaf01a..83fada30b1f 100644
--- a/mmdet/datasets/transforms/formatting.py
+++ b/mmdet/datasets/transforms/formatting.py
@@ -134,7 +134,7 @@ def transform(self, results: dict) -> dict:
         img_meta = {}
         for key in self.meta_keys:
             assert key in results, f'`{key}` is not found in `results`, ' \
-                                   f'the valid keys are {list(results)}.'
+                f'the valid keys are {list(results)}.'
             img_meta[key] = results[key]
 
         data_sample.set_metainfo(img_meta)
diff --git a/mmdet/datasets/transforms/loading.py b/mmdet/datasets/transforms/loading.py
index fd3c05ce2cd..95945a82d88 100644
--- a/mmdet/datasets/transforms/loading.py
+++ b/mmdet/datasets/transforms/loading.py
@@ -706,7 +706,7 @@ def transform(self, results: dict) -> dict:
 
     def __repr__(self):
         return self.__class__.__name__ + \
-            f'(num_max_proposals={self.num_max_proposals})'
+               f'(num_max_proposals={self.num_max_proposals})'
 
 
 @TRANSFORMS.register_module()
@@ -796,8 +796,8 @@ def transform(self, results: dict) -> Union[dict, None]:
 
     def __repr__(self):
         return self.__class__.__name__ + \
-            f'(min_gt_bbox_wh={self.min_gt_bbox_wh}, ' \
-            f'keep_empty={self.keep_empty})'
+               f'(min_gt_bbox_wh={self.min_gt_bbox_wh}, ' \
+               f'keep_empty={self.keep_empty})'
 
 
 @TRANSFORMS.register_module()
diff --git a/mmdet/datasets/transforms/transforms.py b/mmdet/datasets/transforms/transforms.py
index 9768d71eaed..c2a155868f9 100644
--- a/mmdet/datasets/transforms/transforms.py
+++ b/mmdet/datasets/transforms/transforms.py
@@ -2458,40 +2458,40 @@ def _mosaic_combine(
         if loc == 'top_left':
             # index0 to top left part of image
             x1, y1, x2, y2 = max(center_position_xy[0] - img_shape_wh[0], 0), \
-                max(center_position_xy[1] - img_shape_wh[1], 0), \
-                center_position_xy[0], \
-                center_position_xy[1]
+                             max(center_position_xy[1] - img_shape_wh[1], 0), \
+                             center_position_xy[0], \
+                             center_position_xy[1]
             crop_coord = img_shape_wh[0] - (x2 - x1), img_shape_wh[1] - (
                 y2 - y1), img_shape_wh[0], img_shape_wh[1]
 
         elif loc == 'top_right':
             # index1 to top right part of image
             x1, y1, x2, y2 = center_position_xy[0], \
-                max(center_position_xy[1] - img_shape_wh[1], 0), \
-                min(center_position_xy[0] + img_shape_wh[0],
-                    self.img_scale[0] * 2), \
-                center_position_xy[1]
+                             max(center_position_xy[1] - img_shape_wh[1], 0), \
+                             min(center_position_xy[0] + img_shape_wh[0],
+                                 self.img_scale[0] * 2), \
+                             center_position_xy[1]
             crop_coord = 0, img_shape_wh[1] - (y2 - y1), min(
                 img_shape_wh[0], x2 - x1), img_shape_wh[1]
 
         elif loc == 'bottom_left':
             # index2 to bottom left part of image
             x1, y1, x2, y2 = max(center_position_xy[0] - img_shape_wh[0], 0), \
-                center_position_xy[1], \
-                center_position_xy[0], \
-                min(self.img_scale[1] * 2, center_position_xy[1] +
-                    img_shape_wh[1])
+                             center_position_xy[1], \
+                             center_position_xy[0], \
+                             min(self.img_scale[1] * 2, center_position_xy[1] +
+                                 img_shape_wh[1])
             crop_coord = img_shape_wh[0] - (x2 - x1), 0, img_shape_wh[0], min(
                 y2 - y1, img_shape_wh[1])
 
         else:
             # index3 to bottom right part of image
             x1, y1, x2, y2 = center_position_xy[0], \
-                center_position_xy[1], \
-                min(center_position_xy[0] + img_shape_wh[0],
-                    self.img_scale[0] * 2), \
-                min(self.img_scale[1] * 2, center_position_xy[1] +
-                    img_shape_wh[1])
+                             center_position_xy[1], \
+                             min(center_position_xy[0] + img_shape_wh[0],
+                                 self.img_scale[0] * 2), \
+                             min(self.img_scale[1] * 2, center_position_xy[1] +
+                                 img_shape_wh[1])
             crop_coord = 0, 0, min(img_shape_wh[0],
                                    x2 - x1), min(y2 - y1, img_shape_wh[1])
 
diff --git a/projects/XDecoder/README.md b/projects/XDecoder/README.md
index 10ffe230d99..b5325cbacb5 100644
--- a/projects/XDecoder/README.md
+++ b/projects/XDecoder/README.md
@@ -147,7 +147,7 @@ Since semantic segmentation is a pixel-level task, we don't need to use a thresh
 ```
 
 | Model                               | mIoU  | mIOU(official) |                                 Config                                 |
-| :---------------------------------- |:-----:| :------------: | :--------------------------------------------------------------------: |
+| :---------------------------------- | :---: | :------------: | :--------------------------------------------------------------------: |
 | `xdecoder_focalt_best_openseg.pt`\* | 19.06 |     18.97      | [config](configs/xdecoder-tiny_zeroshot_open-vocab-panoptic_ade20k.py) |
 
 ### Semantic segmentation on COCO2017
@@ -175,8 +175,8 @@ Prepare your dataset according to the [docs](https://mmdetection.readthedocs.io/
 ```
 
 | Model                                               | Mask mAP | Mask mAP(official) |                                Config                                |
-| :-------------------------------------------------- | :------: |-------------------:| :------------------------------------------------------------------: |
-| `xdecoder-tiny_zeroshot_open-vocab-instance_coco`\* |   39.8   |    39.7 | [config](configs/xdecoder-tiny_zeroshot_open-vocab-instance_coco.py) |
+| :-------------------------------------------------- | :------: | -----------------: | :------------------------------------------------------------------: |
+| `xdecoder-tiny_zeroshot_open-vocab-instance_coco`\* |   39.8   |               39.7 | [config](configs/xdecoder-tiny_zeroshot_open-vocab-instance_coco.py) |
 
 ### Panoptic segmentation on COCO2017
 
diff --git a/projects/XDecoder/configs/xdecoder-tiny_zeroshot_open-vocab-instance_ade20k.py b/projects/XDecoder/configs/xdecoder-tiny_zeroshot_open-vocab-instance_ade20k.py
index 58923599cba..f0b2e484775 100644
--- a/projects/XDecoder/configs/xdecoder-tiny_zeroshot_open-vocab-instance_ade20k.py
+++ b/projects/XDecoder/configs/xdecoder-tiny_zeroshot_open-vocab-instance_ade20k.py
@@ -10,12 +10,7 @@
         backend_args=_base_.backend_args),
     dict(
         type='ResizeShortestEdge', scale=640, max_size=2560, backend='pillow'),
-    dict(
-        type='LoadAnnotations',
-        with_bbox=False,
-        with_mask=False,
-        with_seg=True,
-        reduce_zero_label=True),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
     dict(
         type='PackDetInputs',
         meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
diff --git a/projects/XDecoder/configs/xdecoder-tiny_zeroshot_open-vocab-instance_coco.py b/projects/XDecoder/configs/xdecoder-tiny_zeroshot_open-vocab-instance_coco.py
index 512a70824c8..d978cf2fa8e 100644
--- a/projects/XDecoder/configs/xdecoder-tiny_zeroshot_open-vocab-instance_coco.py
+++ b/projects/XDecoder/configs/xdecoder-tiny_zeroshot_open-vocab-instance_coco.py
@@ -10,6 +10,7 @@
         backend_args=_base_.backend_args),
     dict(
         type='ResizeShortestEdge', scale=800, max_size=1333, backend='pillow'),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
     dict(
         type='PackDetInputs',
         meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
diff --git a/projects/XDecoder/configs/xdecoder-tiny_zeroshot_open-vocab-panoptic_ade20k.py b/projects/XDecoder/configs/xdecoder-tiny_zeroshot_open-vocab-panoptic_ade20k.py
index 7a9cd4ad502..4f4deb459d7 100644
--- a/projects/XDecoder/configs/xdecoder-tiny_zeroshot_open-vocab-panoptic_ade20k.py
+++ b/projects/XDecoder/configs/xdecoder-tiny_zeroshot_open-vocab-panoptic_ade20k.py
@@ -19,48 +19,37 @@
                    'scale_factor', 'text', 'stuff_text'))
 ]
 
-x_decoder_ade20k_thing_classes = ('bed', 'window', 'cabinet', 'person', 'door', 'table', 'curtain',
-                                  'chair', 'car', 'painting', 'sofa', 'shelf', 'mirror',
-                                  'armchair', 'seat', 'fence', 'desk', 'wardrobe',
-                                  'lamp', 'tub', 'rail', 'cushion', 'box', 'column',
-                                  'signboard', 'chest of drawers',
-                                  'counter', 'sink', 'fireplace', 'refrigerator', 'stairs',
-                                  'case',
-                                  'pool table', 'pillow',
-                                  'screen door', 'bookcase', 'coffee table',
-                                  'toilet', 'flower',
-                                  'book', 'bench', 'countertop', 'stove', 'palm',
-                                  'kitchen island', 'computer', 'swivel chair', 'boat',
-                                  'arcade machine', 'bus', 'towel', 'light', 'truck', 'chandelier',
-                                  'awning', 'street lamp', 'booth', 'tv', 'airplane',
-                                  'clothes', 'pole',
-                                  'bannister',
-                                  'ottoman', 'bottle', 'van', 'ship',
-                                  'fountain', 'washer',
-                                  'plaything', 'stool', 'barrel', 'basket',
-                                  'bag', 'minibike', 'oven', 'ball', 'food',
-                                  'step', 'trade name', 'microwave', 'pot', 'animal', 'bicycle',
-                                  'dishwasher', 'screen', 'sculpture', 'hood', 'sconce',
-                                  'vase', 'traffic light', 'tray', 'trash can', 'fan', 'plate',
-                                  'monitor', 'bulletin board', 'radiator', 'glass',
-                                  'clock', 'flag')
+x_decoder_ade20k_thing_classes = (
+    'bed', 'window', 'cabinet', 'person', 'door', 'table', 'curtain', 'chair',
+    'car', 'painting', 'sofa', 'shelf', 'mirror', 'armchair', 'seat', 'fence',
+    'desk', 'wardrobe', 'lamp', 'tub', 'rail', 'cushion', 'box', 'column',
+    'signboard', 'chest of drawers', 'counter', 'sink', 'fireplace',
+    'refrigerator', 'stairs', 'case', 'pool table', 'pillow', 'screen door',
+    'bookcase', 'coffee table', 'toilet', 'flower', 'book', 'bench',
+    'countertop', 'stove', 'palm', 'kitchen island', 'computer',
+    'swivel chair', 'boat', 'arcade machine', 'bus', 'towel', 'light', 'truck',
+    'chandelier', 'awning', 'street lamp', 'booth', 'tv', 'airplane',
+    'clothes', 'pole', 'bannister', 'ottoman', 'bottle', 'van', 'ship',
+    'fountain', 'washer', 'plaything', 'stool', 'barrel', 'basket', 'bag',
+    'minibike', 'oven', 'ball', 'food', 'step', 'trade name', 'microwave',
+    'pot', 'animal', 'bicycle', 'dishwasher', 'screen', 'sculpture', 'hood',
+    'sconce', 'vase', 'traffic light', 'tray', 'trash can', 'fan', 'plate',
+    'monitor', 'bulletin board', 'radiator', 'glass', 'clock', 'flag')
 
-x_decoder_ade20k_stuff_classes = ('wall', 'building', 'sky', 'floor', 'tree', 'ceiling', 'road',
-                                  'grass', 'sidewalk', 'earth', 'mountain',
-                                  'plant', 'water', 'house', 'sea', 'rug', 'field', 'rock',
-                                  'base', 'sand', 'skyscraper',
-                                  'grandstand', 'path', 'runway', 'stairway',
-                                  'river', 'bridge', 'blind', 'hill', 'bar',
-                                  'hovel', 'tower', 'dirt track',
-                                  'land', 'escalator',
-                                  'buffet',
-                                  'poster', 'stage',
-                                  'conveyer belt',
-                                  'canopy', 'pool', 'falls', 'tent', 'cradle', 'tank',
-                                  'lake', 'blanket', 'pier', 'crt screen', 'shower')
+x_decoder_ade20k_stuff_classes = (
+    'wall', 'building', 'sky', 'floor', 'tree', 'ceiling', 'road', 'grass',
+    'sidewalk', 'earth', 'mountain', 'plant', 'water', 'house', 'sea', 'rug',
+    'field', 'rock', 'base', 'sand', 'skyscraper', 'grandstand', 'path',
+    'runway', 'stairway', 'river', 'bridge', 'blind', 'hill', 'bar', 'hovel',
+    'tower', 'dirt track', 'land', 'escalator', 'buffet', 'poster', 'stage',
+    'conveyer belt', 'canopy', 'pool', 'falls', 'tent', 'cradle', 'tank',
+    'lake', 'blanket', 'pier', 'crt screen', 'shower')
 
 val_dataloader = dict(
-    dataset=dict(metainfo=dict(thing_classes=x_decoder_ade20k_thing_classes,
-                               stuff_classes=x_decoder_ade20k_stuff_classes),
-                 return_classes=True, pipeline=test_pipeline))
+    dataset=dict(
+        metainfo=dict(
+            thing_classes=x_decoder_ade20k_thing_classes,
+            stuff_classes=x_decoder_ade20k_stuff_classes),
+        return_classes=True,
+        pipeline=test_pipeline))
 test_dataloader = val_dataloader
diff --git a/projects/XDecoder/configs/xdecoder-tiny_zeroshot_open-vocab-panoptic_coco.py b/projects/XDecoder/configs/xdecoder-tiny_zeroshot_open-vocab-panoptic_coco.py
index b0b2712a4ed..5547e9ed121 100644
--- a/projects/XDecoder/configs/xdecoder-tiny_zeroshot_open-vocab-panoptic_coco.py
+++ b/projects/XDecoder/configs/xdecoder-tiny_zeroshot_open-vocab-panoptic_coco.py
@@ -10,6 +10,7 @@
         backend_args=_base_.backend_args),
     dict(
         type='ResizeShortestEdge', scale=800, max_size=1333, backend='pillow'),
+    dict(type='LoadPanopticAnnotations', backend_args=_base_.backend_args),
     dict(
         type='PackDetInputs',
         meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
diff --git a/tools/dataset_converters/ade20k2coco.py b/tools/dataset_converters/ade20k2coco.py
index 3bd748f620b..5da332ba504 100644
--- a/tools/dataset_converters/ade20k2coco.py
+++ b/tools/dataset_converters/ade20k2coco.py
@@ -201,14 +201,12 @@ def prepare_panoptic_annotations(dataset_dir: str):
             new_2_old_mapping[i + len(thing_classes)] = j
 
         for old, new in old_2_new_mapping.items():
-            neworder_categories.append(
-                {
-                    'id': new,
-                    'name': all_classes[old],
-                    'isthing': int(new < len(thing_classes)),
-                    'color': palette[new]
-                }
-            )
+            neworder_categories.append({
+                'id': new,
+                'name': all_classes[old],
+                'isthing': int(new < len(thing_classes)),
+                'color': palette[new]
+            })
         categories_dict = {cat['id']: cat for cat in neworder_categories}
 
         panoptic_json_categories = neworder_categories[:]
@@ -255,7 +253,8 @@ def prepare_panoptic_annotations(dataset_dir: str):
             for semantic_cat_id in np.unique(semantic_cat_ids):
                 if semantic_cat_id == 255:
                     continue
-                if categories_dict[old_2_new_mapping[int(semantic_cat_id)]]['isthing'] == 1:
+                if categories_dict[old_2_new_mapping[int(
+                        semantic_cat_id)]]['isthing'] == 1:
                     continue
                 mask = semantic_cat_ids == semantic_cat_id
                 # should not have any overlap
@@ -278,11 +277,16 @@ def prepare_panoptic_annotations(dataset_dir: str):
                 bbox = [int(x), int(y), int(width), int(height)]
 
                 segm_info.append({
-                    'id': int(segment_id),
-                    'category_id': old_2_new_mapping[int(semantic_cat_id)],
-                    'area': int(area),
-                    'bbox': bbox,
-                    'iscrowd': 0
+                    'id':
+                    int(segment_id),
+                    'category_id':
+                    old_2_new_mapping[int(semantic_cat_id)],
+                    'area':
+                    int(area),
+                    'bbox':
+                    bbox,
+                    'iscrowd':
+                    0
                 })
 
             # process things
@@ -377,4 +381,4 @@ def main():
 
 
 if __name__ == '__main__':
-    main()
\ No newline at end of file
+    main()
diff --git a/tools/dataset_converters/coco_stuff164k.py b/tools/dataset_converters/coco_stuff164k.py
index ce25d999c9c..fe1ff9f6b43 100644
--- a/tools/dataset_converters/coco_stuff164k.py
+++ b/tools/dataset_converters/coco_stuff164k.py
@@ -202,7 +202,7 @@ def convert_to_trainID(maskpath, out_mask_dir, is_train):
 def parse_args():
     parser = argparse.ArgumentParser(
         description=\
-        'Convert COCO Stuff 164k annotations to mmsegmentation format')  # noqa
+        'Convert COCO Stuff 164k annotations to mmdet format')  # noqa
     parser.add_argument('coco_path', help='coco stuff path')
     parser.add_argument(
         '--out-dir-name',
diff --git a/tools/dataset_converters/prepare_coco_semantic_annos_from_panoptic_annos.py b/tools/dataset_converters/prepare_coco_semantic_annos_from_panoptic_annos.py
index ac1f2dc4ae3..2b9ee592cb3 100644
--- a/tools/dataset_converters/prepare_coco_semantic_annos_from_panoptic_annos.py
+++ b/tools/dataset_converters/prepare_coco_semantic_annos_from_panoptic_annos.py
@@ -879,7 +879,7 @@ def iter_annotations():
 def parse_args():
     parser = argparse.ArgumentParser(
         description=\
-        'Convert COCO Stuff 164k annotations to mmsegmentation format')  # noqa
+        'Convert COCO Stuff 164k annotations to mmdet format')  # noqa
     parser.add_argument('coco_path', help='coco stuff path')
     args = parser.parse_args()
     return args
diff --git a/tools/misc/download_dataset.py b/tools/misc/download_dataset.py
index dd0b49eaf20..5d801d208c4 100644
--- a/tools/misc/download_dataset.py
+++ b/tools/misc/download_dataset.py
@@ -206,7 +206,8 @@ def main():
         ])
     url = data2url.get(args.dataset_name, None)
     if url is None:
-        print('Only support COCO, VOC, LVIS, balloon, and Objects365v2 now!')
+        print('Only support ADE20K, COCO, RefCOCO, VOC, LVIS, '
+              'balloon, and Objects365v2 now!')
         return
     if args.dataset_name == 'objects365v2':
         download_objects365v2(