-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Feature] add CID-res50 config and model (#2600)
- Loading branch information
Showing
4 changed files
with
225 additions
and
0 deletions.
There are no files selected for viewing
41 changes: 41 additions & 0 deletions
41
configs/body/2d_kpt_sview_rgb_img/cid/coco/res50_fpn_coco.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
<!-- [ALGORITHM] --> | ||
|
||
<details> | ||
<summary align="right"><a href="https://openaccess.thecvf.com/content/CVPR2022/html/Wang_Contextual_Instance_Decoupling_for_Robust_Multi-Person_Pose_Estimation_CVPR_2022_paper.html">CID (CVPR'2022)</a></summary> | ||
|
||
```bibtex | ||
@InProceedings{Wang_2022_CVPR, | ||
author = {Wang, Dongkai and Zhang, Shiliang}, | ||
title = {Contextual Instance Decoupling for Robust Multi-Person Pose Estimation}, | ||
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, | ||
month = {June}, | ||
year = {2022}, | ||
pages = {11060-11068} | ||
} | ||
``` | ||
|
||
</details> | ||
|
||
<!-- [DATASET] --> | ||
|
||
<details> | ||
<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> | ||
|
||
```bibtex | ||
@inproceedings{lin2014microsoft, | ||
title={Microsoft coco: Common objects in context}, | ||
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, | ||
booktitle={European conference on computer vision}, | ||
pages={740--755}, | ||
year={2014}, | ||
organization={Springer} | ||
} | ||
``` | ||
|
||
</details> | ||
|
||
Results on COCO val2017 without multi-scale test | ||
|
||
| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | | ||
| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | | ||
| [CID_resnet_50](/configs/body/2d_kpt_sview_rgb_img/cid/coco/res50_fpn_coco_512x512.py) | 512x512 | 0.520 | 0.793 | 0.544 | 0.614 | 0.860 | [ckpt](https://download.openmmlab.com/mmpose/bottom_up/cid/res50_fpn_coco_512x512-494aea6c_20230807.pth) | [log](https://download.openmmlab.com/mmpose/bottom_up/cid/res50_fpn_coco_512x512-20230807.log.json) | |
24 changes: 24 additions & 0 deletions
24
configs/body/2d_kpt_sview_rgb_img/cid/coco/res50_fpn_coco.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
Collections: | ||
- Name: CID | ||
Paper: | ||
Title: Contextual Instance Decoupling for Robust Multi-Person Pose Estimation | ||
URL: https://openaccess.thecvf.com/content/CVPR2022/html/Wang_Contextual_Instance_Decoupling_for_Robust_Multi-Person_Pose_Estimation_CVPR_2022_paper.html | ||
README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/cid.md | ||
Models: | ||
- Config: configs/body/2d_kpt_sview_rgb_img/cid/coco/res50_fpn_coco_512x512.py | ||
In Collection: CID | ||
Metadata: | ||
Architecture: | ||
- CID | ||
Training Data: COCO | ||
Name: cid_res50_fpn_coco_512x512 | ||
Results: | ||
- Dataset: COCO | ||
Metrics: | ||
AP: 0.52 | ||
[email protected]: 0.793 | ||
[email protected]: 0.544 | ||
AR: 0.614 | ||
[email protected]: 0.86 | ||
Task: Body 2D Keypoint | ||
Weights: https://download.openmmlab.com/mmpose/bottom_up/cid/res50_fpn_coco_512x512-494aea6c_20230807.pth |
159 changes: 159 additions & 0 deletions
159
configs/body/2d_kpt_sview_rgb_img/cid/coco/res50_fpn_coco_512x512.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
_base_ = [ | ||
'../../../../_base_/default_runtime.py', | ||
'../../../../_base_/datasets/coco.py' | ||
] | ||
checkpoint_config = dict(interval=20) | ||
evaluation = dict(interval=20, metric='mAP', save_best='AP') | ||
|
||
optimizer = dict( | ||
type='Adam', | ||
lr=0.001, | ||
) | ||
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) | ||
# learning policy | ||
lr_config = dict( | ||
policy='step', | ||
warmup='linear', | ||
warmup_iters=500, | ||
warmup_ratio=0.001, | ||
step=[90, 120]) | ||
total_epochs = 140 | ||
channel_cfg = dict( | ||
dataset_joints=17, | ||
dataset_channel=[ | ||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], | ||
], | ||
inference_channel=[ | ||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 | ||
]) | ||
|
||
data_cfg = dict( | ||
image_size=512, | ||
base_size=256, | ||
base_sigma=2, | ||
heatmap_size=[128], | ||
num_joints=channel_cfg['dataset_joints'], | ||
dataset_channel=channel_cfg['dataset_channel'], | ||
inference_channel=channel_cfg['inference_channel'], | ||
num_scales=1, | ||
scale_aware_sigma=False, | ||
with_bbox=True, | ||
use_nms=True, | ||
soft_nms=False, | ||
oks_thr=0.8, | ||
) | ||
|
||
# model settings | ||
model = dict( | ||
type='CID', | ||
pretrained='torchvision://resnet50', | ||
backbone=dict( | ||
type='ResNet', | ||
depth=50, | ||
num_stages=4, | ||
out_indices=(0, 1, 2, 3), | ||
frozen_stages=1, | ||
norm_cfg=dict(type='BN', requires_grad=True), | ||
norm_eval=True, | ||
style='pytorch'), | ||
neck=dict( | ||
type='FPN', | ||
in_channels=[256, 512, 1024, 2048], | ||
out_channels=256, | ||
num_outs=4), | ||
keypoint_head=dict( | ||
type='CIDHead', | ||
in_channels=1024, | ||
gfd_channels=32, | ||
num_joints=17, | ||
multi_hm_loss_factor=1.0, | ||
single_hm_loss_factor=4.0, | ||
contrastive_loss_factor=1.0, | ||
max_train_instances=200, | ||
prior_prob=0.01), | ||
train_cfg=dict(), | ||
test_cfg=dict( | ||
num_joints=channel_cfg['dataset_joints'], | ||
flip_test=True, | ||
max_num_people=30, | ||
detection_threshold=0.01, | ||
center_pool_kernel=3)) | ||
|
||
train_pipeline = [ | ||
dict(type='LoadImageFromFile'), | ||
dict( | ||
type='BottomUpRandomAffine', | ||
rot_factor=30, | ||
scale_factor=[0.75, 1.5], | ||
scale_type='short', | ||
trans_factor=40), | ||
dict(type='BottomUpRandomFlip', flip_prob=0.5), | ||
dict(type='ToTensor'), | ||
dict( | ||
type='NormalizeTensor', | ||
mean=[0.485, 0.456, 0.406], | ||
std=[0.229, 0.224, 0.225]), | ||
dict( | ||
type='CIDGenerateTarget', | ||
max_num_people=30, | ||
), | ||
dict( | ||
type='Collect', | ||
keys=[ | ||
'img', 'multi_heatmap', 'multi_mask', 'instance_coord', | ||
'instance_heatmap', 'instance_mask', 'instance_valid' | ||
], | ||
meta_keys=[]), | ||
] | ||
|
||
val_pipeline = [ | ||
dict(type='LoadImageFromFile'), | ||
dict(type='BottomUpGetImgSize', test_scale_factor=[1]), | ||
dict( | ||
type='BottomUpResizeAlign', | ||
transforms=[ | ||
dict(type='ToTensor'), | ||
dict( | ||
type='NormalizeTensor', | ||
mean=[0.485, 0.456, 0.406], | ||
std=[0.229, 0.224, 0.225]), | ||
]), | ||
dict( | ||
type='Collect', | ||
keys=['img'], | ||
meta_keys=[ | ||
'image_file', 'aug_data', 'test_scale_factor', 'base_size', | ||
'center', 'scale', 'flip_index' | ||
]), | ||
] | ||
|
||
test_pipeline = val_pipeline | ||
|
||
data_root = '/mnt/lustre/share/DSK/datasets/mscoco2017/' | ||
data = dict( | ||
workers_per_gpu=2, | ||
train_dataloader=dict(samples_per_gpu=20), | ||
val_dataloader=dict(samples_per_gpu=1), | ||
test_dataloader=dict(samples_per_gpu=1), | ||
train=dict( | ||
type='BottomUpCocoDataset', | ||
ann_file=f'{data_root}/annotations/person_keypoints_train2017.json', | ||
img_prefix=f'{data_root}/train2017/', | ||
data_cfg=data_cfg, | ||
pipeline=train_pipeline, | ||
dataset_info={{_base_.dataset_info}}), | ||
val=dict( | ||
type='BottomUpCocoDataset', | ||
ann_file=f'{data_root}/annotations/person_keypoints_val2017.json', | ||
img_prefix=f'{data_root}/val2017/', | ||
data_cfg=data_cfg, | ||
pipeline=val_pipeline, | ||
dataset_info={{_base_.dataset_info}}), | ||
test=dict( | ||
type='BottomUpCocoDataset', | ||
ann_file=f'{data_root}/annotations/person_keypoints_val2017.json', | ||
img_prefix=f'{data_root}/val2017/', | ||
data_cfg=data_cfg, | ||
pipeline=test_pipeline, | ||
dataset_info={{_base_.dataset_info}}), | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters