From dbd8c23e1767ee7a314161d5cec456db766bb2cd Mon Sep 17 00:00:00 2001 From: liuhengyue Date: Mon, 2 May 2022 20:53:56 -0700 Subject: [PATCH] done --- .gitignore | 2 + configs/jede/best.yaml | 74 ++++++++++ configs/jede/best_train_all.yaml | 68 +++++++++ ...ix_swapdigit_less_anchors_unfreeze_b8.yaml | 27 ++-- .../model_size/R101-FPN_test_0_gn.yaml | 2 +- configs/pg_rcnn/tests/baseline.yaml | 10 +- demo/demo.py | 7 +- demo/predictor.py | 1 + experiments/ablations/player_feature_size.sh | 37 +++++ experiments/jede_R_50_FPN_best.sh | 78 +++++++++++ pgrcnn/config/config.py | 3 +- pgrcnn/data/build.py | 8 +- pgrcnn/data/dataset_mapper.py | 2 +- pgrcnn/modeling/roi_heads/pg_head_base.py | 30 +++- pgrcnn/tests/test_dataset_loader.py | 16 ++- pgrcnn/utils/custom_visualizer.py | 9 +- tools/__init__.py | 0 tools/analyze_model.py | 129 ++++++++++++++++++ 18 files changed, 466 insertions(+), 37 deletions(-) create mode 100644 configs/jede/best.yaml create mode 100644 configs/jede/best_train_all.yaml create mode 100644 experiments/ablations/player_feature_size.sh create mode 100644 experiments/jede_R_50_FPN_best.sh create mode 100644 tools/__init__.py create mode 100755 tools/analyze_model.py diff --git a/.gitignore b/.gitignore index 605c4e1..bc30919 100644 --- a/.gitignore +++ b/.gitignore @@ -62,3 +62,5 @@ datasets # pycharm plugins node_modules/ package-lock.json + +demo_videos \ No newline at end of file diff --git a/configs/jede/best.yaml b/configs/jede/best.yaml new file mode 100644 index 0000000..90322d0 --- /dev/null +++ b/configs/jede/best.yaml @@ -0,0 +1,74 @@ +_BASE_: "../pg_rcnn/pg_rcnn_base.yaml" +MODEL: +# BACKBONE: +# FREEZE_AT: 0 +# RESNETS: +# NORM: "SyncBN" + WEIGHTS: "output/jede_best/train_with_all/model_final.pth" + ANCHOR_GENERATOR: + SIZES: [ [ 64 ], [ 128 ], [ 256 ], [ 512 ] ] + FPN: + IN_FEATURES: [ "res2", "res3", "res4", "res5" ] + RPN: + IN_FEATURES: [ "p3", "p4", "p5", "p6" ] + PRE_NMS_TOPK_TRAIN: 1000 # Per FPN level + POST_NMS_TOPK_TRAIN: 500 + ROI_HEADS: + NAME: PGROIHeads + PROPOSAL_APPEND_GT: True + ENABLE_POSE_GUIDE: True + BATCH_SIZE_PER_IMAGE: 64 + ROI_DIGIT_BOX_HEAD: + POOLER_RESOLUTION: 28 + ROI_DIGIT_NECK_OUTPUT: + NORM: "GN" + MIN_OVERLAP: 0.1 + OUTPUT_HEAD_CHANNELS: (2, 2, 2) + CONV_DIM: 256 + NUM_DIGITS_CLASSIFIER_ON: 2 + ROI_NECK_BASE_BRANCHES: + NORM: "GN" + PERSON_BRANCH: + NAME: "PersonROIBranch" + POOLER_RESOLUTION: 28 + UP_SCALE: 1 + DECONV_KERNEL: 1 + KEYPOINTS_BRANCH: + NAME: "KptsROIBranch" # "KptsROIBranch" + DECONV_KERNEL: 1 + UP_SCALE: 2 + CONV_SPECS: [3,2,1] + PE: True + ROI_NECK_BASE: + BATCH_DIGIT_SIZE_PER_IMAGE: 256 + NUM_PROPOSAL_TRAIN: 100 +DATASETS: + TRAIN: ("jerseynumbers_train", "keypoints_coco_2017_train") + TRAIN_VIDEO_IDS: [0,1,2,3,4] + TEST_VIDEO_IDS: [4] + +DATALOADER: + SAMPLER_TRAIN: WeightedTrainingSampler +INPUT: + MAX_SIZE_TRAIN: 1589 #1333 + 256 + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800, 832, 864, 896) +# MAX_SIZE_TEST: 1333 +# MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 2333 + MIN_SIZE_TEST: 896 + AUG: + HELPER_DATASET_NAME: svhn_train + COPY_PASTE_MIX: 10 +SOLVER: + IMS_PER_BATCH: 2 + BASE_LR: 0.0002 + STEPS: (120000,) + MAX_ITER: 150000 +OUTPUT_DIR: ./output/jede_best/best + +#python train_net.py \ +# --num-gpus 2 \ +# --config-file configs/jede/best_model.yaml \ +# DATASETS.TRAIN_VIDEO_IDS [0,1,3] \ +# DATASETS.TEST_VIDEO_IDS [2] \ +# OUTPUT_DIR ./output/jede_best/test_2 \ No newline at end of file diff --git a/configs/jede/best_train_all.yaml b/configs/jede/best_train_all.yaml new file mode 100644 index 0000000..c9d105a --- /dev/null +++ b/configs/jede/best_train_all.yaml @@ -0,0 +1,68 @@ +_BASE_: "../pg_rcnn/pg_rcnn_base.yaml" +MODEL: +# BACKBONE: +# FREEZE_AT: 0 +# RESNETS: +# NORM: "SyncBN" + WEIGHTS: "output/pg_rcnn/aug/datasets_mix/pretrain_coco_svhn_3x/model_final.pth" + ANCHOR_GENERATOR: + SIZES: [ [ 64 ], [ 128 ], [ 256 ], [ 512 ] ] + FPN: + IN_FEATURES: [ "res2", "res3", "res4", "res5" ] + RPN: + IN_FEATURES: [ "p3", "p4", "p5", "p6" ] + PRE_NMS_TOPK_TRAIN: 1000 # Per FPN level + POST_NMS_TOPK_TRAIN: 500 + ROI_HEADS: + NAME: PGROIHeads + PROPOSAL_APPEND_GT: True + ENABLE_POSE_GUIDE: True + BATCH_SIZE_PER_IMAGE: 64 + ROI_DIGIT_BOX_HEAD: + POOLER_RESOLUTION: 28 + ROI_DIGIT_NECK_OUTPUT: + NORM: "GN" + MIN_OVERLAP: 0.1 + OUTPUT_HEAD_CHANNELS: (2, 2, 2) + CONV_DIM: 256 + NUM_DIGITS_CLASSIFIER_ON: 2 + ROI_NECK_BASE_BRANCHES: + NORM: "GN" + PERSON_BRANCH: + NAME: "PersonROIBranch" + UP_SCALE: 2 + DECONV_KERNEL: 4 + KEYPOINTS_BRANCH: + NAME: "KptsROIBranch" # "KptsROIBranch" + DECONV_KERNEL: 4 + UP_SCALE: 2 + PE: True + ROI_NECK_BASE: + BATCH_DIGIT_SIZE_PER_IMAGE: 64 + NUM_PROPOSAL_TRAIN: 50 +DATASETS: + TRAIN_VIDEO_IDS: [0,1,2,3,4] + TEST_VIDEO_IDS: [4] +INPUT: + MAX_SIZE_TRAIN: 1333 + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +# MAX_SIZE_TEST: 1333 +# MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 2666 + MIN_SIZE_TEST: 1600 + AUG: + HELPER_DATASET_NAME: svhn_train + COPY_PASTE_MIX: 10 +SOLVER: + IMS_PER_BATCH: 2 + BASE_LR: 0.0002 + STEPS: (120000,) + MAX_ITER: 150000 +OUTPUT_DIR: ./output/jede_best/train_with_all + +#python train_net.py \ +# --num-gpus 2 \ +# --config-file configs/jede/best_model.yaml \ +# DATASETS.TRAIN_VIDEO_IDS [0,1,3] \ +# DATASETS.TEST_VIDEO_IDS [2] \ +# OUTPUT_DIR ./output/jede_best/test_2 \ No newline at end of file diff --git a/configs/pg_rcnn/digit_twochannels/test_0_parallel_gn_pe_pretrain_copypastemix_swapdigit_less_anchors_unfreeze_b8.yaml b/configs/pg_rcnn/digit_twochannels/test_0_parallel_gn_pe_pretrain_copypastemix_swapdigit_less_anchors_unfreeze_b8.yaml index 787b5ba..c56a1f7 100644 --- a/configs/pg_rcnn/digit_twochannels/test_0_parallel_gn_pe_pretrain_copypastemix_swapdigit_less_anchors_unfreeze_b8.yaml +++ b/configs/pg_rcnn/digit_twochannels/test_0_parallel_gn_pe_pretrain_copypastemix_swapdigit_less_anchors_unfreeze_b8.yaml @@ -1,9 +1,9 @@ _BASE_: "../pg_rcnn_base.yaml" MODEL: - BACKBONE: - FREEZE_AT: 0 - RESNETS: - NORM: "SyncBN" +# BACKBONE: +# FREEZE_AT: 0 +# RESNETS: +# NORM: "SyncBN" WEIGHTS: "output/pg_rcnn/aug/datasets_mix/pretrain_coco_svhn_3x/model_final.pth" ANCHOR_GENERATOR: SIZES: [ [ 64 ], [ 128 ], [ 256 ], [ 512 ] ] @@ -30,13 +30,20 @@ MODEL: DATASETS: TRAIN_VIDEO_IDS: [1,2,3] TEST_VIDEO_IDS: [0] +#SOLVER: +# IMS_PER_BATCH: 8 +# BASE_LR: 0.0008 +# STEPS: (10000,) +# MAX_ITER: 12500 +# WARMUP_FACTOR: 0.004 +# WARMUP_ITERS: 250 SOLVER: - IMS_PER_BATCH: 8 - BASE_LR: 0.0008 - STEPS: (10000,) - MAX_ITER: 12500 - WARMUP_FACTOR: 0.004 - WARMUP_ITERS: 250 + IMS_PER_BATCH: 4 + BASE_LR: 0.0004 + STEPS: (20000,) + MAX_ITER: 25000 + WARMUP_FACTOR: 0.002 + WARMUP_ITERS: 500 INPUT: AUG: HELPER_DATASET_NAME: svhn_train diff --git a/configs/pg_rcnn/model_size/R101-FPN_test_0_gn.yaml b/configs/pg_rcnn/model_size/R101-FPN_test_0_gn.yaml index dd45067..e655a38 100644 --- a/configs/pg_rcnn/model_size/R101-FPN_test_0_gn.yaml +++ b/configs/pg_rcnn/model_size/R101-FPN_test_0_gn.yaml @@ -12,7 +12,7 @@ MODEL: NAME: "" ROI_DIGIT_NECK_OUTPUT: OUTPUT_HEAD_CHANNELS: (2, 2, 2) - NUM_DIGITS_CLASSIFIER_ON: 2 + NUM_DIGITS_CLASSIFIER_ON: 1 DATASETS: TRAIN_VIDEO_IDS: [1,2,3] TEST_VIDEO_IDS: [0] diff --git a/configs/pg_rcnn/tests/baseline.yaml b/configs/pg_rcnn/tests/baseline.yaml index e8bbf9a..9f45b8f 100644 --- a/configs/pg_rcnn/tests/baseline.yaml +++ b/configs/pg_rcnn/tests/baseline.yaml @@ -31,15 +31,15 @@ MODEL: DATASETS: TRAIN: ("jerseynumbers_train", ) # ("svhn_train", "keypoints_coco_2017_val_100") # jerseynumbers_train NUM_IMAGES: -1 # change the number of images to load - TRAIN_VIDEO_IDS: [0,1,2,3,4] + TRAIN_VIDEO_IDS: [0,1,2,3] TEST_VIDEO_IDS: [3] -#INPUT: +INPUT: # MIN_SIZE_TRAIN: (256,) # MAX_SIZE_TRAIN: 320 # MAX_SIZE_TEST: 320 -# AUG: -# COPY_PASTE_MIX: 5 -# HELPER_DATASET_NAME: svhn_train + AUG: + COPY_PASTE_MIX: 5 + HELPER_DATASET_NAME: svhn_train SOLVER: IMS_PER_BATCH: 1 BASE_LR: 0.001 # when IMS_PER_BATCH = 16, LR is 0.02 diff --git a/demo/demo.py b/demo/demo.py index ab7e57b..6e1a530 100755 --- a/demo/demo.py +++ b/demo/demo.py @@ -36,7 +36,7 @@ def get_parser(): parser = argparse.ArgumentParser(description="JEDE demo") parser.add_argument( "--config-file", - default="configs/jede/best_model.yaml", + default="configs/jede/best.yaml", metavar="FILE", help="path to config file", ) @@ -61,7 +61,7 @@ def get_parser(): parser.add_argument( "--opts", help="Modify config options using the command-line 'KEY VALUE' pairs", - default=["MODEL.WEIGHTS", "output/jede_best/test_0/model_final.pth"], + default=["MODEL.WEIGHTS", "output/jede_best/best/model_final.pth"], nargs=argparse.REMAINDER, ) return parser @@ -98,9 +98,10 @@ def get_parser(): ) if args.output: + ext = ".pdf" if os.path.isdir(args.output): assert os.path.isdir(args.output), args.output - out_filename = os.path.join(args.output, os.path.basename(path)) + out_filename = os.path.join(args.output, os.path.basename(path).split(".")[0] + ext) else: assert len(args.input) == 1, "Please specify a directory with args.output" out_filename = args.output diff --git a/demo/predictor.py b/demo/predictor.py index 636d925..b7c9b81 100644 --- a/demo/predictor.py +++ b/demo/predictor.py @@ -52,6 +52,7 @@ def run_on_image(self, image): image = image[:, :, ::-1] visualizer = JerseyNumberVisualizer(image, self.metadata, + scale=2.0, instance_mode=self.instance_mode, digit_only=self.digit_only) if "panoptic_seg" in predictions: diff --git a/experiments/ablations/player_feature_size.sh b/experiments/ablations/player_feature_size.sh new file mode 100644 index 0000000..4025c3d --- /dev/null +++ b/experiments/ablations/player_feature_size.sh @@ -0,0 +1,37 @@ +python train_net.py \ + --num-gpus 2 \ + --config-file configs/pg_rcnn/digit_twochannels/test_0_parallel_gn.yaml \ + MODEL.ROI_NECK_BASE_BRANCHES.NORM "" \ + MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.UP_SCALE 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.DECONV_KERNEL 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.UP_SCALE 2 \ + MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.DECONV_KERNEL 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.POOLER_RESOLUTION 28 \ + MODEL.ROI_DIGIT_NECK_OUTPUT.NORM "" \ + OUTPUT_DIR "./output/ablations/player_feature_size/28x28" + + +#python train_net.py \ +# --num-gpus 2 \ +# --config-file configs/pg_rcnn/digit_twochannels/test_0_parallel_gn.yaml \ +# MODEL.ROI_NECK_BASE_BRANCHES.NORM "" \ +# MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.UP_SCALE 1 \ +# MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.DECONV_KERNEL 1 \ +# MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.UP_SCALE 1 \ +# MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.DECONV_KERNEL 1 \ +# MODEL.ROI_DIGIT_NECK_OUTPUT.NORM "" \ +# OUTPUT_DIR "./output/ablations/player_feature_size/14x14" + + +python train_net.py \ + --num-gpus 2 \ + --config-file configs/pg_rcnn/digit_twochannels/test_0_parallel_gn.yaml \ + MODEL.ROI_NECK_BASE_BRANCHES.NORM "" \ + MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.UP_SCALE 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.DECONV_KERNEL 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.UP_SCALE 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.DECONV_KERNEL 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.POOLER_RESOLUTION 56 \ + MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.CONV_SPECS 3,1,1 \ + MODEL.ROI_DIGIT_NECK_OUTPUT.NORM "" \ + OUTPUT_DIR "./output/ablations/player_feature_size/56x56" \ No newline at end of file diff --git a/experiments/jede_R_50_FPN_best.sh b/experiments/jede_R_50_FPN_best.sh new file mode 100644 index 0000000..23441d5 --- /dev/null +++ b/experiments/jede_R_50_FPN_best.sh @@ -0,0 +1,78 @@ +python train_net.py \ + --num-gpus 2 \ + --config-file configs/pg_rcnn/digit_twochannels/test_0_parallel_gn_pe_pretrain_copypastemix_swapdigit_less_anchors_unfreeze_b8.yaml \ + MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.UP_SCALE 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.DECONV_KERNEL 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.UP_SCALE 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.DECONV_KERNEL 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.POOLER_RESOLUTION 56 \ + MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.CONV_SPECS 3,1,1 \ + DATASETS.TRAIN_VIDEO_IDS [1,2,3] \ + DATASETS.TEST_VIDEO_IDS [0] \ + OUTPUT_DIR "./output/jede_R_50_FPN_best/test_0" + +python train_net.py \ + --num-gpus 2 \ + --config-file configs/pg_rcnn/digit_twochannels/test_0_parallel_gn_pe_pretrain_copypastemix_swapdigit_less_anchors_unfreeze_b8.yaml \ + MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.UP_SCALE 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.DECONV_KERNEL 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.UP_SCALE 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.DECONV_KERNEL 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.POOLER_RESOLUTION 56 \ + MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.CONV_SPECS 3,1,1 \ + DATASETS.TRAIN_VIDEO_IDS [0,2,3] \ + DATASETS.TEST_VIDEO_IDS [1] \ + OUTPUT_DIR "./output/jede_R_50_FPN_best/test_1" + +python train_net.py \ + --num-gpus 2 \ + --config-file configs/pg_rcnn/digit_twochannels/test_0_parallel_gn_pe_pretrain_copypastemix_swapdigit_less_anchors_unfreeze_b8.yaml \ + MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.UP_SCALE 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.DECONV_KERNEL 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.UP_SCALE 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.DECONV_KERNEL 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.POOLER_RESOLUTION 56 \ + MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.CONV_SPECS 3,1,1 \ + DATASETS.TRAIN_VIDEO_IDS [0,1,3] \ + DATASETS.TEST_VIDEO_IDS [2] \ + OUTPUT_DIR "./output/jede_R_50_FPN_best/test_2" + +python train_net.py \ + --num-gpus 2 \ + --config-file configs/pg_rcnn/digit_twochannels/test_0_parallel_gn_pe_pretrain_copypastemix_swapdigit_less_anchors_unfreeze_b8.yaml \ + MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.UP_SCALE 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.DECONV_KERNEL 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.UP_SCALE 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.DECONV_KERNEL 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.POOLER_RESOLUTION 56 \ + MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.CONV_SPECS 3,1,1 \ + DATASETS.TRAIN_VIDEO_IDS [0,1,2] \ + DATASETS.TEST_VIDEO_IDS [3] \ + OUTPUT_DIR "./output/jede_R_50_FPN_best/test_3" + +python train_net.py \ + --num-gpus 2 \ + --config-file configs/pg_rcnn/digit_twochannels/test_0_parallel_gn_pe_pretrain_copypastemix_swapdigit_less_anchors_unfreeze_b8.yaml \ + MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.UP_SCALE 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.DECONV_KERNEL 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.UP_SCALE 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.DECONV_KERNEL 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.POOLER_RESOLUTION 56 \ + MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.CONV_SPECS 3,1,1 \ + DATASETS.TRAIN_VIDEO_IDS [0,1,2,3] \ + DATASETS.TEST_VIDEO_IDS [4] \ + OUTPUT_DIR "./output/jede_R_50_FPN_best/test_4" + +python train_net.py \ + --num-gpus 2 \ + --config-file configs/pg_rcnn/digit_twochannels/test_0_parallel_gn_pe_pretrain_copypastemix_swapdigit_less_anchors_unfreeze_b8.yaml \ + MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.UP_SCALE 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.DECONV_KERNEL 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.UP_SCALE 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.DECONV_KERNEL 1 \ + MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.POOLER_RESOLUTION 56 \ + MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.CONV_SPECS 3,1,1 \ + DATASETS.TRAIN_VIDEO_IDS [4] \ + DATASETS.TEST_VIDEO_IDS [0,1,2,3] \ + INPUT.AUG.COPY_PASTE_MIX 0 \ + OUTPUT_DIR "./output/jede_R_50_FPN_best/test_5" diff --git a/pgrcnn/config/config.py b/pgrcnn/config/config.py index 28c6ae9..0e7cdc3 100644 --- a/pgrcnn/config/config.py +++ b/pgrcnn/config/config.py @@ -6,7 +6,7 @@ def get_cfg() -> CN: """ Get a copy of the default config. - Then add extra fields. + Then add modeling fields. We @@ -201,6 +201,7 @@ def add_poseguide_config(cfg): _C.MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH = CN() _C.MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.NAME = "PersonROIBranch" + _C.MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.POOLER_RESOLUTION = 14 _C.MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.UP_SCALE = 1 _C.MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.DECONV_KERNEL = 4 _C.MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.CONV_DIMS = [64, 64] diff --git a/pgrcnn/data/build.py b/pgrcnn/data/build.py index 2dc0391..2ab0f3e 100644 --- a/pgrcnn/data/build.py +++ b/pgrcnn/data/build.py @@ -339,13 +339,15 @@ def get_detection_dataset_dicts( pass # get the mapping from data index to the weight probability, and dataset source # record if the data can be applied with CopyPasteMix - has_jerseynumber = 'digit_bboxes' in dataset_dicts[i][0]['annotations'][0] + from_svhn = 'digit_bboxes' in dataset_dicts[i][0]['annotations'][0] and ('video_id' not in dataset_dicts[i][0]) for j, dataset_dict in enumerate(dataset_dicts[i]): data_id = start_ind + j # video 4 (basketball should not be used for copypastemix) - applicables[data_id] = (has_jerseynumber and dataset_dict['video_id']!= 4, i) + from_cropped_player = 'video_id' in dataset_dict and dataset_dict['video_id'] != 4 + valid = from_svhn or from_cropped_player + applicables[data_id] = (valid, i) weights.append( 1 / (len(dataset_dicts) * len(dataset_dicts[i])) ) - if has_jerseynumber: + if valid: jerseynumber_inds[i].append(data_id) start_ind += len(dataset_dicts[i]) diff --git a/pgrcnn/data/dataset_mapper.py b/pgrcnn/data/dataset_mapper.py index 8761bb8..564c7fb 100644 --- a/pgrcnn/data/dataset_mapper.py +++ b/pgrcnn/data/dataset_mapper.py @@ -120,7 +120,7 @@ def __call__(self, dataset_dict): def apply_helper_annos(self, img, dataset_dict): # we randomly apply - if np.random.rand(1) > 0.5: + if ('digit_bboxes' not in dataset_dict['annotations'][0]) or np.random.rand(1) > 0.5: return img, dataset_dict img = img.copy() annos = dataset_dict["annotations"] diff --git a/pgrcnn/modeling/roi_heads/pg_head_base.py b/pgrcnn/modeling/roi_heads/pg_head_base.py index b925c3d..b616913 100644 --- a/pgrcnn/modeling/roi_heads/pg_head_base.py +++ b/pgrcnn/modeling/roi_heads/pg_head_base.py @@ -37,6 +37,7 @@ class BasePGROIHeads(BaseROIHeads): def __init__(self, *, neck_base: Optional[nn.Module] = None, + player_box_pooler: Optional[ROIPooler] = None, neck_digit_output: Optional[nn.Module] = None, digit_box_pooler: Optional[ROIPooler] = None, digit_box_head: Optional[nn.Module] = None, @@ -53,6 +54,7 @@ def __init__(self, if self.neck_base is not None: self.use_person_features = self.neck_base.use_person_features self.use_kpts_features = self.neck_base.use_kpts_features + self.player_box_pooler = player_box_pooler self.neck_digit_output = neck_digit_output self.digit_box_pooler = digit_box_pooler self.digit_box_head = digit_box_head @@ -106,8 +108,22 @@ def _init_neck_base(cls, cfg, input_shape): ret["neck_base"] = None return ret in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES - # default is 14x14 - pooler_resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION + # add a player pooler, default is 14x14 + pooler_resolution = cfg.MODEL.ROI_NECK_BASE_BRANCHES.PERSON_BRANCH.POOLER_RESOLUTION + pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) + sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO + pooler_type = cfg.MODEL.ROI_DIGIT_BOX_HEAD.POOLER_TYPE + player_box_pooler = ( + ROIPooler( + output_size=pooler_resolution, + scales=pooler_scales, + sampling_ratio=sampling_ratio, + pooler_type=pooler_type, + ) + if pooler_type + else None + ) + ret["player_box_pooler"] = player_box_pooler in_channels = [input_shape[f].channels for f in in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels @@ -118,7 +134,11 @@ def _init_neck_base(cls, cfg, input_shape): # and keypoint heatmaps of K x 56 x 56, # where K could be 4 or 17 depending on K = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS if cfg.DATASETS.PAD_TO_FULL else cfg.DATASETS.NUM_KEYPOINTS - out_size = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION * 4 + kernel_size, stride, padding = cfg.MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.CONV_SPECS + kpts_size = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION * 4 + conv_out_size = (kpts_size - kernel_size + 2 * padding) // stride + 1 + up_scale = max(1, cfg.MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.DECONV_KERNEL // 2) * cfg.MODEL.ROI_NECK_BASE_BRANCHES.KEYPOINTS_BRANCH.UP_SCALE + out_size = conv_out_size * up_scale # construct the input shapes, in the order of keypoint heatmap, then person box features input_shapes = { "keypoint_heatmap_shape": ShapeSpec(channels=K, height=out_size, width=out_size), @@ -258,10 +278,10 @@ def _forward_neck_base(self, features, instances): # we pool the features again for convenience # 14 x 14 pooler if self.training: - person_features = self.keypoint_pooler(features, [x.proposal_boxes if x.has("proposal_boxes") + person_features = self.player_box_pooler(features, [x.proposal_boxes if x.has("proposal_boxes") else Boxes([]).to(features[0].device) for x in instances]) else: - person_features = self.keypoint_pooler(features, [x.pred_boxes for x in instances]) + person_features = self.player_box_pooler(features, [x.pred_boxes for x in instances]) else: person_features = None diff --git a/pgrcnn/tests/test_dataset_loader.py b/pgrcnn/tests/test_dataset_loader.py index 9e2105b..637848a 100644 --- a/pgrcnn/tests/test_dataset_loader.py +++ b/pgrcnn/tests/test_dataset_loader.py @@ -1,3 +1,4 @@ +import os import logging import cv2 from detectron2.engine import default_argument_parser @@ -27,16 +28,22 @@ def visualize_training(batched_inputs, cfg): jnw_metadata = MetadataCatalog.get("jerseynumbers_train") batched_inputs = [batched_inputs[0]] if len(batched_inputs) > 1 else batched_inputs # assert len(batched_inputs) == 1, "visualize_training() needs batch size of 1" + out_dir = "output/test_dataset_loader" + if not os.path.exists(out_dir): + os.makedirs(out_dir) for input in batched_inputs: img = input["image"].cpu().numpy() + basename = os.path.basename(input["file_name"]) + basename_wo_extension = os.path.splitext(basename)[0] assert img.shape[0] == 3, "Images should have 3 channels." - if cfg.INPUT.FORMAT == "RGB": - img = img[::-1, :, :] + img = img[::-1, :, :] img = img.transpose(1, 2, 0) v_gt = JerseyNumberVisualizer(img, metadata=jnw_metadata) # v_gt = v_gt.overlay_instances(boxes=input["instances"].gt_boxes) v_gt = v_gt.draw_dataloader_instances(input) vis_img = v_gt.get_image() + out_path = os.path.join(out_dir, basename_wo_extension) + v_gt.save("{}.pdf".format(out_path)) return input['file_name'], vis_img def test_base_dataloader(cfg, show_data=False): @@ -45,8 +52,8 @@ def test_base_dataloader(cfg, show_data=False): for data in dataloader: logger.info(f"{data[0]['file_name']}") logger.info(f"{data[0]}") + file_name, vis_img = visualize_training(data, cfg) if show_data: - file_name, vis_img = visualize_training(data, cfg) cv2.imshow(file_name, vis_img) cv2.waitKey(0) cv2.destroyAllWindows() @@ -58,5 +65,6 @@ def test_base_dataloader(cfg, show_data=False): # lazy add config file if you want if not args.config_file: args.config_file = "configs/pg_rcnn/tests/baseline.yaml" + # args.config_file = "configs/jede/best.yaml" cfg = setup(args) - test_base_dataloader(cfg, show_data=True) + test_base_dataloader(cfg, show_data=False) diff --git a/pgrcnn/utils/custom_visualizer.py b/pgrcnn/utils/custom_visualizer.py index 3fe2cd4..aca5a42 100644 --- a/pgrcnn/utils/custom_visualizer.py +++ b/pgrcnn/utils/custom_visualizer.py @@ -213,9 +213,10 @@ def __init__(self, self.cpu_device = torch.device("cpu") # too small texts are useless, therefore clamp to 9 - self._default_font_size = max( - np.sqrt(self.output.height * self.output.width) / (nrows * ncols) // 90, 10 // scale - ) + # self._default_font_size = max( + # np.sqrt(self.output.height * self.output.width) / (nrows * ncols) // 90, 10 // scale + # ) + self._default_font_size = 5 self._instance_mode = instance_mode self.digit_only = digit_only @@ -548,7 +549,7 @@ def draw_and_connect_keypoints(self, keypoints, color=_RED, draw_mid=False): # draw keypoint x, y, prob = keypoint if prob > _KEYPOINT_THRESHOLD: - self.draw_circle((x, y), color=color) + self.draw_circle((x, y), radius=self._default_font_size // 2 + 1, color=color) if keypoint_names: keypoint_name = keypoint_names[idx] visible[keypoint_name] = (x, y) diff --git a/tools/__init__.py b/tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tools/analyze_model.py b/tools/analyze_model.py new file mode 100755 index 0000000..3666ddd --- /dev/null +++ b/tools/analyze_model.py @@ -0,0 +1,129 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. + +import logging +import numpy as np +from collections import Counter +import tqdm +from fvcore.nn import flop_count_table # can also try flop_count_str + +from detectron2.checkpoint import DetectionCheckpointer +from detectron2.config import get_cfg +from detectron2.data import build_detection_test_loader +from detectron2.engine import default_argument_parser +from detectron2.modeling import build_model +from detectron2.utils.analysis import ( + FlopCountAnalysis, + activation_count_operators, + parameter_count_table, +) +from detectron2.utils.logger import setup_logger + +from pgrcnn.utils.launch_utils import setup, Trainer +logger = logging.getLogger("detectron2") + + + + +def do_flop(cfg): + data_loader = Trainer.build_test_loader(cfg, cfg.DATASETS.TEST[0]) + model = Trainer.build_model(cfg) + DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) + model.eval() + + counts = Counter() + total_flops = [] + for idx, data in zip(tqdm.trange(args.num_inputs), data_loader): # noqa + flops = FlopCountAnalysis(model, data) + if idx > 0: + flops.unsupported_ops_warnings(False).uncalled_modules_warnings(False) + counts += flops.by_operator() + total_flops.append(flops.total()) + + logger.info("Flops table computed from only one input sample:\n" + flop_count_table(flops)) + logger.info( + "Average GFlops for each type of operators:\n" + + str([(k, v / (idx + 1) / 1e9) for k, v in counts.items()]) + ) + logger.info( + "Total GFlops: {:.1f}±{:.1f}".format(np.mean(total_flops) / 1e9, np.std(total_flops) / 1e9) + ) + + +def do_activation(cfg): + data_loader = Trainer.build_test_loader(cfg, cfg.DATASETS.TEST[0]) + model = Trainer.build_model(cfg) + DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) + model.eval() + + counts = Counter() + total_activations = [] + for idx, data in zip(tqdm.trange(args.num_inputs), data_loader): # noqa + count = activation_count_operators(model, data) + counts += count + total_activations.append(sum(count.values())) + logger.info( + "(Million) Activations for Each Type of Operators:\n" + + str([(k, v / idx) for k, v in counts.items()]) + ) + logger.info( + "Total (Million) Activations: {}±{}".format( + np.mean(total_activations), np.std(total_activations) + ) + ) + + +def do_parameter(cfg): + model = Trainer.build_model(cfg) + logger.info("Parameter Count:\n" + parameter_count_table(model, max_depth=5)) + + +def do_structure(cfg): + model = Trainer.build_model(cfg) + logger.info("Model Structure:\n" + str(model)) + + +if __name__ == "__main__": + parser = default_argument_parser( + epilog=""" +Examples: + +To show parameters of a model: +$ ./analyze_model.py --tasks parameter \\ + --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml + +Flops and activations are data-dependent, therefore inputs and model weights +are needed to count them: + +$ ./analyze_model.py --num-inputs 100 --tasks flop \\ + --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \\ + MODEL.WEIGHTS /path/to/model.pkl +""" + ) + parser.add_argument( + "--tasks", + choices=["flop", "activation", "parameter", "structure"], + required=True, + nargs="+", + ) + parser.add_argument( + "-n", + "--num-inputs", + default=100, + type=int, + help="number of inputs used to compute statistics for flops/activations, " + "both are data dependent.", + ) + args = parser.parse_args() + assert not args.eval_only + assert args.num_gpus == 1 + + cfg = setup(args) + + for task in args.tasks: + { + "flop": do_flop, + "activation": do_activation, + "parameter": do_parameter, + "structure": do_structure, + }[task](cfg)