Skip to content

Commit

Permalink
fix inference accuracy test
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #5348

Some accuracy tests started to fail in between Jun 11 and Jun 17:
- ❌ mask_rcnn_R_50_FPN_inference_acc_test
- ✅ keypoint_rcnn_R_50_FPN_inference_acc_test
- ✅ fast_rcnn_R_50_FPN_inference_acc_test
- ❌ panoptic_fpn_R_50_inference_acc_test
- ✅ retinanet_R_50_FPN_inference_acc_test
- ❌ rpn_R_50_FPN_inference_acc_test
- ✅ semantic_R_50_FPN_inference_acc_test
- ❌ cascade_mask_rcnn_R_50_FPN_inference_acc_test

V1: update the yaml to reflect the new scores.
V5: it turns out that we can match the old scores by disabling tf32.

Differential Revision: D61301698
  • Loading branch information
Yanghan Wang authored and facebook-github-bot committed Aug 20, 2024
1 parent bcfd464 commit c04028f
Show file tree
Hide file tree
Showing 10 changed files with 44 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ DATASETS:
TEST: ("coco_2017_val_100",)
TEST:
EXPECTED_RESULTS: [["bbox", "AP", 50.18, 0.02], ["segm", "AP", 43.87, 0.02]]
FLOAT32_PRECISION: "highest"
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ DATASETS:
TEST: ("coco_2017_val_100",)
TEST:
EXPECTED_RESULTS: [["bbox", "AP", 45.70, 0.02]]
FLOAT32_PRECISION: "highest"
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ DATASETS:
TEST: ("keypoints_coco_2017_val_100",)
TEST:
EXPECTED_RESULTS: [["bbox", "AP", 52.47, 0.02], ["keypoints", "AP", 67.36, 0.02]]
FLOAT32_PRECISION: "highest"
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ DATASETS:
TEST: ("coco_2017_val_100",)
TEST:
EXPECTED_RESULTS: [["bbox", "AP", 47.37, 0.02], ["segm", "AP", 40.99, 0.02]]
FLOAT32_PRECISION: "highest"
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ DATASETS:
TEST: ("coco_2017_val_100",)
TEST:
EXPECTED_RESULTS: [["bbox", "AP", 47.44, 0.02], ["segm", "AP", 42.94, 0.02]]
FLOAT32_PRECISION: "highest"
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ TEST:
AUG:
ENABLED: True
MIN_SIZES: (700, 800) # to save some time
FLOAT32_PRECISION: "highest"
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ DATASETS:
TEST: ("coco_2017_val_100_panoptic_separated",)
TEST:
EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]]
FLOAT32_PRECISION: "highest"
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ DATASETS:
TEST: ("coco_2017_val_100",)
TEST:
EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]]
FLOAT32_PRECISION: "highest"
4 changes: 4 additions & 0 deletions detectron2/config/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,10 @@
# for about 10k iterations. It usually hurts total time, but can benefit for certain models.
# If input images have the same or similar sizes, benchmark is often helpful.
_C.CUDNN_BENCHMARK = False
# Option to set PyTorch matmul and CuDNN's float32 precision. When set to non-empty string,
# the corresponding precision ("highest", "high" or "medium") will be used. The highest
# precision will effectively disable tf32.
_C.FLOAT32_PRECISION = ""
# The period (in terms of steps) for minibatch visualization at train time.
# Set to 0 to disable.
_C.VIS_PERIOD = 0
Expand Down
32 changes: 32 additions & 0 deletions detectron2/engine/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,30 @@ def _highlight(code, filename):
return code


# adapted from:
# https://github.com/pytorch/tnt/blob/ebda066f8f55af6a906807d35bc829686618074d/torchtnt/utils/device.py#L328-L346
def _set_float32_precision(precision: str = "high") -> None:
"""Sets the precision of float32 matrix multiplications and convolution operations.
For more information, see the PyTorch docs:
- https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html
- https://pytorch.org/docs/stable/backends.html#torch.backends.cudnn.allow_tf32
Args:
precision: The setting to determine which datatypes to use for matrix
multiplication and convolution operations.
"""
if not (torch.cuda.is_available()): # Not relevant for non-CUDA devices
return
# set precision for matrix multiplications
torch.set_float32_matmul_precision(precision)
# set precision for convolution operations
if precision == "highest":
torch.backends.cudnn.allow_tf32 = False
else:
torch.backends.cudnn.allow_tf32 = True


def default_setup(cfg, args):
"""
Perform some basic common setups at the beginning of a job, including:
Expand Down Expand Up @@ -226,6 +250,14 @@ def default_setup(cfg, args):
cfg, "CUDNN_BENCHMARK", "train.cudnn_benchmark", default=False
)

fp32_precision = _try_get_key(cfg, "FLOAT32_PRECISION", "train.float32_precision", default="")
if fp32_precision != "":
logger.info(f"Set fp32 precision to {fp32_precision}")
_set_float32_precision(fp32_precision)
logger.info(f"{torch.get_float32_matmul_precision()=}")
logger.info(f"{torch.backends.cuda.matmul.allow_tf32=}")
logger.info(f"{torch.backends.cudnn.allow_tf32=}")


def default_writers(output_dir: str, max_iter: Optional[int] = None):
"""
Expand Down

0 comments on commit c04028f

Please sign in to comment.