fix inference accuracy test

Yanghan Wang · facebook-github-bot · commit 28425c8efde2 · 2024-08-19T19:07:20.000-07:00
Summary: Pull Request resolved: #5348 Some accuracy tests started to fail in between Jun 11 and Jun 17: - ❌ mask_rcnn_R_50_FPN_inference_acc_test - ✅ keypoint_rcnn_R_50_FPN_inference_acc_test - ✅ fast_rcnn_R_50_FPN_inference_acc_test - ❌ panoptic_fpn_R_50_inference_acc_test - ✅ retinanet_R_50_FPN_inference_acc_test - ❌ rpn_R_50_FPN_inference_acc_test - ✅ semantic_R_50_FPN_inference_acc_test - ❌ cascade_mask_rcnn_R_50_FPN_inference_acc_test V1: update the yaml to reflect the new scores. V5: it turns out that we can match the old scores by disabling tf32. Differential Revision: D61301698
diff --git a/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -5,3 +5,4 @@ DATASETS:
   TEST: ("coco_2017_val_100",)
 TEST:
   EXPECTED_RESULTS: [["bbox", "AP", 50.18, 0.02], ["segm", "AP",  43.87, 0.02]]
+FLOAT32_PRECISION: "highest"
diff --git a/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -5,3 +5,4 @@ DATASETS:
   TEST: ("coco_2017_val_100",)
 TEST:
   EXPECTED_RESULTS: [["bbox", "AP", 45.70, 0.02]]
+FLOAT32_PRECISION: "highest"
diff --git a/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -5,3 +5,4 @@ DATASETS:
   TEST: ("keypoints_coco_2017_val_100",)
 TEST:
   EXPECTED_RESULTS: [["bbox", "AP", 52.47, 0.02], ["keypoints", "AP", 67.36, 0.02]]
+FLOAT32_PRECISION: "highest"
diff --git a/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml
@@ -5,3 +5,4 @@ DATASETS:
   TEST: ("coco_2017_val_100",)
 TEST:
   EXPECTED_RESULTS: [["bbox", "AP", 47.37, 0.02], ["segm", "AP", 40.99, 0.02]]
+FLOAT32_PRECISION: "highest"
diff --git a/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml
@@ -5,3 +5,4 @@ DATASETS:
   TEST: ("coco_2017_val_100",)
 TEST:
   EXPECTED_RESULTS: [["bbox", "AP", 47.44, 0.02], ["segm", "AP", 42.94, 0.02]]
+FLOAT32_PRECISION: "highest"
diff --git a/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -8,3 +8,4 @@ TEST:
   AUG:
     ENABLED: True
     MIN_SIZES: (700, 800)  # to save some time
+FLOAT32_PRECISION: "highest"
diff --git a/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml b/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml
@@ -5,3 +5,4 @@ DATASETS:
   TEST: ("coco_2017_val_100_panoptic_separated",)
 TEST:
   EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]]
+FLOAT32_PRECISION: "highest"
diff --git a/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml
@@ -5,3 +5,4 @@ DATASETS:
   TEST: ("coco_2017_val_100",)
 TEST:
   EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]]
+FLOAT32_PRECISION: "highest"
diff --git a/detectron2/config/defaults.py b/detectron2/config/defaults.py
@@ -636,6 +636,10 @@
 # for about 10k iterations. It usually hurts total time, but can benefit for certain models.
 # If input images have the same or similar sizes, benchmark is often helpful.
 _C.CUDNN_BENCHMARK = False
+# Option to set PyTorch matmul and CuDNN's float32 precision. When set to non-empty string,
+# the corresponding precision ("highest", "high" or "medium") will be used. The highest
+# precision will effectively disable tf32.
+_C.FLOAT32_PRECISION = ""
 # The period (in terms of steps) for minibatch visualization at train time.
 # Set to 0 to disable.
 _C.VIS_PERIOD = 0
diff --git a/detectron2/engine/defaults.py b/detectron2/engine/defaults.py
@@ -171,6 +171,30 @@ def _highlight(code, filename):
     return code
 
 
+# adapted from:
+# https://github.com/pytorch/tnt/blob/ebda066f8f55af6a906807d35bc829686618074d/torchtnt/utils/device.py#L328-L346
+def _set_float32_precision(precision: str = "high") -> None:
+    """Sets the precision of float32 matrix multiplications and convolution operations.
+
+    For more information, see the PyTorch docs:
+    - https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html
+    - https://pytorch.org/docs/stable/backends.html#torch.backends.cudnn.allow_tf32
+
+    Args:
+        precision: The setting to determine which datatypes to use for matrix
+        multiplication and convolution operations.
+    """
+    if not (torch.cuda.is_available()):  # Not relevant for non-CUDA devices
+        return
+    # set precision for matrix multiplications
+    torch.set_float32_matmul_precision(precision)
+    # set precision for convolution operations
+    if precision == "highest":
+        torch.backends.cudnn.allow_tf32 = False
+    else:
+        torch.backends.cudnn.allow_tf32 = True
+
+
 def default_setup(cfg, args):
     """
     Perform some basic common setups at the beginning of a job, including:
@@ -226,6 +250,14 @@ def default_setup(cfg, args):
             cfg, "CUDNN_BENCHMARK", "train.cudnn_benchmark", default=False
         )
 
+    fp32_precision = _try_get_key(cfg, "FLOAT32_PRECISION", "train.float32_precision", default="")
+    if fp32_precision != "":
+        logger.info(f"Set fp32 precision to {fp32_precision}")
+        _set_float32_precision(fp32_precision)
+        logger.info(f"{torch.get_float32_matmul_precision()=}")
+        logger.info(f"{torch.backends.cuda.matmul.allow_tf32=}")
+        logger.info(f"{torch.backends.cudnn.allow_tf32=}")
+
 
 def default_writers(output_dir: str, max_iter: Optional[int] = None):
     """