@@ -63,7 +63,7 @@ class BenchmarkOperatorBackend:
63
63
REGISTERED_METRICS : Dict [str , List [str ]] = {}
64
64
REGISTERED_X_VALS : Dict [str , str ] = {}
65
65
BASELINE_BENCHMARKS : Dict [str , str ] = {}
66
- BASELINE_SKIP_METRICS = {"speedup" , "accuracy" , "mem_footprint " }
66
+ BASELINE_SKIP_METRICS = {"speedup" , "accuracy" , "mem_footprint_compression_ratio " }
67
67
X_ONLY_METRICS = set (["hw_roofline" ])
68
68
PRECISION_DTYPE_MAPPING = {
69
69
"fp32" : torch .float32 ,
@@ -227,7 +227,7 @@ class BenchmarkOperatorMetrics:
227
227
# extra metrics
228
228
extra_metrics : Optional [Dict [str , float ]] = None
229
229
# mem footprint
230
- mem_footprint : Optional [float ] = None
230
+ mem_footprint_compression_ratio : Optional [float ] = None
231
231
232
232
233
233
BUILTIN_METRICS = {x .name for x in fields (BenchmarkOperatorMetrics )} - {"extra_metrics" }
@@ -953,29 +953,31 @@ def _init_extra_metrics() -> Dict[str, Any]:
953
953
if not self .tb_args .bypass_fail :
954
954
raise e
955
955
metrics .latency = None
956
- if {"gpu_peak_mem" , "gpu_mem_footprint" , "cpu_peak_mem" } & set (
957
- self .required_metrics
958
- ):
956
+ if {
957
+ "gpu_peak_mem" ,
958
+ "gpu_mem_footprint_compression_ratio" ,
959
+ "cpu_peak_mem" ,
960
+ } & set (self .required_metrics ):
959
961
metrics .cpu_peak_mem , metrics .gpu_peak_mem = self .get_peak_mem (
960
962
fn ,
961
963
grad_to_none = self .get_grad_to_none (self .example_inputs ),
962
964
required_metrics = self .required_metrics ,
963
965
use_cuda_graphs = self .use_cuda_graphs ,
964
966
)
965
967
if (
966
- "mem_footprint " in self .required_metrics
968
+ "mem_footprint_compression_ratio " in self .required_metrics
967
969
and "gpu_peak_mem" in self .required_metrics
968
970
and self .baseline_metrics
969
971
):
970
972
if (
971
973
self .baseline_metrics .gpu_peak_mem is not None
972
974
and metrics .gpu_peak_mem is not None
973
975
):
974
- metrics .mem_footprint = (
976
+ metrics .mem_footprint_compression_ratio = (
975
977
self .baseline_metrics .gpu_peak_mem / metrics .gpu_peak_mem
976
978
)
977
979
else :
978
- metrics .mem_footprint = None
980
+ metrics .mem_footprint_compression_ratio = None
979
981
if "walltime" in self .required_metrics :
980
982
metrics .walltime = do_bench_walltime (
981
983
fn ,
@@ -1180,7 +1182,7 @@ def get_peak_mem(
1180
1182
grad_to_none (Optional[List[torch.Tensor]], optional): List of tensors whose gradients
1181
1183
should be set to None between iterations. Defaults to None.
1182
1184
required_metrics (Optional[List[str]], optional): List of metrics to measure.
1183
- Supported values: ["gpu_peak_mem", "mem_footprint ", "cpu_peak_mem"].
1185
+ Supported values: ["gpu_peak_mem", "mem_footprint_compression_ratio ", "cpu_peak_mem"].
1184
1186
Defaults to None.
1185
1187
use_cuda_graphs (bool, optional): Whether to use CUDA graphs for measurement.
1186
1188
Defaults to False.
@@ -1206,7 +1208,7 @@ def get_peak_mem(
1206
1208
fn , n_repeat = 2 , grad_to_none = grad_to_none , device_type = device_type
1207
1209
)
1208
1210
if device_type == "cuda" and (
1209
- {"gpu_peak_mem" , "mem_footprint " } & set (required_metrics )
1211
+ {"gpu_peak_mem" , "mem_footprint_compression_ratio " } & set (required_metrics )
1210
1212
):
1211
1213
gpu_peak_mem = torch .cuda .max_memory_allocated () / 10 ** 9
1212
1214
if "cpu_peak_mem" in required_metrics :
0 commit comments