diff --git a/corelib/dynamicemb/benchmark/README.md b/corelib/dynamicemb/benchmark/README.md index 1ef265e0c..d235ff7d8 100644 --- a/corelib/dynamicemb/benchmark/README.md +++ b/corelib/dynamicemb/benchmark/README.md @@ -56,7 +56,7 @@ bash ./benchmark/benchmark_batched_dynamicemb_tables.sh We test the `BatchedDynamicEmbeddingTablesV2` under `capacity=128x1024x1024`. -The overhead(ms) on H100 80GB HBM3, used pow-law(alpha=1.05) as input. +The overhead(ms) is tested with pow-law(alpha=1.05) as input. - embedding_dtype: float32 - embedding_dim: 128 - cache_algorithm: lru @@ -64,4 +64,10 @@ The overhead(ms) on H100 80GB HBM3, used pow-law(alpha=1.05) as input. - capacity: 24M when cache_ratio=1.0, 256M when cache_ratio=0.1 - num_iterations: 100 +**Overhead on H100 80GB HBM3** + ![benchmark result of BatchedDynamicEmbeddingTables with torchrec](./benchmark_bdet_results.png) + +**Overhead on NVIDIA H200** + +![benchmark result of BatchedDynamicEmbeddingTables with torchrec on H200](./benchmark_bdet_results_h200.png) diff --git a/corelib/dynamicemb/benchmark/benchmark_batched_dynamicemb_tables.py b/corelib/dynamicemb/benchmark/benchmark_batched_dynamicemb_tables.py index 9074c916a..d7a53e350 100644 --- a/corelib/dynamicemb/benchmark/benchmark_batched_dynamicemb_tables.py +++ b/corelib/dynamicemb/benchmark/benchmark_batched_dynamicemb_tables.py @@ -717,7 +717,9 @@ def main(): features_file = f"{args.num_iterations}-{args.feature_distribution}-{num_embs}-{args.batch_size}-{args.alpha}.pt" try: with open(features_file, "rb") as f: - sparse_features = torch.load(f, map_location=f"cuda:{local_rank}") + sparse_features = torch.load( + f, map_location=f"cuda:{local_rank}", weights_only=False + ) except FileNotFoundError: sparse_features = [] for i in range(args.num_iterations): diff --git a/corelib/dynamicemb/benchmark/benchmark_bdet_results_h200.png b/corelib/dynamicemb/benchmark/benchmark_bdet_results_h200.png new file mode 100644 index 000000000..69ee42933 Binary files /dev/null and b/corelib/dynamicemb/benchmark/benchmark_bdet_results_h200.png differ