Merge pull request mlcommons#677 from mnaumovfb/master

DLRM: Adjusting README with more precise benchmark commands.
nv-rborkar · Aug 13, 2020 · cf15214 · cf15214
2 parents 2244cff + 5691281
commit cf15214
Show file tree

Hide file tree

Showing 2 changed files with 34 additions and 10 deletions.
diff --git a/v0.5/recommendation/README.md b/v0.5/recommendation/README.md
@@ -6,7 +6,7 @@ This is the reference implementation for MLPerf Inference benchmarks.
 
 | name | framework | acc. | AUC | dataset | weights  | size | prec. | notes |
 | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- |
-| dlrm (debugging) | PyTorch | 78.9% | N/A | [Criteo KaggleDAC](https://labs.criteo.com/2014/02/kaggle-display-advertising-challenge-dataset/)       | N/A                                                                     | ~1GB | fp32 |                          |
+| dlrm (debugging) | PyTorch | 78.82% | N/A | [Criteo KaggleDAC](https://labs.criteo.com/2014/02/kaggle-display-advertising-challenge-dataset/)       | N/A                                                                     | ~1GB | fp32 |                          |
 | dlrm (debugging) | PyTorch | 81.07% | N/A | [Criteo Terabyte](https://labs.criteo.com/2013/12/download-terabyte-click-logs/) | [pytorch](https://dlrm.s3-us-west-1.amazonaws.com/models/tb0875_10M.pt), [onnx](https://dlrm.s3-us-west-1.amazonaws.com/models/tb0875_10M.onnx.tar) | ~10GB | fp32 | --max-ind-range=10000000 --data-sub-sample-rate=0.875 |
 | dlrm (official) | PyTorch | N/A | 80.25% | [Criteo Terabyte](https://labs.criteo.com/2013/12/download-terabyte-click-logs/) | [pytorch](https://dlrm.s3-us-west-1.amazonaws.com/models/tb00_40M.pt), [onnx](https://dlrm.s3-us-west-1.amazonaws.com/models/tb00_40M.onnx.tar) | ~100GB | fp32 | --max-ind-range=40000000 |
 
@@ -180,22 +180,45 @@ options are extra arguments that are passed along
 
 For example, to run on CPU you may choose to use:
 
-1. Criteo Kaggle DAC
+1. Criteo Kaggle DAC (debugging)
+
+Offline scenario perf and accuracy modes
+```
+./run_local.sh pytorch dlrm kaggle cpu --scenario Offline --samples-to-aggregate-fix=2048 --max-batchsize=2048
+./run_local.sh pytorch dlrm kaggle cpu --scenario Offline --samples-to-aggregate-fix=2048 --max-batchsize=2048 --samples-per-query-offline=1 --accuracy
+```
+Server scenario perf and accuracy modes
 ```
-./run_local.sh pytorch dlrm kaggle cpu --scenario Offline --samples-per-query-offline=1 --samples-to-aggregate-fix=2048 --max-batchsize=2048 --accuracy
-./run_local.sh pytorch dlrm kaggle cpu --scenario Server --samples-to-aggregate-quantile-file=./tools/dist_quantile.txt --max-batchsize=2048
+./run_local.sh pytorch dlrm kaggle cpu --scenario Server --samples-to-aggregate-fix=2048 --max-batchsize=2048
+./run_local.sh pytorch dlrm kaggle cpu --scenario Server --samples-to-aggregate-fix=2048 --max-batchsize=2048 --accuracy
 ```
 
-2. Criteo Terabyte (0.875)
+2. Criteo Terabyte with 0.875 sub-sampling (debugging)
+
+Offline scenario perf and accuracy modes
 ```
-./run_local.sh pytorch dlrm terabyte cpu --scenario Offline --max-ind-range=10000000 --data-sub-sample-rate=0.875 --samples-per-query-offline=1 --samples-to-aggregate-fix=2048 --max-batchsize=2048 --accuracy [--mlperf-bin-loader]
-./run_local.sh pytorch dlrm terabyte cpu --scenario Server  --max-ind-range=10000000 --data-sub-sample-rate=0.875 --samples-to-aggregate-quantile-file=./tools/dist_quantile.txt --max-batchsize=2048
+./run_local.sh pytorch dlrm terabyte cpu --scenario Offline --max-ind-range=10000000 --data-sub-sample-rate=0.875 --samples-to-aggregate-quantile-file=./tools/dist_quantile.txt --max-batchsize=2048 [--mlperf-bin-loader]
+./run_local.sh pytorch dlrm terabyte cpu --scenario Offline --max-ind-range=10000000 --data-sub-sample-rate=0.875 --samples-to-aggregate-quantile-file=./tools/dist_quantile.txt --max-batchsize=2048 --samples-per-query-offline=1 --accuracy [--mlperf-bin-loader]
 ```
-3. Criteo Terabyte
+Server scenario perf and accuracy modes
 ```
-./run_local.sh pytorch dlrm terabyte cpu --scenario Offline --max-ind-range=40000000 --samples-per-query-offline=1 --samples-to-aggregate-fix=2048 --max-batchsize=2048 --accuracy [--mlperf-bin-loader]
-./run_local.sh pytorch dlrm terabyte cpu --scenario Server  --max-ind-range=40000000 --samples-to-aggregate-quantile-file=./tools/dist_quantile.txt --max-batchsize=2048
+./run_local.sh pytorch dlrm terabyte cpu --scenario Server  --max-ind-range=10000000 --data-sub-sample-rate=0.875 --samples-to-aggregate-quantile-file=./tools/dist_quantile.txt --max-batchsize=2048 [--mlperf-bin-loader]
+./run_local.sh pytorch dlrm terabyte cpu --scenario Server  --max-ind-range=10000000 --data-sub-sample-rate=0.875 --samples-to-aggregate-quantile-file=./tools/dist_quantile.txt --max-batchsize=2048 --accuracy [--mlperf-bin-loader]
 ```
+
+3. Criteo Terabyte (official)
+
+Offline scenario perf and accuracy modes
+```
+./run_local.sh pytorch dlrm terabyte cpu --scenario Offline --max-ind-range=40000000 --samples-to-aggregate-quantile-file=./tools/dist_quantile.txt --max-batchsize=2048 --samples-per-query-offline=204800 [--mlperf-bin-loader]
+./run_local.sh pytorch dlrm terabyte cpu --scenario Offline --max-ind-range=40000000 --samples-to-aggregate-quantile-file=./tools/dist_quantile.txt --max-batchsize=2048 --samples-per-query-offline=204800 --accuracy [--mlperf-bin-loader]
+```
+Server scenario perf and accuracy modes
+```
+./run_local.sh pytorch dlrm terabyte cpu --scenario Server  --max-ind-range=40000000 --samples-to-aggregate-quantile-file=./tools/dist_quantile.txt --max-batchsize=2048 [--mlperf-bin-loader]
+./run_local.sh pytorch dlrm terabyte cpu --scenario Server  --max-ind-range=40000000 --samples-to-aggregate-quantile-file=./tools/dist_quantile.txt --max-batchsize=2048 --accuracy [--mlperf-bin-loader]
+```
+
 Note that the code support (i) original and (ii) mlperf binary loader, that have slightly different performance characteristics. The latter loader can be enabled by adding `--mlperf-bin-loader` to the command line.
 
 Note that this script will pre-process the data during the first run and reuse it over sub-sequent runs. The pre-processing of data can take a significant amount of time during the first run.

diff --git a/v0.5/recommendation/python/main.py b/v0.5/recommendation/python/main.py
@@ -124,6 +124,7 @@ def get_args():
     parser.add_argument("--count-samples", type=int, help="dataset items to use")
     parser.add_argument("--count-queries", type=int, help="number of queries to use")
     parser.add_argument("--samples-per-query-multistream", type=int, help="query length for multi-stream scenario (in terms of aggregated samples)")
+    # --samples-per-query-offline is equivalent to perf_sample_count
     parser.add_argument("--samples-per-query-offline", type=int, default=2048, help="query length for offline scenario (in terms of aggregated samples)")
     parser.add_argument("--samples-to-aggregate-fix", type=int, help="number of samples to be treated as one")
     parser.add_argument("--samples-to-aggregate-min", type=int, help="min number of samples to be treated as one in random query size")