Skip to content

Commit 8ecf78e

Browse files
committed
Auto-merge updates from auto-update branch
2 parents 149bc16 + f1ab5ed commit 8ecf78e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+1151
-1161
lines changed

closed/MLCommons/systems/RTX4090x1-nvidia_original-gpu-tensorrt-vdefault-default_config.json

+8-8
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
{
2-
"accelerator_frequency": "2610000 MHz",
2+
"accelerator_frequency": "2520000 MHz",
33
"accelerator_host_interconnect": "N/A",
44
"accelerator_interconnect": "N/A",
55
"accelerator_interconnect_topology": "",
6-
"accelerator_memory_capacity": "23.54595947265625 GB",
6+
"accelerator_memory_capacity": "23.64971923828125 GB",
77
"accelerator_memory_configuration": "N/A",
88
"accelerator_model_name": "NVIDIA GeForce RTX 4090",
99
"accelerator_on-chip_memories": "",
@@ -16,17 +16,17 @@
1616
"host_network_card_count": "1",
1717
"host_networking": "Gig Ethernet",
1818
"host_networking_topology": "N/A",
19-
"host_processor_caches": "L1d cache: 512 KiB, L1i cache: 512 KiB, L2 cache: 16 MiB, L3 cache: 64 MiB",
20-
"host_processor_core_count": "16",
21-
"host_processor_frequency": "5881.0000",
19+
"host_processor_caches": "L1d cache: 576 KiB, L1i cache: 384 KiB, L2 cache: 24 MiB, L3 cache: ",
20+
"host_processor_core_count": "24",
21+
"host_processor_frequency": "5800.0000",
2222
"host_processor_interconnect": "",
23-
"host_processor_model_name": "AMD Ryzen 9 7950X 16-Core Processor",
23+
"host_processor_model_name": "13th Gen Intel(R) Core(TM) i9-13900K",
2424
"host_processors_per_node": "1",
25-
"host_storage_capacity": "6.8T",
25+
"host_storage_capacity": "9.4T",
2626
"host_storage_type": "SSD",
2727
"hw_notes": "",
2828
"number_of_nodes": "1",
29-
"operating_system": "Ubuntu 20.04 (linux-6.8.0-51-generic-glibc2.31)",
29+
"operating_system": "Ubuntu 20.04 (linux-6.8.0-49-generic-glibc2.31)",
3030
"other_software_stack": "Python: 3.8.10, GCC-9.4.0, Using Docker , CUDA 12.2",
3131
"status": "available",
3232
"submitter": "MLCommons",

open/MLCommons/measurements/RTX4090x1-nvidia_original-gpu-tensorrt-vdefault-default_config/stable-diffusion-xl/offline/README.md

+6-12
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
This experiment is generated using the [MLCommons Collective Mind automation framework (CM)](https://github.com/mlcommons/cm4mlops).
2-
31
*Check [CM MLPerf docs](https://docs.mlcommons.org/inference) for more details.*
42

53
## Host platform
@@ -19,7 +17,7 @@ pip install -U cmind
1917

2018
cm rm cache -f
2119

22-
cm pull repo mlcommons@mlperf-automations --checkout=ca9263aff2a56ee495a03382fb678506581d9da9
20+
cm pull repo mlcommons@mlperf-automations --checkout=48ea6b46a7606d1c5d74909e94d5599dbe7ff9e1
2321

2422
cm run script \
2523
--tags=app,mlperf,inference,generic,_nvidia,_sdxl,_tensorrt,_cuda,_valid,_r4.1-dev_default,_offline \
@@ -41,8 +39,8 @@ cm run script \
4139
--env.CM_RUN_MLPERF_SUBMISSION_PREPROCESSOR=yes \
4240
--env.CM_MLPERF_INFERENCE_PULL_CODE_CHANGES=yes \
4341
--env.CM_MLPERF_INFERENCE_PULL_SRC_CHANGES=yes \
44-
--env.OUTPUT_BASE_DIR=/home/arjun/gh_action_results \
45-
--env.CM_MLPERF_INFERENCE_SUBMISSION_DIR=/home/arjun/gh_action_submissions \
42+
--env.OUTPUT_BASE_DIR=/cm-mount/home/arjun/gh_action_results \
43+
--env.CM_MLPERF_INFERENCE_SUBMISSION_DIR=/cm-mount/home/arjun/gh_action_submissions \
4644
--env.CM_MLPERF_SUBMITTER=MLCommons \
4745
--env.CM_USE_DATASET_FROM_HOST=yes \
4846
--env.CM_USE_MODEL_FROM_HOST=yes \
@@ -71,7 +69,7 @@ cm run script \
7169
--env.CM_DOCKER_REUSE_EXISTING_CONTAINER=yes \
7270
--env.CM_DOCKER_DETACHED_MODE=yes \
7371
--env.CM_MLPERF_INFERENCE_RESULTS_DIR_=/home/arjun/gh_action_results/valid_results \
74-
--env.CM_DOCKER_CONTAINER_ID=c30d1a720abb \
72+
--env.CM_DOCKER_CONTAINER_ID=2578b35d628e \
7573
--env.CM_MLPERF_LOADGEN_COMPLIANCE_TEST=TEST04 \
7674
--add_deps_recursive.compiler.tags=gcc \
7775
--add_deps_recursive.coco2014-original.tags=_full \
@@ -104,11 +102,7 @@ cm run script \
104102
--v=False \
105103
--print_env=False \
106104
--print_deps=False \
107-
--dump_version_info=True \
108-
--env.OUTPUT_BASE_DIR=/cm-mount/home/arjun/gh_action_results \
109-
--env.CM_MLPERF_INFERENCE_SUBMISSION_DIR=/cm-mount/home/arjun/gh_action_submissions \
110-
--env.SDXL_CHECKPOINT_PATH=/home/cmuser/CM/repos/local/cache/d0f05efed7544e3a/stable_diffusion_fp16 \
111-
--env.MLPERF_SCRATCH_PATH=/home/cmuser/CM/repos/local/cache/5b2b0cc913a4453a
105+
--dump_version_info=True
112106
```
113107
*Note that if you want to use the [latest automation recipes](https://docs.mlcommons.org/inference) for MLPerf (CM scripts),
114108
you should simply reload mlcommons@mlperf-automations without checkout and clean CM cache as follows:*
@@ -129,4 +123,4 @@ Model Precision: int8
129123
### Accuracy Results
130124

131125
### Performance Results
132-
`Samples per second`: `0.698`
126+
`Samples per second`: `0.697739`
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,30 @@
1-
[2024-12-29 07:34:13,589 main.py:229 INFO] Detected system ID: KnownSystem.RTX4090x1
1+
[2025-01-01 07:35:05,442 main.py:229 INFO] Detected system ID: KnownSystem.RTX4090x1
22
/home/cmuser/.local/lib/python3.8/site-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
33
warnings.warn(_BETA_TRANSFORMS_WARNING)
44
/home/cmuser/.local/lib/python3.8/site-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
55
warnings.warn(_BETA_TRANSFORMS_WARNING)
6-
[2024-12-29 07:34:14,715 generate_conf_files.py:107 INFO] Generated measurements/ entries for RTX4090x1_TRT/stable-diffusion-xl/Offline
7-
[2024-12-29 07:34:14,715 __init__.py:46 INFO] Running command: python3 -m code.stable-diffusion-xl.tensorrt.harness --logfile_outdir="/cm-mount/home/arjun/gh_action_results/valid_results/RTX4090x1-nvidia_original-gpu-tensorrt-vdefault-default_config/stable-diffusion-xl/offline/accuracy" --logfile_prefix="mlperf_log_" --performance_sample_count=5000 --test_mode="AccuracyOnly" --gpu_batch_size=2 --mlperf_conf_path="/home/cmuser/CM/repos/local/cache/c1d8c371d52d46a3/inference/mlperf.conf" --tensor_path="build/preprocessed_data/coco2014-tokenized-sdxl/5k_dataset_final/" --use_graphs=true --user_conf_path="/home/cmuser/CM/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/0f2b4a4ab1aa48d092f808fe52515e2a.conf" --gpu_inference_streams=1 --gpu_copy_streams=1 --gpu_engines="./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-CLIP-Offline-gpu-b2-fp16.custom_k_99_MaxP.plan,./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-CLIPWithProj-Offline-gpu-b2-fp16.custom_k_99_MaxP.plan,./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-UNetXL-Offline-gpu-b2-int8.custom_k_99_MaxP.plan,./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-VAE-Offline-gpu-b2-fp32.custom_k_99_MaxP.plan" --scenario Offline --model stable-diffusion-xl
8-
[2024-12-29 07:34:14,715 __init__.py:53 INFO] Overriding Environment
6+
[2025-01-01 07:35:06,464 generate_conf_files.py:107 INFO] Generated measurements/ entries for RTX4090x1_TRT/stable-diffusion-xl/Offline
7+
[2025-01-01 07:35:06,464 __init__.py:46 INFO] Running command: python3 -m code.stable-diffusion-xl.tensorrt.harness --logfile_outdir="/cm-mount/home/arjun/gh_action_results/valid_results/RTX4090x1-nvidia_original-gpu-tensorrt-vdefault-default_config/stable-diffusion-xl/offline/accuracy" --logfile_prefix="mlperf_log_" --performance_sample_count=5000 --test_mode="AccuracyOnly" --gpu_batch_size=2 --mlperf_conf_path="/home/cmuser/CM/repos/local/cache/c1d8c371d52d46a3/inference/mlperf.conf" --tensor_path="build/preprocessed_data/coco2014-tokenized-sdxl/5k_dataset_final/" --use_graphs=true --user_conf_path="/home/cmuser/CM/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/8255eff47682486f875ae6f8c8cd3191.conf" --gpu_inference_streams=1 --gpu_copy_streams=1 --gpu_engines="./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-CLIP-Offline-gpu-b2-fp16.custom_k_99_MaxP.plan,./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-CLIPWithProj-Offline-gpu-b2-fp16.custom_k_99_MaxP.plan,./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-UNetXL-Offline-gpu-b2-int8.custom_k_99_MaxP.plan,./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-VAE-Offline-gpu-b2-fp32.custom_k_99_MaxP.plan" --scenario Offline --model stable-diffusion-xl
8+
[2025-01-01 07:35:06,464 __init__.py:53 INFO] Overriding Environment
99
/home/cmuser/.local/lib/python3.8/site-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
1010
warnings.warn(_BETA_TRANSFORMS_WARNING)
1111
/home/cmuser/.local/lib/python3.8/site-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
1212
warnings.warn(_BETA_TRANSFORMS_WARNING)
13-
[2024-12-29 07:34:16,327 backend.py:71 INFO] Loading TensorRT engine: ./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-CLIP-Offline-gpu-b2-fp16.custom_k_99_MaxP.plan.
14-
[2024-12-29 07:34:16,428 backend.py:71 INFO] Loading TensorRT engine: ./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-CLIPWithProj-Offline-gpu-b2-fp16.custom_k_99_MaxP.plan.
15-
[2024-12-29 07:34:16,936 backend.py:71 INFO] Loading TensorRT engine: ./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-UNetXL-Offline-gpu-b2-int8.custom_k_99_MaxP.plan.
16-
[2024-12-29 07:34:17,974 backend.py:71 INFO] Loading TensorRT engine: ./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-VAE-Offline-gpu-b2-fp32.custom_k_99_MaxP.plan.
17-
[2024-12-29 07:34:18,939 backend.py:96 INFO] Enabling cuda graphs for unet
18-
[2024-12-29 07:34:19,149 backend.py:154 INFO] captured graph for BS=1
19-
[2024-12-29 07:34:19,402 backend.py:154 INFO] captured graph for BS=2
20-
[2024-12-29 07:34:19,402 harness.py:207 INFO] Start Warm Up!
21-
[2024-12-29 07:34:25,225 harness.py:209 INFO] Warm Up Done!
22-
[2024-12-29 07:34:25,225 harness.py:211 INFO] Start Test!
23-
[2024-12-29 09:33:49,131 backend.py:801 INFO] [Server] Received 5000 total samples
24-
[2024-12-29 09:33:49,132 backend.py:809 INFO] [Device 0] Reported 5000 samples
25-
[2024-12-29 09:33:49,132 harness.py:214 INFO] Test Done!
26-
[2024-12-29 09:33:49,132 harness.py:216 INFO] Destroying SUT...
27-
[2024-12-29 09:33:49,132 harness.py:219 INFO] Destroying QSL...
13+
[2025-01-01 07:35:07,887 backend.py:71 INFO] Loading TensorRT engine: ./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-CLIP-Offline-gpu-b2-fp16.custom_k_99_MaxP.plan.
14+
[2025-01-01 07:35:07,986 backend.py:71 INFO] Loading TensorRT engine: ./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-CLIPWithProj-Offline-gpu-b2-fp16.custom_k_99_MaxP.plan.
15+
[2025-01-01 07:35:08,499 backend.py:71 INFO] Loading TensorRT engine: ./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-UNetXL-Offline-gpu-b2-int8.custom_k_99_MaxP.plan.
16+
[2025-01-01 07:35:09,539 backend.py:71 INFO] Loading TensorRT engine: ./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-VAE-Offline-gpu-b2-fp32.custom_k_99_MaxP.plan.
17+
[2025-01-01 07:35:10,501 backend.py:96 INFO] Enabling cuda graphs for unet
18+
[2025-01-01 07:35:10,710 backend.py:154 INFO] captured graph for BS=1
19+
[2025-01-01 07:35:10,963 backend.py:154 INFO] captured graph for BS=2
20+
[2025-01-01 07:35:10,963 harness.py:207 INFO] Start Warm Up!
21+
[2025-01-01 07:35:16,788 harness.py:209 INFO] Warm Up Done!
22+
[2025-01-01 07:35:16,788 harness.py:211 INFO] Start Test!
23+
[2025-01-01 09:34:41,246 backend.py:801 INFO] [Server] Received 5000 total samples
24+
[2025-01-01 09:34:41,247 backend.py:809 INFO] [Device 0] Reported 5000 samples
25+
[2025-01-01 09:34:41,247 harness.py:214 INFO] Test Done!
26+
[2025-01-01 09:34:41,247 harness.py:216 INFO] Destroying SUT...
27+
[2025-01-01 09:34:41,247 harness.py:219 INFO] Destroying QSL...
2828
benchmark : Benchmark.SDXL
2929
buffer_manager_thread_count : 0
3030
data_dir : /home/cmuser/CM/repos/local/cache/5b2b0cc913a4453a/data
@@ -33,7 +33,7 @@ gpu_copy_streams : 1
3333
gpu_inference_streams : 1
3434
input_dtype : int32
3535
input_format : linear
36-
log_dir : /home/cmuser/CM/repos/local/cache/dfbf240f980947f5/repo/closed/NVIDIA/build/logs/2024.12.29-07.34.12
36+
log_dir : /home/cmuser/CM/repos/local/cache/dfbf240f980947f5/repo/closed/NVIDIA/build/logs/2025.01.01-07.35.04
3737
mlperf_conf_path : /home/cmuser/CM/repos/local/cache/c1d8c371d52d46a3/inference/mlperf.conf
3838
model_path : /home/cmuser/CM/repos/local/cache/5b2b0cc913a4453a/models/SDXL/
3939
offline_expected_qps : 0.0
@@ -44,7 +44,7 @@ system : SystemConfiguration(host_cpu_conf=CPUConfiguration(layout={CPU(name='13
4444
tensor_path : build/preprocessed_data/coco2014-tokenized-sdxl/5k_dataset_final/
4545
test_mode : AccuracyOnly
4646
use_graphs : True
47-
user_conf_path : /home/cmuser/CM/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/0f2b4a4ab1aa48d092f808fe52515e2a.conf
47+
user_conf_path : /home/cmuser/CM/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/8255eff47682486f875ae6f8c8cd3191.conf
4848
system_id : RTX4090x1
4949
config_name : RTX4090x1_stable-diffusion-xl_Offline
5050
workload_setting : WorkloadSetting(HarnessType.Custom, AccuracyTarget.k_99, PowerSetting.MaxP)
@@ -60,7 +60,7 @@ cpu_freq : None
6060
[I] Loading bytes from ./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-CLIPWithProj-Offline-gpu-b2-fp16.custom_k_99_MaxP.plan
6161
[I] Loading bytes from ./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-UNetXL-Offline-gpu-b2-int8.custom_k_99_MaxP.plan
6262
[I] Loading bytes from ./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-VAE-Offline-gpu-b2-fp32.custom_k_99_MaxP.plan
63-
[2024-12-29 09:33:49,425 run_harness.py:166 INFO] Result: Accuracy run detected.
63+
[2025-01-01 09:34:41,537 run_harness.py:166 INFO] Result: Accuracy run detected.
6464

6565
======================== Result summaries: ========================
6666

0 commit comments

Comments
 (0)