1
- [2024-12-27 19:22:11,757 main.py:229 INFO] Detected system ID: KnownSystem.RTX4090x2
2
- [2024-12-27 19:22:12,105 harness.py:249 INFO] The harness will load 3 plugins: ['build/plugins/pixelShuffle3DPlugin/libpixelshuffle3dplugin.so', 'build/plugins/conv3D1X1X1K4Plugin/libconv3D1X1X1K4Plugin.so', 'build/plugins/conv3D3X3X3C1K32Plugin/libconv3D3X3X3C1K32Plugin.so']
3
- [2024-12-27 19:22:12,105 generate_conf_files.py:107 INFO] Generated measurements/ entries for RTX4090x2_TRT/3d-unet-99.9/Offline
4
- [2024-12-27 19:22:12,105 __init__.py:46 INFO] Running command: ./build/bin/harness_3dunet --plugins="build/plugins/pixelShuffle3DPlugin/libpixelshuffle3dplugin.so,build/plugins/conv3D1X1X1K4Plugin/libconv3D1X1X1K4Plugin.so,build/plugins/conv3D3X3X3C1K32Plugin/libconv3D3X3X3C1K32Plugin.so" --logfile_outdir="/cm-mount/home/arjun/gh_action_results/valid_results/RTX4090x2-nvidia_original-gpu-tensorrt-vdefault-default_config/3d-unet-99.9/offline/accuracy" --logfile_prefix="mlperf_log_" --performance_sample_count=43 --test_mode="AccuracyOnly" --gpu_copy_streams=1 --gpu_inference_streams=1 --use_deque_limit=true --gpu_batch_size=8 --map_path="data_maps/kits19/val_map.txt" --mlperf_conf_path="/home/cmuser/CM/repos/local/cache/5860c00d55d14786/inference/mlperf.conf" --tensor_path="build/preprocessed_data/KiTS19/inference/int8" --use_graphs=false --user_conf_path="/home/cmuser/CM/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/41707d0ae3a44394912cc5401db89bb4.conf" --unet3d_sw_gaussian_patch_path="/home/cmuser/CM/repos/local/cache/4db00c74da1e44c8/preprocessed_data/KiTS19/etc/gaussian_patches.npy" --gpu_engines="./build/engines/RTX4090x2/3d-unet/Offline/3d-unet-Offline-gpu-b8-int8.custom_k_99_9_MaxP.plan" --max_dlas=0 --slice_overlap_patch_kernel_cg_impl=false --scenario Offline --model 3d-unet
5
- [2024-12-27 19:22:12,105 __init__.py:53 INFO] Overriding Environment
1
+ [2024-12-28 19:37:47,865 main.py:229 INFO] Detected system ID: KnownSystem.RTX4090x2
2
+ [2024-12-28 19:37:48,216 harness.py:249 INFO] The harness will load 3 plugins: ['build/plugins/pixelShuffle3DPlugin/libpixelshuffle3dplugin.so', 'build/plugins/conv3D1X1X1K4Plugin/libconv3D1X1X1K4Plugin.so', 'build/plugins/conv3D3X3X3C1K32Plugin/libconv3D3X3X3C1K32Plugin.so']
3
+ [2024-12-28 19:37:48,217 generate_conf_files.py:107 INFO] Generated measurements/ entries for RTX4090x2_TRT/3d-unet-99.9/Offline
4
+ [2024-12-28 19:37:48,217 __init__.py:46 INFO] Running command: ./build/bin/harness_3dunet --plugins="build/plugins/pixelShuffle3DPlugin/libpixelshuffle3dplugin.so,build/plugins/conv3D1X1X1K4Plugin/libconv3D1X1X1K4Plugin.so,build/plugins/conv3D3X3X3C1K32Plugin/libconv3D3X3X3C1K32Plugin.so" --logfile_outdir="/cm-mount/home/arjun/gh_action_results/valid_results/RTX4090x2-nvidia_original-gpu-tensorrt-vdefault-default_config/3d-unet-99.9/offline/accuracy" --logfile_prefix="mlperf_log_" --performance_sample_count=43 --test_mode="AccuracyOnly" --gpu_copy_streams=1 --gpu_inference_streams=1 --use_deque_limit=true --gpu_batch_size=8 --map_path="data_maps/kits19/val_map.txt" --mlperf_conf_path="/home/cmuser/CM/repos/local/cache/5860c00d55d14786/inference/mlperf.conf" --tensor_path="build/preprocessed_data/KiTS19/inference/int8" --use_graphs=false --user_conf_path="/home/cmuser/CM/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/51ed06465eef4935a9103379699403b7.conf" --unet3d_sw_gaussian_patch_path="/home/cmuser/CM/repos/local/cache/4db00c74da1e44c8/preprocessed_data/KiTS19/etc/gaussian_patches.npy" --gpu_engines="./build/engines/RTX4090x2/3d-unet/Offline/3d-unet-Offline-gpu-b8-int8.custom_k_99_9_MaxP.plan" --max_dlas=0 --slice_overlap_patch_kernel_cg_impl=false --scenario Offline --model 3d-unet
5
+ [2024-12-28 19:37:48,217 __init__.py:53 INFO] Overriding Environment
6
6
benchmark : Benchmark.UNET3D
7
7
buffer_manager_thread_count : 0
8
8
data_dir : /home/cmuser/CM/repos/local/cache/4db00c74da1e44c8/data
@@ -11,7 +11,7 @@ gpu_copy_streams : 1
11
11
gpu_inference_streams : 1
12
12
input_dtype : int8
13
13
input_format : linear
14
- log_dir : /home/cmuser/CM/repos/local/cache/94a57f78972843c6/repo/closed/NVIDIA/build/logs/2024.12.27 -19.22.10
14
+ log_dir : /home/cmuser/CM/repos/local/cache/94a57f78972843c6/repo/closed/NVIDIA/build/logs/2024.12.28 -19.37.46
15
15
map_path : data_maps/kits19/val_map.txt
16
16
mlperf_conf_path : /home/cmuser/CM/repos/local/cache/5860c00d55d14786/inference/mlperf.conf
17
17
offline_expected_qps : 0.0
@@ -25,7 +25,7 @@ test_mode : AccuracyOnly
25
25
unet3d_sw_gaussian_patch_path : /home/cmuser/CM/repos/local/cache/4db00c74da1e44c8/preprocessed_data/KiTS19/etc/gaussian_patches.npy
26
26
use_deque_limit : True
27
27
use_graphs : False
28
- user_conf_path : /home/cmuser/CM/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/41707d0ae3a44394912cc5401db89bb4 .conf
28
+ user_conf_path : /home/cmuser/CM/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/51ed06465eef4935a9103379699403b7 .conf
29
29
system_id : RTX4090x2
30
30
config_name : RTX4090x2_3d-unet_Offline
31
31
workload_setting : WorkloadSetting(HarnessType.Custom, AccuracyTarget.k_99_9, PowerSetting.MaxP)
@@ -39,18 +39,18 @@ power_limit : None
39
39
cpu_freq : None
40
40
&&&& RUNNING MLPerf_Inference_3DUNet_Harness # ./build/bin/harness_3dunet
41
41
[I] mlperf.conf path: /home/cmuser/CM/repos/local/cache/5860c00d55d14786/inference/mlperf.conf
42
- [I] user.conf path: /home/cmuser/CM/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/41707d0ae3a44394912cc5401db89bb4 .conf
42
+ [I] user.conf path: /home/cmuser/CM/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/51ed06465eef4935a9103379699403b7 .conf
43
43
Creating QSL.
44
44
Finished Creating QSL.
45
45
Setting up SUT.
46
46
[I] [TRT] Loaded engine size: 31 MiB
47
- [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +1 , GPU +8, now: CPU 86, GPU 1097 (MiB)
47
+ [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +0 , GPU +8, now: CPU 86, GPU 1097 (MiB)
48
48
[I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +10, now: CPU 86, GPU 1107 (MiB)
49
49
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +29, now: CPU 0, GPU 29 (MiB)
50
50
[I] Device:0: ./build/engines/RTX4090x2/3d-unet/Offline/3d-unet-Offline-gpu-b8-int8.custom_k_99_9_MaxP.plan has been successfully loaded.
51
51
[I] [TRT] Loaded engine size: 31 MiB
52
52
[W] [TRT] Using an engine plan file across different models of devices is not recommended and is likely to affect performance or even cause errors.
53
- [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +1 , GPU +8, now: CPU 122, GPU 841 (MiB)
53
+ [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +0 , GPU +8, now: CPU 122, GPU 841 (MiB)
54
54
[I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +10, now: CPU 122, GPU 851 (MiB)
55
55
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +30, now: CPU 0, GPU 59 (MiB)
56
56
[I] Device:1: ./build/engines/RTX4090x2/3d-unet/Offline/3d-unet-Offline-gpu-b8-int8.custom_k_99_9_MaxP.plan has been successfully loaded.
@@ -65,32 +65,33 @@ Setting up SUT.
65
65
[I] Creating batcher thread: 1 EnableBatcherThreadPerDevice: true
66
66
Finished setting up SUT.
67
67
Starting warmup. Running for a minimum of 5 seconds.
68
- Finished warmup. Ran for 5.42451s .
68
+ Finished warmup. Ran for 5.42915s .
69
69
Starting running actual test.
70
70
71
71
No warnings encountered during test.
72
72
73
73
No errors encountered during test.
74
74
Finished running actual test.
75
75
Device Device:0 processed:
76
- 3 batches of size 2
76
+ 4 batches of size 2
77
77
2 batches of size 3
78
78
4 batches of size 4
79
- 171 batches of size 8
79
+ 1 batches of size 5
80
+ 170 batches of size 8
80
81
Memcpy Calls: 0
81
- PerSampleCudaMemcpy Calls: 23
82
+ PerSampleCudaMemcpy Calls: 22
82
83
BatchedCudaMemcpy Calls: 0
83
84
Device Device:1 processed:
84
- 5 batches of size 2
85
+ 4 batches of size 2
85
86
3 batches of size 4
86
- 3 batches of size 5
87
- 166 batches of size 8
87
+ 2 batches of size 5
88
+ 167 batches of size 8
88
89
Memcpy Calls: 0
89
- PerSampleCudaMemcpy Calls: 20
90
+ PerSampleCudaMemcpy Calls: 21
90
91
BatchedCudaMemcpy Calls: 0
91
92
&&&& PASSED MLPerf_Inference_3DUNet_Harness # ./build/bin/harness_3dunet
92
- [2024-12-27 19:22:24,990 run_harness.py:166 INFO] Result: Accuracy run detected.
93
- [2024-12-27 19:22:24,991 __init__.py:46 INFO] Running command: python3 code/3d-unet/tensorrt/accuracy_kits.py --log_file /cm-mount/home/arjun/gh_action_results/valid_results/RTX4090x2-nvidia_original-gpu-tensorrt-vdefault-default_config/3d-unet-99.9/offline/accuracy/mlperf_log_accuracy.json
93
+ [2024-12-28 19:38:01,125 run_harness.py:166 INFO] Result: Accuracy run detected.
94
+ [2024-12-28 19:38:01,125 __init__.py:46 INFO] Running command: python3 code/3d-unet/tensorrt/accuracy_kits.py --log_file /cm-mount/home/arjun/gh_action_results/valid_results/RTX4090x2-nvidia_original-gpu-tensorrt-vdefault-default_config/3d-unet-99.9/offline/accuracy/mlperf_log_accuracy.json
94
95
Loading necessary metadata...
95
96
Loading loadgen accuracy log...
96
97
Running postprocessing...
0 commit comments