1
- [2025-01-29 23:50:31,220 main.py:229 INFO] Detected system ID: KnownSystem.e7bff0656085
2
- [2025-01-29 23:50:31,303 harness.py:249 INFO] The harness will load 2 plugins: ['build/plugins/NMSOptPlugin/libnmsoptplugin.so', 'build/plugins/retinanetConcatPlugin/libretinanetconcatplugin.so']
3
- [2025-01-29 23:50:31,304 generate_conf_files.py:107 INFO] Generated measurements/ entries for e7bff0656085_TRT /retinanet/MultiStream
4
- [2025-01-29 23:50:31,304 __init__.py:46 INFO] Running command: ./build/bin/harness_default --plugins="build/plugins/NMSOptPlugin/libnmsoptplugin.so,build/plugins/retinanetConcatPlugin/libretinanetconcatplugin.so" --logfile_outdir="/mlc-mount/home/arjun/gh_action_results/valid_results/RTX4090x2-nvidia_original-gpu-tensorrt-vdefault-default_config/retinanet/multistream/accuracy" --logfile_prefix="mlperf_log_" --performance_sample_count=64 --test_mode="AccuracyOnly" --gpu_copy_streams=1 --gpu_inference_streams=1 --use_deque_limit=true --gpu_batch_size=2 --map_path="data_maps/open-images-v6-mlperf/val_map.txt" --mlperf_conf_path="/home/mlcuser/MLC/repos/local/cache/get-git-repo_4cd85a18/inference/mlperf.conf" --tensor_path="build/preprocessed_data/open-images-v6-mlperf/validation/Retinanet/int8_linear" --use_graphs=true --user_conf_path="/home/mlcuser/MLC/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/22a091d0057b427a93d508033599dd79.conf" --gpu_engines="./build/engines/e7bff0656085/retinanet/MultiStream/retinanet-MultiStream-gpu-b2-int8.lwis_k_99_MaxP.plan" --max_dlas=0 --scenario MultiStream --model retinanet --response_postprocess openimageeffnms
5
- [2025-01-29 23:50:31,304 __init__.py:53 INFO] Overriding Environment
1
+ [2025-02-02 02:07:19,844 main.py:229 INFO] Detected system ID: KnownSystem.dd805e2fec5f
2
+ [2025-02-02 02:07:19,926 harness.py:249 INFO] The harness will load 2 plugins: ['build/plugins/NMSOptPlugin/libnmsoptplugin.so', 'build/plugins/retinanetConcatPlugin/libretinanetconcatplugin.so']
3
+ [2025-02-02 02:07:19,927 generate_conf_files.py:107 INFO] Generated measurements/ entries for dd805e2fec5f_TRT /retinanet/MultiStream
4
+ [2025-02-02 02:07:19,927 __init__.py:46 INFO] Running command: ./build/bin/harness_default --plugins="build/plugins/NMSOptPlugin/libnmsoptplugin.so,build/plugins/retinanetConcatPlugin/libretinanetconcatplugin.so" --logfile_outdir="/mlc-mount/home/arjun/gh_action_results/valid_results/RTX4090x2-nvidia_original-gpu-tensorrt-vdefault-default_config/retinanet/multistream/accuracy" --logfile_prefix="mlperf_log_" --performance_sample_count=64 --test_mode="AccuracyOnly" --gpu_copy_streams=1 --gpu_inference_streams=1 --use_deque_limit=true --gpu_batch_size=2 --map_path="data_maps/open-images-v6-mlperf/val_map.txt" --mlperf_conf_path="/home/mlcuser/MLC/repos/local/cache/get-git-repo_14157262/inference/mlperf.conf" --tensor_path="build/preprocessed_data/open-images-v6-mlperf/validation/Retinanet/int8_linear" --use_graphs=true --user_conf_path="/home/mlcuser/MLC/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/2daada4f809841509c848f618114c672.conf" --gpu_engines="./build/engines/dd805e2fec5f/retinanet/MultiStream/retinanet-MultiStream-gpu-b2-int8.lwis_k_99_MaxP.plan" --max_dlas=0 --scenario MultiStream --model retinanet --response_postprocess openimageeffnms
5
+ [2025-02-02 02:07:19,927 __init__.py:53 INFO] Overriding Environment
6
6
benchmark : Benchmark.Retinanet
7
7
buffer_manager_thread_count : 0
8
8
data_dir : /home/mlcuser/MLC/repos/local/cache/get-mlperf-inference-nvidia-scratch-space_5aab030f/data
@@ -12,23 +12,23 @@ gpu_copy_streams : 1
12
12
gpu_inference_streams : 1
13
13
input_dtype : int8
14
14
input_format : linear
15
- log_dir : /home/mlcuser/MLC/repos/local/cache/get-git-repo_8953de2a /repo/closed/NVIDIA/build/logs/2025.01.29-23.50.29
15
+ log_dir : /home/mlcuser/MLC/repos/local/cache/get-git-repo_0ab377fc /repo/closed/NVIDIA/build/logs/2025.02.02-02.07.18
16
16
map_path : data_maps/open-images-v6-mlperf/val_map.txt
17
- mlperf_conf_path : /home/mlcuser/MLC/repos/local/cache/get-git-repo_4cd85a18 /inference/mlperf.conf
17
+ mlperf_conf_path : /home/mlcuser/MLC/repos/local/cache/get-git-repo_14157262 /inference/mlperf.conf
18
18
multi_stream_expected_latency_ns : 0
19
19
multi_stream_samples_per_query : 8
20
20
multi_stream_target_latency_percentile : 99
21
21
precision : int8
22
22
preprocessed_data_dir : /home/mlcuser/MLC/repos/local/cache/get-mlperf-inference-nvidia-scratch-space_5aab030f/preprocessed_data
23
23
scenario : Scenario.MultiStream
24
- system : SystemConfiguration(host_cpu_conf=CPUConfiguration(layout={CPU(name='Intel(R) Xeon(R) w7-2495X', architecture=<CPUArchitecture.x86_64: AliasedName(name='x86_64', aliases=(), patterns=())>, core_count=24, threads_per_core=2): 1}), host_mem_conf=MemoryConfiguration(host_memory_capacity=Memory(quantity=197.33452799999998, byte_suffix=<ByteSuffix.GB: (1000, 3)>, _num_bytes=197334528000), comparison_tolerance=0.05), accelerator_conf=AcceleratorConfiguration(layout=defaultdict(<class 'int'>, {GPU(name='NVIDIA GeForce RTX 4090', accelerator_type=<AcceleratorType.Discrete: AliasedName(name='Discrete', aliases=(), patterns=())>, vram=Memory(quantity=23.98828125, byte_suffix=<ByteSuffix.GiB: (1024, 3)>, _num_bytes=25757220864), max_power_limit=450.0, pci_id='0x268410DE', compute_sm=89): 1, GPU(name='NVIDIA GeForce RTX 4090', accelerator_type=<AcceleratorType.Discrete: AliasedName(name='Discrete', aliases=(), patterns=())>, vram=Memory(quantity=23.98828125, byte_suffix=<ByteSuffix.GiB: (1024, 3)>, _num_bytes=25757220864), max_power_limit=500.0, pci_id='0x268410DE', compute_sm=89): 1})), numa_conf=NUMAConfiguration(numa_nodes={}, num_numa_nodes=1), system_id='e7bff0656085')
24
+ system : SystemConfiguration(host_cpu_conf=CPUConfiguration(layout={CPU(name='Intel(R) Xeon(R) w7-2495X', architecture=<CPUArchitecture.x86_64: AliasedName(name='x86_64', aliases=(), patterns=())>, core_count=24, threads_per_core=2): 1}), host_mem_conf=MemoryConfiguration(host_memory_capacity=Memory(quantity=197.33452799999998, byte_suffix=<ByteSuffix.GB: (1000, 3)>, _num_bytes=197334528000), comparison_tolerance=0.05), accelerator_conf=AcceleratorConfiguration(layout=defaultdict(<class 'int'>, {GPU(name='NVIDIA GeForce RTX 4090', accelerator_type=<AcceleratorType.Discrete: AliasedName(name='Discrete', aliases=(), patterns=())>, vram=Memory(quantity=23.98828125, byte_suffix=<ByteSuffix.GiB: (1024, 3)>, _num_bytes=25757220864), max_power_limit=450.0, pci_id='0x268410DE', compute_sm=89): 1, GPU(name='NVIDIA GeForce RTX 4090', accelerator_type=<AcceleratorType.Discrete: AliasedName(name='Discrete', aliases=(), patterns=())>, vram=Memory(quantity=23.98828125, byte_suffix=<ByteSuffix.GiB: (1024, 3)>, _num_bytes=25757220864), max_power_limit=500.0, pci_id='0x268410DE', compute_sm=89): 1})), numa_conf=NUMAConfiguration(numa_nodes={}, num_numa_nodes=1), system_id='dd805e2fec5f')
25
25
tensor_path : build/preprocessed_data/open-images-v6-mlperf/validation/Retinanet/int8_linear
26
26
test_mode : AccuracyOnly
27
27
use_deque_limit : True
28
28
use_graphs : True
29
- user_conf_path : /home/mlcuser/MLC/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/22a091d0057b427a93d508033599dd79 .conf
30
- system_id : e7bff0656085
31
- config_name : e7bff0656085_retinanet_MultiStream
29
+ user_conf_path : /home/mlcuser/MLC/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/2daada4f809841509c848f618114c672 .conf
30
+ system_id : dd805e2fec5f
31
+ config_name : dd805e2fec5f_retinanet_MultiStream
32
32
workload_setting : WorkloadSetting(HarnessType.LWIS, AccuracyTarget.k_99, PowerSetting.MaxP)
33
33
optimization_level : plugin-enabled
34
34
num_profiles : 1
@@ -39,82 +39,82 @@ skip_file_checks : False
39
39
power_limit : None
40
40
cpu_freq : None
41
41
&&&& RUNNING Default_Harness # ./build/bin/harness_default
42
- [I] mlperf.conf path: /home/mlcuser/MLC/repos/local/cache/get-git-repo_4cd85a18 /inference/mlperf.conf
43
- [I] user.conf path: /home/mlcuser/MLC/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/22a091d0057b427a93d508033599dd79 .conf
42
+ [I] mlperf.conf path: /home/mlcuser/MLC/repos/local/cache/get-git-repo_14157262 /inference/mlperf.conf
43
+ [I] user.conf path: /home/mlcuser/MLC/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/2daada4f809841509c848f618114c672 .conf
44
44
Creating QSL.
45
45
Finished Creating QSL.
46
46
Setting up SUT.
47
47
[I] [TRT] Loaded engine size: 73 MiB
48
- [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +7 , GPU +10, now: CPU 126 , GPU 881 (MiB)
49
- [I] [TRT] [MemUsageChange] Init cuDNN: CPU +1 , GPU +10, now: CPU 127, GPU 891 (MiB)
48
+ [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +6 , GPU +10, now: CPU 125 , GPU 881 (MiB)
49
+ [I] [TRT] [MemUsageChange] Init cuDNN: CPU +2 , GPU +10, now: CPU 127, GPU 891 (MiB)
50
50
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +68, now: CPU 0, GPU 68 (MiB)
51
- [I] Device:0.GPU: [0] ./build/engines/e7bff0656085 /retinanet/MultiStream/retinanet-MultiStream-gpu-b2-int8.lwis_k_99_MaxP.plan has been successfully loaded.
51
+ [I] Device:0.GPU: [0] ./build/engines/dd805e2fec5f /retinanet/MultiStream/retinanet-MultiStream-gpu-b2-int8.lwis_k_99_MaxP.plan has been successfully loaded.
52
52
[I] [TRT] Loaded engine size: 73 MiB
53
53
[W] [TRT] Using an engine plan file across different models of devices is not recommended and is likely to affect performance or even cause errors.
54
- [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +6, GPU +10, now: CPU 160, GPU 625 (MiB)
55
- [I] [TRT] [MemUsageChange] Init cuDNN: CPU +2 , GPU +10, now: CPU 162 , GPU 635 (MiB)
54
+ [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +6, GPU +10, now: CPU 160, GPU 624 (MiB)
55
+ [I] [TRT] [MemUsageChange] Init cuDNN: CPU +1 , GPU +10, now: CPU 161 , GPU 634 (MiB)
56
56
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +69, now: CPU 0, GPU 137 (MiB)
57
- [I] Device:1.GPU: [0] ./build/engines/e7bff0656085 /retinanet/MultiStream/retinanet-MultiStream-gpu-b2-int8.lwis_k_99_MaxP.plan has been successfully loaded.
57
+ [I] Device:1.GPU: [0] ./build/engines/dd805e2fec5f /retinanet/MultiStream/retinanet-MultiStream-gpu-b2-int8.lwis_k_99_MaxP.plan has been successfully loaded.
58
58
[E] [TRT] 3: [runtime.cpp::~Runtime::401] Error Code 3: API Usage Error (Parameter check failed at: runtime/rt/runtime.cpp::~Runtime::401, condition: mEngineCounter.use_count() == 1 Destroying a runtime before destroying deserialized engines created by the runtime leads to undefined behavior.)
59
- [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +0 , GPU +8, now: CPU 89, GPU 893 (MiB)
59
+ [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +1 , GPU +8, now: CPU 89, GPU 893 (MiB)
60
60
[I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +8, now: CPU 89, GPU 901 (MiB)
61
61
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +1, GPU +1528, now: CPU 1, GPU 1665 (MiB)
62
- [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +0, GPU +8, now: CPU 90 , GPU 637 (MiB)
63
- [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0 , GPU +8, now: CPU 90, GPU 645 (MiB)
64
- [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +1528 , now: CPU 1, GPU 3193 (MiB)
62
+ [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +0, GPU +8, now: CPU 89 , GPU 636 (MiB)
63
+ [I] [TRT] [MemUsageChange] Init cuDNN: CPU +1 , GPU +8, now: CPU 90, GPU 644 (MiB)
64
+ [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +1527 , now: CPU 1, GPU 3192 (MiB)
65
65
[I] Start creating CUDA graphs
66
66
[I] Capture 2 CUDA graphs
67
67
[I] Capture 2 CUDA graphs
68
68
[I] Finish creating CUDA graphs
69
69
[I] Creating batcher thread: 0 EnableBatcherThreadPerDevice: false
70
70
Finished setting up SUT.
71
71
Starting warmup. Running for a minimum of 5 seconds.
72
- Finished warmup. Ran for 5.14365s .
72
+ Finished warmup. Ran for 5.14387s .
73
73
Starting running actual test.
74
74
75
75
No warnings encountered during test.
76
76
77
77
No errors encountered during test.
78
78
Finished running actual test.
79
79
Device Device:0.GPU processed:
80
- 6196 batches of size 2
80
+ 6204 batches of size 2
81
81
Memcpy Calls: 0
82
82
PerSampleCudaMemcpy Calls: 0
83
- BatchedCudaMemcpy Calls: 6196
83
+ BatchedCudaMemcpy Calls: 6204
84
84
Device Device:1.GPU processed:
85
- 6196 batches of size 2
85
+ 6188 batches of size 2
86
86
Memcpy Calls: 0
87
87
PerSampleCudaMemcpy Calls: 0
88
- BatchedCudaMemcpy Calls: 6196
88
+ BatchedCudaMemcpy Calls: 6188
89
89
&&&& PASSED Default_Harness # ./build/bin/harness_default
90
- [2025-01-29 23:51:08,313 run_harness.py:166 INFO] Result: Accuracy run detected.
91
- [2025-01-29 23:51:08,313 __init__.py:46 INFO] Running command: python3 /home/mlcuser/MLC/repos/local/cache/get-git-repo_8953de2a /repo/closed/NVIDIA/build/inference/vision/classification_and_detection/tools/accuracy-openimages.py --mlperf-accuracy-file /mlc-mount/home/arjun/gh_action_results/valid_results/RTX4090x2-nvidia_original-gpu-tensorrt-vdefault-default_config/retinanet/multistream/accuracy/mlperf_log_accuracy.json --openimages-dir /home/mlcuser/MLC/repos/local/cache/get-mlperf-inference-nvidia-scratch-space_5aab030f/preprocessed_data/open-images-v6-mlperf --output-file build/retinanet-results.json
90
+ [2025-02-02 02:07:58,440 run_harness.py:166 INFO] Result: Accuracy run detected.
91
+ [2025-02-02 02:07:58,440 __init__.py:46 INFO] Running command: python3 /home/mlcuser/MLC/repos/local/cache/get-git-repo_0ab377fc /repo/closed/NVIDIA/build/inference/vision/classification_and_detection/tools/accuracy-openimages.py --mlperf-accuracy-file /mlc-mount/home/arjun/gh_action_results/valid_results/RTX4090x2-nvidia_original-gpu-tensorrt-vdefault-default_config/retinanet/multistream/accuracy/mlperf_log_accuracy.json --openimages-dir /home/mlcuser/MLC/repos/local/cache/get-mlperf-inference-nvidia-scratch-space_5aab030f/preprocessed_data/open-images-v6-mlperf --output-file build/retinanet-results.json
92
92
loading annotations into memory...
93
- Done (t=0.50s )
93
+ Done (t=0.44s )
94
94
creating index...
95
95
index created!
96
96
Loading and preparing results...
97
- DONE (t=17.48s )
97
+ DONE (t=17.83s )
98
98
creating index...
99
99
index created!
100
100
Running per image evaluation...
101
101
Evaluate annotation type *bbox*
102
- DONE (t=132.60s ).
102
+ DONE (t=132.09s ).
103
103
Accumulating evaluation results...
104
- DONE (t=32.29s ).
104
+ DONE (t=31.97s ).
105
105
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.373
106
106
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.522
107
- Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.404
107
+ Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.403
108
108
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.023
109
109
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.125
110
110
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.413
111
111
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.419
112
- Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.598
112
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.599
113
113
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.628
114
- Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.082
115
- Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.343
116
- Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.677
117
- mAP=37.340 %
114
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.083
115
+ Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.344
116
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.678
117
+ mAP=37.330 %
118
118
119
119
======================== Result summaries: ========================
120
120
0 commit comments