1
- [2024-12-28 15:24:37,200 main.py:229 INFO] Detected system ID: KnownSystem.RTX4090x1
2
- [2024-12-28 15:24:37,731 generate_conf_files.py:107 INFO] Generated measurements/ entries for RTX4090x1_TRT/bert-99.9/Offline
3
- [2024-12-28 15:24:37,731 __init__.py:46 INFO] Running command: ./build/bin/harness_bert --logfile_outdir="/cm-mount/home/arjun/gh_action_results/valid_results/RTX4090x1-nvidia_original-gpu-tensorrt-vdefault-default_config/bert-99.9/offline/accuracy" --logfile_prefix="mlperf_log_" --performance_sample_count=10833 --test_mode="AccuracyOnly" --gpu_batch_size=256 --mlperf_conf_path="/home/cmuser/CM/repos/local/cache/85453ba5383a47d1 /inference/mlperf.conf" --tensor_path="build/preprocessed_data/squad_tokenized/input_ids.npy,build/preprocessed_data/squad_tokenized/segment_ids.npy,build/preprocessed_data/squad_tokenized/input_mask.npy" --use_graphs=false --user_conf_path="/home/cmuser/CM/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/b449747f5f5a4135a402e6c9015639c8 .conf" --gpu_inference_streams=2 --gpu_copy_streams=2 --gpu_engines="./build/engines/RTX4090x1/bert/Offline/bert-Offline-gpu-fp16_S_384_B_256_P_2_vs.custom_k_99_9_MaxP.plan" --scenario Offline --model bert
4
- [2024-12-28 15:24:37,731 __init__.py:53 INFO] Overriding Environment
1
+ [2024-12-31 23:51:14,202 main.py:229 INFO] Detected system ID: KnownSystem.RTX4090x1
2
+ [2024-12-31 23:51:14,747 generate_conf_files.py:107 INFO] Generated measurements/ entries for RTX4090x1_TRT/bert-99.9/Offline
3
+ [2024-12-31 23:51:14,747 __init__.py:46 INFO] Running command: ./build/bin/harness_bert --logfile_outdir="/cm-mount/home/arjun/gh_action_results/valid_results/RTX4090x1-nvidia_original-gpu-tensorrt-vdefault-default_config/bert-99.9/offline/accuracy" --logfile_prefix="mlperf_log_" --performance_sample_count=10833 --test_mode="AccuracyOnly" --gpu_batch_size=256 --mlperf_conf_path="/home/cmuser/CM/repos/local/cache/551e61f86b914205 /inference/mlperf.conf" --tensor_path="build/preprocessed_data/squad_tokenized/input_ids.npy,build/preprocessed_data/squad_tokenized/segment_ids.npy,build/preprocessed_data/squad_tokenized/input_mask.npy" --use_graphs=false --user_conf_path="/home/cmuser/CM/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/4fb75fc446ab4ac1b7d16df1c26e1ed7 .conf" --gpu_inference_streams=2 --gpu_copy_streams=2 --gpu_engines="./build/engines/RTX4090x1/bert/Offline/bert-Offline-gpu-fp16_S_384_B_256_P_2_vs.custom_k_99_9_MaxP.plan" --scenario Offline --model bert
4
+ [2024-12-31 23:51:14,747 __init__.py:53 INFO] Overriding Environment
5
5
benchmark : Benchmark.BERT
6
6
buffer_manager_thread_count : 0
7
7
coalesced_tensor : True
@@ -11,8 +11,8 @@ gpu_copy_streams : 2
11
11
gpu_inference_streams : 2
12
12
input_dtype : int32
13
13
input_format : linear
14
- log_dir : /home/cmuser/CM/repos/local/cache/ba8d5f2a6bc546f9/repo/closed/NVIDIA/build/logs/2024.12.28-15.24.32
15
- mlperf_conf_path : /home/cmuser/CM/repos/local/cache/85453ba5383a47d1 /inference/mlperf.conf
14
+ log_dir : /home/cmuser/CM/repos/local/cache/ba8d5f2a6bc546f9/repo/closed/NVIDIA/build/logs/2024.12.31-23.51.03
15
+ mlperf_conf_path : /home/cmuser/CM/repos/local/cache/551e61f86b914205 /inference/mlperf.conf
16
16
offline_expected_qps : 0.0
17
17
precision : fp16
18
18
preprocessed_data_dir : /home/cmuser/CM/repos/local/cache/a8c152aef5494496/preprocessed_data
@@ -21,7 +21,7 @@ system : SystemConfiguration(host_cpu_conf=CPUConfiguration(layout={CPU(name='AM
21
21
tensor_path : build/preprocessed_data/squad_tokenized/input_ids.npy,build/preprocessed_data/squad_tokenized/segment_ids.npy,build/preprocessed_data/squad_tokenized/input_mask.npy
22
22
test_mode : AccuracyOnly
23
23
use_graphs : False
24
- user_conf_path : /home/cmuser/CM/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/b449747f5f5a4135a402e6c9015639c8 .conf
24
+ user_conf_path : /home/cmuser/CM/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/4fb75fc446ab4ac1b7d16df1c26e1ed7 .conf
25
25
system_id : RTX4090x1
26
26
config_name : RTX4090x1_bert_Offline
27
27
workload_setting : WorkloadSetting(HarnessType.Custom, AccuracyTarget.k_99_9, PowerSetting.MaxP)
@@ -34,41 +34,41 @@ skip_file_checks : True
34
34
power_limit : None
35
35
cpu_freq : None
36
36
&&&& RUNNING BERT_HARNESS # ./build/bin/harness_bert
37
- I1228 15:24:37.834396 19732 main_bert.cc:163] Found 1 GPUs
38
- I1228 15:24:38.761662 19732 bert_server.cc:147] Engine Path: ./build/engines/RTX4090x1/bert/Offline/bert-Offline-gpu-fp16_S_384_B_256_P_2_vs.custom_k_99_9_MaxP.plan
39
- [I] [TRT] Loaded engine size: 699 MiB
40
- [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +6 , GPU +10, now: CPU 863 , GPU 1518 (MiB)
41
- [I] [TRT] [MemUsageChange] Init cuDNN: CPU +2 , GPU +10, now: CPU 865, GPU 1528 (MiB)
37
+ I1231 23:51:14.815665 19726 main_bert.cc:163] Found 1 GPUs
38
+ I1231 23:51:15.253394 19726 bert_server.cc:147] Engine Path: ./build/engines/RTX4090x1/bert/Offline/bert-Offline-gpu-fp16_S_384_B_256_P_2_vs.custom_k_99_9_MaxP.plan
39
+ [I] [TRT] Loaded engine size: 700 MiB
40
+ [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +7 , GPU +10, now: CPU 864 , GPU 1518 (MiB)
41
+ [I] [TRT] [MemUsageChange] Init cuDNN: CPU +1 , GPU +10, now: CPU 865, GPU 1528 (MiB)
42
42
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +576, now: CPU 0, GPU 576 (MiB)
43
- I1228 15:24:40.453316 19732 bert_server.cc:208] Engines Creation Completed
44
- I1228 15:24:40.507620 19732 bert_core_vs.cc:385] Engine - Device Memory requirements: 1409287680
45
- I1228 15:24:40.507630 19732 bert_core_vs.cc:393] Engine - Number of Optimization Profiles: 2
46
- I1228 15:24:40.507637 19732 bert_core_vs.cc:415] Engine - Profile 0 maxDims 98304 Bmax=256 Smax=384
43
+ I1231 23:51:19.299175 19726 bert_server.cc:208] Engines Creation Completed
44
+ I1231 23:51:19.348824 19726 bert_core_vs.cc:385] Engine - Device Memory requirements: 1409287680
45
+ I1231 23:51:19.348850 19726 bert_core_vs.cc:393] Engine - Number of Optimization Profiles: 2
46
+ I1231 23:51:19.348858 19726 bert_core_vs.cc:415] Engine - Profile 0 maxDims 98304 Bmax=256 Smax=384
47
47
[I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +0, GPU +8, now: CPU 166, GPU 2866 (MiB)
48
48
[I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +8, now: CPU 166, GPU 2874 (MiB)
49
- I1228 15:24:40.577661 19732 bert_core_vs.cc:426] Setting Opt.Prof. to 0
49
+ I1231 23:51:19.440239 19726 bert_core_vs.cc:426] Setting Opt.Prof. to 0
50
50
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +1, GPU +0, now: CPU 1, GPU 576 (MiB)
51
- I1228 15:24:40.577692 19732 bert_core_vs.cc:444] Context creation complete. Max supported batchSize: 256
52
- I1228 15:24:40.579841 19732 bert_core_vs.cc:476] Setup complete
53
- I1228 15:24:40.580044 19732 bert_core_vs.cc:385] Engine - Device Memory requirements: 1409287680
54
- I1228 15:24:40.580049 19732 bert_core_vs.cc:393] Engine - Number of Optimization Profiles: 2
55
- I1228 15:24:40.580052 19732 bert_core_vs.cc:415] Engine - Profile 1 maxDims 98304 Bmax=256 Smax=384
51
+ I1231 23:51:19.440282 19726 bert_core_vs.cc:444] Context creation complete. Max supported batchSize: 256
52
+ I1231 23:51:19.442037 19726 bert_core_vs.cc:476] Setup complete
53
+ I1231 23:51:19.444537 19726 bert_core_vs.cc:385] Engine - Device Memory requirements: 1409287680
54
+ I1231 23:51:19.444542 19726 bert_core_vs.cc:393] Engine - Number of Optimization Profiles: 2
55
+ I1231 23:51:19.444546 19726 bert_core_vs.cc:415] Engine - Profile 1 maxDims 98304 Bmax=256 Smax=384
56
56
[I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +0, GPU +8, now: CPU 289, GPU 4352 (MiB)
57
- [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +10, now: CPU 289, GPU 4362 (MiB)
57
+ [I] [TRT] [MemUsageChange] Init cuDNN: CPU +1, GPU +10, now: CPU 290, GPU 4362 (MiB)
58
+ I1231 23:51:19.721244 19726 bert_core_vs.cc:426] Setting Opt.Prof. to 1
58
59
[I] [TRT] Could not set default profile 0 for execution context. Profile index must be set explicitly.
59
- I1228 15:24:40.651274 19732 bert_core_vs.cc:426] Setting Opt.Prof. to 1
60
60
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +0, now: CPU 1, GPU 576 (MiB)
61
- I1228 15:24:40.651684 19732 bert_core_vs.cc:444] Context creation complete. Max supported batchSize: 256
62
- I1228 15:24:40.652937 19732 bert_core_vs.cc:476] Setup complete
63
- I1228 15:24:41.248067 19732 main_bert.cc:184] Starting running actual test.
64
- I1228 15:24:47.760198 19732 main_bert.cc:190] Finished running actual test.
61
+ I1231 23:51:19.721933 19726 bert_core_vs.cc:444] Context creation complete. Max supported batchSize: 256
62
+ I1231 23:51:19.723507 19726 bert_core_vs.cc:476] Setup complete
63
+ I1231 23:51:20.460515 19726 main_bert.cc:184] Starting running actual test.
64
+ I1231 23:51:41.768021 19726 main_bert.cc:190] Finished running actual test.
65
65
66
- No warnings encountered during test .
66
+ 3797 warnings encountered. See detailed log .
67
67
68
68
No errors encountered during test.
69
- [2024-12-28 15:24:48,231 run_harness.py:166 INFO] Result: Accuracy run detected.
70
- [2024-12-28 15:24:48,231 __init__.py:46 INFO] Running command: PYTHONPATH=code/bert/tensorrt/helpers python3 /home/cmuser/CM/repos/local/cache/ba8d5f2a6bc546f9/repo/closed/NVIDIA/build/inference/language/bert/accuracy-squad.py --log_file /cm-mount/home/arjun/gh_action_results/valid_results/RTX4090x1-nvidia_original-gpu-tensorrt-vdefault-default_config/bert-99.9/offline/accuracy/mlperf_log_accuracy.json --vocab_file build/models/bert/vocab.txt --val_data /home/cmuser/CM/repos/local/cache/a8c152aef5494496/data/squad/dev-v1.1.json --out_file /cm-mount/home/arjun/gh_action_results/valid_results/RTX4090x1-nvidia_original-gpu-tensorrt-vdefault-default_config/bert-99.9/offline/accuracy/predictions.json --output_dtype float16
71
- {"exact_match": 83.6802270577105 , "f1": 90.88066528372401 }
69
+ [2024-12-31 23:51:42,156 run_harness.py:166 INFO] Result: Accuracy run detected.
70
+ [2024-12-31 23:51:42,156 __init__.py:46 INFO] Running command: PYTHONPATH=code/bert/tensorrt/helpers python3 /home/cmuser/CM/repos/local/cache/ba8d5f2a6bc546f9/repo/closed/NVIDIA/build/inference/language/bert/accuracy-squad.py --log_file /cm-mount/home/arjun/gh_action_results/valid_results/RTX4090x1-nvidia_original-gpu-tensorrt-vdefault-default_config/bert-99.9/offline/accuracy/mlperf_log_accuracy.json --vocab_file build/models/bert/vocab.txt --val_data /home/cmuser/CM/repos/local/cache/a8c152aef5494496/data/squad/dev-v1.1.json --out_file /cm-mount/home/arjun/gh_action_results/valid_results/RTX4090x1-nvidia_original-gpu-tensorrt-vdefault-default_config/bert-99.9/offline/accuracy/predictions.json --output_dtype float16
71
+ {"exact_match": 83.67076631977294 , "f1": 90.8832407068292 }
72
72
Reading examples...
73
73
Loading cached features from 'eval_features.pickle'...
74
74
Loading LoadGen logs...
0 commit comments