generated from mlcommons/mlperf_inference_submissions
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Results from GH action on NVIDIA_RTX4090x1
- Loading branch information
1 parent
3de9ab5
commit c095160
Showing
81 changed files
with
39,964 additions
and
0 deletions.
There are no files selected for viewing
43 changes: 43 additions & 0 deletions
43
...ts/RTX4090x1-nvidia-gpu-TensorRT-default_config/retinanet/multistream/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
*Check [CM MLPerf docs](https://docs.mlcommons.org/inference) for more details.* | ||
|
||
## Host platform | ||
|
||
* OS version: Linux-6.8.0-51-generic-x86_64-with-glibc2.29 | ||
* CPU version: x86_64 | ||
* Python version: 3.8.10 (default, Jan 17 2025, 14:40:23) | ||
[GCC 9.4.0] | ||
* MLC version: unknown | ||
|
||
## CM Run Command | ||
|
||
See [CM installation guide](https://docs.mlcommons.org/inference/install/). | ||
|
||
```bash | ||
pip install -U mlcflow | ||
|
||
mlc rm cache -f | ||
|
||
mlc pull repo mlcommons@mlperf-automations --checkout=02683cf5e8beb0cc5baaf27802daafc08fe42e67 | ||
|
||
|
||
``` | ||
*Note that if you want to use the [latest automation recipes](https://docs.mlcommons.org/inference) for MLPerf, | ||
you should simply reload mlcommons@mlperf-automations without checkout and clean MLC cache as follows:* | ||
|
||
```bash | ||
mlc rm repo mlcommons@mlperf-automations | ||
mlc pull repo mlcommons@mlperf-automations | ||
mlc rm cache -f | ||
|
||
``` | ||
|
||
## Results | ||
|
||
Platform: RTX4090x1-nvidia-gpu-TensorRT-default_config | ||
|
||
Model Precision: int8 | ||
|
||
### Accuracy Results | ||
|
||
### Performance Results | ||
`Samples per query`: `11960473.0` |
7 changes: 7 additions & 0 deletions
7
...RT-default_config/retinanet/multistream/RTX4090x1-nvidia-gpu-TensorRT-default_config.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{ | ||
"starting_weights_filename": "https://zenodo.org/record/6617981/files/resnext50_32x4d_fpn.pth", | ||
"retraining": "no", | ||
"input_data_types": "int8", | ||
"weight_data_types": "int8", | ||
"weight_transformations": "quantization, affine fusion" | ||
} |
105 changes: 105 additions & 0 deletions
105
...s/RTX4090x1-nvidia-gpu-TensorRT-default_config/retinanet/multistream/accuracy_console.out
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
[2025-01-31 13:36:22,880 main.py:229 INFO] Detected system ID: KnownSystem.ab508c0ea568 | ||
[2025-01-31 13:36:22,961 harness.py:249 INFO] The harness will load 2 plugins: ['build/plugins/NMSOptPlugin/libnmsoptplugin.so', 'build/plugins/retinanetConcatPlugin/libretinanetconcatplugin.so'] | ||
[2025-01-31 13:36:22,962 generate_conf_files.py:107 INFO] Generated measurements/ entries for ab508c0ea568_TRT/retinanet/MultiStream | ||
[2025-01-31 13:36:22,962 __init__.py:46 INFO] Running command: ./build/bin/harness_default --plugins="build/plugins/NMSOptPlugin/libnmsoptplugin.so,build/plugins/retinanetConcatPlugin/libretinanetconcatplugin.so" --logfile_outdir="/mlc-mount/home/arjun/gh_action_results/valid_results/RTX4090x1-nvidia_original-gpu-tensorrt-vdefault-default_config/retinanet/multistream/accuracy" --logfile_prefix="mlperf_log_" --performance_sample_count=64 --test_mode="AccuracyOnly" --gpu_copy_streams=1 --gpu_inference_streams=1 --use_deque_limit=true --gpu_batch_size=2 --map_path="data_maps/open-images-v6-mlperf/val_map.txt" --mlperf_conf_path="/home/mlcuser/MLC/repos/local/cache/get-git-repo_02ea1bfc/inference/mlperf.conf" --tensor_path="build/preprocessed_data/open-images-v6-mlperf/validation/Retinanet/int8_linear" --use_graphs=true --user_conf_path="/home/mlcuser/MLC/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/16e46cedee994e58a8cd7ad1a4822c10.conf" --gpu_engines="./build/engines/ab508c0ea568/retinanet/MultiStream/retinanet-MultiStream-gpu-b2-int8.lwis_k_99_MaxP.plan" --max_dlas=0 --scenario MultiStream --model retinanet --response_postprocess openimageeffnms | ||
[2025-01-31 13:36:22,962 __init__.py:53 INFO] Overriding Environment | ||
benchmark : Benchmark.Retinanet | ||
buffer_manager_thread_count : 0 | ||
data_dir : /home/mlcuser/MLC/repos/local/cache/get-mlperf-inference-nvidia-scratch-space_fe95ede4/data | ||
disable_beta1_smallk : True | ||
gpu_batch_size : 2 | ||
gpu_copy_streams : 1 | ||
gpu_inference_streams : 1 | ||
input_dtype : int8 | ||
input_format : linear | ||
log_dir : /home/mlcuser/MLC/repos/local/cache/get-git-repo_e7fa5107/repo/closed/NVIDIA/build/logs/2025.01.31-13.36.21 | ||
map_path : data_maps/open-images-v6-mlperf/val_map.txt | ||
mlperf_conf_path : /home/mlcuser/MLC/repos/local/cache/get-git-repo_02ea1bfc/inference/mlperf.conf | ||
multi_stream_expected_latency_ns : 0 | ||
multi_stream_samples_per_query : 8 | ||
multi_stream_target_latency_percentile : 99 | ||
precision : int8 | ||
preprocessed_data_dir : /home/mlcuser/MLC/repos/local/cache/get-mlperf-inference-nvidia-scratch-space_fe95ede4/preprocessed_data | ||
scenario : Scenario.MultiStream | ||
system : SystemConfiguration(host_cpu_conf=CPUConfiguration(layout={CPU(name='AMD Ryzen 9 7950X 16-Core Processor', architecture=<CPUArchitecture.x86_64: AliasedName(name='x86_64', aliases=(), patterns=())>, core_count=16, threads_per_core=2): 1}), host_mem_conf=MemoryConfiguration(host_memory_capacity=Memory(quantity=131.080068, byte_suffix=<ByteSuffix.GB: (1000, 3)>, _num_bytes=131080068000), comparison_tolerance=0.05), accelerator_conf=AcceleratorConfiguration(layout=defaultdict(<class 'int'>, {GPU(name='NVIDIA GeForce RTX 4090', accelerator_type=<AcceleratorType.Discrete: AliasedName(name='Discrete', aliases=(), patterns=())>, vram=Memory(quantity=23.98828125, byte_suffix=<ByteSuffix.GiB: (1024, 3)>, _num_bytes=25757220864), max_power_limit=450.0, pci_id='0x268410DE', compute_sm=89): 1})), numa_conf=None, system_id='ab508c0ea568') | ||
tensor_path : build/preprocessed_data/open-images-v6-mlperf/validation/Retinanet/int8_linear | ||
test_mode : AccuracyOnly | ||
use_deque_limit : True | ||
use_graphs : True | ||
user_conf_path : /home/mlcuser/MLC/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/16e46cedee994e58a8cd7ad1a4822c10.conf | ||
system_id : ab508c0ea568 | ||
config_name : ab508c0ea568_retinanet_MultiStream | ||
workload_setting : WorkloadSetting(HarnessType.LWIS, AccuracyTarget.k_99, PowerSetting.MaxP) | ||
optimization_level : plugin-enabled | ||
num_profiles : 1 | ||
config_ver : lwis_k_99_MaxP | ||
accuracy_level : 99% | ||
inference_server : lwis | ||
skip_file_checks : False | ||
power_limit : None | ||
cpu_freq : None | ||
&&&& RUNNING Default_Harness # ./build/bin/harness_default | ||
[I] mlperf.conf path: /home/mlcuser/MLC/repos/local/cache/get-git-repo_02ea1bfc/inference/mlperf.conf | ||
[I] user.conf path: /home/mlcuser/MLC/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/16e46cedee994e58a8cd7ad1a4822c10.conf | ||
Creating QSL. | ||
Finished Creating QSL. | ||
Setting up SUT. | ||
[I] [TRT] Loaded engine size: 73 MiB | ||
[I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +6, GPU +10, now: CPU 124, GPU 888 (MiB) | ||
[I] [TRT] [MemUsageChange] Init cuDNN: CPU +2, GPU +10, now: CPU 126, GPU 898 (MiB) | ||
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +68, now: CPU 0, GPU 68 (MiB) | ||
[I] Device:0.GPU: [0] ./build/engines/ab508c0ea568/retinanet/MultiStream/retinanet-MultiStream-gpu-b2-int8.lwis_k_99_MaxP.plan has been successfully loaded. | ||
[E] [TRT] 3: [runtime.cpp::~Runtime::401] Error Code 3: API Usage Error (Parameter check failed at: runtime/rt/runtime.cpp::~Runtime::401, condition: mEngineCounter.use_count() == 1 Destroying a runtime before destroying deserialized engines created by the runtime leads to undefined behavior.) | ||
[I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +0, GPU +8, now: CPU 53, GPU 900 (MiB) | ||
[I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +8, now: CPU 53, GPU 908 (MiB) | ||
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +1528, now: CPU 0, GPU 1596 (MiB) | ||
[I] Start creating CUDA graphs | ||
[I] Capture 2 CUDA graphs | ||
[I] Finish creating CUDA graphs | ||
[I] Creating batcher thread: 0 EnableBatcherThreadPerDevice: false | ||
Finished setting up SUT. | ||
Starting warmup. Running for a minimum of 5 seconds. | ||
Finished warmup. Ran for 5.14309s. | ||
Starting running actual test. | ||
|
||
No warnings encountered during test. | ||
|
||
No errors encountered during test. | ||
Finished running actual test. | ||
Device Device:0.GPU processed: | ||
12392 batches of size 2 | ||
Memcpy Calls: 0 | ||
PerSampleCudaMemcpy Calls: 0 | ||
BatchedCudaMemcpy Calls: 12392 | ||
&&&& PASSED Default_Harness # ./build/bin/harness_default | ||
[2025-01-31 13:37:50,565 run_harness.py:166 INFO] Result: Accuracy run detected. | ||
[2025-01-31 13:37:50,565 __init__.py:46 INFO] Running command: python3 /home/mlcuser/MLC/repos/local/cache/get-git-repo_e7fa5107/repo/closed/NVIDIA/build/inference/vision/classification_and_detection/tools/accuracy-openimages.py --mlperf-accuracy-file /mlc-mount/home/arjun/gh_action_results/valid_results/RTX4090x1-nvidia_original-gpu-tensorrt-vdefault-default_config/retinanet/multistream/accuracy/mlperf_log_accuracy.json --openimages-dir /home/mlcuser/MLC/repos/local/cache/get-mlperf-inference-nvidia-scratch-space_fe95ede4/preprocessed_data/open-images-v6-mlperf --output-file build/retinanet-results.json | ||
loading annotations into memory... | ||
Done (t=0.45s) | ||
creating index... | ||
index created! | ||
Loading and preparing results... | ||
DONE (t=20.10s) | ||
creating index... | ||
index created! | ||
Running per image evaluation... | ||
Evaluate annotation type *bbox* | ||
DONE (t=131.75s). | ||
Accumulating evaluation results... | ||
DONE (t=34.34s). | ||
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.373 | ||
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.522 | ||
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.404 | ||
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.023 | ||
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.125 | ||
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.412 | ||
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.419 | ||
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.598 | ||
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.627 | ||
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.083 | ||
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.344 | ||
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.677 | ||
mAP=37.312% | ||
|
||
======================== Result summaries: ======================== | ||
|
27 changes: 27 additions & 0 deletions
27
...urements/RTX4090x1-nvidia-gpu-TensorRT-default_config/retinanet/multistream/cpu_info.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
{ | ||
"MLC_HOST_CPU_WRITE_PROTECT_SUPPORT": "yes", | ||
"MLC_HOST_CPU_MICROCODE": "0xa601206", | ||
"MLC_HOST_CPU_FPU_SUPPORT": "yes", | ||
"MLC_HOST_CPU_FPU_EXCEPTION_SUPPORT": "yes", | ||
"MLC_HOST_CPU_BUGS": "sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass srso", | ||
"MLC_HOST_CPU_TLB_SIZE": "3584 4K pages", | ||
"MLC_HOST_CPU_CFLUSH_SIZE": "64", | ||
"MLC_HOST_CPU_ARCHITECTURE": "x86_64", | ||
"MLC_HOST_CPU_TOTAL_CORES": "32", | ||
"MLC_HOST_CPU_ON_LINE_CPUS_LIST": "0-31", | ||
"MLC_HOST_CPU_THREADS_PER_CORE": "2", | ||
"MLC_HOST_CPU_PHYSICAL_CORES_PER_SOCKET": "16", | ||
"MLC_HOST_CPU_SOCKETS": "1", | ||
"MLC_HOST_CPU_NUMA_NODES": "1", | ||
"MLC_HOST_CPU_VENDOR_ID": "AuthenticAMD", | ||
"MLC_HOST_CPU_FAMILY": "25", | ||
"MLC_HOST_CPU_MODEL_NAME": "AMD Ryzen 9 7950X 16-Core Processor", | ||
"MLC_HOST_CPU_MAX_MHZ": "5881.0000", | ||
"MLC_HOST_CPU_L1D_CACHE_SIZE": "512 KiB", | ||
"MLC_HOST_CPU_L1I_CACHE_SIZE": "512 KiB", | ||
"MLC_HOST_CPU_L2_CACHE_SIZE": "16 MiB", | ||
"MLC_HOST_CPU_L3_CACHE_SIZE": "64 MiB", | ||
"MLC_HOST_CPU_TOTAL_LOGICAL_CORES": "32", | ||
"MLC_HOST_MEMORY_CAPACITY": "128G", | ||
"MLC_HOST_DISK_CAPACITY": "6.8T" | ||
} |
Oops, something went wrong.