File tree Expand file tree Collapse file tree 2 files changed +7
-7
lines changed Expand file tree Collapse file tree 2 files changed +7
-7
lines changed Original file line number Diff line number Diff line change @@ -657,7 +657,6 @@ run_ucx_perftest_cuda_device() {
657
657
ucx_perftest=" $ucx_inst /bin/ucx_perftest"
658
658
ucp_test_args=" -b $ucx_inst_ptest /test_types_ucp_device_cuda"
659
659
660
- # TODO: Run on all GPUs & NICs combinations
661
660
# TODO: Run on all GPUs & NICs combinations
662
661
ucp_client_args=" -a cuda:0 $( hostname) "
663
662
gda_tls=" cuda_copy,rc,rc_gda"
Original file line number Diff line number Diff line change @@ -11,9 +11,10 @@ ucp_device_cuda_partial_lat_1k_1thread -t ucp_put_partial_lat -m cuda -s 2
11
11
# Increase number of threads after following fixes:
12
12
# - Use thread-local memory instead of shared for requests (limit 48K)
13
13
# - Fix WQE size limit of 1024
14
- ucp_device_cuda_single_bw_1k_32threads -t ucp_put_single_bw -m cuda -s 1024 -n 10000 -T 32
15
- ucp_device_cuda_single_lat_1k_32threads -t ucp_put_single_lat -m cuda -s 1024 -n 10000 -T 32
16
- ucp_device_cuda_multi_bw_1k_32threads -t ucp_put_multi_bw -m cuda -s 256:8 -n 10000 -T 32 -O 2
17
- ucp_device_cuda_multi_lat_1k_32threads -t ucp_put_multi_lat -m cuda -s 256:8 -n 10000 -T 32 -O 2
18
- ucp_device_cuda_partial_bw_1k_32threads -t ucp_put_partial_bw -m cuda -s 256:8 -n 10000 -T 32 -O 2
19
- ucp_device_cuda_partial_lat_1k_32threads -t ucp_put_partial_lat -m cuda -s 256:8 -n 10000 -T 32 -O 2
14
+ # TODO - enable when wqe reserve is fixed.
15
+ # ucp_device_cuda_single_bw_1k_32threads -t ucp_put_single_bw -m cuda -s 1024 -n 10000 -T 32
16
+ # ucp_device_cuda_single_lat_1k_32threads -t ucp_put_single_lat -m cuda -s 1024 -n 10000 -T 32
17
+ # ucp_device_cuda_multi_bw_1k_32threads -t ucp_put_multi_bw -m cuda -s 256:8 -n 10000 -T 32 -O 2
18
+ # ucp_device_cuda_multi_lat_1k_32threads -t ucp_put_multi_lat -m cuda -s 256:8 -n 10000 -T 32 -O 2
19
+ # ucp_device_cuda_partial_bw_1k_32threads -t ucp_put_partial_bw -m cuda -s 256:8 -n 10000 -T 32 -O 2
20
+ # ucp_device_cuda_partial_lat_1k_32threads -t ucp_put_partial_lat -m cuda -s 256:8 -n 10000 -T 32 -O 2
You can’t perform that action at this time.
0 commit comments