Skip to content

Commit bb0794a

Browse files
authored
Merge pull request #10726 from rakhmets/topic/cuda-cpy-h2h-bw-1.19
UCT/CUDA/CUDA_COPY: H2H performance estimation is not supported - v1.19.x
2 parents 8a026df + e1285da commit bb0794a

File tree

3 files changed

+46
-1
lines changed

3 files changed

+46
-1
lines changed

src/ucp/proto/proto_common.c

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,21 @@ static void ucp_proto_common_tl_perf_reset(ucp_proto_common_tl_perf_t *tl_perf)
319319
tl_perf->max_frag = SIZE_MAX;
320320
}
321321

322+
static void ucp_proto_common_perf_attr_set_mem_type(
323+
const ucp_proto_common_init_params_t *params,
324+
uct_perf_attr_t *perf_attr)
325+
{
326+
const ucp_rkey_config_key_t *rkey_config_key = params->super.rkey_config_key;
327+
328+
perf_attr->field_mask |= UCT_PERF_ATTR_FIELD_LOCAL_MEMORY_TYPE;
329+
perf_attr->local_memory_type = params->reg_mem_info.type;
330+
331+
if (rkey_config_key != NULL) {
332+
perf_attr->field_mask |= UCT_PERF_ATTR_FIELD_REMOTE_MEMORY_TYPE;
333+
perf_attr->remote_memory_type = rkey_config_key->mem_type;
334+
}
335+
}
336+
322337
ucs_status_t
323338
ucp_proto_common_get_lane_perf(const ucp_proto_common_init_params_t *params,
324339
ucp_lane_index_t lane,
@@ -367,6 +382,7 @@ ucp_proto_common_get_lane_perf(const ucp_proto_common_init_params_t *params,
367382
UCT_PERF_ATTR_FIELD_PATH_BANDWIDTH |
368383
UCT_PERF_ATTR_FIELD_LATENCY;
369384
perf_attr.operation = params->send_op;
385+
ucp_proto_common_perf_attr_set_mem_type(params, &perf_attr);
370386

371387
status = ucp_worker_iface_estimate_perf(wiface, &perf_attr);
372388
if (status != UCS_OK) {
@@ -647,6 +663,22 @@ ucp_proto_common_reg_md_map(const ucp_proto_common_init_params_t *params,
647663
return reg_md_map;
648664
}
649665

666+
static int ucp_proto_common_find_lanes_check_mem_type(
667+
const ucp_proto_common_init_params_t *params, ucp_lane_index_t lane)
668+
{
669+
uct_perf_attr_t perf_attr = {0};
670+
ucp_rsc_index_t rsc_index;
671+
ucp_worker_iface_t *wiface;
672+
673+
ucp_proto_common_perf_attr_set_mem_type(params, &perf_attr);
674+
675+
rsc_index = ucp_proto_common_get_rsc_index(&params->super, lane);
676+
wiface = ucp_worker_iface(params->super.worker, rsc_index);
677+
/* TODO: Use memory reachability UCT API, when available, to check memory
678+
type support */
679+
return uct_iface_estimate_perf(wiface->iface, &perf_attr) == UCS_OK;
680+
}
681+
650682
ucp_lane_index_t ucp_proto_common_find_lanes_with_min_frag(
651683
const ucp_proto_common_init_params_t *params, ucp_lane_type_t lane_type,
652684
uint64_t tl_cap_flags, ucp_lane_index_t max_lanes,
@@ -688,6 +720,10 @@ ucp_lane_index_t ucp_proto_common_find_lanes_with_min_frag(
688720
continue;
689721
}
690722

723+
if (!ucp_proto_common_find_lanes_check_mem_type(params, lane)) {
724+
continue;
725+
}
726+
691727
lanes[num_valid_lanes++] = lane;
692728
if (num_valid_lanes >= max_lanes) {
693729
break;

src/uct/cuda/cuda_copy/cuda_copy_iface.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,14 @@ uct_cuda_copy_estimate_perf(uct_iface_h tl_iface, uct_perf_attr_t *perf_attr)
198198
const double ss_factor = zcopy ? 1 : 0.95;
199199
uct_ppn_bandwidth_t bandwidth = {};
200200

201+
if ((src_mem_type == UCS_MEMORY_TYPE_HOST) &&
202+
(dst_mem_type == UCS_MEMORY_TYPE_HOST)) {
203+
ucs_trace("src_mem_type:%s to dst_mem_type:%s is not supported",
204+
ucs_memory_type_names[src_mem_type],
205+
ucs_memory_type_names[dst_mem_type]);
206+
return UCS_ERR_UNSUPPORTED;
207+
}
208+
201209
if (uct_perf_attr_has_bandwidth(perf_attr->field_mask)) {
202210
if (uct_ep_op_is_fetch(op)) {
203211
ucs_swap(&src_mem_type, &dst_mem_type);

test/gtest/uct/v2/test_uct_query.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@ UCS_TEST_P(test_uct_query, query_perf)
7979
UCT_PERF_ATTR_FIELD_SEND_POST_OVERHEAD |
8080
UCT_PERF_ATTR_FIELD_RECV_OVERHEAD |
8181
UCT_PERF_ATTR_FIELD_BANDWIDTH;
82-
EXPECT_EQ(iface_estimate_perf(&perf_attr), UCS_OK);
82+
EXPECT_EQ(iface_estimate_perf(&perf_attr),
83+
has_transport("cuda_copy") ? UCS_ERR_UNSUPPORTED : UCS_OK);
8384

8485
perf_attr.remote_memory_type = UCS_MEMORY_TYPE_CUDA;
8586
perf_attr.operation = UCT_EP_OP_PUT_SHORT;

0 commit comments

Comments
 (0)