Skip to content

Commit b59cff0

Browse files
committed
UCP/CONTEXT/WIREUP: Separate memory detection transport configuration
Separate memory detection transports config to a new variable - UCX_MEM_TLS. This way, setting UCX_TLS will not affect the ability to detect GPU memory.
1 parent 6b5eede commit b59cff0

File tree

12 files changed

+279
-211
lines changed

12 files changed

+279
-211
lines changed

docs/source/faq.md

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -201,8 +201,6 @@ by `ucx_info -d` command.
201201
> **IMPORTANT NOTE**
202202
> In some cases restricting the transports can lead to unexpected and undefined behavior:
203203
> * Using *rc_verbs* or *rc_mlx5* also requires *ud_verbs* or *ud_mlx5* transport for bootstrap.
204-
> * Applications using GPU memory must also specify GPU transports for detecting and
205-
> handling non-host memory.
206204
207205
In addition to the built-in transports it's possible to use aliases which specify multiple transports.
208206

@@ -347,9 +345,6 @@ GPU memory (for example,
347345
and UCX compiled with GPU support. Then you can run the application as usual (for
348346
example, with MPI) and whenever GPU memory is passed to UCX, it either use GPU-direct
349347
for zero copy operations, or copy the data to/from host memory.
350-
> NOTE When specifying UCX_TLS explicitly, must also specify cuda/rocm for GPU memory
351-
> support, otherwise the GPU memory will not be recognized.
352-
> For example: `UCX_TLS=rc,cuda` or `UCX_TLS=dc,rocm`
353348

354349
#### I'm running UCX with GPU memory and getting a segfault, why?
355350

src/ucp/core/ucp_context.c

Lines changed: 158 additions & 129 deletions
Large diffs are not rendered by default.

src/ucp/core/ucp_context.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,12 @@ KHASH_IMPL(ucp_context_imported_mem_hash, uint64_t, ucs_rcache_t*, 1,
3838

3939

4040
enum {
41-
/* The flag indicates that the resource may be used for auxiliary
42-
* wireup communications only */
43-
UCP_TL_RSC_FLAG_AUX = UCS_BIT(0)
41+
/* The flag indicates that the resource may be used for normal communication */
42+
UCP_TL_RSC_FLAG_COMM = UCS_BIT(0),
43+
/* The flag indicates that the resource may be used for auxiliary wireup */
44+
UCP_TL_RSC_FLAG_AUX = UCS_BIT(1),
45+
/* The flag indicates that the resource may be used for memory copy */
46+
UCP_TL_RSC_FLAG_MEM = UCS_BIT(2),
4447
};
4548

4649
#define UCP_OP_ATTR_INDEX_MASK (UCP_OP_ATTR_FLAG_NO_IMM_CMPL | \
@@ -228,6 +231,8 @@ struct ucp_config {
228231
ucs_config_names_array_t devices[UCT_DEVICE_TYPE_LAST];
229232
/** Array of transport names to use */
230233
ucs_config_allow_list_t tls;
234+
/** Array of transport names to use for memory copy */
235+
ucs_config_allow_list_t mem_tls;
231236
/** Array of protocol names to use */
232237
ucs_config_allow_list_t protos;
233238
/** Array of memory allocation methods */
@@ -587,6 +592,7 @@ typedef struct ucp_tl_iface_atomic_flags {
587592

588593
extern ucp_am_handler_t *ucp_am_handlers[];
589594
extern const char *ucp_feature_str[];
595+
extern const char *ucp_tl_rsc_flag_names[];
590596

591597

592598
void ucp_dump_payload(ucp_context_h context, char *buffer, size_t max,

src/ucp/core/ucp_worker.c

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -573,13 +573,26 @@ ucp_worker_iface_error_handler(void *arg, uct_ep_h uct_ep, ucs_status_t status)
573573
return status;
574574
}
575575

576+
static int ucp_worker_iface_can_recv_tag(const ucp_worker_iface_t *wiface)
577+
{
578+
const uint64_t cap_flags = UCT_IFACE_FLAG_CB_SYNC |
579+
UCT_IFACE_FLAG_TAG_EAGER_SHORT |
580+
UCT_IFACE_FLAG_TAG_EAGER_BCOPY |
581+
UCT_IFACE_FLAG_TAG_EAGER_ZCOPY |
582+
UCT_IFACE_FLAG_TAG_RNDV_ZCOPY;
583+
ucp_context_h context = wiface->worker->context;
584+
585+
return (wiface->attr.cap.flags & cap_flags) &&
586+
(context->tl_rscs[wiface->rsc_index].flags & UCP_TL_RSC_FLAG_COMM);
587+
}
588+
576589
void ucp_worker_iface_activate(ucp_worker_iface_t *wiface, unsigned uct_flags)
577590
{
578591
ucp_worker_h worker = wiface->worker;
579592

580-
ucs_trace("activate " UCP_WIFACE_FMT " a_count=%u a_ifaces=%u",
593+
ucs_trace("activate " UCP_WIFACE_FMT " a_count %u tag_ifaces %u",
581594
UCP_WIFACE_ARG(wiface), wiface->activate_count,
582-
worker->num_active_ifaces);
595+
worker->num_active_tag_ifaces);
583596

584597
if (wiface->activate_count++ > 0) {
585598
return; /* was already activated */
@@ -602,7 +615,9 @@ void ucp_worker_iface_activate(ucp_worker_iface_t *wiface, unsigned uct_flags)
602615
ucs_list_add_tail(&worker->arm_ifaces, &wiface->arm_list);
603616
}
604617

605-
++worker->num_active_ifaces;
618+
if (ucp_worker_iface_can_recv_tag(wiface)) {
619+
++worker->num_active_tag_ifaces;
620+
}
606621

607622
uct_iface_progress_enable(wiface->iface,
608623
UCT_PROGRESS_SEND | UCT_PROGRESS_RECV | uct_flags);
@@ -731,9 +746,9 @@ static void ucp_worker_iface_deactivate(ucp_worker_iface_t *wiface, int force)
731746
{
732747
ucp_worker_h worker = wiface->worker;
733748

734-
ucs_trace("deactivate " UCP_WIFACE_FMT " force=%d a_count=%u a_ifaces=%u",
749+
ucs_trace("deactivate " UCP_WIFACE_FMT " force=%d a_count %u tag_ifaces %u",
735750
UCP_WIFACE_ARG(wiface), force, wiface->activate_count,
736-
worker->num_active_ifaces);
751+
worker->num_active_tag_ifaces);
737752

738753
if (!force) {
739754
ucs_assertv(wiface->activate_count > 0, UCP_WIFACE_FMT,
@@ -744,7 +759,9 @@ static void ucp_worker_iface_deactivate(ucp_worker_iface_t *wiface, int force)
744759
return;
745760
}
746761

747-
--worker->num_active_ifaces;
762+
if (ucp_worker_iface_can_recv_tag(wiface)) {
763+
--worker->num_active_tag_ifaces;
764+
}
748765
}
749766

750767
/* Avoid progress on the interface to reduce overhead */
@@ -2485,17 +2502,17 @@ ucs_status_t ucp_worker_create(ucp_context_h context,
24852502
return UCS_ERR_NO_MEMORY;
24862503
}
24872504

2488-
worker->context = context;
2489-
worker->uuid = ucs_generate_uuid((uintptr_t)worker);
2490-
worker->flush_ops_count = 0;
2491-
worker->fence_seq = 0;
2492-
worker->inprogress = 0;
2493-
worker->rkey_config_count = 0;
2494-
worker->num_active_ifaces = 0;
2495-
worker->num_ifaces = 0;
2496-
worker->am_message_id = ucs_generate_uuid(0);
2497-
worker->rkey_ptr_cb_id = UCS_CALLBACKQ_ID_NULL;
2498-
worker->num_all_eps = 0;
2505+
worker->context = context;
2506+
worker->uuid = ucs_generate_uuid((uintptr_t)worker);
2507+
worker->flush_ops_count = 0;
2508+
worker->fence_seq = 0;
2509+
worker->inprogress = 0;
2510+
worker->rkey_config_count = 0;
2511+
worker->num_active_tag_ifaces = 0;
2512+
worker->num_ifaces = 0;
2513+
worker->am_message_id = ucs_generate_uuid(0);
2514+
worker->rkey_ptr_cb_id = UCS_CALLBACKQ_ID_NULL;
2515+
worker->num_all_eps = 0;
24992516
ucp_worker_keepalive_reset(worker);
25002517
ucs_queue_head_init(&worker->rkey_ptr_reqs);
25012518
ucs_list_head_init(&worker->arm_ifaces);

src/ucp/core/ucp_worker.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ typedef struct ucp_worker {
334334
ucp_worker_iface_t **ifaces; /* Array of pointers to interfaces,
335335
one for each resource */
336336
unsigned num_ifaces; /* Number of elements in ifaces array */
337-
unsigned num_active_ifaces; /* Number of activated ifaces */
337+
unsigned num_active_tag_ifaces; /* Number of activated ifaces that can receive tag messages */
338338
ucp_tl_bitmap_t scalable_tl_bitmap; /* Map of scalable tl resources */
339339
ucp_worker_cm_t *cms; /* Array of CMs, one for each component */
340340
ucs_mpool_set_t am_mps; /* Memory pool set for AM receives */

src/ucp/tag/offload.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ ucp_tag_offload_iface(ucp_worker_t *worker, ucp_tag_t tag)
5353
khiter_t hash_it;
5454
ucp_tag_t key_tag;
5555

56-
if (worker->num_active_ifaces == 1) {
56+
if (worker->num_active_tag_ifaces == 1) {
5757
ucs_assert(worker->tm.offload.iface != NULL);
5858
return worker->tm.offload.iface;
5959
}

src/ucp/tag/offload.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ ucp_tag_offload_unexp(ucp_worker_iface_t *wiface, ucp_tag_t tag, size_t length)
159159
avoid unwanted postings of receive buffers (those, which are expected to
160160
arrive from offload incapable iface) to the HW. */
161161
if (ucs_unlikely((length >= worker->tm.offload.thresh) &&
162-
(worker->num_active_ifaces > 1))) {
162+
(worker->num_active_tag_ifaces > 1))) {
163163
tag_key = worker->context->config.tag_sender_mask & tag;
164164
hash_it = kh_get(ucp_tag_offload_hash, &worker->tm.offload.tag_hash,
165165
tag_key);

src/ucp/wireup/select.c

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -410,13 +410,15 @@ static UCS_F_NOINLINE ucs_status_t ucp_wireup_select_transport(
410410
ucp_lane_index_t lane;
411411
char tls_info[256];
412412
char uct_info[256];
413+
char flags_str[64];
413414
char *p, *endp;
414415
uct_iface_attr_t *iface_attr;
415416
uct_md_attr_v2_t *md_attr;
416417
const uct_component_attr_t *cmpt_attr;
417418
int is_reachable;
418419
double score;
419420
uint8_t priority;
421+
uint8_t tl_rsc_flags;
420422
ucp_md_index_t md_index;
421423

422424
p = tls_info;
@@ -494,8 +496,14 @@ static UCS_F_NOINLINE ucs_status_t ucp_wireup_select_transport(
494496
md_attr = &context->tl_mds[md_index].attr;
495497
cmpt_attr = ucp_cmpt_attr_by_md_index(context, md_index);
496498

497-
if ((context->tl_rscs[rsc_index].flags & UCP_TL_RSC_FLAG_AUX) &&
498-
!(criteria->tl_rsc_flags & UCP_TL_RSC_FLAG_AUX)) {
499+
tl_rsc_flags = context->tl_rscs[rsc_index].flags;
500+
if (!(tl_rsc_flags & criteria->tl_rsc_flags)) {
501+
ucs_trace(UCT_TL_RESOURCE_DESC_FMT
502+
" : disabled because it doesn't support %s",
503+
UCT_TL_RESOURCE_DESC_ARG(resource),
504+
ucs_flags_str(flags_str, sizeof(flags_str),
505+
criteria->tl_rsc_flags,
506+
ucp_tl_rsc_flag_names));
499507
continue;
500508
}
501509

@@ -1083,7 +1091,9 @@ static void ucp_wireup_fill_aux_criteria(ucp_wireup_criteria_t *criteria,
10831091
ucp_wireup_fill_peer_err_criteria(criteria, ep_init_flags);
10841092
}
10851093

1086-
static void ucp_wireup_criteria_init(ucp_wireup_criteria_t *criteria)
1094+
static void
1095+
ucp_wireup_criteria_init(const ucp_wireup_select_params_t *select_params,
1096+
ucp_wireup_criteria_t *criteria)
10871097
{
10881098
criteria->title = "";
10891099
criteria->local_md_flags = 0;
@@ -1093,7 +1103,10 @@ static void ucp_wireup_criteria_init(ucp_wireup_criteria_t *criteria)
10931103
criteria->alloc_mem_types = 0;
10941104
criteria->is_keepalive = 0;
10951105
criteria->calc_score = NULL;
1096-
criteria->tl_rsc_flags = 0;
1106+
criteria->tl_rsc_flags = (select_params->ep_init_flags &
1107+
UCP_EP_INIT_FLAG_MEM_TYPE) ?
1108+
UCP_TL_RSC_FLAG_MEM :
1109+
UCP_TL_RSC_FLAG_COMM;
10971110
ucp_wireup_init_select_flags(&criteria->local_iface_flags, 0, 0);
10981111
ucp_wireup_init_select_flags(&criteria->remote_iface_flags, 0, 0);
10991112
memset(&criteria->remote_atomic_flags, 0,
@@ -1154,7 +1167,7 @@ ucp_wireup_add_rma_lanes(const ucp_wireup_select_params_t *select_params,
11541167
return UCS_OK;
11551168
}
11561169

1157-
ucp_wireup_criteria_init(&criteria);
1170+
ucp_wireup_criteria_init(select_params, &criteria);
11581171
if (ep_init_flags & UCP_EP_INIT_FLAG_MEM_TYPE) {
11591172
criteria.title = "copy across memory types";
11601173
ucp_wireup_init_select_flags(&criteria.local_iface_flags,
@@ -1221,7 +1234,7 @@ ucp_wireup_add_amo_lanes(const ucp_wireup_select_params_t *select_params,
12211234
return UCS_OK;
12221235
}
12231236

1224-
ucp_wireup_criteria_init(&criteria);
1237+
ucp_wireup_criteria_init(select_params, &criteria);
12251238
criteria.title = "atomic operations on %s memory";
12261239
criteria.local_atomic_flags = criteria.remote_atomic_flags;
12271240
criteria.calc_score = ucp_wireup_amo_score_func;
@@ -1464,13 +1477,15 @@ ucp_wireup_add_am_lane(const ucp_wireup_select_params_t *select_params,
14641477

14651478
/* Select one lane for active messages */
14661479
for (;;) {
1467-
ucp_wireup_criteria_init(&criteria);
1468-
criteria.title = "active messages";
1469-
criteria.calc_score = ucp_wireup_am_score_func;
1470-
criteria.lane_type = UCP_LANE_TYPE_AM;
1471-
criteria.tl_rsc_flags =
1472-
(ep_init_flags & UCP_EP_INIT_ALLOW_AM_AUX_TL) ?
1473-
UCP_TL_RSC_FLAG_AUX : 0;
1480+
ucp_wireup_criteria_init(select_params, &criteria);
1481+
criteria.calc_score = ucp_wireup_am_score_func;
1482+
criteria.lane_type = UCP_LANE_TYPE_AM;
1483+
if (ep_init_flags & UCP_EP_INIT_ALLOW_AM_AUX_TL) {
1484+
criteria.title = "auxiliary active messages";
1485+
criteria.tl_rsc_flags |= UCP_TL_RSC_FLAG_AUX;
1486+
} else {
1487+
criteria.title = "active messages";
1488+
}
14741489
ucp_wireup_init_select_flags(&criteria.local_iface_flags,
14751490
UCT_IFACE_FLAG_AM_BCOPY, 0);
14761491
ucp_wireup_init_select_flags(&criteria.remote_iface_flags,
@@ -1905,7 +1920,7 @@ ucp_wireup_add_am_bw_lanes(const ucp_wireup_select_params_t *select_params,
19051920
}
19061921

19071922
/* Select one lane for active messages */
1908-
ucp_wireup_criteria_init(&bw_info.criteria);
1923+
ucp_wireup_criteria_init(select_params, &bw_info.criteria);
19091924
bw_info.criteria.title = "high-bw active messages";
19101925
bw_info.criteria.calc_score = ucp_wireup_am_bw_score_func;
19111926
bw_info.criteria.lane_type = UCP_LANE_TYPE_AM_BW;
@@ -2074,7 +2089,7 @@ ucp_wireup_add_rma_bw_lanes(const ucp_wireup_select_params_t *select_params,
20742089
return UCS_OK;
20752090
}
20762091

2077-
ucp_wireup_criteria_init(&bw_info.criteria);
2092+
ucp_wireup_criteria_init(select_params, &bw_info.criteria);
20782093
bw_info.criteria.calc_score = ucp_wireup_rma_bw_score_func;
20792094
ucp_wireup_init_select_flags(&bw_info.criteria.local_iface_flags,
20802095
UCT_IFACE_FLAG_PENDING, 0);
@@ -2237,7 +2252,7 @@ ucp_wireup_add_tag_lane(const ucp_wireup_select_params_t *select_params,
22372252
return UCS_OK;
22382253
}
22392254

2240-
ucp_wireup_criteria_init(&criteria);
2255+
ucp_wireup_criteria_init(select_params, &criteria);
22412256
criteria.title = "tag_offload";
22422257
criteria.calc_score = ucp_wireup_am_score_func;
22432258
criteria.lane_type = UCP_LANE_TYPE_TAG;
@@ -2401,7 +2416,7 @@ ucp_wireup_add_keepalive_lane(const ucp_wireup_select_params_t *select_params,
24012416
tl_bitmap = &select_params->tl_bitmap;
24022417
}
24032418

2404-
ucp_wireup_criteria_init(&criteria);
2419+
ucp_wireup_criteria_init(select_params, &criteria);
24052420
criteria.title = "keepalive";
24062421
criteria.local_md_flags = 0;
24072422
criteria.is_keepalive = 1;

test/gtest/ucp/test_ucp_sockaddr.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3307,7 +3307,10 @@ class test_ucp_sockaddr_iface_activate : public test_ucp_sockaddr {
33073307
{
33083308
ucp_worker_h worker = e.worker();
33093309
for (unsigned i = 0; i < worker->num_ifaces; ++i) {
3310-
if (ucp_worker_iface_is_activated(worker->ifaces[i])) {
3310+
auto wiface = worker->ifaces[i];
3311+
if ((worker->context->tl_rscs[wiface->rsc_index].flags &
3312+
UCP_TL_RSC_FLAG_COMM) &&
3313+
ucp_worker_iface_is_activated(wiface)) {
33113314
return true;
33123315
}
33133316
}

test/gtest/ucp/test_ucp_tag.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,9 @@ class test_ucp_tag_limits : public test_ucp_tag {
431431
}
432432

433433
void init() {
434+
// Disable GPU memory support
435+
modify_config("MEM_TLS", "");
436+
434437
/* TODO: Currently all the tests are for intra-node communication only.
435438
* Find a way to create inter-node endpoint on a single node */
436439
test_ucp_tag::init();

0 commit comments

Comments
 (0)