Skip to content

Commit 6b5eede

Browse files
authored
Merge pull request #10799 from rakhmets/topic/direct-nic-fix-ib-send
UCT/CUDA/CUDA_COPY: Fixed mapping of DMA_BUF handle.
2 parents 881a419 + 2da859c commit 6b5eede

File tree

3 files changed

+40
-20
lines changed

3 files changed

+40
-20
lines changed

src/ucs/sys/topo/base/topo.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -929,6 +929,19 @@ ucs_status_t ucs_topo_sys_device_set_sys_dev_aux(ucs_sys_device_t sys_dev,
929929
return status;
930930
}
931931

932+
int ucs_topo_device_has_sibling(ucs_sys_device_t sys_dev)
933+
{
934+
int result;
935+
936+
ucs_spin_lock(&ucs_topo_global_ctx.lock);
937+
result = (sys_dev < ucs_topo_global_ctx.num_devices) &&
938+
(ucs_topo_global_ctx.devices[sys_dev].sibling_sys_dev !=
939+
UCS_SYS_DEVICE_ID_UNKNOWN);
940+
ucs_spin_unlock(&ucs_topo_global_ctx.lock);
941+
942+
return result;
943+
}
944+
932945
ucs_status_t
933946
ucs_topo_sys_device_set_user_value(ucs_sys_device_t sys_dev, uintptr_t value)
934947
{

src/ucs/sys/topo/base/topo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,9 @@ ucs_topo_is_reachable(ucs_sys_device_t sys_dev, ucs_sys_device_t sys_dev_mem);
305305
int ucs_topo_is_sibling(ucs_sys_device_t sys_dev, ucs_sys_device_t sys_dev_mem);
306306

307307

308+
int ucs_topo_device_has_sibling(ucs_sys_device_t sys_dev);
309+
310+
308311
/**
309312
* Get the number of registered system devices.
310313
*

src/uct/cuda/cuda_copy/cuda_copy_md.c

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -848,12 +848,13 @@ uct_cuda_copy_md_query_attributes(const uct_cuda_copy_md_t *md,
848848
return UCS_OK;
849849
}
850850

851-
static int uct_cuda_copy_md_get_dmabuf_fd(uintptr_t address, size_t length)
851+
static int uct_cuda_copy_md_get_dmabuf_fd(uintptr_t address, size_t length,
852+
ucs_sys_device_t sys_dev)
852853
{
853854
#if CUDA_VERSION >= 11070
855+
unsigned long long flags = 0;
854856
PFN_cuMemGetHandleForAddressRange_v11070 get_handle_func;
855857
CUresult cu_err;
856-
unsigned long long flags;
857858
int fd;
858859

859860
/* Get fxn ptr for cuMemGetHandleForAddressRange in case installed libcuda
@@ -880,31 +881,33 @@ static int uct_cuda_copy_md_get_dmabuf_fd(uintptr_t address, size_t length)
880881
#endif
881882

882883
#if CUDA_VERSION >= 12080
883-
flags = CU_MEM_RANGE_FLAG_DMA_BUF_MAPPING_TYPE_PCIE;
884-
cu_err = get_handle_func((void*)&fd, address, length,
885-
CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD, flags);
886-
if (cu_err == CUDA_SUCCESS) {
887-
goto out;
884+
/**
885+
* DMA_BUF handle mapped via PCIE BAR1 can only be used in conjunction with
886+
* mlx5dv_reg_dmabuf_mr. Other interfaces (e.g. ibv_reg_dmabuf_mr) may
887+
* successfully register the handle, but the subsequent remote ib operation
888+
* will fail.
889+
* Check if there is a sibling device to determine if the handle will be
890+
* used by a Direct NIC via mlx5dv_reg_dmabuf_mr.
891+
*/
892+
if (ucs_topo_device_has_sibling(sys_dev)) {
893+
flags = CU_MEM_RANGE_FLAG_DMA_BUF_MAPPING_TYPE_PCIE;
888894
}
889895
#endif
890896

891-
flags = 0;
892897
cu_err = get_handle_func((void*)&fd, address, length,
893898
CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD, flags);
894-
if (cu_err != CUDA_SUCCESS) {
895-
ucs_debug("cuMemGetHandleForAddressRange(address=0x%lx length=%zu "
896-
"DMA_BUF_FD) failed: %s",
897-
address, length, uct_cuda_base_cu_get_error_string(cu_err));
898-
return UCT_DMABUF_FD_INVALID;
899+
if (cu_err == CUDA_SUCCESS) {
900+
ucs_trace("dmabuf for address 0x%lx length %zu flags %llx is fd %d",
901+
address, length, flags, fd);
902+
return fd;
899903
}
900904

901-
out:
902-
ucs_trace("dmabuf for address 0x%lx length %zu flags %llx is fd %d",
903-
address, length, flags, fd);
904-
return fd;
905-
#else
906-
return UCT_DMABUF_FD_INVALID;
905+
ucs_debug("cuMemGetHandleForAddressRange(address=0x%lx length=%zu "
906+
"flags=%llx DMA_BUF_FD) failed: %s",
907+
address, length, flags,
908+
uct_cuda_base_cu_get_error_string(cu_err));
907909
#endif
910+
return UCT_DMABUF_FD_INVALID;
908911
}
909912

910913
ucs_status_t
@@ -970,7 +973,8 @@ uct_cuda_copy_md_mem_query(uct_md_h tl_md, const void *address, size_t length,
970973
ucs_get_page_size());
971974

972975
mem_attr->dmabuf_fd = uct_cuda_copy_md_get_dmabuf_fd(
973-
aligned_start, aligned_end - aligned_start);
976+
aligned_start, aligned_end - aligned_start,
977+
addr_mem_info.sys_dev);
974978
}
975979

976980
if (mem_attr->field_mask & UCT_MD_MEM_ATTR_FIELD_DMABUF_OFFSET) {

0 commit comments

Comments
 (0)