@@ -848,12 +848,13 @@ uct_cuda_copy_md_query_attributes(const uct_cuda_copy_md_t *md,
848
848
return UCS_OK ;
849
849
}
850
850
851
- static int uct_cuda_copy_md_get_dmabuf_fd (uintptr_t address , size_t length )
851
+ static int uct_cuda_copy_md_get_dmabuf_fd (uintptr_t address , size_t length ,
852
+ ucs_sys_device_t sys_dev )
852
853
{
853
854
#if CUDA_VERSION >= 11070
855
+ unsigned long long flags = 0 ;
854
856
PFN_cuMemGetHandleForAddressRange_v11070 get_handle_func ;
855
857
CUresult cu_err ;
856
- unsigned long long flags ;
857
858
int fd ;
858
859
859
860
/* Get fxn ptr for cuMemGetHandleForAddressRange in case installed libcuda
@@ -880,31 +881,33 @@ static int uct_cuda_copy_md_get_dmabuf_fd(uintptr_t address, size_t length)
880
881
#endif
881
882
882
883
#if CUDA_VERSION >= 12080
883
- flags = CU_MEM_RANGE_FLAG_DMA_BUF_MAPPING_TYPE_PCIE ;
884
- cu_err = get_handle_func ((void * )& fd , address , length ,
885
- CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD , flags );
886
- if (cu_err == CUDA_SUCCESS ) {
887
- goto out ;
884
+ /**
885
+ * DMA_BUF handle mapped via PCIE BAR1 can only be used in conjunction with
886
+ * mlx5dv_reg_dmabuf_mr. Other interfaces (e.g. ibv_reg_dmabuf_mr) may
887
+ * successfully register the handle, but the subsequent remote ib operation
888
+ * will fail.
889
+ * Check if there is a sibling device to determine if the handle will be
890
+ * used by a Direct NIC via mlx5dv_reg_dmabuf_mr.
891
+ */
892
+ if (ucs_topo_device_has_sibling (sys_dev )) {
893
+ flags = CU_MEM_RANGE_FLAG_DMA_BUF_MAPPING_TYPE_PCIE ;
888
894
}
889
895
#endif
890
896
891
- flags = 0 ;
892
897
cu_err = get_handle_func ((void * )& fd , address , length ,
893
898
CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD , flags );
894
- if (cu_err != CUDA_SUCCESS ) {
895
- ucs_debug ("cuMemGetHandleForAddressRange(address=0x%lx length=%zu "
896
- "DMA_BUF_FD) failed: %s" ,
897
- address , length , uct_cuda_base_cu_get_error_string (cu_err ));
898
- return UCT_DMABUF_FD_INVALID ;
899
+ if (cu_err == CUDA_SUCCESS ) {
900
+ ucs_trace ("dmabuf for address 0x%lx length %zu flags %llx is fd %d" ,
901
+ address , length , flags , fd );
902
+ return fd ;
899
903
}
900
904
901
- out :
902
- ucs_trace ("dmabuf for address 0x%lx length %zu flags %llx is fd %d" ,
903
- address , length , flags , fd );
904
- return fd ;
905
- #else
906
- return UCT_DMABUF_FD_INVALID ;
905
+ ucs_debug ("cuMemGetHandleForAddressRange(address=0x%lx length=%zu "
906
+ "flags=%llx DMA_BUF_FD) failed: %s" ,
907
+ address , length , flags ,
908
+ uct_cuda_base_cu_get_error_string (cu_err ));
907
909
#endif
910
+ return UCT_DMABUF_FD_INVALID ;
908
911
}
909
912
910
913
ucs_status_t
@@ -970,7 +973,8 @@ uct_cuda_copy_md_mem_query(uct_md_h tl_md, const void *address, size_t length,
970
973
ucs_get_page_size ());
971
974
972
975
mem_attr -> dmabuf_fd = uct_cuda_copy_md_get_dmabuf_fd (
973
- aligned_start , aligned_end - aligned_start );
976
+ aligned_start , aligned_end - aligned_start ,
977
+ addr_mem_info .sys_dev );
974
978
}
975
979
976
980
if (mem_attr -> field_mask & UCT_MD_MEM_ATTR_FIELD_DMABUF_OFFSET ) {
0 commit comments