Skip to content

Commit

Permalink
opal/cuda: avoid direct access to cumem host numa memory
Browse files Browse the repository at this point in the history
Signed-off-by: Akshay Venkatesh <[email protected]>

bot:notacherrypick
  • Loading branch information
Akshay-Venkatesh committed Aug 13, 2024
1 parent 778476f commit dc7932b
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 0 deletions.
11 changes: 11 additions & 0 deletions config/opal_check_cuda.m4
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
dnl -*- shell-script -*-
dnl
dnl Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
dnl Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
dnl University Research and Technology
dnl Corporation. All rights reserved.
Expand Down Expand Up @@ -113,6 +114,12 @@ AS_IF([test "$opal_check_cuda_happy"="yes"],
[#include <$opal_cuda_incdir/cuda.h>]),
[])

# If we have CUDA support, check to see if we have support for cuMemCreate memory on host NUMA.
AS_IF([test "$opal_check_cuda_happy"="yes"],
[AC_CHECK_DECL([CU_MEM_LOCATION_TYPE_HOST_NUMA], [CUDA_HOST_NUMA_SUPPORT=1], [CUDA_HOST_NUMA_SUPPORT=0],
[#include <$opal_cuda_incdir/cuda.h>])],
[])

AC_MSG_CHECKING([if have cuda support])
if test "$opal_check_cuda_happy" = "yes"; then
AC_MSG_RESULT([yes (-I$opal_cuda_incdir)])
Expand All @@ -134,6 +141,10 @@ AM_CONDITIONAL([OPAL_cuda_sync_memops], [test "x$CUDA_SYNC_MEMOPS" = "x1"])
AC_DEFINE_UNQUOTED([OPAL_CUDA_SYNC_MEMOPS],$CUDA_SYNC_MEMOPS,
[Whether we have CUDA CU_POINTER_ATTRIBUTE_SYNC_MEMOPS support available])

AM_CONDITIONAL([OPAL_cuda_host_numa_support], [test "x$CUDA_HOST_NUMA_SUPPORT" = "x1"])
AC_DEFINE_UNQUOTED([OPAL_CUDA_HOST_NUMA_SUPPORT],$CUDA_HOST_NUMA_SUPPORT,
[Whether we have CU_MEM_LOCATION_TYPE_HOST_NUMA support available])

AM_CONDITIONAL([OPAL_cuda_get_attributes], [test "x$CUDA_GET_ATTRIBUTES" = "x1"])
AC_DEFINE_UNQUOTED([OPAL_CUDA_GET_ATTRIBUTES],$CUDA_GET_ATTRIBUTES,
[Whether we have CUDA cuPointerGetAttributes function available])
Expand Down
33 changes: 33 additions & 0 deletions opal/mca/common/cuda/common_cuda.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
/*
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
Expand Down Expand Up @@ -106,6 +107,10 @@ struct cudaFunctionTable {
int (*cuStreamDestroy)(CUstream);
#if OPAL_CUDA_GET_ATTRIBUTES
int (*cuPointerGetAttributes)(unsigned int, CUpointer_attribute *, void **, CUdeviceptr);
#if OPAL_CUDA_HOST_NUMA_SUPPORT
int (*cuMemRetainAllocationHandle)(CUmemGenericAllocationHandle*, void*);
int (*cuMemGetAllocationPropertiesFromHandle)(CUmemAllocationProp *, CUmemGenericAllocationHandle);
#endif
#endif /* OPAL_CUDA_GET_ATTRIBUTES */
};
typedef struct cudaFunctionTable cudaFunctionTable_t;
Expand Down Expand Up @@ -478,6 +483,10 @@ int mca_common_cuda_stage_one_init(void)
OPAL_CUDA_DLSYM(libcuda_handle, cuStreamDestroy);
#if OPAL_CUDA_GET_ATTRIBUTES
OPAL_CUDA_DLSYM(libcuda_handle, cuPointerGetAttributes);
#if OPAL_CUDA_HOST_NUMA_SUPPORT
OPAL_CUDA_DLSYM(libcuda_handle, cuMemRetainAllocationHandle);
OPAL_CUDA_DLSYM(libcuda_handle, cuMemGetAllocationPropertiesFromHandle);
#endif
#endif /* OPAL_CUDA_GET_ATTRIBUTES */
return 0;
}
Expand Down Expand Up @@ -1739,6 +1748,10 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf, opal_convertor_t
CUcontext ctx = NULL, memCtx = NULL;
#if OPAL_CUDA_GET_ATTRIBUTES
uint32_t isManaged = 0;
#if OPAL_CUDA_HOST_NUMA_SUPPORT
CUmemAllocationProp prop = {};
CUmemGenericAllocationHandle alloc_handle;
#endif
/* With CUDA 7.0, we can get multiple attributes with a single call */
CUpointer_attribute attributes[3] = {CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
CU_POINTER_ATTRIBUTE_CONTEXT,
Expand All @@ -1764,7 +1777,27 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf, opal_convertor_t
return 0;
} else if (memType == CU_MEMORYTYPE_HOST) {
/* Host memory, nothing to do here */
#if OPAL_CUDA_HOST_NUMA_SUPPORT
/* Check if memory is allocated using VMM API and see if host memory needs
* to be treated as pinned device memory */
res = cuFunc.cuMemRetainAllocationHandle(&alloc_handle, (void*)dbuf);
if (res != CUDA_SUCCESS) {
return 0;
}

res = cuFunc.cuMemGetAllocationPropertiesFromHandle(&prop, alloc_handle);
if (res != CUDA_SUCCESS) {
return 0;
}

if ((prop.location.type == CU_MEM_LOCATION_TYPE_HOST) ||
(prop.location.type == CU_MEM_LOCATION_TYPE_HOST_NUMA) ||
(prop.location.type == CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT)) {
memType = CU_MEMORYTYPE_DEVICE;
}
#else
return 0;
#endif
} else if (memType == 0) {
/* This can happen when CUDA is initialized but dbuf is not valid CUDA pointer */
return 0;
Expand Down

0 comments on commit dc7932b

Please sign in to comment.