Skip to content

Commit

Permalink
Fixed threading issues with usm and kernel execution
Browse files Browse the repository at this point in the history
Usm adds some extra event handling so that any freeing of usm
memory is safe. There is some attempt already to make this thread
safe but this leaves windows where we have added events to the usm
allocations but we have not actually added the kernel execution to
the queue. This can mean the functions such as clMemBlockingFreeINTEL
can end up waiting on an event in a queue too early.

A previous fix tried to reuse the context mutex, but this had issues as
this cannot be above a queue mutex. For this reason we have added a new
usm mutex which is expected to be above all other queue/context mutexes
and can last longer. The USM mutex is placed above both the usm
event creation and the push to the queue, which means that the
usm deletion correctly waits.
  • Loading branch information
coldav committed Dec 20, 2023
1 parent 1d872df commit 9c915d9
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 10 deletions.
11 changes: 10 additions & 1 deletion source/cl/include/cl/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,17 @@ struct _cl_context final : public cl::base<_cl_context> {
/// @brief List of devices the context targets.
cargo::dynamic_array<cl_device_id> devices;
/// @brief Mutex to protect accesses for _cl_context::context with is not
/// thread safe.
/// thread safe except for USM which has its own mutex. This must not be
/// used above a command queue mutex, as it call program destructor during
/// clean up
std::mutex mutex;

/// @brief Mutex to protect accesses USM allocations. Note due to the nature
/// of usm allocations and queue related activities it is sometimes needed to
/// stay around beyond just accessing the list. It must not be below the
/// general context mutex or the queue mutex.
std::mutex usm_mutex;

/// @brief List of the context's enabled properties.
cargo::dynamic_array<cl_context_properties> properties;
#ifdef OCL_EXTENSION_cl_intel_unified_shared_memory
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,7 @@ cargo::expected<cl_mem_alloc_flags_intel, cl_int> parseProperties(
/// @param[in] context Context containing list of USM allocations to search.
/// @param[in] ptr Pointer to find an owning USM allocation for.
///
/// @note this is not thread safe and a USM mutex should be used above it.
/// @return Pointer to matching allocation on success, or nullptr on failure.
allocation_info *findAllocation(const cl_context context, const void *ptr);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,7 @@ cargo::expected<cl_mem_alloc_flags_intel, cl_int> parseProperties(
}

allocation_info *findAllocation(const cl_context context, const void *ptr) {
// Lock context to ensure usm allocation iterators are valid
std::lock_guard<std::mutex> context_guard(context->mutex);

// Note this is not thread safe and the usm mutex should be locked above this.
auto ownsUsmPtr = [ptr](const std::unique_ptr<allocation_info> &usm_alloc) {
return usm_alloc->isOwnerOf(ptr);
};
Expand Down Expand Up @@ -661,6 +659,7 @@ cl_int intel_unified_shared_memory::SetKernelExecInfo(
}

const cl_context context = kernel->program->context;
std::lock_guard<std::mutex> context_guard(context->usm_mutex);
for (size_t i = 0; i < num_pointers; i++) {
indirect_allocs[i] = usm::findAllocation(context, usm_pointers[i]);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ void *clHostMemAllocINTEL(cl_context context,
}

// Lock context for pushing to list of usm allocations
std::lock_guard<std::mutex> context_guard(context->mutex);
std::lock_guard<std::mutex> context_guard(context->usm_mutex);
if (context->usm_allocations.push_back(
std::move(new_usm_allocation.value()))) {
OCL_SET_IF_NOT_NULL(errcode_ret, CL_OUT_OF_HOST_MEMORY);
Expand Down Expand Up @@ -232,7 +232,7 @@ void *clDeviceMemAllocINTEL(cl_context context, cl_device_id device,
}

// Lock context for pushing to list of usm allocations
std::lock_guard<std::mutex> context_guard(context->mutex);
std::lock_guard<std::mutex> context_guard(context->usm_mutex);
if (context->usm_allocations.push_back(
std::move(new_usm_allocation.value()))) {
OCL_SET_IF_NOT_NULL(errcode_ret, CL_OUT_OF_HOST_MEMORY);
Expand Down Expand Up @@ -284,7 +284,7 @@ void *clSharedMemAllocINTEL(cl_context context, cl_device_id device,
}

// Lock context for pushing to list of usm allocations
std::lock_guard<std::mutex> context_guard(context->mutex);
std::lock_guard<std::mutex> context_guard(context->usm_mutex);
if (context->usm_allocations.push_back(
std::move(new_usm_allocation.value()))) {
OCL_SET_IF_NOT_NULL(errcode_ret, CL_OUT_OF_HOST_MEMORY);
Expand All @@ -302,7 +302,7 @@ cl_int clMemFreeINTEL(cl_context context, void *ptr) {
OCL_CHECK(ptr == NULL, return CL_SUCCESS);

// Lock context to ensure usm allocation iterators are valid
std::lock_guard<std::mutex> context_guard(context->mutex);
std::lock_guard<std::mutex> context_guard(context->usm_mutex);

auto isUsmPtr =
[ptr](const std::unique_ptr<extension::usm::allocation_info> &usm_alloc) {
Expand All @@ -329,7 +329,7 @@ cl_int clMemBlockingFreeINTEL(cl_context context, void *ptr) {
OCL_CHECK(ptr == NULL, return CL_SUCCESS);

// Lock context to ensure usm allocation iterators are valid
std::lock_guard<std::mutex> context_guard(context->mutex);
std::lock_guard<std::mutex> context_guard(context->usm_mutex);

auto isUsmPtr =
[ptr](const std::unique_ptr<extension::usm::allocation_info> &usm_alloc) {
Expand Down Expand Up @@ -392,6 +392,7 @@ cl_int clGetMemAllocInfoINTEL(cl_context context, const void *ptr,
tracer::TraceGuard<tracer::OpenCL> trace(__func__);

OCL_CHECK(!context, return CL_INVALID_CONTEXT);
std::lock_guard<std::mutex> context_guard(context->usm_mutex);

const extension::usm::allocation_info *const usm_alloc =
extension::usm::findAllocation(context, ptr);
Expand Down Expand Up @@ -498,6 +499,8 @@ cl_int MemFillImpl(cl_command_queue command_queue, void *dst_ptr,
}
cl_event return_event = *new_event;

std::lock_guard<std::mutex> context_guard(command_queue->context->usm_mutex);

// Find USM allocation from pointer
extension::usm::allocation_info *usm_alloc =
extension::usm::findAllocation(command_queue->context, dst_ptr);
Expand Down Expand Up @@ -618,7 +621,7 @@ cl_int clEnqueueMemcpyINTEL(cl_command_queue command_queue, cl_bool blocking,
return new_event.error();
}
cl_event return_event = *new_event;

std::lock_guard<std::mutex> context_guard(command_queue->context->usm_mutex);
cl::release_guard<cl_event> event_release_guard(return_event,
cl::ref_count_type::EXTERNAL);
{
Expand Down Expand Up @@ -783,6 +786,8 @@ cl_int clEnqueueMigrateMemINTEL(cl_command_queue command_queue, const void *ptr,
cl::release_guard<cl_event> event_release_guard(return_event,
cl::ref_count_type::EXTERNAL);
{
std::lock_guard<std::mutex> context_guard(
command_queue->context->usm_mutex);
const cl_context context = command_queue->context;
extension::usm::allocation_info *const usm_alloc =
extension::usm::findAllocation(context, ptr);
Expand Down Expand Up @@ -838,6 +843,8 @@ cl_int clEnqueueMemAdviseINTEL(cl_command_queue command_queue, const void *ptr,
cl::release_guard<cl_event> event_release_guard(return_event,
cl::ref_count_type::EXTERNAL);
{
std::lock_guard<std::mutex> context_guard(
command_queue->context->usm_mutex);
const cl_context context = command_queue->context;
extension::usm::allocation_info *const usm_alloc =
extension::usm::findAllocation(context, ptr);
Expand Down
10 changes: 10 additions & 0 deletions source/cl/source/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1568,6 +1568,11 @@ CL_API_ENTRY cl_int CL_API_CALL cl::EnqueueNDRangeKernel(
cl_event return_event = nullptr;
cl_int error = 0;
#ifdef OCL_EXTENSION_cl_intel_unified_shared_memory
// We need to lock the context for the remainder of the function as we need to
// ensure any blocking operations such clMemBlockingFreeINTEL are entirely in
// sync as createBlockingEventForKernel adds to USM lists assuming that they
// reflect already queued events.
std::lock_guard<std::mutex> context_guard(command_queue->context->usm_mutex);
error = extension::usm::createBlockingEventForKernel(
command_queue, kernel, CL_COMMAND_NDRANGE_KERNEL, return_event);
OCL_CHECK(error != CL_SUCCESS, return error);
Expand Down Expand Up @@ -1643,6 +1648,11 @@ CL_API_ENTRY cl_int CL_API_CALL cl::EnqueueTask(cl_command_queue command_queue,

cl_event return_event = nullptr;
#ifdef OCL_EXTENSION_cl_intel_unified_shared_memory
// We need to lock the context for the remainder of the function as we need to
// ensure any blocking operations such clMemBlockingFreeINTEL are entirely in
// sync as createBlockingEventForKernel adds to USM lists assuming that they
// reflect already queued events.
std::lock_guard<std::mutex> context_guard(command_queue->context->usm_mutex);
error = extension::usm::createBlockingEventForKernel(
command_queue, kernel, CL_COMMAND_TASK, return_event);
OCL_CHECK(error != CL_SUCCESS, return error);
Expand Down

0 comments on commit 9c915d9

Please sign in to comment.