Skip to content

Commit

Permalink
Merge pull request #268 from coldav/colin/fix_usm_thread_issues
Browse files Browse the repository at this point in the history
Fixed threading issues with usm and kernel execution
  • Loading branch information
coldav authored Dec 20, 2023
2 parents a19c02c + 9c915d9 commit 1e1b744
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 10 deletions.
11 changes: 10 additions & 1 deletion source/cl/include/cl/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,17 @@ struct _cl_context final : public cl::base<_cl_context> {
/// @brief List of devices the context targets.
cargo::dynamic_array<cl_device_id> devices;
/// @brief Mutex to protect accesses for _cl_context::context with is not
/// thread safe.
/// thread safe except for USM which has its own mutex. This must not be
/// used above a command queue mutex, as it call program destructor during
/// clean up
std::mutex mutex;

/// @brief Mutex to protect accesses USM allocations. Note due to the nature
/// of usm allocations and queue related activities it is sometimes needed to
/// stay around beyond just accessing the list. It must not be below the
/// general context mutex or the queue mutex.
std::mutex usm_mutex;

/// @brief List of the context's enabled properties.
cargo::dynamic_array<cl_context_properties> properties;
#ifdef OCL_EXTENSION_cl_intel_unified_shared_memory
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,7 @@ cargo::expected<cl_mem_alloc_flags_intel, cl_int> parseProperties(
/// @param[in] context Context containing list of USM allocations to search.
/// @param[in] ptr Pointer to find an owning USM allocation for.
///
/// @note this is not thread safe and a USM mutex should be used above it.
/// @return Pointer to matching allocation on success, or nullptr on failure.
allocation_info *findAllocation(const cl_context context, const void *ptr);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,7 @@ cargo::expected<cl_mem_alloc_flags_intel, cl_int> parseProperties(
}

allocation_info *findAllocation(const cl_context context, const void *ptr) {
// Lock context to ensure usm allocation iterators are valid
std::lock_guard<std::mutex> context_guard(context->mutex);

// Note this is not thread safe and the usm mutex should be locked above this.
auto ownsUsmPtr = [ptr](const std::unique_ptr<allocation_info> &usm_alloc) {
return usm_alloc->isOwnerOf(ptr);
};
Expand Down Expand Up @@ -661,6 +659,7 @@ cl_int intel_unified_shared_memory::SetKernelExecInfo(
}

const cl_context context = kernel->program->context;
std::lock_guard<std::mutex> context_guard(context->usm_mutex);
for (size_t i = 0; i < num_pointers; i++) {
indirect_allocs[i] = usm::findAllocation(context, usm_pointers[i]);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ void *clHostMemAllocINTEL(cl_context context,
}

// Lock context for pushing to list of usm allocations
std::lock_guard<std::mutex> context_guard(context->mutex);
std::lock_guard<std::mutex> context_guard(context->usm_mutex);
if (context->usm_allocations.push_back(
std::move(new_usm_allocation.value()))) {
OCL_SET_IF_NOT_NULL(errcode_ret, CL_OUT_OF_HOST_MEMORY);
Expand Down Expand Up @@ -232,7 +232,7 @@ void *clDeviceMemAllocINTEL(cl_context context, cl_device_id device,
}

// Lock context for pushing to list of usm allocations
std::lock_guard<std::mutex> context_guard(context->mutex);
std::lock_guard<std::mutex> context_guard(context->usm_mutex);
if (context->usm_allocations.push_back(
std::move(new_usm_allocation.value()))) {
OCL_SET_IF_NOT_NULL(errcode_ret, CL_OUT_OF_HOST_MEMORY);
Expand Down Expand Up @@ -284,7 +284,7 @@ void *clSharedMemAllocINTEL(cl_context context, cl_device_id device,
}

// Lock context for pushing to list of usm allocations
std::lock_guard<std::mutex> context_guard(context->mutex);
std::lock_guard<std::mutex> context_guard(context->usm_mutex);
if (context->usm_allocations.push_back(
std::move(new_usm_allocation.value()))) {
OCL_SET_IF_NOT_NULL(errcode_ret, CL_OUT_OF_HOST_MEMORY);
Expand All @@ -302,7 +302,7 @@ cl_int clMemFreeINTEL(cl_context context, void *ptr) {
OCL_CHECK(ptr == NULL, return CL_SUCCESS);

// Lock context to ensure usm allocation iterators are valid
std::lock_guard<std::mutex> context_guard(context->mutex);
std::lock_guard<std::mutex> context_guard(context->usm_mutex);

auto isUsmPtr =
[ptr](const std::unique_ptr<extension::usm::allocation_info> &usm_alloc) {
Expand All @@ -329,7 +329,7 @@ cl_int clMemBlockingFreeINTEL(cl_context context, void *ptr) {
OCL_CHECK(ptr == NULL, return CL_SUCCESS);

// Lock context to ensure usm allocation iterators are valid
std::lock_guard<std::mutex> context_guard(context->mutex);
std::lock_guard<std::mutex> context_guard(context->usm_mutex);

auto isUsmPtr =
[ptr](const std::unique_ptr<extension::usm::allocation_info> &usm_alloc) {
Expand Down Expand Up @@ -392,6 +392,7 @@ cl_int clGetMemAllocInfoINTEL(cl_context context, const void *ptr,
tracer::TraceGuard<tracer::OpenCL> trace(__func__);

OCL_CHECK(!context, return CL_INVALID_CONTEXT);
std::lock_guard<std::mutex> context_guard(context->usm_mutex);

const extension::usm::allocation_info *const usm_alloc =
extension::usm::findAllocation(context, ptr);
Expand Down Expand Up @@ -498,6 +499,8 @@ cl_int MemFillImpl(cl_command_queue command_queue, void *dst_ptr,
}
cl_event return_event = *new_event;

std::lock_guard<std::mutex> context_guard(command_queue->context->usm_mutex);

// Find USM allocation from pointer
extension::usm::allocation_info *usm_alloc =
extension::usm::findAllocation(command_queue->context, dst_ptr);
Expand Down Expand Up @@ -618,7 +621,7 @@ cl_int clEnqueueMemcpyINTEL(cl_command_queue command_queue, cl_bool blocking,
return new_event.error();
}
cl_event return_event = *new_event;

std::lock_guard<std::mutex> context_guard(command_queue->context->usm_mutex);
cl::release_guard<cl_event> event_release_guard(return_event,
cl::ref_count_type::EXTERNAL);
{
Expand Down Expand Up @@ -783,6 +786,8 @@ cl_int clEnqueueMigrateMemINTEL(cl_command_queue command_queue, const void *ptr,
cl::release_guard<cl_event> event_release_guard(return_event,
cl::ref_count_type::EXTERNAL);
{
std::lock_guard<std::mutex> context_guard(
command_queue->context->usm_mutex);
const cl_context context = command_queue->context;
extension::usm::allocation_info *const usm_alloc =
extension::usm::findAllocation(context, ptr);
Expand Down Expand Up @@ -838,6 +843,8 @@ cl_int clEnqueueMemAdviseINTEL(cl_command_queue command_queue, const void *ptr,
cl::release_guard<cl_event> event_release_guard(return_event,
cl::ref_count_type::EXTERNAL);
{
std::lock_guard<std::mutex> context_guard(
command_queue->context->usm_mutex);
const cl_context context = command_queue->context;
extension::usm::allocation_info *const usm_alloc =
extension::usm::findAllocation(context, ptr);
Expand Down
10 changes: 10 additions & 0 deletions source/cl/source/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1568,6 +1568,11 @@ CL_API_ENTRY cl_int CL_API_CALL cl::EnqueueNDRangeKernel(
cl_event return_event = nullptr;
cl_int error = 0;
#ifdef OCL_EXTENSION_cl_intel_unified_shared_memory
// We need to lock the context for the remainder of the function as we need to
// ensure any blocking operations such clMemBlockingFreeINTEL are entirely in
// sync as createBlockingEventForKernel adds to USM lists assuming that they
// reflect already queued events.
std::lock_guard<std::mutex> context_guard(command_queue->context->usm_mutex);
error = extension::usm::createBlockingEventForKernel(
command_queue, kernel, CL_COMMAND_NDRANGE_KERNEL, return_event);
OCL_CHECK(error != CL_SUCCESS, return error);
Expand Down Expand Up @@ -1643,6 +1648,11 @@ CL_API_ENTRY cl_int CL_API_CALL cl::EnqueueTask(cl_command_queue command_queue,

cl_event return_event = nullptr;
#ifdef OCL_EXTENSION_cl_intel_unified_shared_memory
// We need to lock the context for the remainder of the function as we need to
// ensure any blocking operations such clMemBlockingFreeINTEL are entirely in
// sync as createBlockingEventForKernel adds to USM lists assuming that they
// reflect already queued events.
std::lock_guard<std::mutex> context_guard(command_queue->context->usm_mutex);
error = extension::usm::createBlockingEventForKernel(
command_queue, kernel, CL_COMMAND_TASK, return_event);
OCL_CHECK(error != CL_SUCCESS, return error);
Expand Down

0 comments on commit 1e1b744

Please sign in to comment.