diff --git a/source/cl/include/cl/context.h b/source/cl/include/cl/context.h index 0592c58a3..17e42fc89 100644 --- a/source/cl/include/cl/context.h +++ b/source/cl/include/cl/context.h @@ -204,8 +204,17 @@ struct _cl_context final : public cl::base<_cl_context> { /// @brief List of devices the context targets. cargo::dynamic_array devices; /// @brief Mutex to protect accesses for _cl_context::context with is not - /// thread safe. + /// thread safe except for USM which has its own mutex. This must not be + /// used above a command queue mutex, as it call program destructor during + /// clean up std::mutex mutex; + + /// @brief Mutex to protect accesses USM allocations. Note due to the nature + /// of usm allocations and queue related activities it is sometimes needed to + /// stay around beyond just accessing the list. It must not be below the + /// general context mutex or the queue mutex. + std::mutex usm_mutex; + /// @brief List of the context's enabled properties. cargo::dynamic_array properties; #ifdef OCL_EXTENSION_cl_intel_unified_shared_memory diff --git a/source/cl/source/extension/include/extension/intel_unified_shared_memory.h b/source/cl/source/extension/include/extension/intel_unified_shared_memory.h index 7b2a5a14e..0b5046a29 100644 --- a/source/cl/source/extension/include/extension/intel_unified_shared_memory.h +++ b/source/cl/source/extension/include/extension/intel_unified_shared_memory.h @@ -361,6 +361,7 @@ cargo::expected parseProperties( /// @param[in] context Context containing list of USM allocations to search. /// @param[in] ptr Pointer to find an owning USM allocation for. /// +/// @note this is not thread safe and a USM mutex should be used above it. /// @return Pointer to matching allocation on success, or nullptr on failure. allocation_info *findAllocation(const cl_context context, const void *ptr); diff --git a/source/cl/source/extension/source/intel_unified_shared_memory/intel_unified_shared_memory.cpp b/source/cl/source/extension/source/intel_unified_shared_memory/intel_unified_shared_memory.cpp index 753b28761..6e82feced 100644 --- a/source/cl/source/extension/source/intel_unified_shared_memory/intel_unified_shared_memory.cpp +++ b/source/cl/source/extension/source/intel_unified_shared_memory/intel_unified_shared_memory.cpp @@ -86,9 +86,7 @@ cargo::expected parseProperties( } allocation_info *findAllocation(const cl_context context, const void *ptr) { - // Lock context to ensure usm allocation iterators are valid - std::lock_guard context_guard(context->mutex); - + // Note this is not thread safe and the usm mutex should be locked above this. auto ownsUsmPtr = [ptr](const std::unique_ptr &usm_alloc) { return usm_alloc->isOwnerOf(ptr); }; @@ -661,6 +659,7 @@ cl_int intel_unified_shared_memory::SetKernelExecInfo( } const cl_context context = kernel->program->context; + std::lock_guard context_guard(context->usm_mutex); for (size_t i = 0; i < num_pointers; i++) { indirect_allocs[i] = usm::findAllocation(context, usm_pointers[i]); } diff --git a/source/cl/source/extension/source/intel_unified_shared_memory/usm-exports.cpp b/source/cl/source/extension/source/intel_unified_shared_memory/usm-exports.cpp index aeb67fcd3..707561cac 100644 --- a/source/cl/source/extension/source/intel_unified_shared_memory/usm-exports.cpp +++ b/source/cl/source/extension/source/intel_unified_shared_memory/usm-exports.cpp @@ -199,7 +199,7 @@ void *clHostMemAllocINTEL(cl_context context, } // Lock context for pushing to list of usm allocations - std::lock_guard context_guard(context->mutex); + std::lock_guard context_guard(context->usm_mutex); if (context->usm_allocations.push_back( std::move(new_usm_allocation.value()))) { OCL_SET_IF_NOT_NULL(errcode_ret, CL_OUT_OF_HOST_MEMORY); @@ -232,7 +232,7 @@ void *clDeviceMemAllocINTEL(cl_context context, cl_device_id device, } // Lock context for pushing to list of usm allocations - std::lock_guard context_guard(context->mutex); + std::lock_guard context_guard(context->usm_mutex); if (context->usm_allocations.push_back( std::move(new_usm_allocation.value()))) { OCL_SET_IF_NOT_NULL(errcode_ret, CL_OUT_OF_HOST_MEMORY); @@ -284,7 +284,7 @@ void *clSharedMemAllocINTEL(cl_context context, cl_device_id device, } // Lock context for pushing to list of usm allocations - std::lock_guard context_guard(context->mutex); + std::lock_guard context_guard(context->usm_mutex); if (context->usm_allocations.push_back( std::move(new_usm_allocation.value()))) { OCL_SET_IF_NOT_NULL(errcode_ret, CL_OUT_OF_HOST_MEMORY); @@ -302,7 +302,7 @@ cl_int clMemFreeINTEL(cl_context context, void *ptr) { OCL_CHECK(ptr == NULL, return CL_SUCCESS); // Lock context to ensure usm allocation iterators are valid - std::lock_guard context_guard(context->mutex); + std::lock_guard context_guard(context->usm_mutex); auto isUsmPtr = [ptr](const std::unique_ptr &usm_alloc) { @@ -329,7 +329,7 @@ cl_int clMemBlockingFreeINTEL(cl_context context, void *ptr) { OCL_CHECK(ptr == NULL, return CL_SUCCESS); // Lock context to ensure usm allocation iterators are valid - std::lock_guard context_guard(context->mutex); + std::lock_guard context_guard(context->usm_mutex); auto isUsmPtr = [ptr](const std::unique_ptr &usm_alloc) { @@ -392,6 +392,7 @@ cl_int clGetMemAllocInfoINTEL(cl_context context, const void *ptr, tracer::TraceGuard trace(__func__); OCL_CHECK(!context, return CL_INVALID_CONTEXT); + std::lock_guard context_guard(context->usm_mutex); const extension::usm::allocation_info *const usm_alloc = extension::usm::findAllocation(context, ptr); @@ -498,6 +499,8 @@ cl_int MemFillImpl(cl_command_queue command_queue, void *dst_ptr, } cl_event return_event = *new_event; + std::lock_guard context_guard(command_queue->context->usm_mutex); + // Find USM allocation from pointer extension::usm::allocation_info *usm_alloc = extension::usm::findAllocation(command_queue->context, dst_ptr); @@ -618,7 +621,7 @@ cl_int clEnqueueMemcpyINTEL(cl_command_queue command_queue, cl_bool blocking, return new_event.error(); } cl_event return_event = *new_event; - + std::lock_guard context_guard(command_queue->context->usm_mutex); cl::release_guard event_release_guard(return_event, cl::ref_count_type::EXTERNAL); { @@ -783,6 +786,8 @@ cl_int clEnqueueMigrateMemINTEL(cl_command_queue command_queue, const void *ptr, cl::release_guard event_release_guard(return_event, cl::ref_count_type::EXTERNAL); { + std::lock_guard context_guard( + command_queue->context->usm_mutex); const cl_context context = command_queue->context; extension::usm::allocation_info *const usm_alloc = extension::usm::findAllocation(context, ptr); @@ -838,6 +843,8 @@ cl_int clEnqueueMemAdviseINTEL(cl_command_queue command_queue, const void *ptr, cl::release_guard event_release_guard(return_event, cl::ref_count_type::EXTERNAL); { + std::lock_guard context_guard( + command_queue->context->usm_mutex); const cl_context context = command_queue->context; extension::usm::allocation_info *const usm_alloc = extension::usm::findAllocation(context, ptr); diff --git a/source/cl/source/kernel.cpp b/source/cl/source/kernel.cpp index c6a6bf6ae..f96bd728b 100644 --- a/source/cl/source/kernel.cpp +++ b/source/cl/source/kernel.cpp @@ -1568,6 +1568,11 @@ CL_API_ENTRY cl_int CL_API_CALL cl::EnqueueNDRangeKernel( cl_event return_event = nullptr; cl_int error = 0; #ifdef OCL_EXTENSION_cl_intel_unified_shared_memory + // We need to lock the context for the remainder of the function as we need to + // ensure any blocking operations such clMemBlockingFreeINTEL are entirely in + // sync as createBlockingEventForKernel adds to USM lists assuming that they + // reflect already queued events. + std::lock_guard context_guard(command_queue->context->usm_mutex); error = extension::usm::createBlockingEventForKernel( command_queue, kernel, CL_COMMAND_NDRANGE_KERNEL, return_event); OCL_CHECK(error != CL_SUCCESS, return error); @@ -1643,6 +1648,11 @@ CL_API_ENTRY cl_int CL_API_CALL cl::EnqueueTask(cl_command_queue command_queue, cl_event return_event = nullptr; #ifdef OCL_EXTENSION_cl_intel_unified_shared_memory + // We need to lock the context for the remainder of the function as we need to + // ensure any blocking operations such clMemBlockingFreeINTEL are entirely in + // sync as createBlockingEventForKernel adds to USM lists assuming that they + // reflect already queued events. + std::lock_guard context_guard(command_queue->context->usm_mutex); error = extension::usm::createBlockingEventForKernel( command_queue, kernel, CL_COMMAND_TASK, return_event); OCL_CHECK(error != CL_SUCCESS, return error);