From 4c1f90aebf58ee713b2b2e75adb2dda4f1701ac6 Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Thu, 2 Nov 2023 17:11:44 +0000 Subject: [PATCH] [USM] Accept arbitrary pointers. The SYCL CTS reveals that the logic we had in clSetKernelArgMemPointerINTEL to detect whether the pointer arguments we passed in were valid, while supported by the cl_intel_unified_shared_memory specification, was causing valid SYCL programs to be rejected, and the specification had an open question on whether this is actually desired. This commit implements a change to just accept arbitrary pointer values, which allows us to also remove a bunch of USM special casing. --- source/cl/include/cl/kernel.h | 20 --- .../intel_unified_shared_memory.cpp | 9 -- .../usm-exports.cpp | 18 +-- .../extension/source/khr_command_buffer.cpp | 18 +-- source/cl/source/kernel.cpp | 148 ++++++------------ .../usm_kernel.cpp | 14 +- .../usm_arg_update.cpp | 9 +- 7 files changed, 73 insertions(+), 163 deletions(-) diff --git a/source/cl/include/cl/kernel.h b/source/cl/include/cl/kernel.h index dc12f5fbc..8e5c12179 100644 --- a/source/cl/include/cl/kernel.h +++ b/source/cl/include/cl/kernel.h @@ -262,17 +262,6 @@ struct _cl_kernel final : public cl::base<_cl_kernel> { /// @param value_size Size of the data. argument(compiler::ArgumentType type, const void *value, size_t value_size); -#ifdef OCL_EXTENSION_cl_intel_unified_shared_memory - /// @brief Constructor to create an USM allocation argument. - /// - /// @param type Type of the kernel argument, must be a valid type to use - /// with a USM allocation, i.e a global or constant pointer. - /// @param usm_alloc USM allocation associated with parameter. - /// @param offset Byte offset from the start of the USM allocation. - argument(compiler::ArgumentType type, - extension::usm::allocation_info *usm_alloc, size_t offset); -#endif - /// @brief Copy constructor. /// /// @param other Argument to copy from. @@ -315,12 +304,6 @@ struct _cl_kernel final : public cl::base<_cl_kernel> { void *data; size_t size; } value; -#ifdef OCL_EXTENSION_cl_intel_unified_shared_memory - struct { - extension::usm::allocation_info *usm_ptr; - size_t offset; - } usm; -#endif }; using info = compiler::KernelInfo::ArgumentInfo; @@ -336,9 +319,6 @@ struct _cl_kernel final : public cl::base<_cl_kernel> { memory_buffer, sampler, value, -#ifdef OCL_EXTENSION_cl_intel_unified_shared_memory - usm, -#endif uninitialized }; diff --git a/source/cl/source/extension/source/intel_unified_shared_memory/intel_unified_shared_memory.cpp b/source/cl/source/extension/source/intel_unified_shared_memory/intel_unified_shared_memory.cpp index adcbcd5a6..81fdffcff 100644 --- a/source/cl/source/extension/source/intel_unified_shared_memory/intel_unified_shared_memory.cpp +++ b/source/cl/source/extension/source/intel_unified_shared_memory/intel_unified_shared_memory.cpp @@ -129,15 +129,6 @@ cl_int createBlockingEventForKernel(cl_command_queue queue, cl_kernel kernel, } return_event = *new_event; - // USM allocation set as kernel arguments - for (size_t i = 0, e = kernel->info->getNumArguments(); i < e; ++i) { - _cl_kernel::argument &arg = kernel->saved_args[i]; - if (arg.stype == _cl_kernel::argument::storage_type::usm) { - auto mux_error = arg.usm.usm_ptr->record_event(return_event); - OCL_CHECK(mux_error, return CL_OUT_OF_RESOURCES); - } - } - // USM allocations which have been set explicitly via clSetKernelExecInfo // to be used indirectly in the kernel. for (auto indirect_alloc : kernel->indirect_usm_allocs) { diff --git a/source/cl/source/extension/source/intel_unified_shared_memory/usm-exports.cpp b/source/cl/source/extension/source/intel_unified_shared_memory/usm-exports.cpp index d5e273464..e68142895 100644 --- a/source/cl/source/extension/source/intel_unified_shared_memory/usm-exports.cpp +++ b/source/cl/source/extension/source/intel_unified_shared_memory/usm-exports.cpp @@ -474,20 +474,12 @@ cl_int clSetKernelArgMemPointerINTEL(cl_kernel kernel, cl_uint arg_index, OCL_CHECK(!(arg_type->address_space == compiler::AddressSpace::GLOBAL || arg_type->address_space == compiler::AddressSpace::CONSTANT), return CL_INVALID_ARG_VALUE); - if (arg_value) { - const cl_context context = kernel->program->context; - extension::usm::allocation_info *const usm_alloc = - extension::usm::findAllocation(context, arg_value); - OCL_CHECK(nullptr == usm_alloc, return CL_INVALID_ARG_VALUE); - - const uint64_t offset = getUSMOffset(arg_value, usm_alloc); - - // Will create a mux_descriptor_info_buffer_s descriptor for the argument - // using the device specific mux_buffer assigned to the allocation. - kernel->saved_args[arg_index] = - _cl_kernel::argument(*arg_type, usm_alloc, offset); - } + // The cl_intel_unified_shared_memory specification has an open question on + // whether unknown pointers should be accepted. We accept them since the SYCL + // specification and the SYCL CTS imply this must be treated as valid. + kernel->saved_args[arg_index] = + _cl_kernel::argument(*arg_type, &arg_value, sizeof(void *)); return CL_SUCCESS; } diff --git a/source/cl/source/extension/source/khr_command_buffer.cpp b/source/cl/source/extension/source/khr_command_buffer.cpp index 58554b290..9ad05b552 100644 --- a/source/cl/source/extension/source/khr_command_buffer.cpp +++ b/source/cl/source/extension/source/khr_command_buffer.cpp @@ -1168,19 +1168,11 @@ CARGO_NODISCARD cl_int _cl_command_buffer_khr::updateCommandBuffer( // Construct Descriptor mux_descriptor_info_s descriptor; descriptor.type = - mux_descriptor_info_type_e::mux_descriptor_info_type_buffer; - - extension::usm::allocation_info *const usm_alloc = - extension::usm::findAllocation(command_queue->context, arg_value); - OCL_CHECK(nullptr == usm_alloc, return CL_INVALID_ARG_VALUE); - - const uint64_t offset = static_cast( - reinterpret_cast(arg_value) - - reinterpret_cast(usm_alloc->base_ptr)); - - auto mux_buffer = usm_alloc->getMuxBufferForDevice(device); - descriptor.buffer_descriptor = - mux_descriptor_info_buffer_s{mux_buffer, offset}; + mux_descriptor_info_type_e::mux_descriptor_info_type_plain_old_data; + void *data = new char[sizeof arg_value]; + memcpy(data, &arg_value, sizeof arg_value); + descriptor.plain_old_data_descriptor.data = data; + descriptor.plain_old_data_descriptor.length = sizeof arg_value; update_info.descriptors[update_index] = descriptor; update_index++; } diff --git a/source/cl/source/kernel.cpp b/source/cl/source/kernel.cpp index c47550050..c6a6bf6ae 100644 --- a/source/cl/source/kernel.cpp +++ b/source/cl/source/kernel.cpp @@ -46,6 +46,8 @@ mux_ndrange_options_t _cl_kernel::createKernelExecutionOptions( const std::array &global_size, mux_buffer_t printf_buffer, std::unique_ptr &descriptors) { + (void)device; + const size_t num_arguments = info->getNumArguments(); const bool printf = nullptr != printf_buffer; descriptors = std::unique_ptr( @@ -53,72 +55,48 @@ mux_ndrange_options_t _cl_kernel::createKernelExecutionOptions( for (size_t i = 0; i < num_arguments; i++) { _cl_kernel::argument &arg = saved_args[i]; -#ifdef OCL_EXTENSION_cl_intel_unified_shared_memory - if (arg.stype == _cl_kernel::argument::storage_type::usm) { - // Find mux_buffer_t bound to USM allocation for device - auto mux_buffer = arg.usm.usm_ptr->getMuxBufferForDevice(device); - - descriptors[i].type = mux_descriptor_info_type_buffer; - descriptors[i].buffer_descriptor.buffer = mux_buffer; - descriptors[i].buffer_descriptor.offset = arg.usm.offset; - continue; - } -#endif - - switch (arg.type.kind) { - default: { - descriptors[i].type = mux_descriptor_info_type_plain_old_data; - descriptors[i].plain_old_data_descriptor.data = arg.value.data; - descriptors[i].plain_old_data_descriptor.length = arg.value.size; - break; + switch (arg.stype) { + case _cl_kernel::argument::storage_type::local_memory: { + descriptors[i].type = mux_descriptor_info_type_shared_local_buffer; + descriptors[i].shared_local_buffer_descriptor.size = + arg.local_memory_size; } - case compiler::ArgumentKind::POINTER: { - if (arg.type.address_space == compiler::AddressSpace::GLOBAL || - arg.type.address_space == compiler::AddressSpace::CONSTANT || - arg.type.address_space == compiler::AddressSpace::PRIVATE) { - if (nullptr != arg.memory_buffer) { - auto buffer = static_cast(arg.memory_buffer); - descriptors[i].type = mux_descriptor_info_type_buffer; - descriptors[i].buffer_descriptor.buffer = - buffer->mux_buffers[device_index]; - descriptors[i].buffer_descriptor.offset = 0; - } else { - descriptors[i].type = mux_descriptor_info_type_null_buffer; - } - break; - } else if (arg.type.address_space == compiler::AddressSpace::LOCAL) { - descriptors[i].type = mux_descriptor_info_type_shared_local_buffer; - descriptors[i].shared_local_buffer_descriptor.size = - arg.local_memory_size; - break; - } else { - descriptors[i].type = mux_descriptor_info_type_custom_buffer; - descriptors[i].custom_buffer_descriptor = {}; - if (mux_custom_buffer_capabilities_data & - device->mux_device->info->custom_buffer_capabilities) { - // Pass through custom buffer data. - descriptors[i].custom_buffer_descriptor.data = arg.value.data; - descriptors[i].custom_buffer_descriptor.size = arg.value.size; + continue; + + case _cl_kernel::argument::storage_type::memory_buffer: + switch (arg.type.kind) { + default: + OCL_ABORT("Unhandled argument type"); + continue; + + case compiler::ArgumentKind::POINTER: { + if (arg.memory_buffer) { + auto buffer = static_cast(arg.memory_buffer); + descriptors[i].type = mux_descriptor_info_type_buffer; + descriptors[i].buffer_descriptor.buffer = + buffer->mux_buffers[device_index]; + descriptors[i].buffer_descriptor.offset = 0; + } else { + descriptors[i].type = mux_descriptor_info_type_null_buffer; + } } - if (mux_custom_buffer_capabilities_address_space & - device->mux_device->info->custom_buffer_capabilities) { - // Pass through custom buffer address space. - descriptors[i].custom_buffer_descriptor.address_space = - arg.type.address_space; + continue; + + case compiler::ArgumentKind::IMAGE1D: // fall-through + case compiler::ArgumentKind::IMAGE1D_ARRAY: // fall-through + case compiler::ArgumentKind::IMAGE1D_BUFFER: // fall-through + case compiler::ArgumentKind::IMAGE2D_ARRAY: // fall-through + case compiler::ArgumentKind::IMAGE3D: // fall-through + case compiler::ArgumentKind::IMAGE2D: { + auto image = static_cast(arg.memory_buffer); + descriptors[i].type = mux_descriptor_info_type_image; + descriptors[i].image_descriptor.image = + image->mux_images[device_index]; } + continue; } - } break; - case compiler::ArgumentKind::IMAGE1D: // fall-through - case compiler::ArgumentKind::IMAGE1D_ARRAY: // fall-through - case compiler::ArgumentKind::IMAGE1D_BUFFER: // fall-through - case compiler::ArgumentKind::IMAGE2D_ARRAY: // fall-through - case compiler::ArgumentKind::IMAGE3D: // fall-through - case compiler::ArgumentKind::IMAGE2D: { - auto image = static_cast(arg.memory_buffer); - descriptors[i].type = mux_descriptor_info_type_image; - descriptors[i].image_descriptor.image = image->mux_images[device_index]; - } break; - case compiler::ArgumentKind::SAMPLER: { + + case _cl_kernel::argument::storage_type::sampler: { // HACK(Benie): This code is going away so hack in samplers to // Core.cpp because the target device abstraction is going to die and // there is no point putting effort into it. These hexidecimal values @@ -156,16 +134,22 @@ mux_ndrange_options_t _cl_kernel::createKernelExecutionOptions( case 0x10: descriptors[i].sampler_descriptor.sampler.filter_mode = mux_filter_mode_linear; - break; + continue; case 0x20: descriptors[i].sampler_descriptor.sampler.filter_mode = mux_filter_mode_nearest; - break; + continue; } - } break; - case compiler::ArgumentKind::UNKNOWN: - OCL_ASSERT(false, "Unhandled argument type!"); - break; + } + + case _cl_kernel::argument::storage_type::value: + descriptors[i].type = mux_descriptor_info_type_plain_old_data; + descriptors[i].plain_old_data_descriptor.data = arg.value.data; + descriptors[i].plain_old_data_descriptor.length = arg.value.size; + continue; + + case _cl_kernel::argument::storage_type::uninitialized: + continue; } } @@ -732,24 +716,6 @@ _cl_kernel::argument::argument(compiler::ArgumentType type, cl_mem mem) "Trying to create a memory argument with a non-memory type."); } -#ifdef OCL_EXTENSION_cl_intel_unified_shared_memory -_cl_kernel::argument::argument(compiler::ArgumentType type, - extension::usm::allocation_info *usm_alloc, - size_t offset) - : type(type), stype(_cl_kernel::argument::storage_type::usm) { - OCL_ASSERT(compiler::ArgumentKind::POINTER == type.kind, - "Trying to create a USM allocation argument on a type other than " - "a pointer."); - OCL_ASSERT(type.address_space == compiler::AddressSpace::GLOBAL || - type.address_space == compiler::AddressSpace::CONSTANT, - "Trying to create a USM allocation argument from pointer type " - "without global or constant address space."); - - usm.usm_ptr = usm_alloc; - usm.offset = offset; -} -#endif - _cl_kernel::argument::argument(compiler::ArgumentType type, const void *data, size_t size) : type(type), stype(_cl_kernel::argument::storage_type::value) { @@ -779,11 +745,6 @@ _cl_kernel::argument::argument(const argument &other) std::memcpy(value.data, other.value.data, other.value.size); value.size = other.value.size; break; -#ifdef OCL_EXTENSION_cl_intel_unified_shared_memory - case _cl_kernel::argument::storage_type::usm: - usm = other.usm; - break; -#endif case _cl_kernel::argument::storage_type::uninitialized: break; } @@ -805,11 +766,6 @@ _cl_kernel::argument::argument(argument &&other) value.data = other.value.data; value.size = other.value.size; break; -#ifdef OCL_EXTENSION_cl_intel_unified_shared_memory - case _cl_kernel::argument::storage_type::usm: - usm = other.usm; - break; -#endif case _cl_kernel::argument::storage_type::uninitialized: break; } diff --git a/source/cl/test/UnitCL/source/cl_intel_unified_shared_memory/usm_kernel.cpp b/source/cl/test/UnitCL/source/cl_intel_unified_shared_memory/usm_kernel.cpp index 3f9f9f493..a33a7572b 100644 --- a/source/cl/test/UnitCL/source/cl_intel_unified_shared_memory/usm_kernel.cpp +++ b/source/cl/test/UnitCL/source/cl_intel_unified_shared_memory/usm_kernel.cpp @@ -67,8 +67,6 @@ struct USMKernelTest : public cl_intel_unified_shared_memory_Test { ASSERT_SUCCESS(clGetDeviceInfo( device, CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, sizeof(cross_capabilities), &cross_capabilities, nullptr)); - shared_mem_support = - (single_capabilities != 0) && (cross_capabilities != 0); } void TearDown() override { @@ -95,7 +93,6 @@ struct USMKernelTest : public cl_intel_unified_shared_memory_Test { } cl_device_unified_shared_memory_capabilities_intel host_capabilities = 0; - bool shared_mem_support = false; const size_t elements = 64; const size_t bytes = elements * sizeof(cl_int); @@ -153,11 +150,12 @@ TEST_P(USMSetKernelArgMemPointerTest, InvalidUsage) { err = clSetKernelArgMemPointerINTEL(kernel, 4, device_ptr); EXPECT_EQ_ERRCODE(err, CL_INVALID_ARG_INDEX); - if (!shared_mem_support) { - cl_uint usm_arg_index = GetParam(); - err = clSetKernelArgMemPointerINTEL(kernel, usm_arg_index, user_ptr); - EXPECT_EQ_ERRCODE(err, CL_INVALID_ARG_VALUE); - } + // The cl_intel_unified_shared_memory specification has an open question + // whether invalid pointers should result in an error. We accept this as Intel + // passes invalid pointers in valid SYCL code. + cl_uint usm_arg_index = GetParam(); + err = clSetKernelArgMemPointerINTEL(kernel, usm_arg_index, user_ptr); + ASSERT_SUCCESS(err); if (host_ptr) { err = clSetKernelArgMemPointerINTEL(kernel, 2, host_ptr); diff --git a/source/cl/test/UnitCL/source/cl_khr_command_buffer_mutable_dispatch/usm_arg_update.cpp b/source/cl/test/UnitCL/source/cl_khr_command_buffer_mutable_dispatch/usm_arg_update.cpp index 4261165db..86e5bcbaf 100644 --- a/source/cl/test/UnitCL/source/cl_khr_command_buffer_mutable_dispatch/usm_arg_update.cpp +++ b/source/cl/test/UnitCL/source/cl_khr_command_buffer_mutable_dispatch/usm_arg_update.cpp @@ -219,8 +219,6 @@ TEST_F(MutableDispatchUSMTest, InvalidArgIndex) { command_buffer, &mutable_config)); } -// Test clSetKernelMemPointerINTEL error code for CL_INVALID_ARG_VALUE if -// arg_value is not a valid argument index. TEST_F(MutableDispatchUSMTest, InvalidArgValue) { ASSERT_SUCCESS(clSetKernelArgMemPointerINTEL(kernel, 0, device_ptrs[0])); @@ -251,8 +249,11 @@ TEST_F(MutableDispatchUSMTest, InvalidArgValue) { cl_mutable_base_config_khr mutable_config{ CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, &dispatch_config}; - ASSERT_EQ_ERRCODE(CL_INVALID_ARG_VALUE, clUpdateMutableCommandsKHR( - command_buffer, &mutable_config)); + // The interaction between cl_intel_unified_shared_memory and + // cl_khr_command_buffer_mutable_dispatch is not specified but we assume that + // if clSetKernelArgMemPointerINTEL would not report invalid values, neither + // will clUpdateMutableCommandsKHR. + ASSERT_SUCCESS(clUpdateMutableCommandsKHR(command_buffer, &mutable_config)); } // Tests for updating USM arguments to a command-buffer kernel command are