Skip to content

Commit

Permalink
Merge pull request #189 from hvdijk/usm
Browse files Browse the repository at this point in the history
[USM] Accept arbitrary pointers.
  • Loading branch information
hvdijk authored Nov 6, 2023
2 parents 0b1216d + 4c1f90a commit 440c28b
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 163 deletions.
20 changes: 0 additions & 20 deletions source/cl/include/cl/kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -262,17 +262,6 @@ struct _cl_kernel final : public cl::base<_cl_kernel> {
/// @param value_size Size of the data.
argument(compiler::ArgumentType type, const void *value, size_t value_size);

#ifdef OCL_EXTENSION_cl_intel_unified_shared_memory
/// @brief Constructor to create an USM allocation argument.
///
/// @param type Type of the kernel argument, must be a valid type to use
/// with a USM allocation, i.e a global or constant pointer.
/// @param usm_alloc USM allocation associated with parameter.
/// @param offset Byte offset from the start of the USM allocation.
argument(compiler::ArgumentType type,
extension::usm::allocation_info *usm_alloc, size_t offset);
#endif

/// @brief Copy constructor.
///
/// @param other Argument to copy from.
Expand Down Expand Up @@ -315,12 +304,6 @@ struct _cl_kernel final : public cl::base<_cl_kernel> {
void *data;
size_t size;
} value;
#ifdef OCL_EXTENSION_cl_intel_unified_shared_memory
struct {
extension::usm::allocation_info *usm_ptr;
size_t offset;
} usm;
#endif
};

using info = compiler::KernelInfo::ArgumentInfo;
Expand All @@ -336,9 +319,6 @@ struct _cl_kernel final : public cl::base<_cl_kernel> {
memory_buffer,
sampler,
value,
#ifdef OCL_EXTENSION_cl_intel_unified_shared_memory
usm,
#endif
uninitialized
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,15 +129,6 @@ cl_int createBlockingEventForKernel(cl_command_queue queue, cl_kernel kernel,
}
return_event = *new_event;

// USM allocation set as kernel arguments
for (size_t i = 0, e = kernel->info->getNumArguments(); i < e; ++i) {
_cl_kernel::argument &arg = kernel->saved_args[i];
if (arg.stype == _cl_kernel::argument::storage_type::usm) {
auto mux_error = arg.usm.usm_ptr->record_event(return_event);
OCL_CHECK(mux_error, return CL_OUT_OF_RESOURCES);
}
}

// USM allocations which have been set explicitly via clSetKernelExecInfo
// to be used indirectly in the kernel.
for (auto indirect_alloc : kernel->indirect_usm_allocs) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -474,20 +474,12 @@ cl_int clSetKernelArgMemPointerINTEL(cl_kernel kernel, cl_uint arg_index,
OCL_CHECK(!(arg_type->address_space == compiler::AddressSpace::GLOBAL ||
arg_type->address_space == compiler::AddressSpace::CONSTANT),
return CL_INVALID_ARG_VALUE);
if (arg_value) {
const cl_context context = kernel->program->context;

extension::usm::allocation_info *const usm_alloc =
extension::usm::findAllocation(context, arg_value);
OCL_CHECK(nullptr == usm_alloc, return CL_INVALID_ARG_VALUE);

const uint64_t offset = getUSMOffset(arg_value, usm_alloc);

// Will create a mux_descriptor_info_buffer_s descriptor for the argument
// using the device specific mux_buffer assigned to the allocation.
kernel->saved_args[arg_index] =
_cl_kernel::argument(*arg_type, usm_alloc, offset);
}
// The cl_intel_unified_shared_memory specification has an open question on
// whether unknown pointers should be accepted. We accept them since the SYCL
// specification and the SYCL CTS imply this must be treated as valid.
kernel->saved_args[arg_index] =
_cl_kernel::argument(*arg_type, &arg_value, sizeof(void *));

return CL_SUCCESS;
}
Expand Down
18 changes: 5 additions & 13 deletions source/cl/source/extension/source/khr_command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1168,19 +1168,11 @@ CARGO_NODISCARD cl_int _cl_command_buffer_khr::updateCommandBuffer(
// Construct Descriptor
mux_descriptor_info_s descriptor;
descriptor.type =
mux_descriptor_info_type_e::mux_descriptor_info_type_buffer;

extension::usm::allocation_info *const usm_alloc =
extension::usm::findAllocation(command_queue->context, arg_value);
OCL_CHECK(nullptr == usm_alloc, return CL_INVALID_ARG_VALUE);

const uint64_t offset = static_cast<uint64_t>(
reinterpret_cast<uintptr_t>(arg_value) -
reinterpret_cast<uintptr_t>(usm_alloc->base_ptr));

auto mux_buffer = usm_alloc->getMuxBufferForDevice(device);
descriptor.buffer_descriptor =
mux_descriptor_info_buffer_s{mux_buffer, offset};
mux_descriptor_info_type_e::mux_descriptor_info_type_plain_old_data;
void *data = new char[sizeof arg_value];
memcpy(data, &arg_value, sizeof arg_value);
descriptor.plain_old_data_descriptor.data = data;
descriptor.plain_old_data_descriptor.length = sizeof arg_value;
update_info.descriptors[update_index] = descriptor;
update_index++;
}
Expand Down
148 changes: 52 additions & 96 deletions source/cl/source/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,79 +46,57 @@ mux_ndrange_options_t _cl_kernel::createKernelExecutionOptions(
const std::array<size_t, cl::max::WORK_ITEM_DIM> &global_size,
mux_buffer_t printf_buffer,
std::unique_ptr<mux_descriptor_info_t[]> &descriptors) {
(void)device;

const size_t num_arguments = info->getNumArguments();
const bool printf = nullptr != printf_buffer;
descriptors = std::unique_ptr<mux_descriptor_info_t[]>(
new mux_descriptor_info_t[printf ? num_arguments + 1 : num_arguments]);

for (size_t i = 0; i < num_arguments; i++) {
_cl_kernel::argument &arg = saved_args[i];
#ifdef OCL_EXTENSION_cl_intel_unified_shared_memory
if (arg.stype == _cl_kernel::argument::storage_type::usm) {
// Find mux_buffer_t bound to USM allocation for device
auto mux_buffer = arg.usm.usm_ptr->getMuxBufferForDevice(device);

descriptors[i].type = mux_descriptor_info_type_buffer;
descriptors[i].buffer_descriptor.buffer = mux_buffer;
descriptors[i].buffer_descriptor.offset = arg.usm.offset;
continue;
}
#endif

switch (arg.type.kind) {
default: {
descriptors[i].type = mux_descriptor_info_type_plain_old_data;
descriptors[i].plain_old_data_descriptor.data = arg.value.data;
descriptors[i].plain_old_data_descriptor.length = arg.value.size;
break;
switch (arg.stype) {
case _cl_kernel::argument::storage_type::local_memory: {
descriptors[i].type = mux_descriptor_info_type_shared_local_buffer;
descriptors[i].shared_local_buffer_descriptor.size =
arg.local_memory_size;
}
case compiler::ArgumentKind::POINTER: {
if (arg.type.address_space == compiler::AddressSpace::GLOBAL ||
arg.type.address_space == compiler::AddressSpace::CONSTANT ||
arg.type.address_space == compiler::AddressSpace::PRIVATE) {
if (nullptr != arg.memory_buffer) {
auto buffer = static_cast<cl_mem_buffer>(arg.memory_buffer);
descriptors[i].type = mux_descriptor_info_type_buffer;
descriptors[i].buffer_descriptor.buffer =
buffer->mux_buffers[device_index];
descriptors[i].buffer_descriptor.offset = 0;
} else {
descriptors[i].type = mux_descriptor_info_type_null_buffer;
}
break;
} else if (arg.type.address_space == compiler::AddressSpace::LOCAL) {
descriptors[i].type = mux_descriptor_info_type_shared_local_buffer;
descriptors[i].shared_local_buffer_descriptor.size =
arg.local_memory_size;
break;
} else {
descriptors[i].type = mux_descriptor_info_type_custom_buffer;
descriptors[i].custom_buffer_descriptor = {};
if (mux_custom_buffer_capabilities_data &
device->mux_device->info->custom_buffer_capabilities) {
// Pass through custom buffer data.
descriptors[i].custom_buffer_descriptor.data = arg.value.data;
descriptors[i].custom_buffer_descriptor.size = arg.value.size;
continue;

case _cl_kernel::argument::storage_type::memory_buffer:
switch (arg.type.kind) {
default:
OCL_ABORT("Unhandled argument type");
continue;

case compiler::ArgumentKind::POINTER: {
if (arg.memory_buffer) {
auto buffer = static_cast<cl_mem_buffer>(arg.memory_buffer);
descriptors[i].type = mux_descriptor_info_type_buffer;
descriptors[i].buffer_descriptor.buffer =
buffer->mux_buffers[device_index];
descriptors[i].buffer_descriptor.offset = 0;
} else {
descriptors[i].type = mux_descriptor_info_type_null_buffer;
}
}
if (mux_custom_buffer_capabilities_address_space &
device->mux_device->info->custom_buffer_capabilities) {
// Pass through custom buffer address space.
descriptors[i].custom_buffer_descriptor.address_space =
arg.type.address_space;
continue;

case compiler::ArgumentKind::IMAGE1D: // fall-through
case compiler::ArgumentKind::IMAGE1D_ARRAY: // fall-through
case compiler::ArgumentKind::IMAGE1D_BUFFER: // fall-through
case compiler::ArgumentKind::IMAGE2D_ARRAY: // fall-through
case compiler::ArgumentKind::IMAGE3D: // fall-through
case compiler::ArgumentKind::IMAGE2D: {
auto image = static_cast<cl_mem_image>(arg.memory_buffer);
descriptors[i].type = mux_descriptor_info_type_image;
descriptors[i].image_descriptor.image =
image->mux_images[device_index];
}
continue;
}
} break;
case compiler::ArgumentKind::IMAGE1D: // fall-through
case compiler::ArgumentKind::IMAGE1D_ARRAY: // fall-through
case compiler::ArgumentKind::IMAGE1D_BUFFER: // fall-through
case compiler::ArgumentKind::IMAGE2D_ARRAY: // fall-through
case compiler::ArgumentKind::IMAGE3D: // fall-through
case compiler::ArgumentKind::IMAGE2D: {
auto image = static_cast<cl_mem_image>(arg.memory_buffer);
descriptors[i].type = mux_descriptor_info_type_image;
descriptors[i].image_descriptor.image = image->mux_images[device_index];
} break;
case compiler::ArgumentKind::SAMPLER: {

case _cl_kernel::argument::storage_type::sampler: {
// HACK(Benie): This code is going away so hack in samplers to
// Core.cpp because the target device abstraction is going to die and
// there is no point putting effort into it. These hexidecimal values
Expand Down Expand Up @@ -156,16 +134,22 @@ mux_ndrange_options_t _cl_kernel::createKernelExecutionOptions(
case 0x10:
descriptors[i].sampler_descriptor.sampler.filter_mode =
mux_filter_mode_linear;
break;
continue;
case 0x20:
descriptors[i].sampler_descriptor.sampler.filter_mode =
mux_filter_mode_nearest;
break;
continue;
}
} break;
case compiler::ArgumentKind::UNKNOWN:
OCL_ASSERT(false, "Unhandled argument type!");
break;
}

case _cl_kernel::argument::storage_type::value:
descriptors[i].type = mux_descriptor_info_type_plain_old_data;
descriptors[i].plain_old_data_descriptor.data = arg.value.data;
descriptors[i].plain_old_data_descriptor.length = arg.value.size;
continue;

case _cl_kernel::argument::storage_type::uninitialized:
continue;
}
}

Expand Down Expand Up @@ -732,24 +716,6 @@ _cl_kernel::argument::argument(compiler::ArgumentType type, cl_mem mem)
"Trying to create a memory argument with a non-memory type.");
}

#ifdef OCL_EXTENSION_cl_intel_unified_shared_memory
_cl_kernel::argument::argument(compiler::ArgumentType type,
extension::usm::allocation_info *usm_alloc,
size_t offset)
: type(type), stype(_cl_kernel::argument::storage_type::usm) {
OCL_ASSERT(compiler::ArgumentKind::POINTER == type.kind,
"Trying to create a USM allocation argument on a type other than "
"a pointer.");
OCL_ASSERT(type.address_space == compiler::AddressSpace::GLOBAL ||
type.address_space == compiler::AddressSpace::CONSTANT,
"Trying to create a USM allocation argument from pointer type "
"without global or constant address space.");

usm.usm_ptr = usm_alloc;
usm.offset = offset;
}
#endif

_cl_kernel::argument::argument(compiler::ArgumentType type, const void *data,
size_t size)
: type(type), stype(_cl_kernel::argument::storage_type::value) {
Expand Down Expand Up @@ -779,11 +745,6 @@ _cl_kernel::argument::argument(const argument &other)
std::memcpy(value.data, other.value.data, other.value.size);
value.size = other.value.size;
break;
#ifdef OCL_EXTENSION_cl_intel_unified_shared_memory
case _cl_kernel::argument::storage_type::usm:
usm = other.usm;
break;
#endif
case _cl_kernel::argument::storage_type::uninitialized:
break;
}
Expand All @@ -805,11 +766,6 @@ _cl_kernel::argument::argument(argument &&other)
value.data = other.value.data;
value.size = other.value.size;
break;
#ifdef OCL_EXTENSION_cl_intel_unified_shared_memory
case _cl_kernel::argument::storage_type::usm:
usm = other.usm;
break;
#endif
case _cl_kernel::argument::storage_type::uninitialized:
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,6 @@ struct USMKernelTest : public cl_intel_unified_shared_memory_Test {
ASSERT_SUCCESS(clGetDeviceInfo(
device, CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL,
sizeof(cross_capabilities), &cross_capabilities, nullptr));
shared_mem_support =
(single_capabilities != 0) && (cross_capabilities != 0);
}

void TearDown() override {
Expand All @@ -95,7 +93,6 @@ struct USMKernelTest : public cl_intel_unified_shared_memory_Test {
}

cl_device_unified_shared_memory_capabilities_intel host_capabilities = 0;
bool shared_mem_support = false;

const size_t elements = 64;
const size_t bytes = elements * sizeof(cl_int);
Expand Down Expand Up @@ -153,11 +150,12 @@ TEST_P(USMSetKernelArgMemPointerTest, InvalidUsage) {
err = clSetKernelArgMemPointerINTEL(kernel, 4, device_ptr);
EXPECT_EQ_ERRCODE(err, CL_INVALID_ARG_INDEX);

if (!shared_mem_support) {
cl_uint usm_arg_index = GetParam();
err = clSetKernelArgMemPointerINTEL(kernel, usm_arg_index, user_ptr);
EXPECT_EQ_ERRCODE(err, CL_INVALID_ARG_VALUE);
}
// The cl_intel_unified_shared_memory specification has an open question
// whether invalid pointers should result in an error. We accept this as Intel
// passes invalid pointers in valid SYCL code.
cl_uint usm_arg_index = GetParam();
err = clSetKernelArgMemPointerINTEL(kernel, usm_arg_index, user_ptr);
ASSERT_SUCCESS(err);

if (host_ptr) {
err = clSetKernelArgMemPointerINTEL(kernel, 2, host_ptr);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,6 @@ TEST_F(MutableDispatchUSMTest, InvalidArgIndex) {
command_buffer, &mutable_config));
}

// Test clSetKernelMemPointerINTEL error code for CL_INVALID_ARG_VALUE if
// arg_value is not a valid argument index.
TEST_F(MutableDispatchUSMTest, InvalidArgValue) {
ASSERT_SUCCESS(clSetKernelArgMemPointerINTEL(kernel, 0, device_ptrs[0]));

Expand Down Expand Up @@ -251,8 +249,11 @@ TEST_F(MutableDispatchUSMTest, InvalidArgValue) {
cl_mutable_base_config_khr mutable_config{
CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, &dispatch_config};

ASSERT_EQ_ERRCODE(CL_INVALID_ARG_VALUE, clUpdateMutableCommandsKHR(
command_buffer, &mutable_config));
// The interaction between cl_intel_unified_shared_memory and
// cl_khr_command_buffer_mutable_dispatch is not specified but we assume that
// if clSetKernelArgMemPointerINTEL would not report invalid values, neither
// will clUpdateMutableCommandsKHR.
ASSERT_SUCCESS(clUpdateMutableCommandsKHR(command_buffer, &mutable_config));
}

// Tests for updating USM arguments to a command-buffer kernel command are
Expand Down

0 comments on commit 440c28b

Please sign in to comment.