Skip to content

Commit

Permalink
Merge pull request #259 from frasercrmck/usm-fill-free-user
Browse files Browse the repository at this point in the history
[usm] Support filling and freeing arbitrary host pointers
  • Loading branch information
frasercrmck authored Dec 20, 2023
2 parents d77f61e + 70e68c7 commit 2d186ab
Show file tree
Hide file tree
Showing 3 changed files with 150 additions and 67 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -313,11 +313,11 @@ cl_int clMemFreeINTEL(cl_context context, void *ptr) {
std::find_if(context->usm_allocations.begin(),
context->usm_allocations.end(), isUsmPtr);

OCL_CHECK(context->usm_allocations.end() == usm_alloc_iterator,
return CL_INVALID_VALUE);
if (context->usm_allocations.end() != usm_alloc_iterator) {
// Remove now empty shared pointer from list
context->usm_allocations.erase(usm_alloc_iterator);
}

// Remove now empty shared pointer from list
context->usm_allocations.erase(usm_alloc_iterator);
return CL_SUCCESS;
}

Expand All @@ -340,8 +340,9 @@ cl_int clMemBlockingFreeINTEL(cl_context context, void *ptr) {
std::find_if(context->usm_allocations.begin(),
context->usm_allocations.end(), isUsmPtr);

OCL_CHECK(context->usm_allocations.end() == usm_alloc_iterator,
return CL_INVALID_VALUE);
if (context->usm_allocations.end() == usm_alloc_iterator) {
return CL_SUCCESS;
}

// Implicitly flush all the queues that the events belong to
std::unordered_set<_cl_command_queue *> flushed_queues;
Expand Down Expand Up @@ -508,12 +509,31 @@ cl_int MemFillImpl(cl_command_queue command_queue, void *dst_ptr,
cl::release_guard<cl_event> event_release_guard(return_event,
cl::ref_count_type::EXTERNAL);
{
mux_buffer_t mux_buffer = nullptr;
const cl_device_id device = command_queue->device;

uint64_t offset = 0;
std::unique_ptr<UserDataWrapper> dst_user_data;
// TODO CA-3084 Unresolved issue in extension doc whether fill on arbitrary
// host pointer should be allowed.
OCL_CHECK(usm_alloc == nullptr, return CL_INVALID_VALUE);
if (usm_alloc == nullptr) {
// Source pointer is to arbitrary user data, heap allocate a wrapper class
// so we can use Mux memory constructs to work with it.
auto wrapper = UserDataWrapper::create(device, size, dst_ptr);
OCL_CHECK(!wrapper, return CL_OUT_OF_RESOURCES);
dst_user_data.reset(*wrapper);
mux_buffer = dst_user_data->mux_buffer;
} else {
// Push Mux fill buffer operation
auto mux_error =
ExamineUSMAlloc(usm_alloc, device, return_event, mux_buffer);
OCL_CHECK(mux_error, return CL_OUT_OF_RESOURCES);
offset = getUSMOffset(dst_ptr, usm_alloc);
}

auto mux_error = usm_alloc->record_event(return_event);
OCL_CHECK(mux_error, return CL_OUT_OF_RESOURCES);
// TODO CA-2863 Define correct return code for this situation where device
// USM allocation device is not the same as command queue device
OCL_CHECK(mux_buffer == nullptr, return CL_INVALID_COMMAND_QUEUE);

std::lock_guard<std::mutex> lock(
command_queue->context->getCommandQueueMutex());
Expand All @@ -522,16 +542,7 @@ cl_int MemFillImpl(cl_command_queue command_queue, void *dst_ptr,
{event_wait_list, num_events_in_wait_list}, return_event);
OCL_CHECK(!mux_command_buffer, return CL_OUT_OF_RESOURCES);

// Push Mux fill buffer operation
const cl_device_id device = command_queue->device;
mux_buffer_t mux_buffer = usm_alloc->getMuxBufferForDevice(device);

// TODO CA-2863 Define correct return code for this situation where device
// USM allocation device is not the same as command queue device
OCL_CHECK(mux_buffer == nullptr, return CL_INVALID_COMMAND_QUEUE);

const uint64_t offset = getUSMOffset(dst_ptr, usm_alloc);
mux_error =
auto mux_error =
muxCommandFillBuffer(*mux_command_buffer, mux_buffer, offset, size,
pattern, pattern_size, 0, nullptr, nullptr);
if (mux_error) {
Expand All @@ -541,6 +552,38 @@ cl_int MemFillImpl(cl_command_queue command_queue, void *dst_ptr,
}
return error;
}

// If the destination operand was user data, we need to manually copy
// the destination Mux buffer back to the user supplied `dst_ptr` by
// mapping the buffer.
if (dst_user_data) {
mux_error = muxCommandUserCallback(
*mux_command_buffer,
[](mux_queue_t, mux_command_buffer_t, void *user_data) {
static_cast<UserDataWrapper *>(user_data)->readFromDevice();
},
dst_user_data.get(), 0, nullptr, nullptr);

if (mux_error) {
auto error = cl::getErrorFrom(mux_error);
if (return_event) {
return_event->complete(error);
}
return error;
}
}

// UserDataWrapper objects used to encapsulate user pointer operands are
// heap allocated. Free them once the command has completed.
auto raw_dst_user_data = dst_user_data.release();
if (auto error = command_queue->registerDispatchCallback(
*mux_command_buffer, return_event, [raw_dst_user_data]() {
if (raw_dst_user_data) {
delete raw_dst_user_data;
}
})) {
return error;
}
}

if (nullptr != event) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,6 @@ TEST_F(USMTests, MemFree_InvalidUsage) {
err = clMemBlockingFreeINTEL(nullptr, malloc_ptr);
EXPECT_EQ_ERRCODE(err, CL_INVALID_CONTEXT);

err = clMemFreeINTEL(context, malloc_ptr);
EXPECT_EQ_ERRCODE(err, CL_INVALID_VALUE);

err = clMemBlockingFreeINTEL(context, malloc_ptr);
EXPECT_EQ_ERRCODE(err, CL_INVALID_VALUE);

free(malloc_ptr);
}

Expand Down Expand Up @@ -87,6 +81,19 @@ TEST_F(USMTests, MemFree_ValidUsage) {

err = clMemFreeINTEL(context, device_ptr);
EXPECT_SUCCESS(err);

// Freeing arbitrary host data is permitted by the spec
{
void *malloc_ptr = malloc(256);

err = clMemFreeINTEL(context, malloc_ptr);
EXPECT_SUCCESS(err);

err = clMemBlockingFreeINTEL(context, malloc_ptr);
EXPECT_SUCCESS(err);

free(malloc_ptr);
}
}

namespace {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ struct USMMemFillTest : public cl_intel_unified_shared_memory_Test {
cl_intel_unified_shared_memory_Test::TearDown();
}

void validateResults(T *ptr);

static const size_t bytes = sizeof(T) * elements;
static const cl_uint align = sizeof(T);

Expand Down Expand Up @@ -203,6 +205,30 @@ using OpenCLTypes = ::testing::Types<cl_int>;
#endif
TYPED_TEST_SUITE(USMMemFillTest, OpenCLTypes);

template <typename TypeParam>
void USMMemFillTest<TypeParam>::validateResults(TypeParam *ptr) {
// Verify results
const TypeParam pattern1 = test_patterns<TypeParam>::pattern1;
const TypeParam pattern2 = test_patterns<TypeParam>::pattern2;

const char *tested_type_string = test_patterns<TypeParam>::as_string;
// Elements [0,1] should be set to pattern1
EXPECT_TRUE(0 == std::memcmp(&ptr[0], &pattern1, sizeof(TypeParam)))
<< "For type " << tested_type_string;
EXPECT_TRUE(0 == std::memcmp(&ptr[1], &pattern1, sizeof(TypeParam)))
<< "For type " << tested_type_string;

// Elements [2,6] should still be zero
TypeParam zero[5];
std::memset(zero, 0, sizeof(TypeParam) * 5);
EXPECT_TRUE(0 == std::memcmp(&ptr[2], zero, sizeof(TypeParam) * 5))
<< "For type " << tested_type_string;

// Final element [7] should be pattern2
EXPECT_TRUE(0 == std::memcmp(&ptr[7], &pattern2, sizeof(pattern2)))
<< "For type " << tested_type_string;
}

// Test expected behaviour of clEnqueueMemFillINTEL for a host USM allocation
TYPED_TEST(USMMemFillTest, HostAllocation) {
// gtest TYPED_TEST uses a derived class template, requiring us to visit
Expand Down Expand Up @@ -239,27 +265,7 @@ TYPED_TEST(USMMemFillTest, HostAllocation) {
EXPECT_SUCCESS(clFinish(queue));

// Verify results
TypeParam *host_validation_ptr = reinterpret_cast<TypeParam *>(host_ptr);
const char *tested_type_string = test_patterns<TypeParam>::as_string;
// Elements [0,1] should be set to pattern
EXPECT_TRUE(0 ==
std::memcmp(&host_validation_ptr[0], &pattern, sizeof(TypeParam)))
<< "For type " << tested_type_string;
EXPECT_TRUE(0 ==
std::memcmp(&host_validation_ptr[1], &pattern, sizeof(TypeParam)))
<< "For type " << tested_type_string;

// Elements [2,6] should still be zero
TypeParam zero[5];
std::memset(zero, 0, sizeof(TypeParam) * 5);
EXPECT_TRUE(0 ==
std::memcmp(&host_validation_ptr[2], zero, sizeof(TypeParam) * 5))
<< "For type " << tested_type_string;

// Final element [7] should be pattern2
EXPECT_TRUE(0 ==
std::memcmp(&host_validation_ptr[7], &pattern2, sizeof(pattern2)))
<< "For type " << tested_type_string;
this->validateResults(reinterpret_cast<TypeParam *>(host_ptr));
}

// Test expected behaviour of clEnqueueMemFillINTEL for a device USM allocation
Expand Down Expand Up @@ -314,28 +320,55 @@ TYPED_TEST(USMMemFillTest, DeviceAllocation) {

// Verify results
if (host_ptr) {
TypeParam *host_validation_ptr = reinterpret_cast<TypeParam *>(host_ptr);
const char *tested_type_string = test_patterns<TypeParam>::as_string;
// Elements [0,1] should be set to pattern
EXPECT_TRUE(
0 == std::memcmp(&host_validation_ptr[0], &pattern, sizeof(TypeParam)))
<< "For type " << tested_type_string;
EXPECT_TRUE(
0 == std::memcmp(&host_validation_ptr[1], &pattern, sizeof(TypeParam)))
<< "For type " << tested_type_string;

// Elements [2,6] should still be zero
TypeParam zero[5];
std::memset(zero, 0, sizeof(TypeParam) * 5);
EXPECT_TRUE(
0 == std::memcmp(&host_validation_ptr[2], zero, sizeof(TypeParam) * 5))
<< "For type " << tested_type_string;

// Final element [7] should be pattern2
EXPECT_TRUE(
0 == std::memcmp(&host_validation_ptr[7], &pattern2, sizeof(pattern2)))
<< "For type " << tested_type_string;
this->validateResults(reinterpret_cast<TypeParam *>(host_ptr));
}

for (cl_event &event : wait_events) {
EXPECT_SUCCESS(clReleaseEvent(event));
}
}

// Test expected behaviour of clEnqueueMemFillINTEL for a user allocation
TYPED_TEST(USMMemFillTest, UserAllocation) {
// gtest TYPED_TEST uses a derived class template, requiring us to visit
// members of USMMemFillTest via 'this'.
auto bytes = this->bytes;
auto queue = this->queue;
auto getPointerOffset = this->getPointerOffset;
auto clEnqueueMemFillINTEL = this->clEnqueueMemFillINTEL;

std::vector<TypeParam, UCL::aligned_allocator<TypeParam, sizeof(TypeParam)>>
user_data(elements);

std::array<cl_event, 3> wait_events;
// Zero initialize user memory containing 8 TypeParam elements before
// beginning testing
const TypeParam zero_pattern = test_patterns<TypeParam>::zero_pattern;
cl_int err = clEnqueueMemFillINTEL(queue, user_data.data(), &zero_pattern,
sizeof(zero_pattern), bytes, 0, nullptr,
&wait_events[0]);

// Fill first two elements
const TypeParam pattern = test_patterns<TypeParam>::pattern1;
err = clEnqueueMemFillINTEL(queue, user_data.data(), &pattern,
sizeof(pattern), sizeof(pattern) * 2, 1,
&wait_events[0], &wait_events[1]);
EXPECT_SUCCESS(err);

// Fill last element
const TypeParam pattern2 = test_patterns<TypeParam>::pattern2;
const size_t offset = bytes - sizeof(TypeParam);

void *offset_device_ptr = getPointerOffset(user_data.data(), offset);
err = clEnqueueMemFillINTEL(queue, offset_device_ptr, &pattern2,
sizeof(pattern2), sizeof(pattern2), 1,
&wait_events[0], &wait_events[2]);
EXPECT_SUCCESS(err);

EXPECT_SUCCESS(clFinish(queue));

// Verify results
this->validateResults(reinterpret_cast<TypeParam *>(user_data.data()));

for (cl_event &event : wait_events) {
EXPECT_SUCCESS(clReleaseEvent(event));
Expand Down

0 comments on commit 2d186ab

Please sign in to comment.