diff --git a/source/cl/source/extension/source/intel_unified_shared_memory/usm-exports.cpp b/source/cl/source/extension/source/intel_unified_shared_memory/usm-exports.cpp index 707561cac..85cf0f3a1 100644 --- a/source/cl/source/extension/source/intel_unified_shared_memory/usm-exports.cpp +++ b/source/cl/source/extension/source/intel_unified_shared_memory/usm-exports.cpp @@ -313,11 +313,11 @@ cl_int clMemFreeINTEL(cl_context context, void *ptr) { std::find_if(context->usm_allocations.begin(), context->usm_allocations.end(), isUsmPtr); - OCL_CHECK(context->usm_allocations.end() == usm_alloc_iterator, - return CL_INVALID_VALUE); + if (context->usm_allocations.end() != usm_alloc_iterator) { + // Remove now empty shared pointer from list + context->usm_allocations.erase(usm_alloc_iterator); + } - // Remove now empty shared pointer from list - context->usm_allocations.erase(usm_alloc_iterator); return CL_SUCCESS; } @@ -340,8 +340,9 @@ cl_int clMemBlockingFreeINTEL(cl_context context, void *ptr) { std::find_if(context->usm_allocations.begin(), context->usm_allocations.end(), isUsmPtr); - OCL_CHECK(context->usm_allocations.end() == usm_alloc_iterator, - return CL_INVALID_VALUE); + if (context->usm_allocations.end() == usm_alloc_iterator) { + return CL_SUCCESS; + } // Implicitly flush all the queues that the events belong to std::unordered_set<_cl_command_queue *> flushed_queues; @@ -508,12 +509,31 @@ cl_int MemFillImpl(cl_command_queue command_queue, void *dst_ptr, cl::release_guard event_release_guard(return_event, cl::ref_count_type::EXTERNAL); { + mux_buffer_t mux_buffer = nullptr; + const cl_device_id device = command_queue->device; + + uint64_t offset = 0; + std::unique_ptr dst_user_data; // TODO CA-3084 Unresolved issue in extension doc whether fill on arbitrary // host pointer should be allowed. - OCL_CHECK(usm_alloc == nullptr, return CL_INVALID_VALUE); + if (usm_alloc == nullptr) { + // Source pointer is to arbitrary user data, heap allocate a wrapper class + // so we can use Mux memory constructs to work with it. + auto wrapper = UserDataWrapper::create(device, size, dst_ptr); + OCL_CHECK(!wrapper, return CL_OUT_OF_RESOURCES); + dst_user_data.reset(*wrapper); + mux_buffer = dst_user_data->mux_buffer; + } else { + // Push Mux fill buffer operation + auto mux_error = + ExamineUSMAlloc(usm_alloc, device, return_event, mux_buffer); + OCL_CHECK(mux_error, return CL_OUT_OF_RESOURCES); + offset = getUSMOffset(dst_ptr, usm_alloc); + } - auto mux_error = usm_alloc->record_event(return_event); - OCL_CHECK(mux_error, return CL_OUT_OF_RESOURCES); + // TODO CA-2863 Define correct return code for this situation where device + // USM allocation device is not the same as command queue device + OCL_CHECK(mux_buffer == nullptr, return CL_INVALID_COMMAND_QUEUE); std::lock_guard lock( command_queue->context->getCommandQueueMutex()); @@ -522,16 +542,7 @@ cl_int MemFillImpl(cl_command_queue command_queue, void *dst_ptr, {event_wait_list, num_events_in_wait_list}, return_event); OCL_CHECK(!mux_command_buffer, return CL_OUT_OF_RESOURCES); - // Push Mux fill buffer operation - const cl_device_id device = command_queue->device; - mux_buffer_t mux_buffer = usm_alloc->getMuxBufferForDevice(device); - - // TODO CA-2863 Define correct return code for this situation where device - // USM allocation device is not the same as command queue device - OCL_CHECK(mux_buffer == nullptr, return CL_INVALID_COMMAND_QUEUE); - - const uint64_t offset = getUSMOffset(dst_ptr, usm_alloc); - mux_error = + auto mux_error = muxCommandFillBuffer(*mux_command_buffer, mux_buffer, offset, size, pattern, pattern_size, 0, nullptr, nullptr); if (mux_error) { @@ -541,6 +552,38 @@ cl_int MemFillImpl(cl_command_queue command_queue, void *dst_ptr, } return error; } + + // If the destination operand was user data, we need to manually copy + // the destination Mux buffer back to the user supplied `dst_ptr` by + // mapping the buffer. + if (dst_user_data) { + mux_error = muxCommandUserCallback( + *mux_command_buffer, + [](mux_queue_t, mux_command_buffer_t, void *user_data) { + static_cast(user_data)->readFromDevice(); + }, + dst_user_data.get(), 0, nullptr, nullptr); + + if (mux_error) { + auto error = cl::getErrorFrom(mux_error); + if (return_event) { + return_event->complete(error); + } + return error; + } + } + + // UserDataWrapper objects used to encapsulate user pointer operands are + // heap allocated. Free them once the command has completed. + auto raw_dst_user_data = dst_user_data.release(); + if (auto error = command_queue->registerDispatchCallback( + *mux_command_buffer, return_event, [raw_dst_user_data]() { + if (raw_dst_user_data) { + delete raw_dst_user_data; + } + })) { + return error; + } } if (nullptr != event) { diff --git a/source/cl/test/UnitCL/source/cl_intel_unified_shared_memory/usm_free.cpp b/source/cl/test/UnitCL/source/cl_intel_unified_shared_memory/usm_free.cpp index 92e934744..7de80c174 100644 --- a/source/cl/test/UnitCL/source/cl_intel_unified_shared_memory/usm_free.cpp +++ b/source/cl/test/UnitCL/source/cl_intel_unified_shared_memory/usm_free.cpp @@ -32,12 +32,6 @@ TEST_F(USMTests, MemFree_InvalidUsage) { err = clMemBlockingFreeINTEL(nullptr, malloc_ptr); EXPECT_EQ_ERRCODE(err, CL_INVALID_CONTEXT); - err = clMemFreeINTEL(context, malloc_ptr); - EXPECT_EQ_ERRCODE(err, CL_INVALID_VALUE); - - err = clMemBlockingFreeINTEL(context, malloc_ptr); - EXPECT_EQ_ERRCODE(err, CL_INVALID_VALUE); - free(malloc_ptr); } @@ -87,6 +81,19 @@ TEST_F(USMTests, MemFree_ValidUsage) { err = clMemFreeINTEL(context, device_ptr); EXPECT_SUCCESS(err); + + // Freeing arbitrary host data is permitted by the spec + { + void *malloc_ptr = malloc(256); + + err = clMemFreeINTEL(context, malloc_ptr); + EXPECT_SUCCESS(err); + + err = clMemBlockingFreeINTEL(context, malloc_ptr); + EXPECT_SUCCESS(err); + + free(malloc_ptr); + } } namespace { diff --git a/source/cl/test/UnitCL/source/cl_intel_unified_shared_memory/usm_mem_fill.cpp b/source/cl/test/UnitCL/source/cl_intel_unified_shared_memory/usm_mem_fill.cpp index d50854cc7..4f9197dfd 100644 --- a/source/cl/test/UnitCL/source/cl_intel_unified_shared_memory/usm_mem_fill.cpp +++ b/source/cl/test/UnitCL/source/cl_intel_unified_shared_memory/usm_mem_fill.cpp @@ -68,6 +68,8 @@ struct USMMemFillTest : public cl_intel_unified_shared_memory_Test { cl_intel_unified_shared_memory_Test::TearDown(); } + void validateResults(T *ptr); + static const size_t bytes = sizeof(T) * elements; static const cl_uint align = sizeof(T); @@ -203,6 +205,30 @@ using OpenCLTypes = ::testing::Types; #endif TYPED_TEST_SUITE(USMMemFillTest, OpenCLTypes); +template +void USMMemFillTest::validateResults(TypeParam *ptr) { + // Verify results + const TypeParam pattern1 = test_patterns::pattern1; + const TypeParam pattern2 = test_patterns::pattern2; + + const char *tested_type_string = test_patterns::as_string; + // Elements [0,1] should be set to pattern1 + EXPECT_TRUE(0 == std::memcmp(&ptr[0], &pattern1, sizeof(TypeParam))) + << "For type " << tested_type_string; + EXPECT_TRUE(0 == std::memcmp(&ptr[1], &pattern1, sizeof(TypeParam))) + << "For type " << tested_type_string; + + // Elements [2,6] should still be zero + TypeParam zero[5]; + std::memset(zero, 0, sizeof(TypeParam) * 5); + EXPECT_TRUE(0 == std::memcmp(&ptr[2], zero, sizeof(TypeParam) * 5)) + << "For type " << tested_type_string; + + // Final element [7] should be pattern2 + EXPECT_TRUE(0 == std::memcmp(&ptr[7], &pattern2, sizeof(pattern2))) + << "For type " << tested_type_string; +} + // Test expected behaviour of clEnqueueMemFillINTEL for a host USM allocation TYPED_TEST(USMMemFillTest, HostAllocation) { // gtest TYPED_TEST uses a derived class template, requiring us to visit @@ -239,27 +265,7 @@ TYPED_TEST(USMMemFillTest, HostAllocation) { EXPECT_SUCCESS(clFinish(queue)); // Verify results - TypeParam *host_validation_ptr = reinterpret_cast(host_ptr); - const char *tested_type_string = test_patterns::as_string; - // Elements [0,1] should be set to pattern - EXPECT_TRUE(0 == - std::memcmp(&host_validation_ptr[0], &pattern, sizeof(TypeParam))) - << "For type " << tested_type_string; - EXPECT_TRUE(0 == - std::memcmp(&host_validation_ptr[1], &pattern, sizeof(TypeParam))) - << "For type " << tested_type_string; - - // Elements [2,6] should still be zero - TypeParam zero[5]; - std::memset(zero, 0, sizeof(TypeParam) * 5); - EXPECT_TRUE(0 == - std::memcmp(&host_validation_ptr[2], zero, sizeof(TypeParam) * 5)) - << "For type " << tested_type_string; - - // Final element [7] should be pattern2 - EXPECT_TRUE(0 == - std::memcmp(&host_validation_ptr[7], &pattern2, sizeof(pattern2))) - << "For type " << tested_type_string; + this->validateResults(reinterpret_cast(host_ptr)); } // Test expected behaviour of clEnqueueMemFillINTEL for a device USM allocation @@ -314,28 +320,55 @@ TYPED_TEST(USMMemFillTest, DeviceAllocation) { // Verify results if (host_ptr) { - TypeParam *host_validation_ptr = reinterpret_cast(host_ptr); - const char *tested_type_string = test_patterns::as_string; - // Elements [0,1] should be set to pattern - EXPECT_TRUE( - 0 == std::memcmp(&host_validation_ptr[0], &pattern, sizeof(TypeParam))) - << "For type " << tested_type_string; - EXPECT_TRUE( - 0 == std::memcmp(&host_validation_ptr[1], &pattern, sizeof(TypeParam))) - << "For type " << tested_type_string; - - // Elements [2,6] should still be zero - TypeParam zero[5]; - std::memset(zero, 0, sizeof(TypeParam) * 5); - EXPECT_TRUE( - 0 == std::memcmp(&host_validation_ptr[2], zero, sizeof(TypeParam) * 5)) - << "For type " << tested_type_string; - - // Final element [7] should be pattern2 - EXPECT_TRUE( - 0 == std::memcmp(&host_validation_ptr[7], &pattern2, sizeof(pattern2))) - << "For type " << tested_type_string; + this->validateResults(reinterpret_cast(host_ptr)); + } + + for (cl_event &event : wait_events) { + EXPECT_SUCCESS(clReleaseEvent(event)); } +} + +// Test expected behaviour of clEnqueueMemFillINTEL for a user allocation +TYPED_TEST(USMMemFillTest, UserAllocation) { + // gtest TYPED_TEST uses a derived class template, requiring us to visit + // members of USMMemFillTest via 'this'. + auto bytes = this->bytes; + auto queue = this->queue; + auto getPointerOffset = this->getPointerOffset; + auto clEnqueueMemFillINTEL = this->clEnqueueMemFillINTEL; + + std::vector> + user_data(elements); + + std::array wait_events; + // Zero initialize user memory containing 8 TypeParam elements before + // beginning testing + const TypeParam zero_pattern = test_patterns::zero_pattern; + cl_int err = clEnqueueMemFillINTEL(queue, user_data.data(), &zero_pattern, + sizeof(zero_pattern), bytes, 0, nullptr, + &wait_events[0]); + + // Fill first two elements + const TypeParam pattern = test_patterns::pattern1; + err = clEnqueueMemFillINTEL(queue, user_data.data(), &pattern, + sizeof(pattern), sizeof(pattern) * 2, 1, + &wait_events[0], &wait_events[1]); + EXPECT_SUCCESS(err); + + // Fill last element + const TypeParam pattern2 = test_patterns::pattern2; + const size_t offset = bytes - sizeof(TypeParam); + + void *offset_device_ptr = getPointerOffset(user_data.data(), offset); + err = clEnqueueMemFillINTEL(queue, offset_device_ptr, &pattern2, + sizeof(pattern2), sizeof(pattern2), 1, + &wait_events[0], &wait_events[2]); + EXPECT_SUCCESS(err); + + EXPECT_SUCCESS(clFinish(queue)); + + // Verify results + this->validateResults(reinterpret_cast(user_data.data())); for (cl_event &event : wait_events) { EXPECT_SUCCESS(clReleaseEvent(event));