Skip to content

Commit 36419ab

Browse files
authored
[UR][Offload] Fixes for enqueue UR CTS tests (#19926)
A small selection of fixes to increase the pass rate of the enqueue CTS unit tests: * Blocking memory reads/writes now properly wait on the queue. * `urKernelSetArgMemObj` added to the function table. * Debug print removed. * Layout of kernel arguments now matches the HIP target if Offload is on an AMD device. * `urEnqueueEventsWaitWithBarrierExt` has been implemented (it just calls to the non-ext version). * `UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP` set to false.
1 parent 48e397c commit 36419ab

File tree

9 files changed

+76
-58
lines changed

9 files changed

+76
-58
lines changed

unified-runtime/source/adapters/offload/device.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
194194
case UR_DEVICE_INFO_IMAGE_SRGB:
195195
case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY:
196196
case UR_DEVICE_INFO_LINKER_AVAILABLE:
197+
case UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP:
197198
return ReturnValue(false);
198199
case UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT:
199200
case UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT:

unified-runtime/source/adapters/offload/enqueue.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier(
141141
return doWait<true>(hQueue, numEventsInWaitList, phEventWaitList, phEvent);
142142
}
143143

144+
// This function only makes sense for level_zero, the flag in properties is
145+
// ignored
146+
UR_APIEXPORT ur_result_t urEnqueueEventsWaitWithBarrierExt(
147+
ur_queue_handle_t hQueue, const ur_exp_enqueue_ext_properties_t *,
148+
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
149+
ur_event_handle_t *phEvent) {
150+
return urEnqueueEventsWaitWithBarrier(hQueue, numEventsInWaitList,
151+
phEventWaitList, phEvent);
152+
}
153+
144154
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
145155
ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
146156
const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
@@ -235,6 +245,8 @@ ur_result_t doMemcpy(ur_command_t Command, ur_queue_handle_t hQueue,
235245
OL_RETURN_ON_ERR(waitOnEvents(Queue, phEventWaitList, numEventsInWaitList));
236246

237247
if (blocking) {
248+
// Ensure all work in the queue is complete
249+
OL_RETURN_ON_ERR(olSyncQueue(Queue));
238250
OL_RETURN_ON_ERR(
239251
olMemcpy(nullptr, DestPtr, DestDevice, SrcPtr, SrcDevice, size));
240252
if (phEvent) {

unified-runtime/source/adapters/offload/ur_interface_loader.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable(
118118
pDdiTable->pfnRelease = urKernelRelease;
119119
pDdiTable->pfnRetain = urKernelRetain;
120120
pDdiTable->pfnSetArgLocal = nullptr;
121-
pDdiTable->pfnSetArgMemObj = nullptr;
121+
pDdiTable->pfnSetArgMemObj = urKernelSetArgMemObj;
122122
pDdiTable->pfnSetArgPointer = urKernelSetArgPointer;
123123
pDdiTable->pfnSetArgSampler = nullptr;
124124
pDdiTable->pfnSetArgValue = urKernelSetArgValue;
@@ -172,6 +172,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable(
172172
pDdiTable->pfnDeviceGlobalVariableWrite = urEnqueueDeviceGlobalVariableWrite;
173173
pDdiTable->pfnEventsWait = urEnqueueEventsWait;
174174
pDdiTable->pfnEventsWaitWithBarrier = urEnqueueEventsWaitWithBarrier;
175+
pDdiTable->pfnEventsWaitWithBarrierExt = urEnqueueEventsWaitWithBarrierExt;
175176
pDdiTable->pfnKernelLaunch = urEnqueueKernelLaunch;
176177
pDdiTable->pfnMemBufferCopy = urEnqueueMemBufferCopy;
177178
pDdiTable->pfnMemBufferCopyRect = nullptr;

unified-runtime/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ struct urEnqueueEventsWaitWithBarrierOrderingTest : uur::urProgramTest {
8888

8989
auto entry_points =
9090
uur::KernelsEnvironment::instance->GetEntryPointNames(program_name);
91-
std::cout << entry_points[0];
9291

9392
ASSERT_SUCCESS(urKernelCreate(program, "_ZTS3Add", &add_kernel));
9493
ASSERT_SUCCESS(urKernelCreate(program, "_ZTS3Mul", &mul_kernel));

unified-runtime/test/conformance/source/environment.cpp

Lines changed: 9 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -191,56 +191,6 @@ KernelsEnvironment::parseKernelOptions(int argc, char **argv,
191191
return options;
192192
}
193193

194-
std::string
195-
KernelsEnvironment::getDefaultTargetName(ur_platform_handle_t platform) {
196-
if (instance->GetDevices().size() == 0) {
197-
error = "no devices available on the platform";
198-
return {};
199-
}
200-
201-
ur_backend_t backend;
202-
if (urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, sizeof(backend),
203-
&backend, nullptr)) {
204-
error = "failed to get backend from platform.";
205-
return {};
206-
}
207-
208-
switch (backend) {
209-
case UR_BACKEND_OPENCL:
210-
case UR_BACKEND_LEVEL_ZERO:
211-
return "spir64";
212-
case UR_BACKEND_CUDA:
213-
return "nvptx64-nvidia-cuda";
214-
case UR_BACKEND_HIP:
215-
return "amdgcn-amd-amdhsa";
216-
case UR_BACKEND_OFFLOAD: {
217-
// All Offload platforms report this backend, use the platform name to select
218-
// the actual underlying backend.
219-
std::vector<char> PlatformName;
220-
size_t PlatformNameSize = 0;
221-
urPlatformGetInfo(platform, UR_PLATFORM_INFO_NAME, 0, nullptr,
222-
&PlatformNameSize);
223-
PlatformName.resize(PlatformNameSize);
224-
urPlatformGetInfo(platform, UR_PLATFORM_INFO_NAME, PlatformNameSize,
225-
PlatformName.data(), nullptr);
226-
if (std::strcmp(PlatformName.data(), "CUDA") == 0) {
227-
return "nvptx64-nvidia-cuda";
228-
} else if (std::strcmp(PlatformName.data(), "AMDGPU") == 0) {
229-
return "amdgcn-amd-amdhsa";
230-
} else {
231-
error = "Could not detect target for Offload platform";
232-
return {};
233-
}
234-
}
235-
case UR_BACKEND_NATIVE_CPU:
236-
error = "native_cpu doesn't support kernel tests yet";
237-
return {};
238-
default:
239-
error = "unknown target.";
240-
return {};
241-
}
242-
}
243-
244194
std::string
245195
KernelsEnvironment::getKernelSourcePath(const std::string &kernel_name,
246196
const std::string &target_name) {
@@ -256,12 +206,17 @@ void KernelsEnvironment::LoadSource(
256206
// We don't have a way to build device code for native cpu yet.
257207
UUR_KNOWN_FAILURE_ON_PARAM(platform, uur::NativeCPU{});
258208

259-
std::string target_name = getDefaultTargetName(platform);
260-
if (target_name.empty()) {
261-
FAIL() << error;
209+
if (instance->GetDevices().size() == 0) {
210+
FAIL() << "no devices available on the platform";
211+
}
212+
213+
std::string triple_name;
214+
auto Err = GetPlatformTriple(platform, triple_name);
215+
if (Err) {
216+
FAIL() << "GetPlatformTriple failed with error " << Err << "\n";
262217
}
263218

264-
return LoadSource(kernel_name, target_name, binary_out);
219+
return LoadSource(kernel_name, triple_name, binary_out);
265220
}
266221

267222
void KernelsEnvironment::LoadSource(

unified-runtime/test/conformance/testing/include/uur/environment.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ struct KernelsEnvironment : DevicesEnvironment {
9090
const std::string &kernels_default_dir);
9191
std::string getKernelSourcePath(const std::string &kernel_name,
9292
const std::string &target_name);
93-
std::string getDefaultTargetName(ur_platform_handle_t platform);
9493

9594
KernelOptions kernel_options;
9695
// mapping between kernels (full_path + kernel_name) and their saved source.

unified-runtime/test/conformance/testing/include/uur/fixtures.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1472,7 +1472,9 @@ struct KernelLaunchHelper {
14721472
ur_backend_t backend;
14731473
ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND,
14741474
sizeof(backend), &backend, nullptr));
1475-
if (backend == UR_BACKEND_HIP) {
1475+
std::string target_name;
1476+
ASSERT_SUCCESS(GetPlatformTriple(platform, target_name));
1477+
if (target_name == "amdgcn-amd-amdhsa") {
14761478
// this emulates the three offset params for buffer accessor on AMD.
14771479
size_t val = 0;
14781480
ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_arg_index + 1,

unified-runtime/test/conformance/testing/include/uur/utils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,8 @@ ur_result_t GetTimestampRecordingSupport(ur_device_handle_t device,
416416
bool &support);
417417
ur_result_t GetUSMContextMemcpyExpSupport(ur_device_handle_t device,
418418
bool &support);
419+
ur_result_t GetPlatformTriple(ur_platform_handle_t platform,
420+
std::string &Triple);
419421

420422
ur_device_partition_property_t makePartitionByCountsDesc(uint32_t count);
421423

unified-runtime/test/conformance/testing/source/utils.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -649,6 +649,53 @@ ur_result_t GetUSMContextMemcpyExpSupport(ur_device_handle_t device,
649649
device, UR_DEVICE_INFO_USM_CONTEXT_MEMCPY_SUPPORT_EXP, support);
650650
}
651651

652+
ur_result_t GetPlatformTriple(ur_platform_handle_t platform,
653+
std::string &triple) {
654+
ur_backend_t backend;
655+
if (auto Err = urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND,
656+
sizeof(backend), &backend, nullptr)) {
657+
return Err;
658+
}
659+
660+
switch (backend) {
661+
case UR_BACKEND_OPENCL:
662+
case UR_BACKEND_LEVEL_ZERO:
663+
triple = "spir64";
664+
break;
665+
case UR_BACKEND_CUDA:
666+
triple = "nvptx64-nvidia-cuda";
667+
break;
668+
case UR_BACKEND_HIP:
669+
triple = "amdgcn-amd-amdhsa";
670+
break;
671+
case UR_BACKEND_OFFLOAD: {
672+
// All Offload platforms report this backend, use the platform name to select
673+
// the actual underlying backend.
674+
std::vector<char> PlatformName;
675+
size_t PlatformNameSize = 0;
676+
urPlatformGetInfo(platform, UR_PLATFORM_INFO_NAME, 0, nullptr,
677+
&PlatformNameSize);
678+
PlatformName.resize(PlatformNameSize);
679+
urPlatformGetInfo(platform, UR_PLATFORM_INFO_NAME, PlatformNameSize,
680+
PlatformName.data(), nullptr);
681+
if (strcmp(PlatformName.data(), "CUDA") == 0) {
682+
triple = "nvptx64-nvidia-cuda";
683+
} else if (strcmp(PlatformName.data(), "AMDGPU") == 0) {
684+
triple = "amdgcn-amd-amdhsa";
685+
} else {
686+
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
687+
}
688+
break;
689+
}
690+
case UR_BACKEND_NATIVE_CPU:
691+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
692+
default:
693+
return UR_RESULT_ERROR_INVALID_ENUMERATION;
694+
}
695+
696+
return UR_RESULT_SUCCESS;
697+
}
698+
652699
ur_device_partition_property_t makePartitionByCountsDesc(uint32_t count) {
653700
ur_device_partition_property_t desc;
654701
desc.type = UR_DEVICE_PARTITION_BY_COUNTS;

0 commit comments

Comments
 (0)