Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ int main() {
});
gpu_queue.wait();
// CHECK: DeviceSanitizer: invalid-argument on kernel
// CHECK: The {{[0-9]+}}th argument {{.*}} is allocated in other context
// CHECK: The {{[0-9]+}}th argument {{.*}} is allocated in other device
// CHECK: {{.*}} is located inside of Device USM region

sycl::free(data, cpu_queue);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ ur_result_t setupContext(ur_context_handle_t Context, uint32_t numDevices,
(void *)DI->Handle, (void *)Context);
DI->Shadow = ShadowMemory;
CI->DeviceList.emplace_back(hDevice);
CI->AllocInfosMap[hDevice];
}
}
return UR_RESULT_SUCCESS;
Expand Down Expand Up @@ -1623,6 +1622,30 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgPointer(
return result;
}

__urdlllocal ur_result_t UR_APICALL urKernelSetExecInfo(
/// [in] handle of the kernel object
ur_kernel_handle_t hKernel,
/// [in] name of the execution attribute
ur_kernel_exec_info_t propName,
/// [in] size in byte the attribute value
size_t propSize,
/// [in][optional] pointer to execution info properties.
const ur_kernel_exec_info_properties_t *pProperties,
/// [in][typename(propName, propSize)] pointer to memory location holding
/// the property value.
const void *pPropValue) {
UR_LOG_L(getContext()->logger, DEBUG, "==== urKernelSetExecInfo");

UR_CALL(getContext()->urDdiTable.Kernel.pfnSetExecInfo(
hKernel, propName, propSize, pProperties, pPropValue));
auto &KI = getAsanInterceptor()->getOrCreateKernelInfo(hKernel);
if (propName == UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS) {
KI.IsIndirectAccess = *ur_cast<const bool *>(pPropValue);
}

return UR_RESULT_SUCCESS;
}

///////////////////////////////////////////////////////////////////////////////
/// @brief Intercept function for urDeviceGetInfo
__urdlllocal ur_result_t UR_APICALL urDeviceGetInfo(
Expand Down Expand Up @@ -1928,6 +1951,7 @@ __urdlllocal ur_result_t UR_APICALL urGetKernelProcAddrTable(
pDdiTable->pfnSetArgMemObj = ur_sanitizer_layer::asan::urKernelSetArgMemObj;
pDdiTable->pfnSetArgLocal = ur_sanitizer_layer::asan::urKernelSetArgLocal;
pDdiTable->pfnSetArgPointer = ur_sanitizer_layer::asan::urKernelSetArgPointer;
pDdiTable->pfnSetExecInfo = ur_sanitizer_layer::asan::urKernelSetExecInfo;

return result;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,11 @@ ur_result_t AsanInterceptor::allocateMemory(ur_context_handle_t Context,
AI->print();

// For updating shadow memory
if (Device) { // Device/Shared USM
ContextInfo->insertAllocInfo({Device}, AI);
if (DeviceInfo) { // Device/Shared USM
DeviceInfo->insertAllocInfo(AI);
} else { // Host USM
ContextInfo->insertAllocInfo(ContextInfo->DeviceList, AI);
for (const auto &Device : ContextInfo->DeviceList)
getDeviceInfo(Device)->insertAllocInfo(AI);
}

// For memory release
Expand Down Expand Up @@ -212,9 +213,10 @@ ur_result_t AsanInterceptor::releaseMemory(ur_context_handle_t Context,
AllocInfo->ReleaseStack = GetCurrentBacktrace();

if (AllocInfo->Type == AllocType::HOST_USM) {
ContextInfo->insertAllocInfo(ContextInfo->DeviceList, AllocInfo);
for (const auto &Device : ContextInfo->DeviceList)
getDeviceInfo(Device)->insertAllocInfo(AllocInfo);
} else {
ContextInfo->insertAllocInfo({AllocInfo->Device}, AllocInfo);
getDeviceInfo(AllocInfo->Device)->insertAllocInfo(AllocInfo);
}

// If quarantine is disabled, USM is freed immediately
Expand Down Expand Up @@ -279,7 +281,7 @@ ur_result_t AsanInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel,
(void)ArgPointer;
}
}
UR_CALL(updateShadowMemory(ContextInfo, DeviceInfo, InternalQueue));
UR_CALL(updateShadowMemory(DeviceInfo, InternalQueue));

UR_CALL(prepareLaunch(ContextInfo, DeviceInfo, InternalQueue, Kernel,
LaunchInfo));
Expand Down Expand Up @@ -423,16 +425,14 @@ AsanInterceptor::enqueueAllocInfo(std::shared_ptr<DeviceInfo> &DeviceInfo,
}

ur_result_t
AsanInterceptor::updateShadowMemory(std::shared_ptr<ContextInfo> &ContextInfo,
std::shared_ptr<DeviceInfo> &DeviceInfo,
AsanInterceptor::updateShadowMemory(std::shared_ptr<DeviceInfo> &DeviceInfo,
ur_queue_handle_t Queue) {
auto &AllocInfos = ContextInfo->AllocInfosMap[DeviceInfo->Handle];
std::scoped_lock<ur_shared_mutex> Guard(AllocInfos.Mutex);
std::scoped_lock<ur_shared_mutex> Guard(DeviceInfo->AllocInfos.Mutex);

for (auto &AI : AllocInfos.List) {
for (auto &AI : DeviceInfo->AllocInfos.List) {
UR_CALL(enqueueAllocInfo(DeviceInfo, Queue, AI));
}
AllocInfos.List.clear();
DeviceInfo->AllocInfos.List.clear();

return UR_RESULT_SUCCESS;
}
Expand Down Expand Up @@ -585,7 +585,7 @@ AsanInterceptor::registerDeviceGlobals(ur_program_handle_t Program) {
GetCurrentBacktrace(),
{}});

ContextInfo->insertAllocInfo({Device}, AI);
getDeviceInfo(Device)->insertAllocInfo(AI);
ProgramInfo->AllocInfoForGlobals.emplace(AI);

std::scoped_lock<ur_shared_mutex> Guard(m_AllocationMapMutex);
Expand Down Expand Up @@ -754,7 +754,7 @@ ur_result_t AsanInterceptor::prepareLaunch(
continue;
}
if (auto ValidateResult = ValidateUSMPointer(
ContextInfo->Handle, DeviceInfo->Handle, (uptr)Ptr)) {
Kernel, ContextInfo->Handle, DeviceInfo->Handle, (uptr)Ptr)) {
ReportInvalidKernelArgument(Kernel, ArgIndex, (uptr)Ptr, ValidateResult,
PtrPair.second);
if (ValidateResult.Type != ValidateUSMResult::MAYBE_HOST_POINTER) {
Expand Down Expand Up @@ -801,7 +801,7 @@ ur_result_t AsanInterceptor::prepareLaunch(
if (LaunchInfo.LocalWorkSize.empty()) {
LaunchInfo.LocalWorkSize.resize(LaunchInfo.WorkDim);
auto URes = getContext()->urDdiTable.Kernel.pfnGetSuggestedLocalWorkSize(
Kernel, Queue, LaunchInfo.WorkDim, LaunchInfo.GlobalWorkOffset,
Kernel, Queue, LaunchInfo.WorkDim, LaunchInfo.GlobalWorkOffset.data(),
LaunchInfo.GlobalWorkSize, LaunchInfo.LocalWorkSize.data());
if (URes != UR_RESULT_SUCCESS) {
if (URes != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,16 @@ struct DeviceInfo {
std::queue<std::shared_ptr<AllocInfo>> Quarantine;
size_t QuarantineSize = 0;

AllocInfoList AllocInfos;

// Device handles are special and alive in the whole process lifetime,
// so we needn't retain&release here.
explicit DeviceInfo(ur_device_handle_t Device) : Handle(Device) {}

void insertAllocInfo(std::shared_ptr<AllocInfo> &AI) {
std::scoped_lock<ur_shared_mutex> Guard(AllocInfos.Mutex);
AllocInfos.List.emplace_back(AI);
}
};

struct QueueInfo {
Expand Down Expand Up @@ -88,6 +95,8 @@ struct KernelInfo {
bool IsInstrumented = false;
// check shadow bounds
bool IsCheckShadowBounds = false;
// might have indirect access
bool IsIndirectAccess = false;

// lock this mutex if following fields are accessed
ur_shared_mutex Mutex;
Expand Down Expand Up @@ -147,7 +156,6 @@ struct ContextInfo {
std::atomic<int32_t> RefCount = 1;

std::vector<ur_device_handle_t> DeviceList;
std::unordered_map<ur_device_handle_t, AllocInfoList> AllocInfosMap;

ur_shared_mutex InternalQueueMapMutex;
std::unordered_map<ur_device_handle_t, std::optional<ManagedQueue>>
Expand All @@ -169,15 +177,6 @@ struct ContextInfo {

~ContextInfo();

void insertAllocInfo(const std::vector<ur_device_handle_t> &Devices,
std::shared_ptr<AllocInfo> &AI) {
for (auto Device : Devices) {
auto &AllocInfos = AllocInfosMap[Device];
std::scoped_lock<ur_shared_mutex> Guard(AllocInfos.Mutex);
AllocInfos.List.emplace_back(AI);
}
}

ur_usm_pool_handle_t getUSMPool();

ur_queue_handle_t getInternalQueue(ur_device_handle_t);
Expand Down Expand Up @@ -249,7 +248,7 @@ struct LaunchInfo {
ur_context_handle_t Context = nullptr;
ur_device_handle_t Device = nullptr;
const size_t *GlobalWorkSize = nullptr;
const size_t *GlobalWorkOffset = nullptr;
std::vector<size_t> GlobalWorkOffset;
std::vector<size_t> LocalWorkSize;
uint32_t WorkDim = 0;

Expand All @@ -259,12 +258,19 @@ struct LaunchInfo {
const size_t *GlobalWorkSize, const size_t *LocalWorkSize,
const size_t *GlobalWorkOffset, uint32_t WorkDim)
: Context(Context), Device(Device), GlobalWorkSize(GlobalWorkSize),
GlobalWorkOffset(GlobalWorkOffset), WorkDim(WorkDim),
Data(Context, Device) {
WorkDim(WorkDim), Data(Context, Device) {
if (LocalWorkSize) {
this->LocalWorkSize =
std::vector<size_t>(LocalWorkSize, LocalWorkSize + WorkDim);
}
// UR doesn't allow GlobalWorkOffset is null, we need to construct a zero
// value array if user doesn't specify its value.
if (GlobalWorkOffset) {
this->GlobalWorkOffset =
std::vector<size_t>(GlobalWorkOffset, GlobalWorkOffset + WorkDim);
} else {
this->GlobalWorkOffset = std::vector<size_t>(WorkDim, 0);
}
[[maybe_unused]] auto Result =
getContext()->urDdiTable.Context.pfnRetain(Context);
assert(Result == UR_RESULT_SUCCESS);
Expand Down Expand Up @@ -375,8 +381,7 @@ class AsanInterceptor {
ur_shared_mutex KernelLaunchMutex;

private:
ur_result_t updateShadowMemory(std::shared_ptr<ContextInfo> &ContextInfo,
std::shared_ptr<DeviceInfo> &DeviceInfo,
ur_result_t updateShadowMemory(std::shared_ptr<DeviceInfo> &DeviceInfo,
ur_queue_handle_t Queue);

ur_result_t enqueueAllocInfo(std::shared_ptr<DeviceInfo> &DeviceInfo,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ bool IsSameDevice(ur_device_handle_t Device1, ur_device_handle_t Device2) {

} // namespace

ValidateUSMResult ValidateUSMPointer(ur_context_handle_t Context,
ValidateUSMResult ValidateUSMPointer(ur_kernel_handle_t Kernel,
ur_context_handle_t Context,
ur_device_handle_t Device, uptr Ptr) {
assert(Ptr != 0 && "Don't validate nullptr here");

Expand All @@ -53,7 +54,8 @@ ValidateUSMResult ValidateUSMPointer(ur_context_handle_t Context,

auto AllocInfo = AllocInfoItOp.value()->second;

if (AllocInfo->Context != Context) {
auto &KI = getAsanInterceptor()->getOrCreateKernelInfo(Kernel);
if (!KI.IsIndirectAccess && AllocInfo->Context != Context) {
return ValidateUSMResult::fail(ValidateUSMResult::BAD_CONTEXT, AllocInfo);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ struct ValidateUSMResult {
}
};

ValidateUSMResult ValidateUSMPointer(ur_context_handle_t Context,
ValidateUSMResult ValidateUSMPointer(ur_kernel_handle_t Kernel,
ur_context_handle_t Context,
ur_device_handle_t Device, uptr Ptr);

} // namespace asan
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,7 @@ ur_result_t MsanInterceptor::prepareLaunch(
if (LaunchInfo.LocalWorkSize.empty()) {
LaunchInfo.LocalWorkSize.resize(LaunchInfo.WorkDim);
auto URes = getContext()->urDdiTable.Kernel.pfnGetSuggestedLocalWorkSize(
Kernel, Queue, LaunchInfo.WorkDim, LaunchInfo.GlobalWorkOffset,
Kernel, Queue, LaunchInfo.WorkDim, LaunchInfo.GlobalWorkOffset.data(),
LaunchInfo.GlobalWorkSize, LaunchInfo.LocalWorkSize.data());
if (URes != UR_RESULT_SUCCESS) {
if (URes != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,20 +225,27 @@ struct USMLaunchInfo {
ur_context_handle_t Context = nullptr;
ur_device_handle_t Device = nullptr;
const size_t *GlobalWorkSize = nullptr;
const size_t *GlobalWorkOffset = nullptr;
std::vector<size_t> GlobalWorkOffset;
std::vector<size_t> LocalWorkSize;
uint32_t WorkDim = 0;

USMLaunchInfo(ur_context_handle_t Context, ur_device_handle_t Device,
const size_t *GlobalWorkSize, const size_t *LocalWorkSize,
const size_t *GlobalWorkOffset, uint32_t WorkDim)
: Data(Context, Device), Context(Context), Device(Device),
GlobalWorkSize(GlobalWorkSize), GlobalWorkOffset(GlobalWorkOffset),
WorkDim(WorkDim) {
GlobalWorkSize(GlobalWorkSize), WorkDim(WorkDim) {
if (LocalWorkSize) {
this->LocalWorkSize =
std::vector<size_t>(LocalWorkSize, LocalWorkSize + WorkDim);
}
// UR doesn't allow GlobalWorkOffset is null, we need to construct a zero
// value array if user doesn't specify its value.
if (GlobalWorkOffset) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

global work offset can be null, see:

/// [in][optional] pointer to an array of workDim unsigned values that
/// specify the offset used to calculate the global ID of a work-item
const size_t *pGlobalWorkOffset,

If something is segfaulting when null is passed to launch kernel, please file a bug or let me know.

this->GlobalWorkOffset =
std::vector<size_t>(GlobalWorkOffset, GlobalWorkOffset + WorkDim);
} else {
this->GlobalWorkOffset = std::vector<size_t>(WorkDim, 0);
}
}
~USMLaunchInfo();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ ur_result_t TsanInterceptor::prepareLaunch(std::shared_ptr<ContextInfo> &,
if (LaunchInfo.LocalWorkSize.empty()) {
LaunchInfo.LocalWorkSize.resize(LaunchInfo.WorkDim);
auto URes = getContext()->urDdiTable.Kernel.pfnGetSuggestedLocalWorkSize(
Kernel, Queue, LaunchInfo.WorkDim, LaunchInfo.GlobalWorkOffset,
Kernel, Queue, LaunchInfo.WorkDim, LaunchInfo.GlobalWorkOffset.data(),
LaunchInfo.GlobalWorkSize, LaunchInfo.LocalWorkSize.data());
if (URes != UR_RESULT_SUCCESS) {
if (URes != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ struct LaunchInfo {
ur_context_handle_t Context = nullptr;
ur_device_handle_t Device = nullptr;
const size_t *GlobalWorkSize = nullptr;
const size_t *GlobalWorkOffset = nullptr;
std::vector<size_t> GlobalWorkOffset;
std::vector<size_t> LocalWorkSize;
uint32_t WorkDim = 0;
TsanRuntimeDataWrapper Data;
Expand All @@ -190,8 +190,7 @@ struct LaunchInfo {
const size_t *GlobalWorkSize, const size_t *LocalWorkSize,
const size_t *GlobalWorkOffset, uint32_t WorkDim)
: Context(Context), Device(Device), GlobalWorkSize(GlobalWorkSize),
GlobalWorkOffset(GlobalWorkOffset), WorkDim(WorkDim),
Data(Context, Device) {
WorkDim(WorkDim), Data(Context, Device) {
[[maybe_unused]] auto Result =
getContext()->urDdiTable.Context.pfnRetain(Context);
assert(Result == UR_RESULT_SUCCESS);
Expand All @@ -201,6 +200,14 @@ struct LaunchInfo {
this->LocalWorkSize =
std::vector<size_t>(LocalWorkSize, LocalWorkSize + WorkDim);
}
// UR doesn't allow GlobalWorkOffset is null, we need to construct a zero
// value array if user doesn't specify its value.
if (GlobalWorkOffset) {
this->GlobalWorkOffset =
std::vector<size_t>(GlobalWorkOffset, GlobalWorkOffset + WorkDim);
} else {
this->GlobalWorkOffset = std::vector<size_t>(WorkDim, 0);
}
}

~LaunchInfo() {
Expand Down
Loading