Skip to content

Commit

Permalink
Fix P2P support for implicit scaling
Browse files Browse the repository at this point in the history
when using implicit scaling, 2 dma-buf handles, one per tile, are
needed to support dma access from peer.

Related-To: LOCI-3122

Signed-off-by: Jaime Arteaga <[email protected]>
  • Loading branch information
Jaime Arteaga authored and Compute-Runtime-Automation committed Jun 1, 2022
1 parent 09c68a6 commit 325db6a
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 11 deletions.
16 changes: 14 additions & 2 deletions level_zero/core/source/driver/driver_handle_imp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -554,10 +554,22 @@ NEO::GraphicsAllocation *DriverHandleImp::getPeerAllocation(Device *device,
} else {
alloc = allocData->gpuAllocations.getDefaultGraphicsAllocation();
UNRECOVERABLE_IF(alloc == nullptr);
uint64_t handle = alloc->peekInternalHandle(this->getMemoryManager());
ze_ipc_memory_flags_t flags = {};

peerPtr = this->importFdHandle(device, flags, handle, &alloc);
if (!deviceImp->isSubdevice && deviceImp->isImplicitScalingCapable()) {
uint32_t numHandles = alloc->getNumHandles();
UNRECOVERABLE_IF(numHandles == 0);
std::vector<NEO::osHandle> handles;
for (uint32_t i = 0; i < numHandles; i++) {
int handle = static_cast<int>(alloc->peekInternalHandle(this->getMemoryManager(), i));
handles.push_back(handle);
}
peerPtr = this->importFdHandles(device, flags, handles, &alloc);
} else {
uint64_t handle = alloc->peekInternalHandle(this->getMemoryManager());
peerPtr = this->importFdHandle(device, flags, handle, &alloc);
}

if (peerPtr == nullptr) {
return nullptr;
}
Expand Down
32 changes: 29 additions & 3 deletions level_zero/core/test/unit_tests/fixtures/memory_ipc_fixture.h
Original file line number Diff line number Diff line change
Expand Up @@ -443,14 +443,18 @@ class IpcImplicitScalingMockGraphicsAllocation : public NEO::MemoryAllocation {
uint32_t getNumHandles() override {
return 2u;
}

bool isResident(uint32_t contextId) const override {
return false;
}
};

class MemoryManagerOpenIpcMock : public MemoryManagerIpcMock {
public:
MemoryManagerOpenIpcMock(NEO::ExecutionEnvironment &executionEnvironment) : MemoryManagerIpcMock(executionEnvironment) {}

NEO::GraphicsAllocation *allocateGraphicsMemoryWithProperties(const AllocationProperties &properties) override {
auto alloc = new IpcImplicitScalingMockGraphicsAllocation(0,
auto alloc = new IpcImplicitScalingMockGraphicsAllocation(properties.rootDeviceIndex,
NEO::AllocationType::BUFFER,
reinterpret_cast<void *>(sharedHandleAddress++),
0x1000,
Expand All @@ -465,7 +469,21 @@ class MemoryManagerOpenIpcMock : public MemoryManagerIpcMock {
if (failOnCreateGraphicsAllocationFromSharedHandle) {
return nullptr;
}
auto alloc = new IpcImplicitScalingMockGraphicsAllocation(0,
auto alloc = new IpcImplicitScalingMockGraphicsAllocation(properties.rootDeviceIndex,
NEO::AllocationType::BUFFER,
reinterpret_cast<void *>(sharedHandleAddress++),
0x1000,
0,
sizeof(uint32_t),
MemoryPool::System4KBPages);
alloc->setGpuBaseAddress(0xabcd);
return alloc;
}
NEO::GraphicsAllocation *createGraphicsAllocationFromMultipleSharedHandles(std::vector<osHandle> handles, AllocationProperties &properties, bool requireSpecificBitness, bool isHostIpcAllocation) override {
if (failOnCreateGraphicsAllocationFromSharedHandle) {
return nullptr;
}
auto alloc = new IpcImplicitScalingMockGraphicsAllocation(properties.rootDeviceIndex,
NEO::AllocationType::BUFFER,
reinterpret_cast<void *>(sharedHandleAddress++),
0x1000,
Expand All @@ -476,7 +494,7 @@ class MemoryManagerOpenIpcMock : public MemoryManagerIpcMock {
return alloc;
}
NEO::GraphicsAllocation *createGraphicsAllocationFromNTHandle(void *handle, uint32_t rootDeviceIndex, AllocationType allocType) override {
auto alloc = new NEO::MockGraphicsAllocation(0,
auto alloc = new NEO::MockGraphicsAllocation(0u,
NEO::AllocationType::BUFFER,
reinterpret_cast<void *>(sharedHandleAddress++),
0x1000,
Expand All @@ -487,6 +505,14 @@ class MemoryManagerOpenIpcMock : public MemoryManagerIpcMock {
return alloc;
};

void freeGraphicsMemory(GraphicsAllocation *gfxAllocation) override {
delete gfxAllocation;
}

void freeGraphicsMemory(GraphicsAllocation *gfxAllocation, bool isImportedAllocation) override {
delete gfxAllocation;
}

uint64_t sharedHandleAddress = 0x1234;

bool failOnCreateGraphicsAllocationFromSharedHandle = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1851,7 +1851,7 @@ TEST_F(MultipleDevicesTest, givenTheSameDeviceThenCanAccessPeerReturnsTrue) {
EXPECT_TRUE(canAccess);
}

TEST_F(MultipleDevicesTest, givenTwoRootDevicesFromSameFamilyThenCanAccessPeerSuccessfullyCompletes) {
TEST_F(MultipleDevicesDisabledImplicitScalingTest, givenTwoRootDevicesFromSameFamilyThenCanAccessPeerSuccessfullyCompletes) {
L0::Device *device0 = driverHandle->devices[0];
L0::Device *device1 = driverHandle->devices[1];

Expand All @@ -1864,7 +1864,7 @@ TEST_F(MultipleDevicesTest, givenTwoRootDevicesFromSameFamilyThenCanAccessPeerSu
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
}

HWTEST_F(MultipleDevicesTest, givenTwoRootDevicesFromSameFamilyAndDeviceLostSynchronizeThenCanAccessPeerReturnsDeviceLost) {
HWTEST_F(MultipleDevicesDisabledImplicitScalingTest, givenTwoRootDevicesFromSameFamilyAndDeviceLostSynchronizeThenCanAccessPeerReturnsDeviceLost) {
constexpr size_t devicesCount{2};
ASSERT_LE(devicesCount, driverHandle->devices.size());

Expand Down Expand Up @@ -2095,7 +2095,7 @@ TEST_F(MultipleDevicesP2PDevice0Access1Atomic1Device1Access1Atomic1Test, WhenCal
EXPECT_TRUE(p2pProperties.flags & ZE_DEVICE_P2P_PROPERTY_FLAG_ATOMICS);
}

TEST_F(MultipleDevicesTest, givenTwoRootDevicesFromSameFamilyThenCanAccessPeerReturnsTrue) {
TEST_F(MultipleDevicesDisabledImplicitScalingTest, givenTwoRootDevicesFromSameFamilyThenCanAccessPeerReturnsTrue) {
L0::Device *device0 = driverHandle->devices[0];
L0::Device *device1 = driverHandle->devices[1];

Expand All @@ -2109,7 +2109,7 @@ TEST_F(MultipleDevicesTest, givenTwoRootDevicesFromSameFamilyThenCanAccessPeerRe
EXPECT_TRUE(canAccess);
}

TEST_F(MultipleDevicesTest, givenCanAccessPeerCalledTwiceThenCanAccessPeerReturnsSameValueEachTime) {
TEST_F(MultipleDevicesDisabledImplicitScalingTest, givenCanAccessPeerCalledTwiceThenCanAccessPeerReturnsSameValueEachTime) {
L0::Device *device0 = driverHandle->devices[0];
L0::Device *device1 = driverHandle->devices[1];

Expand Down
46 changes: 45 additions & 1 deletion level_zero/core/test/unit_tests/sources/memory/test_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1990,6 +1990,7 @@ struct MultipleDevicePeerAllocationTest : public ::testing::Test {
}

void SetUp() override {
DebugManagerStateRestore restorer;
NEO::MockCompilerEnableGuard mock(true);
DebugManager.flags.CreateMultipleSubDevices.set(numSubDevices);
VariableBackup<bool> mockDeviceFlagBackup(&MockDevice::createSingleDevice, false);
Expand Down Expand Up @@ -2017,11 +2018,20 @@ struct MultipleDevicePeerAllocationTest : public ::testing::Test {
for (auto i = 0u; i < numRootDevices; i++) {
auto device = driverHandle->devices[i];
context->getDevices().insert(std::make_pair(device->toHandle(), device));
L0::DeviceImp *deviceImp = static_cast<L0::DeviceImp *>(device);
for (auto j = 0u; j < deviceImp->subDevices.size(); j++) {
auto subDevice = deviceImp->subDevices[j];
context->getDevices().insert(std::make_pair(subDevice->toHandle(), subDevice));
}
auto neoDevice = device->getNEODevice();
context->rootDeviceIndices.push_back(neoDevice->getRootDeviceIndex());
context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()});
}
context->rootDeviceIndices.remove_duplicates();

currSvmAllocsManager = new NEO::SVMAllocsManager(currMemoryManager, driverHandle->devices[0]->isImplicitScalingCapable());
prevSvmAllocsManager = driverHandle->svmAllocsManager;
driverHandle->svmAllocsManager = currSvmAllocsManager;
}

void createKernel() {
Expand All @@ -2034,13 +2044,16 @@ struct MultipleDevicePeerAllocationTest : public ::testing::Test {
}

void TearDown() override {
driverHandle->svmAllocsManager = prevSvmAllocsManager;
delete currSvmAllocsManager;
driverHandle->setMemoryManager(prevMemoryManager);
delete currMemoryManager;
}

DebugManagerStateRestore restorer;
NEO::MemoryManager *prevMemoryManager = nullptr;
NEO::MemoryManager *currMemoryManager = nullptr;
NEO::SVMAllocsManager *prevSvmAllocsManager = nullptr;
NEO::SVMAllocsManager *currSvmAllocsManager = nullptr;
std::unique_ptr<DriverHandleImp> driverHandle;

std::unique_ptr<UltDeviceFactory> deviceFactory;
Expand Down Expand Up @@ -2312,6 +2325,37 @@ HWTEST2_F(MultipleDevicePeerAllocationTest,
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
}

HWTEST2_F(MultipleDevicePeerAllocationTest,
givenSubDeviceAllocationPassedToAppendBlitFillUsingDevice1ThenSuccessIsReturned,
IsAtLeastSkl) {
DebugManagerStateRestore restorer;

L0::Device *device0 = driverHandle->devices[0];
L0::Device *device = driverHandle->devices[1];
L0::DeviceImp *deviceImp = static_cast<L0::DeviceImp *>(device);
L0::Device *device1 = deviceImp->subDevices[0];

size_t size = 1024;
size_t alignment = 1u;
void *ptr = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
ze_result_t result = context->allocDeviceMem(device0->toHandle(),
&deviceDesc,
size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);

auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily<gfxCoreFamily>>();
commandList->initialize(device1, NEO::EngineGroupType::RenderCompute, 0u);

uint32_t pattern = 1;
result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);

result = context->freeMem(ptr);
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
}

HWTEST2_F(MultipleDevicePeerAllocationTest,
givenDeviceAllocationPassedToAppendBlitFillUsingDevice0ThenSuccessIsReturned,
IsAtLeastSkl) {
Expand Down
2 changes: 1 addition & 1 deletion shared/source/memory_manager/graphics_allocation.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation> {
uint32_t getInspectionId(uint32_t contextId) const { return usageInfos[contextId].inspectionId; }
void setInspectionId(uint32_t newInspectionId, uint32_t contextId) { usageInfos[contextId].inspectionId = newInspectionId; }

bool isResident(uint32_t contextId) const { return GraphicsAllocation::objectNotResident != getResidencyTaskCount(contextId); }
MOCKABLE_VIRTUAL bool isResident(uint32_t contextId) const { return GraphicsAllocation::objectNotResident != getResidencyTaskCount(contextId); }
bool isAlwaysResident(uint32_t contextId) const { return GraphicsAllocation::objectAlwaysResident == getResidencyTaskCount(contextId); }
void updateResidencyTaskCount(uint32_t newTaskCount, uint32_t contextId) {
if (usageInfos[contextId].residencyTaskCount != GraphicsAllocation::objectAlwaysResident || newTaskCount == GraphicsAllocation::objectNotResident) {
Expand Down

0 comments on commit 325db6a

Please sign in to comment.