Skip to content

Commit 85e485f

Browse files
committed
align readback copies between backends
1 parent 13ca4be commit 85e485f

6 files changed

Lines changed: 40 additions & 45 deletions

File tree

include/Support/Pipeline.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,19 @@ struct CPUBuffer {
157157
return Stride;
158158
return getSingleElementSize() * Channels;
159159
}
160+
161+
// The natural per-row byte size of this buffer when interpreted as a 2D
162+
// image (no padding).
163+
uint32_t getImageRowBytes() const {
164+
return OutputProps.Width * getElementSize();
165+
}
166+
167+
// Copy a 2D image readback from a GPU mapping into Data[0]. The host
168+
// buffer is tightly packed with top-left origin. SrcRowPitch is the
169+
// source's per-row stride in bytes; pass `getImageRowBytes()` when the
170+
// source is tightly packed (Vulkan / Metal), or the GPU's reported pitch
171+
// when the source has row padding (e.g., D3D12's 256-byte aligned rows).
172+
void copyFromTexture(const void *Src, size_t SrcRowPitch);
160173
};
161174

162175
struct Result {

lib/API/DX/Device.cpp

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1581,18 +1581,14 @@ class DXDevice : public offloadtest::Device {
15811581
if (!IS.RTReadback)
15821582
return llvm::Error::success();
15831583

1584-
// Map readback and copy into host buffer, accounting for row pitch and
1585-
// flipping vertical orientation. DirectX render target origin is top-left,
1586-
// while our image writer expects bottom-left.
1587-
const CPUBuffer &B = *P.Bindings.RTargetBufferPtr;
15881584
void *Mapped = nullptr;
15891585
auto &Readback = llvm::cast<DXBuffer>(*IS.RTReadback);
15901586
if (auto Err = HR::toError(Readback.Buffer->Map(0, nullptr, &Mapped),
15911587
"Failed to map render target readback"))
15921588
return Err;
15931589

1594-
// Query the copy footprint to get the actual padded row pitch used by the
1595-
// copy operation.
1590+
// Query the copy footprint to get the actual padded row pitch used by
1591+
// the copy operation (D3D12 requires 256-byte aligned rows).
15961592
auto &RT = llvm::cast<DXTexture>(*IS.RT);
15971593
const D3D12_RESOURCE_DESC RTDesc = RT.Resource->GetDesc();
15981594
D3D12_PLACED_SUBRESOURCE_FOOTPRINT Placed = {};
@@ -1602,23 +1598,8 @@ class DXDevice : public offloadtest::Device {
16021598
Device->GetCopyableFootprints(&RTDesc, 0u, 1u, 0u, &Placed, &NumRows,
16031599
&RowSizeInBytes, &TotalBytes);
16041600

1605-
const uint32_t RowPitch = Placed.Footprint.RowPitch;
1606-
const uint32_t RowBytes =
1607-
static_cast<uint32_t>(B.getElementSize() * B.OutputProps.Width);
1608-
const uint32_t Height = static_cast<uint32_t>(B.OutputProps.Height);
1609-
1610-
const uint8_t *SrcBase = reinterpret_cast<uint8_t *>(Mapped);
1611-
uint8_t *DstBase =
1612-
reinterpret_cast<uint8_t *>(P.Bindings.RTargetBufferPtr->Data[0].get());
1613-
1614-
// Copy rows in reverse order.
1615-
for (uint32_t Y = 0; Y < Height; ++Y) {
1616-
const uint8_t *SrcRow = SrcBase + static_cast<size_t>(Y) * RowPitch;
1617-
uint8_t *DstRow =
1618-
DstBase + static_cast<size_t>(Height - 1 - Y) * RowBytes;
1619-
memcpy(DstRow, SrcRow, RowBytes);
1620-
}
1621-
1601+
P.Bindings.RTargetBufferPtr->copyFromTexture(Mapped,
1602+
Placed.Footprint.RowPitch);
16221603
Readback.Buffer->Unmap(0, nullptr);
16231604
return llvm::Error::success();
16241605
}

lib/API/MTL/MTLDevice.cpp

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -705,23 +705,9 @@ class MTLDevice : public offloadtest::Device {
705705
}
706706
}
707707
if (P.isGraphics()) {
708-
CPUBuffer *RTarget = P.Bindings.RTargetBufferPtr;
709-
const uint64_t Width = RTarget->OutputProps.Width;
710-
const uint64_t Height = RTarget->OutputProps.Height;
711-
const size_t ElemSize = RTarget->getElementSize();
712-
const size_t RowBytes = Width * ElemSize;
713-
714-
// Read from the readback buffer. The blit copied the texture data in
715-
// GPU layout order, so we flip rows here to produce an upright image.
716708
auto &FBReadback = llvm::cast<MTLBuffer>(*IS.FrameBufferReadback);
717-
const unsigned char *Src =
718-
reinterpret_cast<const unsigned char *>(FBReadback.Buf->contents());
719-
unsigned char *Buf =
720-
reinterpret_cast<unsigned char *>(RTarget->Data[0].get());
721-
for (uint64_t R = 0; R < Height; ++R) {
722-
const uint64_t SrcRow = (Height - 1) - R;
723-
memcpy(Buf + R * RowBytes, Src + SrcRow * RowBytes, RowBytes);
724-
}
709+
auto *RT = P.Bindings.RTargetBufferPtr;
710+
RT->copyFromTexture(FBReadback.Buf->contents(), RT->getImageRowBytes());
725711
}
726712
return llvm::Error::success();
727713
}

lib/API/VK/Device.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2448,8 +2448,8 @@ class VulkanDevice : public offloadtest::Device {
24482448
vkMapMemory(Device, Readback.Memory, 0, VK_WHOLE_SIZE, 0, &Mapped);
24492449
vkInvalidateMappedMemoryRanges(Device, 1, &Range);
24502450

2451-
const CPUBuffer &B = *P.Bindings.RTargetBufferPtr;
2452-
memcpy(B.Data[0].get(), Mapped, B.size());
2451+
auto *RT = P.Bindings.RTargetBufferPtr;
2452+
RT->copyFromTexture(Mapped, RT->getImageRowBytes());
24532453
vkUnmapMemory(Device, Readback.Memory);
24542454
}
24552455
return llvm::Error::success();

lib/Image/Image.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -159,10 +159,10 @@ static llvm::Error writePNGImpl(ImageRef Img, llvm::StringRef OutputPath) {
159159
png_bytepp Rows =
160160
(png_bytepp)png_malloc(PNG, Img.getHeight() * sizeof(png_bytep));
161161
const uint64_t RowSize = Img.getWidth() * Img.getChannels() * Img.getDepth();
162-
// Step one row back from the end
163-
const uint8_t *Row = reinterpret_cast<const uint8_t *>(Img.data()) +
164-
(RowSize * Img.getHeight()) - RowSize;
165-
for (uint32_t I = 0; I < Img.getHeight(); ++I, Row -= RowSize)
162+
// The host buffer has top-left origin (matching every backend's readback
163+
// and the standard PNG row order), so we walk forward.
164+
const uint8_t *Row = reinterpret_cast<const uint8_t *>(Img.data());
165+
for (uint32_t I = 0; I < Img.getHeight(); ++I, Row += RowSize)
166166
Rows[I] = const_cast<png_bytep>(Row);
167167

168168
png_write_image(PNG, Rows);

lib/Support/Pipeline.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,21 @@ void PushConstantBlock::getContent(
2626
memcpy(Output.data() + V.OffsetInBytes, V.Data.data(), V.Data.size());
2727
}
2828

29+
void CPUBuffer::copyFromTexture(const void *Src, size_t SrcRowPitch) {
30+
const uint32_t Height = OutputProps.Height;
31+
const uint32_t RowBytes = getImageRowBytes();
32+
assert(SrcRowPitch >= RowBytes && "Source row pitch is smaller than image");
33+
uint8_t *Dst = reinterpret_cast<uint8_t *>(Data[0].get());
34+
if (SrcRowPitch == RowBytes) {
35+
memcpy(Dst, Src, static_cast<size_t>(Height) * RowBytes);
36+
return;
37+
}
38+
const uint8_t *S = reinterpret_cast<const uint8_t *>(Src);
39+
for (uint32_t Y = 0; Y < Height; ++Y)
40+
memcpy(Dst + static_cast<size_t>(Y) * RowBytes,
41+
S + static_cast<size_t>(Y) * SrcRowPitch, RowBytes);
42+
}
43+
2944
uint32_t PushConstantBlock::size() const {
3045
uint32_t Size = 0;
3146
for (const PushConstantValue &V : Values)

0 commit comments

Comments
 (0)