Skip to content

Commit e89cea2

Browse files
committed
WIP
1 parent da91552 commit e89cea2

File tree

1 file changed

+9
-0
lines changed

1 file changed

+9
-0
lines changed

projects/hipblaslt/tensilelite/client/src/DataInitialization.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2070,6 +2070,12 @@ namespace TensileLite
20702070
auto swizzleKey
20712071
= std::make_tuple(toBitWidth(desc.dataType()), unrolledSize, tiledSize);
20722072

2073+
// ::Tensor::Manipulation::TensorDesc paddedShapDesc(paddedShape);
2074+
// std::cout << "original un-padded elems:" << desc.totalAllocatedElements() << std::endl;
2075+
// std::cout << "auto padded elems for swizzle:" << paddedShapDesc.flattenSize() << std::endl;
2076+
// std::cout << "default copy elems:" << p.maxElements << std::endl;
2077+
2078+
// Cache-hit
20732079
if(g_swizzleCache.count(swizzleKey))
20742080
{
20752081
if(swizzleKey != g_swizzleCache.back())
@@ -2086,8 +2092,11 @@ namespace TensileLite
20862092
ptr = p.gpuInput.valid.get();
20872093
}
20882094
}
2095+
// No Cache-hit, do pre-shuffle...
20892096
else
20902097
{
2098+
// TODO- no need to do any of memory operation (cpy, shuffle, etc...)
2099+
// but we still need MAKE SURE WE ALLOC/PASS THE INPUT SIZES (auto-padded = paddedShape)
20912100
auto tmpTensor = Tensor({tiledSize, unrolledSize}, desc.elementBytes());
20922101

20932102
memcpy(

0 commit comments

Comments
 (0)