File tree Expand file tree Collapse file tree 1 file changed +9
-0
lines changed
projects/hipblaslt/tensilelite/client/src Expand file tree Collapse file tree 1 file changed +9
-0
lines changed Original file line number Diff line number Diff line change @@ -2070,6 +2070,12 @@ namespace TensileLite
2070
2070
auto swizzleKey
2071
2071
= std::make_tuple (toBitWidth (desc.dataType ()), unrolledSize, tiledSize);
2072
2072
2073
+ // ::Tensor::Manipulation::TensorDesc paddedShapDesc(paddedShape);
2074
+ // std::cout << "original un-padded elems:" << desc.totalAllocatedElements() << std::endl;
2075
+ // std::cout << "auto padded elems for swizzle:" << paddedShapDesc.flattenSize() << std::endl;
2076
+ // std::cout << "default copy elems:" << p.maxElements << std::endl;
2077
+
2078
+ // Cache-hit
2073
2079
if (g_swizzleCache.count (swizzleKey))
2074
2080
{
2075
2081
if (swizzleKey != g_swizzleCache.back ())
@@ -2086,8 +2092,11 @@ namespace TensileLite
2086
2092
ptr = p.gpuInput .valid .get ();
2087
2093
}
2088
2094
}
2095
+ // No Cache-hit, do pre-shuffle...
2089
2096
else
2090
2097
{
2098
+ // TODO- no need to do any of memory operation (cpy, shuffle, etc...)
2099
+ // but we still need MAKE SURE WE ALLOC/PASS THE INPUT SIZES (auto-padded = paddedShape)
2091
2100
auto tmpTensor = Tensor ({tiledSize, unrolledSize}, desc.elementBytes ());
2092
2101
2093
2102
memcpy (
You can’t perform that action at this time.
0 commit comments