diff --git a/docs/2025.html b/docs/2025.html
index 59179cf763..9d5044e3e3 100644
--- a/docs/2025.html
+++ b/docs/2025.html
@@ -46,6 +46,7 @@
New features
AMX-BF16 kernel DepthwiseConvolution_k5p2d1s1w4 for class SynetMergedConvolution16b.
AMX-BF16 kernel DepthwiseConvolution_k3p1d1s1w8 for class SynetMergedConvolution16b.
AMX-BF16 kernel DepthwiseConvolution_k3p1d1s1w6 for class SynetMergedConvolution16b.
+ Base implementation of class ResizerBf16Bilinear.
Improving
diff --git a/src/Simd/SimdBaseResizer.cpp b/src/Simd/SimdBaseResizer.cpp
index 231706e43b..c094ca6847 100644
--- a/src/Simd/SimdBaseResizer.cpp
+++ b/src/Simd/SimdBaseResizer.cpp
@@ -39,6 +39,8 @@ namespace Simd
return new ResizerShortBilinear(param);
else if (param.IsFloatBilinear())
return new ResizerFloatBilinear(param);
+ else if (param.IsBf16Bilinear())
+ return new ResizerBf16Bilinear(param);
else if (param.IsByteBicubic())
return new ResizerByteBicubic(param);
else if (param.IsByteArea2x2())
diff --git a/src/Simd/SimdBaseResizerBilinear.cpp b/src/Simd/SimdBaseResizerBilinear.cpp
index eaf9219fc3..412e4811b0 100644
--- a/src/Simd/SimdBaseResizerBilinear.cpp
+++ b/src/Simd/SimdBaseResizerBilinear.cpp
@@ -24,6 +24,7 @@
#include "Simd/SimdMemory.h"
#include "Simd/SimdResizer.h"
#include "Simd/SimdCopy.h"
+#include "Simd/SimdBFloat16.h"
namespace Simd
{
@@ -128,7 +129,7 @@ namespace Simd
}
}
- //---------------------------------------------------------------------
+ //-------------------------------------------------------------------------------------------------
ResizerShortBilinear::ResizerShortBilinear(const ResParam& param)
: Resizer(param)
@@ -247,25 +248,11 @@ namespace Simd
}
}
- //---------------------------------------------------------------------
+ //-------------------------------------------------------------------------------------------------
- ResizerFloatBilinear::ResizerFloatBilinear(const ResParam & param)
- : Resizer(param)
+ static void EstimateIndexAlpha(const ResParam& param, size_t srcSize, size_t dstSize, size_t channels, int32_t* indices, float* alphas)
{
- _ay.Resize(_param.dstH, false, _param.align);
- _iy.Resize(_param.dstH, false, _param.align);
- EstimateIndexAlpha(_param.srcH, _param.dstH, 1, _iy.data, _ay.data);
- size_t rs = _param.dstW * _param.channels;
- _ax.Resize(rs, false, _param.align);
- _ix.Resize(rs, false, _param.align);
- EstimateIndexAlpha(_param.srcW, _param.dstW, _param.channels, _ix.data, _ax.data);
- _bx[0].Resize(rs, false, _param.align);
- _bx[1].Resize(rs, false, _param.align);
- }
-
- void ResizerFloatBilinear::EstimateIndexAlpha(size_t srcSize, size_t dstSize, size_t channels, int32_t * indices, float * alphas)
- {
- if (_param.method == SimdResizeMethodBilinear)
+ if (param.method == SimdResizeMethodBilinear)
{
float scale = (float)srcSize / dstSize;
for (size_t i = 0; i < dstSize; ++i)
@@ -290,8 +277,8 @@ namespace Simd
alphas[offset] = alpha;
}
}
- }
- else if (_param.method == SimdResizeMethodBilinearCaffe)
+ }
+ else if (param.method == SimdResizeMethodBilinearCaffe)
{
float scale = dstSize > 1 ? float(srcSize - 1) / float(dstSize - 1) : 0.0f;
for (size_t i = 0; i < dstSize; ++i)
@@ -312,7 +299,7 @@ namespace Simd
}
}
}
- else if (_param.method == SimdResizeMethodBilinearPytorch)
+ else if (param.method == SimdResizeMethodBilinearPytorch)
{
float scale = (float)srcSize / dstSize;
for (size_t i = 0; i < dstSize; ++i)
@@ -342,6 +329,22 @@ namespace Simd
assert(0);
}
+ //-------------------------------------------------------------------------------------------------
+
+ ResizerFloatBilinear::ResizerFloatBilinear(const ResParam & param)
+ : Resizer(param)
+ {
+ _ay.Resize(_param.dstH, false, _param.align);
+ _iy.Resize(_param.dstH, false, _param.align);
+ EstimateIndexAlpha(_param, _param.srcH, _param.dstH, 1, _iy.data, _ay.data);
+ size_t rs = _param.dstW * _param.channels;
+ _ax.Resize(rs, false, _param.align);
+ _ix.Resize(rs, false, _param.align);
+ EstimateIndexAlpha(_param, _param.srcW, _param.dstW, _param.channels, _ix.data, _ax.data);
+ _bx[0].Resize(rs, false, _param.align);
+ _bx[1].Resize(rs, false, _param.align);
+ }
+
void ResizerFloatBilinear::Run(const uint8_t * src, size_t srcStride, uint8_t * dst, size_t dstStride)
{
Run((const float*)src, srcStride / sizeof(float), (float*)dst, dstStride / sizeof(float));
@@ -386,6 +389,67 @@ namespace Simd
dst[dx] = pbx[0][dx]*fy0 + pbx[1][dx]*fy1;
}
}
+
+ //-------------------------------------------------------------------------------------------------
+
+ ResizerBf16Bilinear::ResizerBf16Bilinear(const ResParam& param)
+ : Resizer(param)
+ {
+ _ay.Resize(_param.dstH, false, _param.align);
+ _iy.Resize(_param.dstH, false, _param.align);
+ EstimateIndexAlpha(_param, _param.srcH, _param.dstH, 1, _iy.data, _ay.data);
+ size_t rs = _param.dstW * _param.channels;
+ _ax.Resize(rs, false, _param.align);
+ _ix.Resize(rs, false, _param.align);
+ EstimateIndexAlpha(_param, _param.srcW, _param.dstW, _param.channels, _ix.data, _ax.data);
+ _bx[0].Resize(rs, false, _param.align);
+ _bx[1].Resize(rs, false, _param.align);
+ }
+
+ void ResizerBf16Bilinear::Run(const uint8_t* src, size_t srcStride, uint8_t* dst, size_t dstStride)
+ {
+ Run((const uint16_t*)src, srcStride / sizeof(uint16_t), (uint16_t*)dst, dstStride / sizeof(uint16_t));
+ }
+
+ void ResizerBf16Bilinear::Run(const uint16_t* src, size_t srcStride, uint16_t* dst, size_t dstStride)
+ {
+ size_t cn = _param.channels;
+ size_t rs = _param.dstW * cn;
+ float* pbx[2] = { _bx[0].data, _bx[1].data };
+ int32_t prev = -2;
+ for (size_t dy = 0; dy < _param.dstH; dy++, dst += dstStride)
+ {
+ float fy1 = _ay[dy];
+ float fy0 = 1.0f - fy1;
+ int32_t sy = _iy[dy];
+ int32_t k = 0;
+
+ if (sy == prev)
+ k = 2;
+ else if (sy == prev + 1)
+ {
+ Swap(pbx[0], pbx[1]);
+ k = 1;
+ }
+
+ prev = sy;
+
+ for (; k < 2; k++)
+ {
+ float* pb = pbx[k];
+ const uint16_t* ps = src + (sy + k) * srcStride;
+ for (size_t dx = 0; dx < rs; dx++)
+ {
+ int32_t sx = _ix[dx];
+ float fx = _ax[dx];
+ pb[dx] = BFloat16ToFloat32(ps[sx]) * (1.0f - fx) + BFloat16ToFloat32(ps[sx + cn]) * fx;
+ }
+ }
+
+ for (size_t dx = 0; dx < rs; dx++)
+ dst[dx] = Float32ToBFloat16(pbx[0][dx] * fy0 + pbx[1][dx] * fy1);
+ }
+ }
}
}
diff --git a/src/Simd/SimdResizer.h b/src/Simd/SimdResizer.h
index 84bf6eac64..5307f53dd5 100644
--- a/src/Simd/SimdResizer.h
+++ b/src/Simd/SimdResizer.h
@@ -71,6 +71,12 @@ namespace Simd
(method == SimdResizeMethodBilinear || method == SimdResizeMethodBilinearCaffe || method == SimdResizeMethodBilinearPytorch);
}
+ bool IsBf16Bilinear() const
+ {
+ return type == SimdResizeChannelBf16 &&
+ (method == SimdResizeMethodBilinear || method == SimdResizeMethodBilinearCaffe || method == SimdResizeMethodBilinearPytorch);
+ }
+
bool IsByteBicubic() const
{
return type == SimdResizeChannelByte && method == SimdResizeMethodBicubic;
@@ -180,8 +186,6 @@ namespace Simd
Array32i _ix, _iy;
Array32f _ax, _ay, _bx[2];
- void EstimateIndexAlpha(size_t srcSize, size_t dstSize, size_t channels, int32_t * indices, float * alphas);
-
virtual void Run(const float * src, size_t srcStride, float * dst, size_t dstStride);
public:
@@ -192,6 +196,22 @@ namespace Simd
//-------------------------------------------------------------------------------------------------
+ class ResizerBf16Bilinear : public Resizer
+ {
+ protected:
+ Array32i _ix, _iy;
+ Array32f _ax, _ay, _bx[2];
+
+ virtual void Run(const uint16_t* src, size_t srcStride, uint16_t* dst, size_t dstStride);
+
+ public:
+ ResizerBf16Bilinear(const ResParam& param);
+
+ virtual void Run(const uint8_t* src, size_t srcStride, uint8_t* dst, size_t dstStride);
+ };
+
+ //-------------------------------------------------------------------------------------------------
+
const int32_t BICUBIC_RANGE = 1 << SIMD_RESIZER_BICUBIC_BITS;
const int32_t BICUBIC_SHIFT = SIMD_RESIZER_BICUBIC_BITS * 2;
const int32_t BICUBIC_ROUND = 1 << (BICUBIC_SHIFT - 1);
diff --git a/src/Test/TestResize.cpp b/src/Test/TestResize.cpp
index 23eb089031..7fbe4f0acc 100644
--- a/src/Test/TestResize.cpp
+++ b/src/Test/TestResize.cpp
@@ -144,7 +144,7 @@ namespace Test
assert(0);
View src(srcW, srcH, format, NULL, TEST_ALIGN(srcW));
- if (format == View::Float)
+ if (type == SimdResizeChannelFloat)
FillRandom32f(src);
else if (type == SimdResizeChannelShort)
FillRandom16u(src);
@@ -184,9 +184,9 @@ namespace Test
TEST_EXECUTE_AT_LEAST_MIN_TIME(f2.Call(src, dst2, channels, type, method));
- if (format == View::Float)
+ if (type == SimdResizeChannelFloat)
result = result && Compare(dst1, dst2, EPS, true, 64, DifferenceAbsolute);
- else if (format == View::Float)
+ else if (type == SimdResizeChannelBf16)
{
View dst32f1(dstW, dstH, View::Float), dst32f2(dstW, dstH, View::Float);
for (size_t row = 0; row < dstH; row++)
@@ -196,7 +196,7 @@ namespace Test
}
result = result && Compare(dst32f1, dst32f2, EPS, true, 64, DifferenceAbsolute);
}
- else if(format == View::Int16)
+ else if(type == SimdResizeChannelShort)
result = result && Compare(dst1, dst2, 1, true, 64);
else
result = result && Compare(dst1, dst2, 0, true, 64);
@@ -251,6 +251,8 @@ namespace Test
{
bool result = true;
+ result = result && ResizerAutoTest(SimdResizeMethodBilinear, SimdResizeChannelBf16, 16, f1, f2);
+
result = result && ResizerAutoTest(SimdResizeMethodNearest, SimdResizeChannelBf16, 1, f1, f2);
result = result && ResizerAutoTest(SimdResizeMethodNearest, SimdResizeChannelBf16, 3, f1, f2);
result = result && ResizerAutoTest(SimdResizeMethodNearest, SimdResizeChannelBf16, 8, f1, f2);