+add Base implementation of class ResizerBf16Bilinear.

ermig1979 · Dec 26, 2024 · 3bdb232 · 3bdb232
1 parent f2af786
commit 3bdb232
Show file tree

Hide file tree

Showing 5 changed files with 116 additions and 27 deletions.
diff --git a/docs/2025.html b/docs/2025.html
@@ -46,6 +46,7 @@ <h5>New features</h5>
  <li>AMX-BF16 kernel DepthwiseConvolution_k5p2d1s1w4 for class SynetMergedConvolution16b.</li>
  <li>AMX-BF16 kernel DepthwiseConvolution_k3p1d1s1w8 for class SynetMergedConvolution16b.</li>
  <li>AMX-BF16 kernel DepthwiseConvolution_k3p1d1s1w6 for class SynetMergedConvolution16b.</li>
+ <li>Base implementation of class ResizerBf16Bilinear.</li>
 </ul>
 <h5>Improving</h5>
 <ul>

diff --git a/src/Simd/SimdBaseResizer.cpp b/src/Simd/SimdBaseResizer.cpp
@@ -39,6 +39,8 @@ namespace Simd
                 return new ResizerShortBilinear(param);
             else if (param.IsFloatBilinear())
                 return new ResizerFloatBilinear(param);
+            else if (param.IsBf16Bilinear())
+                return new ResizerBf16Bilinear(param);
             else if (param.IsByteBicubic())
                 return new ResizerByteBicubic(param);
             else if (param.IsByteArea2x2())

diff --git a/src/Simd/SimdBaseResizerBilinear.cpp b/src/Simd/SimdBaseResizerBilinear.cpp
@@ -24,6 +24,7 @@
 #include "Simd/SimdMemory.h"
 #include "Simd/SimdResizer.h"
 #include "Simd/SimdCopy.h"
+#include "Simd/SimdBFloat16.h"
 
 namespace Simd
 {
@@ -128,7 +129,7 @@ namespace Simd
             }
         }
 
-        //---------------------------------------------------------------------
+        //-------------------------------------------------------------------------------------------------
 
         ResizerShortBilinear::ResizerShortBilinear(const ResParam& param)
             : Resizer(param)
@@ -247,25 +248,11 @@ namespace Simd
             }
         }
 
-        //---------------------------------------------------------------------
+        //-------------------------------------------------------------------------------------------------
 
-        ResizerFloatBilinear::ResizerFloatBilinear(const ResParam & param)
-            : Resizer(param)
+        static void EstimateIndexAlpha(const ResParam& param, size_t srcSize, size_t dstSize, size_t channels, int32_t* indices, float* alphas)
         {
-            _ay.Resize(_param.dstH, false, _param.align);
-            _iy.Resize(_param.dstH, false, _param.align);
-            EstimateIndexAlpha(_param.srcH, _param.dstH, 1, _iy.data, _ay.data);
-            size_t rs = _param.dstW * _param.channels;
-            _ax.Resize(rs, false, _param.align);
-            _ix.Resize(rs, false, _param.align);
-            EstimateIndexAlpha(_param.srcW, _param.dstW, _param.channels, _ix.data, _ax.data);
-            _bx[0].Resize(rs, false, _param.align);
-            _bx[1].Resize(rs, false, _param.align);
-        }
-
-        void ResizerFloatBilinear::EstimateIndexAlpha(size_t srcSize, size_t dstSize, size_t channels, int32_t * indices, float * alphas)
-        {
-            if (_param.method == SimdResizeMethodBilinear)
+            if (param.method == SimdResizeMethodBilinear)
             {
                 float scale = (float)srcSize / dstSize;
                 for (size_t i = 0; i < dstSize; ++i)
@@ -290,8 +277,8 @@ namespace Simd
                         alphas[offset] = alpha;
                     }
                 }
-            }            
-            else if (_param.method == SimdResizeMethodBilinearCaffe)
+            }
+            else if (param.method == SimdResizeMethodBilinearCaffe)
             {
                 float scale = dstSize > 1 ? float(srcSize - 1) / float(dstSize - 1) : 0.0f;
                 for (size_t i = 0; i < dstSize; ++i)
@@ -312,7 +299,7 @@ namespace Simd
                     }
                 }
             }
-            else if (_param.method == SimdResizeMethodBilinearPytorch)
+            else if (param.method == SimdResizeMethodBilinearPytorch)
             {
                 float scale = (float)srcSize / dstSize;
                 for (size_t i = 0; i < dstSize; ++i)
@@ -342,6 +329,22 @@ namespace Simd
                 assert(0);
         }
 
+        //-------------------------------------------------------------------------------------------------
+
+        ResizerFloatBilinear::ResizerFloatBilinear(const ResParam & param)
+            : Resizer(param)
+        {
+            _ay.Resize(_param.dstH, false, _param.align);
+            _iy.Resize(_param.dstH, false, _param.align);
+            EstimateIndexAlpha(_param, _param.srcH, _param.dstH, 1, _iy.data, _ay.data);
+            size_t rs = _param.dstW * _param.channels;
+            _ax.Resize(rs, false, _param.align);
+            _ix.Resize(rs, false, _param.align);
+            EstimateIndexAlpha(_param, _param.srcW, _param.dstW, _param.channels, _ix.data, _ax.data);
+            _bx[0].Resize(rs, false, _param.align);
+            _bx[1].Resize(rs, false, _param.align);
+        }
+
         void ResizerFloatBilinear::Run(const uint8_t * src, size_t srcStride, uint8_t * dst, size_t dstStride)
         {
             Run((const float*)src, srcStride / sizeof(float), (float*)dst, dstStride / sizeof(float));
@@ -386,6 +389,67 @@ namespace Simd
                     dst[dx] = pbx[0][dx]*fy0 + pbx[1][dx]*fy1;
             }
         }
+
+        //-------------------------------------------------------------------------------------------------
+
+        ResizerBf16Bilinear::ResizerBf16Bilinear(const ResParam& param)
+            : Resizer(param)
+        {
+            _ay.Resize(_param.dstH, false, _param.align);
+            _iy.Resize(_param.dstH, false, _param.align);
+            EstimateIndexAlpha(_param, _param.srcH, _param.dstH, 1, _iy.data, _ay.data);
+            size_t rs = _param.dstW * _param.channels;
+            _ax.Resize(rs, false, _param.align);
+            _ix.Resize(rs, false, _param.align);
+            EstimateIndexAlpha(_param, _param.srcW, _param.dstW, _param.channels, _ix.data, _ax.data);
+            _bx[0].Resize(rs, false, _param.align);
+            _bx[1].Resize(rs, false, _param.align);
+        }
+
+        void ResizerBf16Bilinear::Run(const uint8_t* src, size_t srcStride, uint8_t* dst, size_t dstStride)
+        {
+            Run((const uint16_t*)src, srcStride / sizeof(uint16_t), (uint16_t*)dst, dstStride / sizeof(uint16_t));
+        }
+
+        void ResizerBf16Bilinear::Run(const uint16_t* src, size_t srcStride, uint16_t* dst, size_t dstStride)
+        {
+            size_t cn = _param.channels;
+            size_t rs = _param.dstW * cn;
+            float* pbx[2] = { _bx[0].data, _bx[1].data };
+            int32_t prev = -2;
+            for (size_t dy = 0; dy < _param.dstH; dy++, dst += dstStride)
+            {
+                float fy1 = _ay[dy];
+                float fy0 = 1.0f - fy1;
+                int32_t sy = _iy[dy];
+                int32_t k = 0;
+
+                if (sy == prev)
+                    k = 2;
+                else if (sy == prev + 1)
+                {
+                    Swap(pbx[0], pbx[1]);
+                    k = 1;
+                }
+
+                prev = sy;
+
+                for (; k < 2; k++)
+                {
+                    float* pb = pbx[k];
+                    const uint16_t* ps = src + (sy + k) * srcStride;
+                    for (size_t dx = 0; dx < rs; dx++)
+                    {
+                        int32_t sx = _ix[dx];
+                        float fx = _ax[dx];
+                        pb[dx] = BFloat16ToFloat32(ps[sx]) * (1.0f - fx) + BFloat16ToFloat32(ps[sx + cn]) * fx;
+                    }
+                }
+
+                for (size_t dx = 0; dx < rs; dx++)
+                    dst[dx] = Float32ToBFloat16(pbx[0][dx] * fy0 + pbx[1][dx] * fy1);
+            }
+        }
     }
 }
 
diff --git a/src/Simd/SimdResizer.h b/src/Simd/SimdResizer.h
@@ -71,6 +71,12 @@ namespace Simd
                 (method == SimdResizeMethodBilinear || method == SimdResizeMethodBilinearCaffe || method == SimdResizeMethodBilinearPytorch);
         }
 
+        bool IsBf16Bilinear() const
+        {
+            return type == SimdResizeChannelBf16 &&
+                (method == SimdResizeMethodBilinear || method == SimdResizeMethodBilinearCaffe || method == SimdResizeMethodBilinearPytorch);
+        }
+
         bool IsByteBicubic() const
         {
             return type == SimdResizeChannelByte && method == SimdResizeMethodBicubic;
@@ -180,8 +186,6 @@ namespace Simd
             Array32i _ix, _iy;
             Array32f _ax, _ay, _bx[2];
 
-            void EstimateIndexAlpha(size_t srcSize, size_t dstSize, size_t channels, int32_t * indices, float * alphas);
-
             virtual void Run(const float * src, size_t srcStride, float * dst, size_t dstStride);
 
         public:
@@ -192,6 +196,22 @@ namespace Simd
 
         //-------------------------------------------------------------------------------------------------
 
+        class ResizerBf16Bilinear : public Resizer
+        {
+        protected:
+            Array32i _ix, _iy;
+            Array32f _ax, _ay, _bx[2];
+
+            virtual void Run(const uint16_t* src, size_t srcStride, uint16_t* dst, size_t dstStride);
+
+        public:
+            ResizerBf16Bilinear(const ResParam& param);
+
+            virtual void Run(const uint8_t* src, size_t srcStride, uint8_t* dst, size_t dstStride);
+        };
+
+        //-------------------------------------------------------------------------------------------------
+
         const int32_t BICUBIC_RANGE = 1 << SIMD_RESIZER_BICUBIC_BITS;
         const int32_t BICUBIC_SHIFT = SIMD_RESIZER_BICUBIC_BITS * 2;
         const int32_t BICUBIC_ROUND = 1 << (BICUBIC_SHIFT - 1);

diff --git a/src/Test/TestResize.cpp b/src/Test/TestResize.cpp
@@ -144,7 +144,7 @@ namespace Test
             assert(0);
 
         View src(srcW, srcH, format, NULL, TEST_ALIGN(srcW));
-        if (format == View::Float)
+        if (type == SimdResizeChannelFloat)
             FillRandom32f(src);
         else if (type == SimdResizeChannelShort)
             FillRandom16u(src);
@@ -184,9 +184,9 @@ namespace Test
 
         TEST_EXECUTE_AT_LEAST_MIN_TIME(f2.Call(src, dst2, channels, type, method));
 
-        if (format == View::Float)
+        if (type == SimdResizeChannelFloat)
             result = result && Compare(dst1, dst2, EPS, true, 64, DifferenceAbsolute);
-        else if (format == View::Float)
+        else if (type == SimdResizeChannelBf16)
         {
             View dst32f1(dstW, dstH, View::Float), dst32f2(dstW, dstH, View::Float);
             for (size_t row = 0; row < dstH; row++)
@@ -196,7 +196,7 @@ namespace Test
             }
             result = result && Compare(dst32f1, dst32f2, EPS, true, 64, DifferenceAbsolute);
         }
-        else if(format == View::Int16)
+        else if(type == SimdResizeChannelShort)
             result = result && Compare(dst1, dst2, 1, true, 64);
         else
             result = result && Compare(dst1, dst2, 0, true, 64);
@@ -251,6 +251,8 @@ namespace Test
     {
         bool result = true;
 
+        result = result && ResizerAutoTest(SimdResizeMethodBilinear, SimdResizeChannelBf16, 16, f1, f2);
+
         result = result && ResizerAutoTest(SimdResizeMethodNearest, SimdResizeChannelBf16, 1, f1, f2);
         result = result && ResizerAutoTest(SimdResizeMethodNearest, SimdResizeChannelBf16, 3, f1, f2);
         result = result && ResizerAutoTest(SimdResizeMethodNearest, SimdResizeChannelBf16, 8, f1, f2);