diff --git a/src/Simd/SimdBaseResizerBilinear.cpp b/src/Simd/SimdBaseResizerBilinear.cpp index fb6b74a0ed..c9e80675bb 100644 --- a/src/Simd/SimdBaseResizerBilinear.cpp +++ b/src/Simd/SimdBaseResizerBilinear.cpp @@ -334,11 +334,10 @@ namespace Simd ResizerFloatBilinear::ResizerFloatBilinear(const ResParam & param) : Resizer(param) { - _rowBuf = _param.align < 16 || ((_param.channels < 4 && _param.align > 16) || _param.channels == 3) || _param.dstH >= _param.srcH; + _rowBuf = _param.align < 16 || (_param.channels < 4 && _param.align > 16) || _param.dstH >= _param.srcH; #if defined(SIMD_ARM_ENABLE) || defined(SIMD_ARM64_ENABLE) _rowBuf = true; #endif - _ay.Resize(_param.dstH, false, _param.align); _iy.Resize(_param.dstH, false, _param.align); EstimateIndexAlpha(_param, _param.srcH, _param.dstH, 1, 1, _iy.data, _ay.data); diff --git a/src/Simd/SimdSse41ResizerBilinear.cpp b/src/Simd/SimdSse41ResizerBilinear.cpp index 4a9aaeeade..7fc98884d8 100644 --- a/src/Simd/SimdSse41ResizerBilinear.cpp +++ b/src/Simd/SimdSse41ResizerBilinear.cpp @@ -598,7 +598,7 @@ namespace Simd void ResizerFloatBilinear::Run(const float* src, size_t srcStride, float* dst, size_t dstStride) { size_t cn = _param.channels, cnF = AlignLo(cn, F), cnT = cn - cnF, cnL = cnT - F; - size_t dw = _param.dstW, dw2= AlignLo(dw, 2), dw4 = AlignLo(dw, 4); + size_t dw = _param.dstW, dw2 = AlignLo(dw, 2), dw4 = AlignLo(dw, 4), dw1 = dw - 1; __m128 _1 = _mm_set1_ps(1.0f); if (_rowBuf) { @@ -770,6 +770,31 @@ namespace Simd StoreHalf<0>(dst + od, _mm_add_ps(_mm_mul_ps(r0, fy0), _mm_mul_ps(r1, fy1))); } } + else if (cn == 3) + { + size_t dx = 0, od = 0; + for (; dx < dw1; dx += 1, od += 3) + { + size_t os = _ix[dx]; + __m128 fx1 = _mm_set1_ps(_ax[dx]); + __m128 fx0 = _mm_sub_ps(_1, fx1); + __m128 r0 = _mm_add_ps(_mm_mul_ps(_mm_loadu_ps(src0 + os), fx0), _mm_mul_ps(_mm_loadu_ps(src0 + os + 3), fx1)); + __m128 r1 = _mm_add_ps(_mm_mul_ps(_mm_loadu_ps(src1 + os), fx0), _mm_mul_ps(_mm_loadu_ps(src1 + os + 3), fx1)); + _mm_storeu_ps(dst + od, _mm_add_ps(_mm_mul_ps(r0, fy0), _mm_mul_ps(r1, fy1))); + } + if (dx < dw) + { + size_t os = _ix[dx]; + __m128 fx1 = _mm_set1_ps(_ax[dx]); + __m128 fx0 = _mm_sub_ps(_1, fx1); + for (size_t ed = od + 3; od < ed; od++, os++) + { + __m128 r0 = _mm_add_ps(_mm_mul_ps(_mm_load_ss(src0 + os), fx0), _mm_mul_ps(_mm_load_ss(src0 + os + 3), fx1)); + __m128 r1 = _mm_add_ps(_mm_mul_ps(_mm_load_ss(src1 + os), fx0), _mm_mul_ps(_mm_load_ss(src1 + os + 3), fx1)); + _mm_store_ss(dst + od, _mm_add_ps(_mm_mul_ps(r0, fy0), _mm_mul_ps(r1, fy1))); + } + } + } else { for (size_t dx = 0; dx < dw; dx++) diff --git a/src/Test/TestResize.cpp b/src/Test/TestResize.cpp index 14b244514c..067656b07f 100644 --- a/src/Test/TestResize.cpp +++ b/src/Test/TestResize.cpp @@ -245,7 +245,7 @@ namespace Test bool ResizerAutoTest(const FuncRS & f1, const FuncRS & f2) { bool result = true; - result = result && ResizerAutoTest(SimdResizeMethodBilinear, SimdResizeChannelFloat, 2, f1, f2); + #if 1 #if defined(SIMD_X64_ENABLE) result = result && ResizerAutoTest(SimdResizeMethodBilinear, SimdResizeChannelFloat, 64, f1, f2);