Skip to content

Commit

Permalink
*improve Performance of SynetConvolution32f (NHWC, srcC=1, dstС=1).
Browse files Browse the repository at this point in the history
  • Loading branch information
ermig1979 committed Dec 26, 2024
1 parent 97d2ed3 commit 6f7b1e4
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 2 deletions.
1 change: 1 addition & 0 deletions docs/2025.html
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ <h5>Improving</h5>
<li>Extend using of AMX-BF16 optimization of function DepthwiseConvolution_k7p3d1s1w8.</li>
<li>Extend using of AVX-512BW optimization of function Convolution32fNhwcDepthwise_k7p3d1s1w4.</li>
<li>Extend using of AMX-BF16 optimization of function DepthwiseConvolution_k5p2d1s1w8.</li>
<li>Performance of SynetConvolution32f (NHWC, srcC=1, dstС=1).</li>
</ul>
<h5>Bug fixing</h5>
<ul>
Expand Down
2 changes: 2 additions & 0 deletions src/Simd/SimdAvx512bwSynetConvolution32fDirectNchw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,8 @@ namespace Simd
return false;
if (!(p.IsStride(1) || p.IsStride(2) || p.IsStride(3)))
return false;
if (p.srcC == 1 && p.dstC == 1 && p.IsStride(1) && p.IsDilation(1))
return true;
double k = double(p.srcC) / p.group * p.strideX * p.strideX * p.strideY / p.kernelX / p.kernelY;
return k < 2.0 && ((p.IsStride(1) && p.IsKernel(1)) || p.IsKernel(2) || p.IsKernel(3)
#if SIMD_ZMM_COUNT == 32 || 1
Expand Down
2 changes: 2 additions & 0 deletions src/Simd/SimdSse41SynetConvolution32fDirectNchw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,8 @@ namespace Simd
if (!(p.IsStride(1) || p.IsStride(2) || p.IsStride(3)))
return false;
double k = double(p.srcC) / p.group * p.strideX * p.strideX * p.strideY / p.kernelX / p.kernelY;
if (p.srcC == 1 && p.dstC == 1 && p.IsStride(1) && p.IsDilation(1))
return true;
return k < 2.0 && ((p.IsStride(1) && p.IsKernel(1)) || p.IsKernel(2) || p.IsKernel(3)) && p.trans == 0;
}

Expand Down
7 changes: 5 additions & 2 deletions src/Test/TestSynetConvolution32f.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ namespace Test
result = result && SynetConvolution32fForwardAutoTest(eps, Param(12, 2000, 5, 5, 255, _1, _1, _1, _0, _0, 1, a, t), f1, f2);
result = result && SynetConvolution32fForwardAutoTest(eps, Param(24, 2048, 6, 6, 255, _3, _1, _1, _0, _0, 1, a, t), f1, f2);
#endif
#if 1
#if 0
result = result && SynetConvolution32fForwardAutoTest(eps, Param(1, 816, 14, 14, 816, _5, _1, _1, _2, _2, 816, aRe, t), f1, f2);
result = result && SynetConvolution32fForwardAutoTest(eps, Param(1, 304, 16, 16, 304, _3, _1, _1, _1, _1, 304, aRe, t), f1, f2);
result = result && SynetConvolution32fForwardAutoTest(eps, Param(1, 304, 16, 16, 304, _7, _1, _1, _3, _3, 304, aPr, t), f1, f2);
Expand All @@ -253,6 +253,9 @@ namespace Test
result = result && SynetConvolution32fForwardAutoTest(eps, Param(1, 152, 32, 32, 152, _7, _1, _1, _3, _3, 152, aRe, t), f1, f2);
result = result && SynetConvolution32fForwardAutoTest(eps, Param(1, 76, 64, 64, 76, _7, _1, _1, _3, _3, 76, aRe, t), f1, f2);
#endif
#if 1
result = result && SynetConvolution32fForwardAutoTest(eps, Param(1, 1, 80, 240, 1, _3, _1, _1, _1, _1, 1, aRe, t), f1, f2);
#endif
#else
result = result && SynetConvolution32fForwardAutoTest(eps, Param(1, 256, 44, 44, 256, _1, _1, _1, _0, _0, 1, a, t), f1, f2);
#endif
Expand All @@ -270,7 +273,7 @@ namespace Test
const SimdBool tF = SimdFalse, tT = SimdTrue;

#ifdef NDEBUG
//result = result && SynetConvolution32fForwardAutoTest(eps, SimdConvolutionActivationGelu, tF, f1, f2);
result = result && SynetConvolution32fForwardAutoTest(eps, SimdConvolutionActivationGelu, tF, f1, f2);
result = result && SynetConvolution32fForwardAutoTest(eps, SimdConvolutionActivationRelu, tT, f1, f2);
#else
//result = result && SynetConvolution32fForwardAutoTest(eps, SimdConvolutionActivationGelu, tF, f1, f2);
Expand Down

0 comments on commit 6f7b1e4

Please sign in to comment.