ermig1979 · Centimo · Feb 3, 2025 · Feb 3, 2025 · Feb 3, 2025 · Feb 3, 2025
diff --git a/py/SimdPy/Simd.py b/py/SimdPy/Simd.py
@@ -570,7 +570,7 @@ def Init(dir = ""):
 		Lib.__lib.SimdResizerRun.restype = None
 
 
-		Lib.__lib.SimdSynetSetInput.argtypes = [ ctypes.c_void_p, ctypes.c_size_t, ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int32, ctypes.POINTER(ctypes.c_float), ctypes.POINTER(ctypes.c_float), ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int32 ]
+		Lib.__lib.SimdSynetSetInput.argtypes = [ ctypes.c_void_p, ctypes.c_size_t, ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int32, ctypes.POINTER(ctypes.c_float), ctypes.POINTER(ctypes.c_float), ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int32, ctypes.c_bool ]
 		Lib.__lib.SimdSynetSetInput.restype = None
 
 
@@ -1027,24 +1027,15 @@ def ResizerRun(resizer : ctypes.c_void_p, src : ctypes.c_void_p, srcStride : int
 	# @param dst - a pointer to the output 32-bit float image tensor.
 	# @param channels - a number of channels in the output image tensor. It can be 1 or 3.
 	# @param dstFormat - a format of output image tensor. There are supported following tensor formats: Simd.TensorFormat.Nchw, Simd.TensorFormat.Nhwc.
-	# @param isRgb - is channel order of output tensor is RGB or BGR. Its default value is false.
-	def SynetSetInput(src : ctypes.c_void_p, width: int, height: int, stride: int, srcFormat : Simd.PixelFormat, lower : array.array('f'), upper : array.array('f'), dst : ctypes.c_void_p, channels : int, dstFormat : Simd.TensorFormat, isRgb = False) :
+	# @param swapChannels - a flag indicating whether to swap channels in the output tensor. Its default value is false.
+	def SynetSetInput(src : ctypes.c_void_p, width: int, height: int, stride: int, srcFormat : Simd.PixelFormat, lower : array.array('f'), upper : array.array('f'), dst : ctypes.c_void_p, channels : int, dstFormat : Simd.TensorFormat, swapChannels = False) :
 		if srcFormat != PixelFormat.Gray8 and srcFormat != PixelFormat.Bgr24 and srcFormat != PixelFormat.Bgra32 and srcFormat != PixelFormat.Rgb24 and srcFormat != PixelFormat.Rgba32 :
 			raise Exception("Incompatible image pixel format: {0}!".format(srcFormat))
 		if channels != 1 and channels != 3 :
 			raise Exception("Incompatible channel value: {0} !".format(channels))
 		lo = (ctypes.c_float * len(lower))(*lower)
 		up = (ctypes.c_float * len(upper))(*upper)
-		sF = srcFormat;
-		if srcFormat == PixelFormat.Bgr24 and isRgb : 
-			sF = PixelFormat.Rgb24
-		elif srcFormat == PixelFormat.Rgb24 and isRgb : 
-			sF = PixelFormat.Bgr24
-		elif srcFormat == PixelFormat.Bgra32 and isRgb : 
-			sF = PixelFormat.Rgba32
-		elif srcFormat == PixelFormat.Rgba32 and isRgb : 
-			sF = PixelFormat.Bgra32
-		Lib.__lib.SimdSynetSetInput(src, width, height, stride, sF.value, lo, up, dst, channels, dstFormat.value)
+		Lib.__lib.SimdSynetSetInput(src, width, height, stride, srcFormat.value, lo, up, dst, channels, dstFormat.value, swapChannels)
 
     ## Creates wrap affine context.
     # @param srcW - a width of input image.
@@ -1779,9 +1770,9 @@ def Resized(src : Image, width :int, height: int, method = Simd.ResizeMethod.Bil
 # @param dst - a pointer to the output 32-bit float image tensor.
 # @param channels - a number of channels in the output image tensor. It can be 1 or 3.
 # @param format - a format of output image tensor. There are supported following tensor formats: Simd.TensorFormat.Nchw, Simd.TensorFormat.Nhwc.
-# @param isRgb - is channel order of output tensor is RGB or BGR. Its default value is false.
-def SynetSetInput(src : Image, lower : array.array('f'), upper : array.array('f'), dst : ctypes.c_void_p, channels : int, format : Simd.TensorFormat, isRgb = False) :
-	Lib.SynetSetInput(src.Data(), src.Width(), src.Height(), src.Stride(), src.Format(), lower, upper, dst, channels, format, isRgb)
+# @param swapChannels - a flag indicating whether to swap channels in the output tensor. Its default value is false.
+def SynetSetInput(src : Image, lower : array.array('f'), upper : array.array('f'), dst : ctypes.c_void_p, channels : int, format : Simd.TensorFormat, swapChannels = False) :
+	Lib.SynetSetInput(src.Data(), src.Width(), src.Height(), src.Stride(), src.Format(), lower, upper, dst, channels, format, swapChannels)
 
 ##  @ingroup python
 # Performs warp affine for current image.

diff --git a/py/SimdPy/Test.py b/py/SimdPy/Test.py
@@ -139,7 +139,7 @@ def SynetSetInputTest(args) :
 	lower = [0.0, 0.0, 0.0]
 	upper = [1.0, 1.0, 1.0]
 	input = Simd.Lib.Allocate(channels * height * width * 4, Simd.Lib.Alignment())
-	Simd.SynetSetInput(resized, lower, upper, input, channels, Simd.TensorFormat.Nhwc, True)
+	Simd.SynetSetInput(resized, lower, upper, input, channels, Simd.TensorFormat.Nhwc)
 	Simd.Lib.Free(input)
 
 ###################################################################################################

diff --git a/src/Simd/SimdLib.cpp b/src/Simd/SimdLib.cpp
@@ -5940,32 +5940,59 @@ SIMD_API void SimdSynetScale8iForward(void* context, const uint8_t* src, uint8_t
 #endif
 }
 
-SIMD_API void SimdSynetSetInput(const uint8_t * src, size_t width, size_t height, size_t stride, SimdPixelFormatType srcFormat,
-    const float * lower, const float * upper, float * dst, size_t channels, SimdTensorFormatType dstFormat)
-{
-    SIMD_EMPTY();
+SIMD_API void SimdSynetSetInput(
+    const uint8_t* src,
+    size_t width,
+    size_t height,
+    size_t stride,
+    SimdPixelFormatType srcPixelFormat,
+    const float* lower,
+    const float* upper,
+    float* dst,
+    size_t channels,
+    SimdTensorFormatType dstTensorFormat,
+    bool swapChannels
+) {
+    SIMD_EMPTY();
+
+    switch (srcPixelFormat)
+    {
+        case SimdPixelFormatGray8:
+            break;
+        case SimdPixelFormatBgr24:
+        case SimdPixelFormatRgb24:
+            srcPixelFormat = !swapChannels ? SimdPixelFormatBgr24 : SimdPixelFormatRgb24;
+            break;
+        case SimdPixelFormatBgra32:
+        case SimdPixelFormatRgba32:
+            srcPixelFormat = !swapChannels ? SimdPixelFormatBgra32 : SimdPixelFormatRgba32;
+            break;
+        default:
+            assert(0);
+    }
+
 #if defined(SIMD_SYNET_ENABLE)
 #ifdef SIMD_AVX512BW_ENABLE
     if (Avx512bw::Enable && width >= Avx512bw::A)
-        Avx512bw::SynetSetInput(src, width, height, stride, srcFormat, lower, upper, dst, channels, dstFormat);
+        Avx512bw::SynetSetInput(src, width, height, stride, srcPixelFormat, lower, upper, dst, channels, dstTensorFormat);
     else
 #endif
 #ifdef SIMD_AVX2_ENABLE
     if (Avx2::Enable && width >= Avx2::A)
-        Avx2::SynetSetInput(src, width, height, stride, srcFormat, lower, upper, dst, channels, dstFormat);
+        Avx2::SynetSetInput(src, width, height, stride, srcPixelFormat, lower, upper, dst, channels, dstTensorFormat);
     else
 #endif
 #ifdef SIMD_SSE41_ENABLE
     if (Sse41::Enable && width >= Sse41::A)
-        Sse41::SynetSetInput(src, width, height, stride, srcFormat, lower, upper, dst, channels, dstFormat);
+        Sse41::SynetSetInput(src, width, height, stride, srcPixelFormat, lower, upper, dst, channels, dstTensorFormat);
     else
 #endif
 #ifdef SIMD_NEON_ENABLE
     if (Neon::Enable && width >= Neon::A)
-        Neon::SynetSetInput(src, width, height, stride, srcFormat, lower, upper, dst, channels, dstFormat);
+        Neon::SynetSetInput(src, width, height, stride, srcPixelFormat, lower, upper, dst, channels, dstTensorFormat);
     else
 #endif
-        Base::SynetSetInput(src, width, height, stride, srcFormat, lower, upper, dst, channels, dstFormat);
+        Base::SynetSetInput(src, width, height, stride, srcPixelFormat, lower, upper, dst, channels, dstTensorFormat);
 #else
     assert(0);
 #endif

diff --git a/src/Simd/SimdLib.h b/src/Simd/SimdLib.h
@@ -29,6 +29,7 @@
 #define __SimdLib_h__
 
 #include <stddef.h>
+#include <stdbool.h>
 
 #if defined(_MSC_VER) || defined(__CODEGEARC__)
 
@@ -7607,13 +7608,13 @@ extern "C"
                     dst[(c*height + y)*width + x] = src[stride*y + width*4 + c]*(upper[c] - lower[c])/255 + lower[c];
         \endverbatim
 
-        Note that there are following relationships: 
+        Note that there are following relationships:
         \verbatim
         upper[c] = (1 - mean[c]) / std[c];
         lower[c] = - mean[c] / std[c];
         \endverbatim
-        Also this algorithm assumes that channel order of output tensor is BGR. 
-        In case of RGB channel order you need to change parameter srcFormat: ::SimdPixelFormatBgr24 <-> ::SimdPixelFormatRgb24, ::SimdPixelFormatBgra32 <-> ::SimdPixelFormatRgba32. 
+        By default, this algorithm assumes that the channel order (RGB or BGR) should be preserved.
+        If you want to swap between Red and Blue (in the output tensor), use the 'swapChannels' parameter
         Note that real format of pixel data of input image is not need to change.
 
         \note This function has a C++ wrappers: Simd::SynetSetInput(const View<A> & src, const float * lower, const float * upper, float * dst, size_t channels, SimdTensorFormatType format, bool isRgb = false).
@@ -7622,17 +7623,29 @@ extern "C"
         \param [in] width - a width of input image and output image tensor.
         \param [in] height - a height of input image and output image tensor.
         \param [in] stride - a row size of input image.
-        \param [in] srcFormat - a pixel format of input image. There are supported following pixel formats: ::SimdPixelFormatGray8, ::SimdPixelFormatBgr24, ::SimdPixelFormatBgra32, ::SimdPixelFormatRgb24, ::SimdPixelFormatRgba32.
+        \param [in] srcPixelFormat - a pixel format of input image. There are supported following pixel formats: ::SimdPixelFormatGray8, ::SimdPixelFormatBgr24, ::SimdPixelFormatBgra32, ::SimdPixelFormatRgb24, ::SimdPixelFormatRgba32.
         \param [in] lower - a pointer to the array with lower bound of values of the output tensor. The size of the array have to correspond number of channels in the output image tensor.
         \param [in] upper - a pointer to the array with upper bound of values of the output tensor. The size of the array have to correspond number of channels in the output image tensor.
         \param [out] dst - a pointer to the output 32-bit float image tensor.
         \param [in] channels - a number of channels in the output image tensor. It can be 1 or 3.
-        \param [in] dstFormat - a format of output image tensor. There are supported following tensor formats: ::SimdTensorFormatNchw, ::SimdTensorFormatNhwc.
-    */
-    SIMD_API void SimdSynetSetInput(const uint8_t * src, size_t width, size_t height, size_t stride, SimdPixelFormatType srcFormat, 
-        const float * lower, const float * upper, float * dst, size_t channels, SimdTensorFormatType dstFormat);
-
-    /*! @ingroup synet_other
+        \param [in] dstTensorFormat - a format of output image tensor. There are supported following tensor formats: ::SimdTensorFormatNchw, ::SimdTensorFormatNhwc.
+        \param [in] swapChannels - a flag indicating whether to swap channels in the output tensor.
+      */
+      SIMD_API void SimdSynetSetInput(
+          const uint8_t* src,
+          size_t width,
+          size_t height,
+          size_t stride,
+          SimdPixelFormatType srcPixelFormat,
+          const float* lower,
+          const float* upper,
+          float* dst,
+          size_t channels,
+          SimdTensorFormatType dstTensorFormat,
+          bool swapChannels
+      );
+
+      /*! @ingroup synet_other
 
         \fn void SimdSynetShuffleLayerForward(const float * src0, const float * src1, size_t channels0, size_t channels1, size_t spatial, float * dst0, float * dst1, SimdTensorFormatType format, int type);
 

diff --git a/src/Simd/SimdLib.hpp b/src/Simd/SimdLib.hpp
@@ -2732,7 +2732,7 @@ namespace Simd
 
         All images must have the same width, height and pixel format.
 
-        \note This function is a C++ wrapper for function� ::SimdRecursiveBilateralFilterInit and ::SimdRecursiveBilateralFilterRun.
+        \note This function is a C++ wrapper for function� ::SimdRecursiveBilateralFilterInit and ::SimdRecursiveBilateralFilterRun.
 
         \param [in] src - an original input image.
         \param [out] dst - a filtered output image.
@@ -3927,24 +3927,33 @@ namespace Simd
         \param [in] upper - a pointer to the array with upper bound of values of the output tensor. The size of the array have to correspond number of channels in the output image tensor.
         \param [out] dst - a pointer to the output 32-bit float image tensor.
         \param [in] channels - a number of channels in the output image tensor. It can be 1 or 3.
-        \param [in] format - a format of output image tensor. There are supported following tensor formats: ::SimdTensorFormatNchw, ::SimdTensorFormatNhwc.
-        \param [in] isRgb - is channel order of output tensor is RGB or BGR. Its default value is false.
-    */
-    template<template<class> class A> SIMD_INLINE void SynetSetInput(const View<A> & src, const float * lower, const float * upper, float * dst, size_t channels, SimdTensorFormatType format, bool isRgb = false)
-    {
-        assert(format == SimdTensorFormatNchw || format == SimdTensorFormatNhwc);
-        SimdPixelFormatType srcFormat;
-        switch (src.format)
-        {
-        case View<A>::Gray8: srcFormat = SimdPixelFormatGray8; break;
-        case View<A>::Bgr24: srcFormat = isRgb ? SimdPixelFormatRgb24 : SimdPixelFormatBgr24; break;
-        case View<A>::Bgra32: srcFormat = isRgb ? SimdPixelFormatRgba32 : SimdPixelFormatBgra32; break;
-        case View<A>::Rgb24: srcFormat = isRgb ? SimdPixelFormatBgr24 : SimdPixelFormatRgb24; break;
-        case View<A>::Rgba32: srcFormat = isRgb ? SimdPixelFormatBgra32 : SimdPixelFormatRgba32; break;
-        deafult :
-            assert(0);
-        }
-        SimdSynetSetInput(src.data, src.width, src.height, src.stride, srcFormat, lower, upper, dst, channels, format);
+        \param [in] tensorFormat - a format of output image tensor. There are supported following tensor formats: ::SimdTensorFormatNchw, ::SimdTensorFormatNhwc.
+        \param [in] swapChannels - when 'true' swaps channels (red and blue in the output tensor).
+          If false (default), no swapping takes place.
+    */
+    template<template<class> class A> SIMD_INLINE void SynetSetInput(
+        const View<A>& src,
+        const float* lower,
+        const float* upper,
+        float* dst,
+        size_t channels,
+        SimdTensorFormatType tensorFormat,
+        bool swapChannels = false
+    ) {
+        assert(tensorFormat == SimdTensorFormatNchw || tensorFormat == SimdTensorFormatNhwc);
+        SimdSynetSetInput(
+            src.data,
+            src.width,
+            src.height,
+            src.stride,
+            static_cast< SimdPixelFormatType>(src.format),
+            lower,
+            upper,
+            dst,
+            channels,
+            tensorFormat,
+            swapChannels
+        );
     }
 
     /*! @ingroup texture_estimation

diff --git a/src/Simd/SimdView.hpp b/src/Simd/SimdView.hpp
@@ -1228,7 +1228,9 @@ namespace Simd
         case Gray8:     return CV_8UC1;
         case Uv16:      return CV_8UC2;
         case Bgr24:     return CV_8UC3;
+        case Rgb24:     return CV_8UC3;
         case Bgra32:    return CV_8UC4;
+        case Rgba32:    return CV_8UC4;
         case Int16:     return CV_16SC1;
         case Int32:     return CV_32SC1;
         case Float:     return CV_32FC1;

diff --git a/src/Test/TestSynetConversion.cpp b/src/Test/TestSynetConversion.cpp
@@ -226,15 +226,52 @@ namespace Test
 
     namespace
     {
+        template<class>
+        constexpr bool ALWAYS_FALSE = false;
+
+        using BaseFunc = decltype(Simd::Base::SynetSetInput);
+        using APIFunc = decltype(SimdSynetSetInput);
+        using VoidFuncPtr = void (*)(void);
+
+        struct FuncWrapper {
+            enum class Type {
+                BASE,
+                API
+            };
+
+            template< class FuncType>
+            FuncWrapper(FuncType*) { static_assert(ALWAYS_FALSE<FuncType>, "There is no such specialization"); }
+
+            template<class FuncType>
+            FuncType* Get() const { static_assert(ALWAYS_FALSE<FuncType>, "There is no such specialization"); }
+
+        private:
+            VoidFuncPtr funcPtr;
+            Type funcType;
+        };
+
+        template<> FuncWrapper::FuncWrapper< BaseFunc>(BaseFunc* f) : funcPtr((VoidFuncPtr)f), funcType(Type::BASE) {}
+        template<> FuncWrapper::FuncWrapper< APIFunc>(APIFunc* f) : funcPtr((VoidFuncPtr)f), funcType(Type::API) {}
+
+        template<>
+        BaseFunc* FuncWrapper::Get< BaseFunc>() const {
+            assert(funcType == Type::BASE);
+            return (BaseFunc*)funcPtr;
+        }
+
+        template<>
+        APIFunc* FuncWrapper::Get< APIFunc>() const {
+            assert(funcType == Type::API);
+            return (APIFunc*)funcPtr;
+        }
+
         struct FuncSI
         {
-            typedef void(*FuncPtr)(const uint8_t* src, size_t width, size_t height, size_t stride, SimdPixelFormatType pixelFormat,
-                const float* lower, const float* upper, float* dst, size_t channels, SimdTensorFormatType dstFormat);
-
-            FuncPtr func;
+            FuncWrapper funcPtr;
             String desc;
 
-            FuncSI(const FuncPtr& f, const String& d) : func(f), desc(d) {}
+            template< class FuncType>
+            FuncSI(FuncType* f, const String& d) : funcPtr(f), desc(d) {}
 
             void Update(size_t c, size_t h, size_t w, View::Format src, SimdTensorFormatType dst)
             {
@@ -244,7 +281,42 @@ namespace Test
             void Call(const View& src, const float* lower, const float* upper, size_t channels, Tensor32f& dst) const
             {
                 TEST_PERFORMANCE_TEST(desc);
-                func(src.data, src.width, src.height, src.stride, (SimdPixelFormatType)src.format, lower, upper, dst.Data(), channels, dst.Format());
+                funcPtr.Get< BaseFunc >()(
+                    src.data,
+                    src.width,
+                    src.height,
+                    src.stride,
+                    (SimdPixelFormatType) src.format,
+                    lower,
+                    upper,
+                    dst.Data(),
+                    channels,
+                    dst.Format()
+                );
+            }
+
+            void Call(
+                const View& src,
+                const float* lower,
+                const float* upper,
+                size_t channels,
+                Tensor32f& dst,
+                bool swapChannels
+            ) const {
+                TEST_PERFORMANCE_TEST(desc);
+                funcPtr.Get< APIFunc >()(
+                    src.data,
+                    src.width,
+                    src.height,
+                    src.stride,
+                    (SimdPixelFormatType) src.format,
+                    lower,
+                    upper,
+                    dst.Data(),
+                    channels,
+                    dst.Format(),
+                    swapChannels
+                );
             }
         };
     }
@@ -274,7 +346,9 @@ namespace Test
 
         TEST_EXECUTE_AT_LEAST_MIN_TIME(f1.Call(src, lower, upper, c, dst1));
 
-        TEST_EXECUTE_AT_LEAST_MIN_TIME(f2.Call(src, lower, upper, c, dst2));
+        TEST_EXECUTE_AT_LEAST_MIN_TIME(
+            f2.Call(src, lower, upper, c, dst2,srcFormat == View::Format::Rgb24 || srcFormat == View::Format::Rgba32)
+        );
 
         result = result && Compare(dst1, dst2, EPS*EPS, true, 64, DifferenceBoth);