Adds a new version of boxfilter_2d that can output into a vvec of a d…

…ifferent element time than the input.
ABRG-Models · Nov 23, 2023 · a16d3f7 · a16d3f7
1 parent 7c2d0f2
commit a16d3f7
Show file tree

Hide file tree

Showing 3 changed files with 144 additions and 0 deletions.
diff --git a/morph/MathAlgo.h b/morph/MathAlgo.h
@@ -525,6 +525,81 @@ namespace morph {
             }
         }
 
+        // Apply a 2d, horizontally wrapping box filter. Test to see if boxside is odd and disallow
+        // even (not tested). Assume the data in the vvec relates to a rectangle of width w.
+        // Specify both input and output type
+        template<typename T, typename T_o = T, int boxside, int w, bool onlysum = false>
+        static void boxfilter_2d (const morph::vvec<T>& data, morph::vvec<T_o>& result)
+        {
+            // static_assert (boxside * boxside * input_max <= std::numeric_limits<T_o>::max(), "boxside*boxside*Tmax should fit in T_o");
+
+            if constexpr (boxside%2 == 0) {
+                throw std::runtime_error ("boxfilter_2d was not designed for even box filter squares (set boxside template param. to an odd value)");
+            }
+            if (result.size() != data.size()) {
+                throw std::runtime_error ("The input data vector is not the same size as the result vector.");
+            }
+
+            // Divide by boxarea without accounting for edges (wrapping will sort horz edges)
+            static constexpr T_o oneover_boxa = T_o{1} / (static_cast<T_o>(boxside)*static_cast<T_o>(boxside));
+            static constexpr int halfbox = boxside / 2;
+            static constexpr int halfbox_p1 = halfbox + 1;
+
+            morph::vec<T_o, w> colsum;
+            colsum.zero();
+            T_o rowsum = T{0};
+
+            int h = data.size() / w;
+
+            // process data row by row
+            for (int y = -halfbox; y < h; ++y) {
+
+                // 1. Accumulate column sums; pull out last row.
+                if (y+halfbox < h) {
+                    if (y >= halfbox_p1) {
+                        for (int x = 0; x < w; ++x) {
+                            // Add to the next row from the data array and subtract the last (bottom) row of the colsum
+                            colsum[x] += data[(y+halfbox)*w+x]  -  data[(y-halfbox_p1)*w+x];
+                            // T_o       T                         T
+                        }
+                    } else {
+                        for (int x = 0; x < w; ++x) {
+                            // Just add to the next row from the data array
+                            colsum[x] += data[(y+halfbox)*w+x];
+                        }
+                    }
+                } else {
+                    if (y >= halfbox_p1) {
+                        // Just subtract
+                        for (int x = 0; x < w; ++x) {
+                            colsum[x] -= data[(y-halfbox_p1)*w+x];
+                        }
+                    } // else no op on colsum[x]
+                }
+
+                rowsum = T{0};
+                if (y>=0) {
+                    // 2. Initialise rowsum. This happens after we have accumulated colsums. Init rowsum as the sum of the end col
+                    for (int i = -halfbox_p1; i < 0; ++i) { rowsum += colsum[i+w]; }
+                    for (int i = 0; i < halfbox; ++i) { rowsum += colsum[i]; }
+
+                    // 3. Compute the sum along the row, and write this into result
+                    int l = -halfbox_p1;
+                    int r = halfbox;
+                    for (int x = 0; x < w; ++x) {
+                        // A modulus where -x modulus w gives always a positive index: (w + (a % w)) % w
+                        rowsum += colsum[(w + (r++ % w)) % w] - colsum[(w + (l++ % w)) % w];
+
+                        if constexpr (onlysum == true) {
+                            result[y*w + x] = rowsum;
+                        } else {
+                            result[y*w + x] = rowsum * oneover_boxa;
+                        }
+                    }
+                }
+            }
+        }
+
         // Boxfilter that works with a runtime-configured width, w
         template<typename T, int boxside, bool onlysum = false>
         static void boxfilter_2d (const morph::vvec<T>& data, morph::vvec<T>& result, const int w)

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -444,3 +444,6 @@ endif()
 # morph::Tools
 add_executable(testTools testTools.cpp)
 add_test(testTools testTools)
+
+add_executable(testboxfilter testboxfilter.cpp)
+add_test(testboxfilter testboxfilter)
diff --git a/tests/testboxfilter.cpp b/tests/testboxfilter.cpp
@@ -0,0 +1,66 @@
+// Test (and profile) the box filter
+#include <morph/MathAlgo.h>
+#include <morph/vvec.h>
+#include <chrono>
+#include <cstdint>
+
+int main()
+{
+    using namespace std::chrono;
+    using sc = std::chrono::steady_clock;
+
+    constexpr int img_w = 256;
+    constexpr int img_h = 64;
+    constexpr int data_sz = img_w * img_h;
+
+    // Single precision
+    morph::vvec<float> input_f (data_sz, 0.0f);
+    morph::vvec<float> output_f (data_sz, 0.0f);
+    input_f.randomize();
+
+    sc::time_point t0_f = sc::now();
+    morph::MathAlgo::boxfilter_2d<float, 17, img_w> (input_f, output_f);
+    sc::time_point t1_f = sc::now();
+
+    sc::duration t_d_f = t1_f - t0_f;
+    std::cout << data_sz << " pixels boxfiltered (17x17, float) in " << duration_cast<microseconds>(t_d_f).count() << " us\n";
+
+    // Double precision
+    morph::vvec<double> input_d (data_sz, 0.0f);
+    morph::vvec<double> output_d (data_sz, 0.0f);
+    input_d.randomize();
+
+    sc::time_point t0_d = sc::now();
+    morph::MathAlgo::boxfilter_2d<double, 17, img_w> (input_d, output_d);
+    sc::time_point t1_d = sc::now();
+
+    sc::duration t_d_d = t1_d - t0_d;
+    std::cout << data_sz << " pixels boxfiltered (17x17, double) in " << duration_cast<microseconds>(t_d_d).count() << " us\n";
+
+    // Multi precision
+    sc::time_point t0_m = sc::now();
+    morph::MathAlgo::boxfilter_2d<double, float, 17, img_w> (input_d, output_f);
+    sc::time_point t1_m = sc::now();
+
+    sc::duration t_d_m = t1_m - t0_m;
+    std::cout << data_sz << " pixels boxfiltered (17x17, double in, float out) in " << duration_cast<microseconds>(t_d_m).count() << " us\n";
+
+
+    // Multi precision with uint8_t input
+    morph::vvec<uint8_t> input_u8 (data_sz, 0);
+    input_u8.randomize();
+    unsigned int uisum = input_u8.sum<unsigned int>();
+    unsigned int uisum2 = input_u8.sum(); // auto template param deduction? No.
+    std::cout << "input_u8: " << uisum << " or " << uisum2  << std::endl;
+
+    sc::time_point t0_u = sc::now();
+    morph::MathAlgo::boxfilter_2d<uint8_t, float, 17, img_w> (input_u8, output_f);
+    sc::time_point t1_u = sc::now();
+
+    std::cout << "output_flt: " << output_f.sum() << std::endl;
+
+    sc::duration t_d_u = t1_u - t0_u;
+    std::cout << data_sz << " pixels boxfiltered (17x17, uint8_t in, float out) in " << duration_cast<microseconds>(t_d_u).count() << " us\n";
+
+    return 0;
+}