gnuradio · marcusmueller · Sep 23, 2024 · Jun 30, 2024 · Jun 30, 2024
diff --git a/lib/ofdm_bouzegzi_c_impl.cc b/lib/ofdm_bouzegzi_c_impl.cc
@@ -23,9 +23,14 @@
 #endif
 
 #include "ofdm_bouzegzi_c_impl.h"
+#include <gnuradio/gr_complex.h>
 #include <gnuradio/io_signature.h>
+#include <gnuradio/logger.h>
+#include <spdlog/fmt/ranges.h>
 #include <volk/volk.h>
+#include <volk/volk_alloc.hh>
 #include <complex>
+#include <memory>
 
 namespace gr {
 namespace inspector {
@@ -48,173 +53,111 @@ ofdm_bouzegzi_c_impl::ofdm_bouzegzi_c_impl(double samp_rate,
                                            const std::vector<int>& beta)
     : gr::sync_block("ofdm_bouzegzi_c",
                      gr::io_signature::make(1, 1, sizeof(gr_complex)),
-                     gr::io_signature::make(0, 0, 0))
+                     gr::io_signature::make(0, 0, 0)),
+      d_Nb(Nb),
+      /* TODO: 2024-06-30b: Almost certain most of these buffers are redundant, and
+       * DEFAULT_LEN makes no sense, whatsoever. Fix. See Fixme comment in work.
+       */
+      d_x1(DEFAULT_LEN),
+      d_y1(DEFAULT_LEN),
+      d_x2(DEFAULT_LEN),
+      d_y2(DEFAULT_LEN),
+      d_tmp1(DEFAULT_LEN),
+      d_tmp2(DEFAULT_LEN),
+      d_real_pre(DEFAULT_LEN),
+      d_imag_pre(DEFAULT_LEN),
+      d_osc_vec(DEFAULT_LEN),
+      d_sig_shift(DEFAULT_LEN),
+      d_res(DEFAULT_LEN),
+      d_alpha(alpha),
+      d_beta(beta),
+      d_fft(std::make_unique<fft::fft_complex_fwd>(DEFAULT_FFT_LEN))
 {
-    d_samp_rate = samp_rate;
-    d_Nb = Nb;
-    d_alpha = alpha;
-    d_beta = beta;
-    d_fft = new fft::fft_complex_fwd(1024, true);
+    set_min_noutput_items(MIN_NOUTPUT_ITEMS);
     message_port_register_out(pmt::intern("ofdm_out"));
-    d_len = 10000;
-
-    // generate vectors
-    d_x1 = (float*)volk_malloc(sizeof(float) * d_len,
-                               volk_get_alignment()); // real part of sig
-    d_y1 = (float*)volk_malloc(sizeof(float) * d_len,
-                               volk_get_alignment()); // imag part of sig
-    d_x2 = (float*)volk_malloc(sizeof(float) * d_len,
-                               volk_get_alignment()); // real part of oscillation
-    d_y2 = (float*)volk_malloc(sizeof(float) * d_len,
-                               volk_get_alignment()); // imag part of oscillation
-    d_tmp1 = (float*)volk_malloc(sizeof(float) * d_len, volk_get_alignment());
-    d_tmp2 = (float*)volk_malloc(sizeof(float) * d_len, volk_get_alignment());
-    d_real_pre = (float*)volk_malloc(sizeof(float) * d_len, volk_get_alignment());
-    d_imag_pre = (float*)volk_malloc(sizeof(float) * d_len, volk_get_alignment());
-    d_sig_shift =
-        (gr_complex*)volk_malloc(sizeof(gr_complex) * (d_len), volk_get_alignment());
-    d_res = (gr_complex*)volk_malloc(sizeof(gr_complex) * (d_len), volk_get_alignment());
-    d_osc_vec = (float*)volk_malloc(sizeof(float) * d_len, volk_get_alignment());
 }
 
 /*
  * Our virtual destructor.
  */
-ofdm_bouzegzi_c_impl::~ofdm_bouzegzi_c_impl()
-{
-    volk_free(d_x1);
-    volk_free(d_y1);
-    volk_free(d_tmp1);
-    volk_free(d_tmp2);
-    volk_free(d_real_pre);
-    volk_free(d_imag_pre);
-    volk_free(d_sig_shift);
-    volk_free(d_res);
-    volk_free(d_osc_vec);
-    volk_free(d_x2);
-    volk_free(d_y2);
-}
+ofdm_bouzegzi_c_impl::~ofdm_bouzegzi_c_impl() {}
 
-gr_complex ofdm_bouzegzi_c_impl::autocorr_orig(const gr_complex* sig, int a, int b, int p)
+float ofdm_bouzegzi_c_impl::autocorr(
+    const gr_complex* sig, int a, int b, int p, unsigned int length)
 {
-    std::cout << "----------- ORIG ------------" << std::endl;
-    int M = d_len;
-    gr_complex R = gr_complex(0, 0);
-
-    for (int m = 0; m < 100; m++) {
-        R += sig[m + a] * std::conj(sig[m]) *
-             std::exp(gr_complex(0, -1) * gr_complex(2 * M_PI * p * m / (a / b + a), 0));
-        std::cout << sig[m + a] * std::conj(sig[m]) *
-                         std::exp(gr_complex(0, -1) *
-                                  gr_complex(2 * M_PI * p * m / (a / b + a), 0))
-                  << " ";
-    }
-    std::cout << std::endl;
-
-    R = R / gr_complex(M - a, 0); // normalize
-    // std::cout << "R = " << R << std::endl;
-    return R;
-}
-float ofdm_bouzegzi_c_impl::autocorr(const gr_complex* sig, int a, int b, int p)
-{
-    // std::cout << "----------- VOLK ------------" << std::endl;
-    int M = d_len;
-    /*
-    gr_complex f[M];
-    for(int i = 0; i < M; i++) {
-      f[i] = sig[i] * std::exp(gr_complex(0,1)*gr_complex(2*M_PI*i*p/(a/b+a),0));
-    }
-
-
-    gr_complex f_fft[M];
-    gr_complex g_fft[M];
-    gr_complex R_vec[M];
-
-    // fast convolution
-    do_fft(f, f_fft);
-    do_fft(sig, g_fft);
-    for(int i = 0; i < d_len; i++) {
-      R_vec[i] = f_fft[i]*g_fft[i]; // convolution
-    }
-    gr_complex result[M];
-    rescale_fft(false);
-    do_fft(R_vec, result);
-    gr_complex R = result[a];
+    /* TODO: 2024-06-30a: DEFINITELY make this cooler.
+     * We have a lot of unnecessary vector members, and here we make (allocate and
+     * deallocate two new ones every single iteration, "m_vec" and "pre".
      */
-
-    gr_complex R = gr_complex(0, 0);
-
-    // TODO: make this cooler
-    __GR_VLA(float, m_vec, M);
-    for (unsigned int i = 0; i < M; i++) {
+    volk::vector<float> m_vec(length);
+    for (unsigned int i = 0; i < length; i++) {
         m_vec[i] = i;
     }
     // create oscillation argument
-    volk_32f_s32f_multiply_32f(d_osc_vec, m_vec, -2 * M_PI * p / (a / b + a), M);
+    /* TODO: The integer division done here seems wrong, but I don't know the original
+     * intent. Figure out whether p/(a/b+a) is correct here.
+     */
+    volk_32f_s32f_multiply_32f(
+        d_osc_vec.data(), m_vec.data(), -2 * M_PI * p / (a / b + a), length);
 
-    volk_32fc_deinterleave_real_32f(d_x1, sig, M);
-    volk_32fc_deinterleave_imag_32f(d_y1, sig, M);
-    volk_32f_cos_32f(d_x2, d_osc_vec, M); // create cosine vector
-    volk_32f_sin_32f(d_y2, d_osc_vec, M); // create sine vector
+    volk_32fc_deinterleave_real_32f(d_x1.data(), sig, length);
+    volk_32fc_deinterleave_imag_32f(d_y1.data(), sig, length);
+    volk_32f_cos_32f(d_x2.data(), d_osc_vec.data(), length); // create cosine vector
+    volk_32f_sin_32f(d_y2.data(), d_osc_vec.data(), length); // create sine vector
 
 
-    volk_32f_x2_multiply_32f(d_tmp1, d_x1, d_x2, M);
-    volk_32f_x2_multiply_32f(d_tmp2, d_y1, d_y2, M);
-    volk_32f_x2_add_32f(d_real_pre, d_tmp1, d_tmp2, M); // final real part
+    volk_32f_x2_multiply_32f(d_tmp1.data(), d_x1.data(), d_x2.data(), length);
+    volk_32f_x2_multiply_32f(d_tmp2.data(), d_y1.data(), d_y2.data(), length);
+    volk_32f_x2_add_32f(
+        d_real_pre.data(), d_tmp1.data(), d_tmp2.data(), length); // final real part
 
-    volk_32f_x2_multiply_32f(d_tmp1, d_x1, d_y2, M);
-    volk_32f_x2_multiply_32f(d_tmp2, d_x2, d_y1, M);
-    volk_32f_x2_subtract_32f(d_imag_pre, d_tmp1, d_tmp2, M); // final imag part
+    volk_32f_x2_multiply_32f(d_tmp1.data(), d_x1.data(), d_y2.data(), length);
+    volk_32f_x2_multiply_32f(d_tmp2.data(), d_x2.data(), d_y1.data(), length);
+    volk_32f_x2_subtract_32f(
+        d_imag_pre.data(), d_tmp1.data(), d_tmp2.data(), length); // final imag part
 
 
-    __GR_VLA(gr_complex, pre, M);
-    for (unsigned int i = 0; i < M; i++) {
+    volk::vector<gr_complex> pre(length);
+    for (unsigned int i = 0; i < length; i++) {
         pre[i] = d_real_pre[i] + gr_complex(0, 1) * d_imag_pre[i];
     }
 
-    memcpy(d_sig_shift, &sig[a], (M - a) * sizeof(gr_complex));
-    volk_32fc_x2_multiply_32fc(d_res, d_sig_shift, pre, M - a);
+    memcpy(d_sig_shift.data(), &sig[a], (length - a) * sizeof(gr_complex));
+    volk_32fc_x2_multiply_32fc(d_res.data(), d_sig_shift.data(), pre.data(), length - a);
 
-    for (unsigned int i = 0; i < M - a; i++) {
+    gr_complex R{ 0, 0 };
+    for (unsigned int i = 0; i < length - a; i++) {
         R += d_res[i];
-        // std::cout << d_res[i] << " ";
     }
-    // std::cout << std::endl;
 
     /*
     for(int m = 0; m < M-a; m++) {
       R += sig[m+a] * std::conj(sig[m]) *
               std::exp(gr_complex(0,-1)*gr_complex(2*M_PI*p*m/(a/b+a),0));
     }
     */
-    R = R / gr_complex(M - a, 0); // normalize
-    // std::cout << "R = " << R << std::endl;
+    R = R / static_cast<float>(length - a); // normalize
     return std::abs(R);
 }
 
-void ofdm_bouzegzi_c_impl::rescale_fft(bool forward)
+void ofdm_bouzegzi_c_impl::rescale_fft(unsigned int length)
 {
-    delete d_fft;
-    d_fft = new fft::fft_complex_fwd(d_len, forward);
-    d_fft->set_nthreads(4);
+    d_fft = std::make_unique<fft::fft_complex_fwd>(length, 4 /*threads*/);
 }
 
-void ofdm_bouzegzi_c_impl::do_fft(const gr_complex* in, gr_complex* out)
-{
-    memcpy(d_fft->get_inbuf(), in, d_len * sizeof(gr_complex));
-    d_fft->execute();
-    memcpy(out, d_fft->get_outbuf(), d_len * sizeof(gr_complex));
-}
 
-float ofdm_bouzegzi_c_impl::cost_func(const gr_complex* sig, int a, int b)
+float ofdm_bouzegzi_c_impl::cost_func(const gr_complex* sig,
+                                      int a,
+                                      int b,
+                                      unsigned int length)
 {
     float J = 0;
-    __GR_VLA(float, power, 2 * d_Nb + 1);
-    __GR_VLA(float, R, 2 * d_Nb + 1);
+    volk::vector<float> power(2 * d_Nb + 1);
+    volk::vector<float> R(2 * d_Nb + 1);
     for (int p = -d_Nb; p <= d_Nb; p++) {
-        R[p + d_Nb] = autocorr(sig, a, b, p);
+        R[p + d_Nb] = autocorr(sig, a, b, p, length);
     }
-    volk_32f_s32f_power_32f(power, R, 2.0, 2 * d_Nb + 1);
+    volk_32f_s32f_power_32f(power.data(), R.data(), 2.0, 2 * d_Nb + 1);
     for (int i = 0; i < 2 * d_Nb + 1; i++) {
         J += power[i];
     }
@@ -227,14 +170,27 @@ int ofdm_bouzegzi_c_impl::work(int noutput_items,
                                gr_vector_void_star& output_items)
 {
     const gr_complex* in = (const gr_complex*)input_items[0];
-    d_len = noutput_items;
-    rescale_fft(true);
-    // std::cout << "len = " << d_len << std::endl;
-
+    /*
+     * FIXME: 2024-06-30 This a terrible idea and will lead to inconsistent results
+     * instead of choosing a different FFT size every time, the author should have
+     * chose an input multiple and worked with the same length.
+     *
+     * However, mission right now is only to make this compile, and if you're inclined to
+     * use this block, please fix this.
+     */
+    rescale_fft(noutput_items);
+
+    // TODO: after fixing the above, remove the next lines
+    d_logger->warn("Calculating based on FFT length {}. If this number changes, your "
+                   "results are inconsistent. There is a fix for that, which you should "
+                   "contribute upstream -- see {}'s FIXME 2024-06-30!",
+                   noutput_items,
+                   __FILE__);
     // we need a max number of items for analysis
-    if (d_len < 7000) {
+    if (noutput_items < static_cast<int>(MIN_NOUTPUT_ITEMS)) {
         return 0;
     }
+#warning This block has consistency issues. See the FIXME 2024-06-30 comment above. DO NOT USE IN PRODUCTION.
 
     // Do <+signal processing+>
     float J = 0.0;
@@ -244,23 +200,24 @@ int ofdm_bouzegzi_c_impl::work(int noutput_items,
 
     for (std::vector<int>::iterator a = d_alpha.begin(); a != d_alpha.end(); ++a) {
         for (std::vector<int>::iterator b = d_beta.begin(); b != d_beta.end(); ++b) {
-            J_new = cost_func(in, *a, *b);
-            // std::cout << "a = " << *a << ", b = " << *b << ", J = " << J_new <<
-            // std::endl;
+            J_new = cost_func(in, *a, *b, noutput_items);
             if (J_new > J) {
                 J = J_new;
                 a_res = *a;
                 b_res = *b;
             }
         }
     }
-    std::cout << "-------- Result -------" << std::endl;
-    std::cout << "FFT len = " << a_res << std::endl;
-    std::cout << "CP len = " << 1.0 / b_res * a_res << std::endl;
+    d_logger->trace("-------- Result -------\n"
+                    "FFT len = {}\n"
+                    "CP len = {}\n",
+                    a_res,
+                    1.0 / b_res * a_res);
 
     // Tell runtime system how many output items we produced.
     return noutput_items;
 }
 
 } /* namespace inspector */
-} /* namespace gr */
+} // namespace gr
+/* namespace gr */
diff --git a/lib/ofdm_bouzegzi_c_impl.h b/lib/ofdm_bouzegzi_c_impl.h
@@ -23,40 +23,52 @@
 
 #include <gnuradio/fft/fft.h>
 #include <gnuradio/inspector/ofdm_bouzegzi_c.h>
+#include <volk/volk_alloc.hh>
+#include <memory>
 
 namespace gr {
 namespace inspector {
 
 class ofdm_bouzegzi_c_impl : public ofdm_bouzegzi_c
 {
+public:
+    static constexpr unsigned int DEFAULT_FFT_LEN = 1024;
+    static constexpr unsigned int MIN_NOUTPUT_ITEMS = 7000;
+    static constexpr unsigned int DEFAULT_LEN = 10000;
+
 private:
-    int d_Nb, d_len;
-    double d_samp_rate;
-    float *d_x1, *d_y1, *d_x2, *d_y2, *d_tmp1, *d_tmp2, *d_real_pre, *d_imag_pre,
-        *d_osc_vec;
-    gr_complex *d_sig_shift, *d_res;
+    int d_Nb;
+    volk::vector<float> d_x1;
+    volk::vector<float> d_y1;
+    volk::vector<float> d_x2;
+    volk::vector<float> d_y2;
+    volk::vector<float> d_tmp1;
+    volk::vector<float> d_tmp2;
+    volk::vector<float> d_real_pre;
+    volk::vector<float> d_imag_pre;
+    volk::vector<float> d_osc_vec;
+    volk::vector<gr_complex> d_sig_shift, d_res;
     std::vector<int> d_alpha, d_beta;
-    fft::fft_complex_fwd* d_fft;
+    std::unique_ptr<fft::fft_complex_fwd> d_fft;
+
+    void rescale_fft(unsigned int length);
+
+    float autocorr(const gr_complex* sig, int a, int b, int p, unsigned int length);
+    float cost_func(const gr_complex* sig, int a, int b, unsigned int length);
 
 public:
     ofdm_bouzegzi_c_impl(double samp_rate,
                          int Nb,
                          const std::vector<int>& alpha,
                          const std::vector<int>& beta);
 
-    ~ofdm_bouzegzi_c_impl();
-
-    void rescale_fft(bool forward);
-    void do_fft(const gr_complex* in, gr_complex* out);
+    ~ofdm_bouzegzi_c_impl() override;
 
-    float autocorr(const gr_complex* sig, int a, int b, int p);
-    gr_complex autocorr_orig(const gr_complex* sig, int a, int b, int p);
-    float cost_func(const gr_complex* sig, int a, int b);
 
     // Where all the action really happens
     int work(int noutput_items,
              gr_vector_const_void_star& input_items,
-             gr_vector_void_star& output_items);
+             gr_vector_void_star& output_items) override;
 };
 
 } // namespace inspector