ik1xpv · hayguen · Jan 10, 2021 · Jan 8, 2021 · Jan 8, 2021 · Jan 8, 2021
diff --git a/Core/config.h b/Core/config.h
@@ -79,8 +79,11 @@ enum rf_mode { NOMODE = 0, HFMODE = 0x1, VHFMODE = 0x2 };
 extern bool saveADCsamplesflag;
 
 const int transferSize = 131072;
+const int transferSamples = 131072 / sizeof(int16_t);
 
 const uint32_t DEFAULT_ADC_FREQ = 64000000;	// ADC sampling frequency
 
+const uint32_t DEFAULT_TRANSFERS_PER_SEC = DEFAULT_ADC_FREQ / transferSamples;
+
 #endif // _CONFIG_H_
 
diff --git a/Core/fft_mt_r2iq.cpp b/Core/fft_mt_r2iq.cpp
@@ -19,19 +19,35 @@ The name r2iq as Real 2 I+Q stream
 
 #include "fir.h"
 
+#include <assert.h>
 #include <string.h>
 #include <algorithm>
-#include <assert.h>
+#include <utility>
+
+// assure, that ADC is not oversteered?
+#define PRINT_INPUT_RANGE  0
+
 
 struct r2iqThreadArg {
 
-	fftwf_plan plan_t2f_r2c;          // fftw plan buffers Freq to Time complex to complex per decimation ratio
-	fftwf_plan plan_f2t_c2c;          // fftw plan buffers Time to Freq real to complex per buffer
-	fftwf_plan plans_f2t_c2c[NDECIDX];
+	r2iqThreadArg()
+	{
+#if PRINT_INPUT_RANGE
+		MinMaxBlockCount = 0;
+		MinValue = 0;
+		MaxValue = 0;
+#endif
+	}
+
 	float *ADCinTime;                // point to each threads input buffers [nftt][n]
 	fftwf_complex *ADCinFreq;         // buffers in frequency
 	fftwf_complex *inFreqTmp;         // tmp decimation output buffers (after tune shift)
 	fftwf_complex *outTimeTmp;        // tmp decimation output buffers baseband time cplx
+#if PRINT_INPUT_RANGE
+	int MinMaxBlockCount;
+	int16_t MinValue;
+	int16_t MaxValue;
+#endif
 };
 
 r2iqControlClass::r2iqControlClass()
@@ -78,6 +94,24 @@ fft_mt_r2iq::fft_mt_r2iq() :
 	}
 	GainScale = BBRF103_GAINFACTOR;
 
+#ifndef NDEBUG
+	int mratio = 1;  // 1,2,4,8,16,..
+	const float Astop = 120.0f;
+	const float relPass = 0.85f;  // 85% of Nyquist should be usable
+	const float relStop = 1.1f;   // 'some' alias back into transition band is OK
+	printf("\n***************************************************************************\n");
+	printf("Filter tap estimation, Astop = %.1f dB, relPass = %.2f, relStop = %.2f\n", Astop, relPass, relStop);
+	for (int d = 0; d < NDECIDX; d++)
+	{
+		float Bw = 64.0f / mratio;
+		int ntaps = KaiserWindow(0, Astop, relPass * Bw / 128.0f, relStop * Bw / 128.0f, nullptr);
+		printf("decimation %2d: KaiserWindow(Astop = %.1f dB, Fpass = %.3f,Fstop = %.3f, Bw %.3f @ %f ) => %d taps\n",
+			d, Astop, relPass * Bw, relStop * Bw, Bw, 128.0f, ntaps);
+		mratio = mratio * 2;
+	}
+	printf("***************************************************************************\n");
+#endif
+
 }
 
 fft_mt_r2iq::~fft_mt_r2iq()
@@ -90,17 +124,18 @@ fft_mt_r2iq::~fft_mt_r2iq()
 	}
 	fftwf_free(filterHw);
 
+	fftwf_destroy_plan(plan_t2f_r2c);
+	for (int d = 0; d < NDECIDX; d++)
+	{
+		fftwf_destroy_plan(plans_f2t_c2c[d]);
+	}
+
 	for (unsigned t = 0; t < processor_count; t++) {
 		auto th = threadArgs[t];
 		fftwf_free(th->ADCinTime);
 		fftwf_free(th->ADCinFreq);
 		fftwf_free(th->inFreqTmp);
 		fftwf_free(th->outTimeTmp);
-		fftwf_destroy_plan(th->plan_t2f_r2c);
-		for (int d = 0; d < NDECIDX; d++)
-		{
-			fftwf_destroy_plan(th->plans_f2t_c2c[d]);
-		}
 
 		delete threadArgs[t];
 	}
@@ -170,6 +205,7 @@ void fft_mt_r2iq::Init(float gain, int16_t **buffers, float** obuffers)
 	if (processor_count > N_MAX_R2IQ_THREADS)
 		processor_count = N_MAX_R2IQ_THREADS;
 
+	{
 		fftwf_plan filterplan_t2f_c2c; // time to frequency fft
 
 		DbgPrintf((char *) "r2iqCntrl initialization\n");
@@ -186,51 +222,18 @@ void fft_mt_r2iq::Init(float gain, int16_t **buffers, float** obuffers)
 			filterHw[d] = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex)*halfFft);     // halfFft
 		}
 
-		for (int t = 0; t < halfFft; t++)
-		{
-			pfilterht[t][0] = 0.0f;
-			pfilterht[t][1] = 0.0f;
-		}
-
 		filterplan_t2f_c2c = fftwf_plan_dft_1d(halfFft, pfilterht, filterHw[0], FFTW_FORWARD, FFTW_MEASURE);
 		float *pht = new float[halfFft / 4 + 1];
+		const float Astop = 120.0f;
+		const float relPass = 0.85f;  // 85% of Nyquist should be usable
+		const float relStop = 1.1f;   // 'some' alias back into transition band is OK
 		for (int d = 0; d < NDECIDX; d++)	// @todo when increasing NDECIDX
 		{
 			// @todo: have dynamic bandpass filter size - depending on decimation
 			//   to allow same stopband-attenuation for all decimations
-#if 0
-			// switch below does same as this
-			float Astop = std::min(120.0f, 130.0f - d * 10.0f);
-			float normFpass = ???;
-			float normFstop = (32.0f / (1 << d)) / 64.0f;
-			KaiserWindow(halfFft / 4 + 1, Astop, 0.7f / 64.0f, normFstop, pht);
-#else
-			switch (d)
-			{
-			// @todo: case 6 == NDECIDX -1 is missing!?!
-			case 5:
-				KaiserWindow(halfFft / 4 + 1, 80.0f, 0.7f/64.0f, 1.0f/64.0f, pht);
-				break;
-			case 4:
-				KaiserWindow(halfFft / 4 + 1, 90.0f, 1.6f/64.0f, 2.0f/64.0f, pht);
-				break;
-			case 3:
-				KaiserWindow(halfFft / 4 + 1, 100.0f, 3.6f/64.0f, 4.0f/64.0f, pht);
-				break;
-			case 2:
-				KaiserWindow(halfFft / 4 + 1, 110.0f, 7.6f/64.0f, 8.0f/64.0f, pht);
-				break;
-			case 1:
-				KaiserWindow(halfFft / 4 + 1, 120.0f, 15.7f/64.0f, 16.0f/64.0f, pht);
-				break;
-			default:
-				//assert(0);  --> case 6 == NDECIDX -1
-				// no-break
-			case 0:
-				KaiserWindow(halfFft / 4 + 1, 120.0f, 30.5f/64.0f, 32.0f/64.0f, pht);
-				break;
-			}
-#endif
+			float Bw = 64.0f / mratio[d];
+			// Bw *= 0.8f;  // easily visualize Kaiser filter's response
+			KaiserWindow(halfFft / 4 + 1, Astop, relPass * Bw / 128.0f, relStop * Bw / 128.0f, pht);
 
 			float gainadj = gain  / sqrtf(2.0f) * 2048.0f / (float)FFTN_R_ADC; // reference is FFTN_R_ADC == 2048
 
@@ -254,14 +257,15 @@ void fft_mt_r2iq::Init(float gain, int16_t **buffers, float** obuffers)
 			th->ADCinFreq = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex)*(halfFft + 1)); // 1024+1
 			th->inFreqTmp = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex)*(halfFft));    // 1024
 			th->outTimeTmp = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex)*(halfFft));
-			th->plan_t2f_r2c = fftwf_plan_dft_r2c_1d(2 * halfFft, th->ADCinTime, th->ADCinFreq, FFTW_MEASURE);
-			for (int d = 0; d < NDECIDX; d++)
-			{
-				th->plans_f2t_c2c[d] = fftwf_plan_dft_1d(mfftdim[d], th->inFreqTmp, th->outTimeTmp, FFTW_BACKWARD, FFTW_MEASURE);
-			}
+		}
 
+		plan_t2f_r2c = fftwf_plan_dft_r2c_1d(2 * halfFft, threadArgs[0]->ADCinTime, threadArgs[0]->ADCinFreq, FFTW_MEASURE);
+		for (int d = 0; d < NDECIDX; d++)
+		{
+			plans_f2t_c2c[d] = fftwf_plan_dft_1d(mfftdim[d], threadArgs[0]->inFreqTmp, threadArgs[0]->outTimeTmp, FFTW_BACKWARD, FFTW_MEASURE);
 		}
 	}
+}
 
 void * fft_mt_r2iq::r2iqThreadf(r2iqThreadArg *th) {
 
@@ -270,7 +274,7 @@ void * fft_mt_r2iq::r2iqThreadf(r2iqThreadArg *th) {
 	const int mratio = this->getRatio();
 	const fftwf_complex* filter = filterHw[decimate];
 	const bool lsb = this->getSideband();
-	th->plan_f2t_c2c = th->plans_f2t_c2c[decimate];
+	plan_f2t_c2c = &plans_f2t_c2c[decimate];
 
 	while (r2iqOn) {
 		const int16_t *dataADC;  // pointer to input data
@@ -301,7 +305,7 @@ void * fft_mt_r2iq::r2iqThreadf(r2iqThreadArg *th) {
 			int offset = ((transferSize / sizeof(int16_t)) / mratio) * moff;
 			pout = this->obuffers[modx] + offset;
 
-			this->bufIdx = ((this->bufIdx + 1) % QUEUE_SIZE);
+			this->bufIdx = (this->bufIdx + 1) % QUEUE_SIZE;
 
 			endloop = lastThread->ADCinTime + transferSize / sizeof(int16_t);
 			lastThread = th;
@@ -317,9 +321,18 @@ void * fft_mt_r2iq::r2iqThreadf(r2iqThreadArg *th) {
 		// directly inside the following loop (for "k < fftPerBuf")
 		//   just before the forward fft "fftwf_execute_dft_r2c" is called
 		// idea: this should improve cache/memory locality
+#if PRINT_INPUT_RANGE
+		std::pair<int16_t, int16_t> blockMinMax = std::make_pair<int16_t, int16_t>(0, 0);
+#endif
 		if (!this->getRand())        // plain samples no ADC rand set
 		{
-			for (int m = 0; m < transferSize / sizeof(int16_t); m++) {
+#if PRINT_INPUT_RANGE
+			auto minmax = std::minmax_element(dataADC, dataADC + transferSamples);
+			blockMinMax.first = *minmax.first;
+			blockMinMax.second = *minmax.second;
+#endif
+			for (int m = 0; m < transferSamples; m++)
+			{
 				*inloop++ = *dataADC++;
 			}
 		}
@@ -328,18 +341,43 @@ void * fft_mt_r2iq::r2iqThreadf(r2iqThreadArg *th) {
 			// @todo: can this get implemented without the RandTable[]?
 			//   for less cache trashing?
 			//   ideally with some simd commands ..
-			for (int m = 0; m < transferSize / sizeof(int16_t); m++) {
+			for (int m = 0; m < transferSamples; m++)
+			{
+#if PRINT_INPUT_RANGE
+				int16_t smp = RandTable[(uint16_t)*dataADC++];
+				blockMinMax.first = std::min(blockMinMax.first, smp);
+				blockMinMax.second = std::max(blockMinMax.second, smp);
+				*inloop++ = smp;
+#else
 				*inloop++ = (RandTable[(uint16_t)*dataADC++]);
+#endif
 			}
 		}
 
+#if PRINT_INPUT_RANGE
+		th->MinValue = std::min(blockMinMax.first, th->MinValue);
+		th->MaxValue = std::max(blockMinMax.second, th->MaxValue);
+		++th->MinMaxBlockCount;
+		if (th->MinMaxBlockCount * processor_count / 3 >= DEFAULT_TRANSFERS_PER_SEC )
+		{
+			float minBits = (th->MinValue < 0) ? (log10f((float)(-th->MinValue)) / log10f(2.0f)) : -1.0f;
+			float maxBits = (th->MaxValue > 0) ? (log10f((float)(th->MaxValue)) / log10f(2.0f)) : -1.0f;
+			printf("r2iq: min = %d (%.1f bits) %.2f%%, max = %d (%.1f bits) %.2f%%\n",
+				(int)th->MinValue, minBits, th->MinValue *-100.0f / 32768.0f,
+				(int)th->MaxValue, maxBits, th->MaxValue * 100.0f / 32768.0f);
+			th->MinValue = 0;
+			th->MaxValue = 0;
+			th->MinMaxBlockCount = 0;
+		}
+#endif
+
 		// decimate in frequency plus tuning
 
-		// Caculate the parameters for the first half
+		// Calculate the parameters for the first half
 		const auto count = std::min(mfft/2, halfFft - _mtunebin);
 		const auto source = &th->ADCinFreq[_mtunebin];
 
-		// Caculate the parameters for the second half
+		// Calculate the parameters for the second half
 		const auto start = std::max(0, mfft / 2 - _mtunebin);
 		const auto source2 = &th->ADCinFreq[_mtunebin - mfft / 2];
 		const auto filter2 = &filter[halfFft - mfft / 2];
@@ -352,7 +390,7 @@ void * fft_mt_r2iq::r2iqThreadf(r2iqThreadArg *th) {
 			{
 				// FFT first stage: time to frequency, real to complex
 				// 'full' transformation size: 2 * halfFft
-				fftwf_execute_dft_r2c(th->plan_t2f_r2c, th->ADCinTime + (3 * halfFft / 2) * k, th->ADCinFreq);
+				fftwf_execute_dft_r2c(plan_t2f_r2c, th->ADCinTime + (3 * halfFft / 2) * k, th->ADCinFreq);
 				// result now in th->ADCinFreq[]
 
 				// circular shift (mixing in full bins) and low/bandpass filtering (complex multiplication)
@@ -385,7 +423,7 @@ void * fft_mt_r2iq::r2iqThreadf(r2iqThreadArg *th) {
 
 				// 'shorter' inverse FFT transform (decimation); frequency (back) to COMPLEX time domain
 				// transform size: mfft = mfftdim[k] = halfFft / 2^k with k = mdecimation
-				fftwf_execute(th->plan_f2t_c2c);     //  c2c decimation
+				fftwf_execute_dft(*plan_f2t_c2c, th->inFreqTmp, th->outTimeTmp);     //  c2c decimation
 				// result now in th->outTimeTmp[]
 			}
 

diff --git a/Core/fft_mt_r2iq.h b/Core/fft_mt_r2iq.h
@@ -37,6 +37,10 @@ class fft_mt_r2iq : public r2iqControlClass
     int fftPerBuf; // number of ffts per buffer with 256|768 overlap
     fftwf_complex **filterHw;       // Hw complex to each decimation ratio
 
+	fftwf_plan plan_t2f_r2c;          // fftw plan buffers Freq to Time complex to complex per decimation ratio
+	fftwf_plan *plan_f2t_c2c;          // fftw plan buffers Time to Freq real to complex per buffer
+	fftwf_plan plans_f2t_c2c[NDECIDX];
+
     uint32_t processor_count;
     r2iqThreadArg* threadArgs[N_MAX_R2IQ_THREADS];
     std::condition_variable cvADCbufferAvailable;  // unlock when a sample buffer is ready

diff --git a/Core/fir.cpp b/Core/fir.cpp
@@ -24,7 +24,28 @@ static float Izero(float x)
     return(sum);
 }
 
-void KaiserWindow(int num_taps, float Astop, float normFpass, float normFstop, float *m_Coef)
+
+////////////////////////////////////////////////////////////////////
+// Create a FIR Low Pass filter
+// num_taps if > 0, forces filter design to be this number of taps
+//          if < 0, limits filter design to be max negative number of taps
+// Astop = Stopband Atenuation in dB (ie 40dB is 40dB stopband attenuation)
+// normFpass = Lowpass passband frequency - relative to samplerate
+// normFstop = Lowpass stopband frequency - relative to samplerate
+// Coef = pointer to array, where to put the resulting (real) coefficients
+//        might be nullptr, to estimate the number of coefficients
+// return the used/estimated number of coefficients
+//
+//           -------------
+//                        |
+//                         |
+//                          |
+//                           |
+//    Astop                   ---------------
+//                    Fpass   Fstop
+//
+////////////////////////////////////////////////////////////////////
+int KaiserWindow(int num_taps, float Astop, float normFpass, float normFstop, float *Coef)
 {
     int n;
     float Beta;
@@ -48,9 +69,21 @@ void KaiserWindow(int num_taps, float Astop, float normFpass, float normFstop, f
     Beta = K_PI * Alpha;
     */
 
-    //number of filter taps we fix to 1025
-    int m_NumTaps = num_taps; // (Astop - 8.0) / (2.285*K_2PI*(normFstop - normFpass) ) + 1;
+    // now estimate number of filter taps required based on filter specs
+    int m_NumTaps = (Astop - 8.0) / (2.285*K_2PI*(normFstop - normFpass) ) + 1;
+
+    // clamp range of filter taps
+    if (num_taps < 0 && m_NumTaps > -num_taps)
+        m_NumTaps = -num_taps;
+    if (m_NumTaps < 3)
+        m_NumTaps = 3;
+
+    // early exit, if the user only wanted to estimate the number of taps
+    if (num_taps <= 0 && !Coef)
+        return m_NumTaps;
 
+    if (num_taps > 0)
+        m_NumTaps = num_taps;
 
     float fCenter = .5f * (float)(m_NumTaps - 1);
     float izb = Izero(Beta);        //precalculate denominator since is same for all points
@@ -65,8 +98,8 @@ void KaiserWindow(int num_taps, float Astop, float normFpass, float normFstop, f
             c = (float)sinf(K_2PI * x * normFcut) / (K_PI * x);
         //calculate Kaiser window and multiply to get coefficient
         x = ((float)n - ((float)m_NumTaps - 1.0f) / 2.0f) / (((float)m_NumTaps - 1.0f) / 2.0f);
-        m_Coef[n] = Scale * c * Izero(Beta * sqrtf(1 - (x * x))) / izb;
+        Coef[n] = Scale * c * Izero(Beta * sqrtf(1 - (x * x))) / izb;
     }
 
-    return;
+    return m_NumTaps;
 }
diff --git a/Core/fir.h b/Core/fir.h
@@ -1,3 +1,3 @@
 #pragma once
 
-void KaiserWindow(int num_taps, float Astop, float normFpass, float normFstop, float *m_Coef);
+int KaiserWindow(int num_taps, float Astop, float normFpass, float normFstop, float * Coef);
diff --git a/Core/r2iq.h b/Core/r2iq.h
@@ -2,7 +2,7 @@
 #define R2IQ_H
 #include "license.txt" 
 
-#define NDECIDX 7  //number of srate
+#define NDECIDX 10  //number of srate
 
 #include <thread>
 #include <mutex>
@@ -39,9 +39,9 @@ class r2iqControlClass {
       // 128 Msps: 0 => 64Msps, 1 => 32Msps, 2=> 16Msps, 3 = 8Msps, 4 = 4Msps, 5 = 2Msps
     bool r2iqOn;        // r2iq on flag
     int16_t RandTable[65536];  // ADC RANDomize table used to whitening EMI from ADC data bus.
+    int mratio [NDECIDX];  // ratio
 
 private:
-    int mratio [NDECIDX];  // ratio
     bool randADC;       // randomized ADC output
     bool sideband;
 };