Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use Kaiser with 85% of Nyquist band .. #147

Merged
merged 4 commits into from
Jan 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Core/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,11 @@ enum rf_mode { NOMODE = 0, HFMODE = 0x1, VHFMODE = 0x2 };
extern bool saveADCsamplesflag;

const int transferSize = 131072;
const int transferSamples = 131072 / sizeof(int16_t);

const uint32_t DEFAULT_ADC_FREQ = 64000000; // ADC sampling frequency

const uint32_t DEFAULT_TRANSFERS_PER_SEC = DEFAULT_ADC_FREQ / transferSamples;

#endif // _CONFIG_H_

160 changes: 99 additions & 61 deletions Core/fft_mt_r2iq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,35 @@ The name r2iq as Real 2 I+Q stream

#include "fir.h"

#include <assert.h>
#include <string.h>
#include <algorithm>
#include <assert.h>
#include <utility>

// assure, that ADC is not oversteered?
#define PRINT_INPUT_RANGE 0
howard0su marked this conversation as resolved.
Show resolved Hide resolved


struct r2iqThreadArg {

fftwf_plan plan_t2f_r2c; // fftw plan buffers Freq to Time complex to complex per decimation ratio
fftwf_plan plan_f2t_c2c; // fftw plan buffers Time to Freq real to complex per buffer
fftwf_plan plans_f2t_c2c[NDECIDX];
r2iqThreadArg()
{
#if PRINT_INPUT_RANGE
MinMaxBlockCount = 0;
MinValue = 0;
MaxValue = 0;
#endif
}

float *ADCinTime; // point to each threads input buffers [nftt][n]
fftwf_complex *ADCinFreq; // buffers in frequency
fftwf_complex *inFreqTmp; // tmp decimation output buffers (after tune shift)
fftwf_complex *outTimeTmp; // tmp decimation output buffers baseband time cplx
#if PRINT_INPUT_RANGE
int MinMaxBlockCount;
int16_t MinValue;
int16_t MaxValue;
#endif
};

r2iqControlClass::r2iqControlClass()
Expand Down Expand Up @@ -78,6 +94,24 @@ fft_mt_r2iq::fft_mt_r2iq() :
}
GainScale = BBRF103_GAINFACTOR;

#ifndef NDEBUG
int mratio = 1; // 1,2,4,8,16,..
const float Astop = 120.0f;
const float relPass = 0.85f; // 85% of Nyquist should be usable
const float relStop = 1.1f; // 'some' alias back into transition band is OK
printf("\n***************************************************************************\n");
printf("Filter tap estimation, Astop = %.1f dB, relPass = %.2f, relStop = %.2f\n", Astop, relPass, relStop);
for (int d = 0; d < NDECIDX; d++)
{
float Bw = 64.0f / mratio;
int ntaps = KaiserWindow(0, Astop, relPass * Bw / 128.0f, relStop * Bw / 128.0f, nullptr);
printf("decimation %2d: KaiserWindow(Astop = %.1f dB, Fpass = %.3f,Fstop = %.3f, Bw %.3f @ %f ) => %d taps\n",
d, Astop, relPass * Bw, relStop * Bw, Bw, 128.0f, ntaps);
mratio = mratio * 2;
}
printf("***************************************************************************\n");
#endif

}

fft_mt_r2iq::~fft_mt_r2iq()
Expand All @@ -90,17 +124,18 @@ fft_mt_r2iq::~fft_mt_r2iq()
}
fftwf_free(filterHw);

fftwf_destroy_plan(plan_t2f_r2c);
for (int d = 0; d < NDECIDX; d++)
{
fftwf_destroy_plan(plans_f2t_c2c[d]);
}

for (unsigned t = 0; t < processor_count; t++) {
auto th = threadArgs[t];
fftwf_free(th->ADCinTime);
fftwf_free(th->ADCinFreq);
fftwf_free(th->inFreqTmp);
fftwf_free(th->outTimeTmp);
fftwf_destroy_plan(th->plan_t2f_r2c);
for (int d = 0; d < NDECIDX; d++)
{
fftwf_destroy_plan(th->plans_f2t_c2c[d]);
}

delete threadArgs[t];
}
Expand Down Expand Up @@ -170,6 +205,7 @@ void fft_mt_r2iq::Init(float gain, int16_t **buffers, float** obuffers)
if (processor_count > N_MAX_R2IQ_THREADS)
processor_count = N_MAX_R2IQ_THREADS;

{
fftwf_plan filterplan_t2f_c2c; // time to frequency fft

DbgPrintf((char *) "r2iqCntrl initialization\n");
Expand All @@ -186,51 +222,18 @@ void fft_mt_r2iq::Init(float gain, int16_t **buffers, float** obuffers)
filterHw[d] = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex)*halfFft); // halfFft
}

for (int t = 0; t < halfFft; t++)
{
pfilterht[t][0] = 0.0f;
pfilterht[t][1] = 0.0f;
}

filterplan_t2f_c2c = fftwf_plan_dft_1d(halfFft, pfilterht, filterHw[0], FFTW_FORWARD, FFTW_MEASURE);
float *pht = new float[halfFft / 4 + 1];
const float Astop = 120.0f;
const float relPass = 0.85f; // 85% of Nyquist should be usable
const float relStop = 1.1f; // 'some' alias back into transition band is OK
for (int d = 0; d < NDECIDX; d++) // @todo when increasing NDECIDX
{
// @todo: have dynamic bandpass filter size - depending on decimation
// to allow same stopband-attenuation for all decimations
#if 0
// switch below does same as this
float Astop = std::min(120.0f, 130.0f - d * 10.0f);
float normFpass = ???;
float normFstop = (32.0f / (1 << d)) / 64.0f;
KaiserWindow(halfFft / 4 + 1, Astop, 0.7f / 64.0f, normFstop, pht);
#else
switch (d)
{
// @todo: case 6 == NDECIDX -1 is missing!?!
case 5:
KaiserWindow(halfFft / 4 + 1, 80.0f, 0.7f/64.0f, 1.0f/64.0f, pht);
break;
case 4:
KaiserWindow(halfFft / 4 + 1, 90.0f, 1.6f/64.0f, 2.0f/64.0f, pht);
break;
case 3:
KaiserWindow(halfFft / 4 + 1, 100.0f, 3.6f/64.0f, 4.0f/64.0f, pht);
break;
case 2:
KaiserWindow(halfFft / 4 + 1, 110.0f, 7.6f/64.0f, 8.0f/64.0f, pht);
break;
case 1:
KaiserWindow(halfFft / 4 + 1, 120.0f, 15.7f/64.0f, 16.0f/64.0f, pht);
break;
default:
//assert(0); --> case 6 == NDECIDX -1
// no-break
case 0:
KaiserWindow(halfFft / 4 + 1, 120.0f, 30.5f/64.0f, 32.0f/64.0f, pht);
break;
}
#endif
float Bw = 64.0f / mratio[d];
// Bw *= 0.8f; // easily visualize Kaiser filter's response
KaiserWindow(halfFft / 4 + 1, Astop, relPass * Bw / 128.0f, relStop * Bw / 128.0f, pht);

float gainadj = gain / sqrtf(2.0f) * 2048.0f / (float)FFTN_R_ADC; // reference is FFTN_R_ADC == 2048

Expand All @@ -254,14 +257,15 @@ void fft_mt_r2iq::Init(float gain, int16_t **buffers, float** obuffers)
th->ADCinFreq = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex)*(halfFft + 1)); // 1024+1
th->inFreqTmp = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex)*(halfFft)); // 1024
th->outTimeTmp = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex)*(halfFft));
th->plan_t2f_r2c = fftwf_plan_dft_r2c_1d(2 * halfFft, th->ADCinTime, th->ADCinFreq, FFTW_MEASURE);
for (int d = 0; d < NDECIDX; d++)
{
th->plans_f2t_c2c[d] = fftwf_plan_dft_1d(mfftdim[d], th->inFreqTmp, th->outTimeTmp, FFTW_BACKWARD, FFTW_MEASURE);
}
}

plan_t2f_r2c = fftwf_plan_dft_r2c_1d(2 * halfFft, threadArgs[0]->ADCinTime, threadArgs[0]->ADCinFreq, FFTW_MEASURE);
for (int d = 0; d < NDECIDX; d++)
{
plans_f2t_c2c[d] = fftwf_plan_dft_1d(mfftdim[d], threadArgs[0]->inFreqTmp, threadArgs[0]->outTimeTmp, FFTW_BACKWARD, FFTW_MEASURE);
}
}
}

void * fft_mt_r2iq::r2iqThreadf(r2iqThreadArg *th) {

Expand All @@ -270,7 +274,7 @@ void * fft_mt_r2iq::r2iqThreadf(r2iqThreadArg *th) {
const int mratio = this->getRatio();
const fftwf_complex* filter = filterHw[decimate];
const bool lsb = this->getSideband();
th->plan_f2t_c2c = th->plans_f2t_c2c[decimate];
plan_f2t_c2c = &plans_f2t_c2c[decimate];

while (r2iqOn) {
const int16_t *dataADC; // pointer to input data
Expand Down Expand Up @@ -301,7 +305,7 @@ void * fft_mt_r2iq::r2iqThreadf(r2iqThreadArg *th) {
int offset = ((transferSize / sizeof(int16_t)) / mratio) * moff;
pout = this->obuffers[modx] + offset;

this->bufIdx = ((this->bufIdx + 1) % QUEUE_SIZE);
this->bufIdx = (this->bufIdx + 1) % QUEUE_SIZE;

endloop = lastThread->ADCinTime + transferSize / sizeof(int16_t);
lastThread = th;
Expand All @@ -317,9 +321,18 @@ void * fft_mt_r2iq::r2iqThreadf(r2iqThreadArg *th) {
// directly inside the following loop (for "k < fftPerBuf")
// just before the forward fft "fftwf_execute_dft_r2c" is called
// idea: this should improve cache/memory locality
#if PRINT_INPUT_RANGE
std::pair<int16_t, int16_t> blockMinMax = std::make_pair<int16_t, int16_t>(0, 0);
#endif
if (!this->getRand()) // plain samples no ADC rand set
{
for (int m = 0; m < transferSize / sizeof(int16_t); m++) {
#if PRINT_INPUT_RANGE
auto minmax = std::minmax_element(dataADC, dataADC + transferSamples);
blockMinMax.first = *minmax.first;
blockMinMax.second = *minmax.second;
#endif
for (int m = 0; m < transferSamples; m++)
{
*inloop++ = *dataADC++;
}
}
Expand All @@ -328,18 +341,43 @@ void * fft_mt_r2iq::r2iqThreadf(r2iqThreadArg *th) {
// @todo: can this get implemented without the RandTable[]?
// for less cache trashing?
// ideally with some simd commands ..
for (int m = 0; m < transferSize / sizeof(int16_t); m++) {
for (int m = 0; m < transferSamples; m++)
{
#if PRINT_INPUT_RANGE
int16_t smp = RandTable[(uint16_t)*dataADC++];
blockMinMax.first = std::min(blockMinMax.first, smp);
blockMinMax.second = std::max(blockMinMax.second, smp);
*inloop++ = smp;
#else
*inloop++ = (RandTable[(uint16_t)*dataADC++]);
#endif
}
}

#if PRINT_INPUT_RANGE
th->MinValue = std::min(blockMinMax.first, th->MinValue);
th->MaxValue = std::max(blockMinMax.second, th->MaxValue);
++th->MinMaxBlockCount;
if (th->MinMaxBlockCount * processor_count / 3 >= DEFAULT_TRANSFERS_PER_SEC )
{
float minBits = (th->MinValue < 0) ? (log10f((float)(-th->MinValue)) / log10f(2.0f)) : -1.0f;
float maxBits = (th->MaxValue > 0) ? (log10f((float)(th->MaxValue)) / log10f(2.0f)) : -1.0f;
printf("r2iq: min = %d (%.1f bits) %.2f%%, max = %d (%.1f bits) %.2f%%\n",
(int)th->MinValue, minBits, th->MinValue *-100.0f / 32768.0f,
(int)th->MaxValue, maxBits, th->MaxValue * 100.0f / 32768.0f);
th->MinValue = 0;
th->MaxValue = 0;
th->MinMaxBlockCount = 0;
}
#endif

// decimate in frequency plus tuning

// Caculate the parameters for the first half
// Calculate the parameters for the first half
const auto count = std::min(mfft/2, halfFft - _mtunebin);
const auto source = &th->ADCinFreq[_mtunebin];

// Caculate the parameters for the second half
// Calculate the parameters for the second half
const auto start = std::max(0, mfft / 2 - _mtunebin);
const auto source2 = &th->ADCinFreq[_mtunebin - mfft / 2];
const auto filter2 = &filter[halfFft - mfft / 2];
Expand All @@ -352,7 +390,7 @@ void * fft_mt_r2iq::r2iqThreadf(r2iqThreadArg *th) {
{
// FFT first stage: time to frequency, real to complex
// 'full' transformation size: 2 * halfFft
fftwf_execute_dft_r2c(th->plan_t2f_r2c, th->ADCinTime + (3 * halfFft / 2) * k, th->ADCinFreq);
fftwf_execute_dft_r2c(plan_t2f_r2c, th->ADCinTime + (3 * halfFft / 2) * k, th->ADCinFreq);
// result now in th->ADCinFreq[]

// circular shift (mixing in full bins) and low/bandpass filtering (complex multiplication)
Expand Down Expand Up @@ -385,7 +423,7 @@ void * fft_mt_r2iq::r2iqThreadf(r2iqThreadArg *th) {

// 'shorter' inverse FFT transform (decimation); frequency (back) to COMPLEX time domain
// transform size: mfft = mfftdim[k] = halfFft / 2^k with k = mdecimation
fftwf_execute(th->plan_f2t_c2c); // c2c decimation
fftwf_execute_dft(*plan_f2t_c2c, th->inFreqTmp, th->outTimeTmp); // c2c decimation
// result now in th->outTimeTmp[]
}

Expand Down
4 changes: 4 additions & 0 deletions Core/fft_mt_r2iq.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ class fft_mt_r2iq : public r2iqControlClass
int fftPerBuf; // number of ffts per buffer with 256|768 overlap
fftwf_complex **filterHw; // Hw complex to each decimation ratio

fftwf_plan plan_t2f_r2c; // fftw plan buffers Freq to Time complex to complex per decimation ratio
fftwf_plan *plan_f2t_c2c; // fftw plan buffers Time to Freq real to complex per buffer
fftwf_plan plans_f2t_c2c[NDECIDX];

uint32_t processor_count;
r2iqThreadArg* threadArgs[N_MAX_R2IQ_THREADS];
std::condition_variable cvADCbufferAvailable; // unlock when a sample buffer is ready
Expand Down
43 changes: 38 additions & 5 deletions Core/fir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,28 @@ static float Izero(float x)
return(sum);
}

void KaiserWindow(int num_taps, float Astop, float normFpass, float normFstop, float *m_Coef)

////////////////////////////////////////////////////////////////////
// Create a FIR Low Pass filter
// num_taps if > 0, forces filter design to be this number of taps
// if < 0, limits filter design to be max negative number of taps
// Astop = Stopband Atenuation in dB (ie 40dB is 40dB stopband attenuation)
// normFpass = Lowpass passband frequency - relative to samplerate
// normFstop = Lowpass stopband frequency - relative to samplerate
// Coef = pointer to array, where to put the resulting (real) coefficients
// might be nullptr, to estimate the number of coefficients
// return the used/estimated number of coefficients
//
// -------------
// |
// |
// |
// |
// Astop ---------------
// Fpass Fstop
//
////////////////////////////////////////////////////////////////////
int KaiserWindow(int num_taps, float Astop, float normFpass, float normFstop, float *Coef)
{
int n;
float Beta;
Expand All @@ -48,9 +69,21 @@ void KaiserWindow(int num_taps, float Astop, float normFpass, float normFstop, f
Beta = K_PI * Alpha;
*/

//number of filter taps we fix to 1025
int m_NumTaps = num_taps; // (Astop - 8.0) / (2.285*K_2PI*(normFstop - normFpass) ) + 1;
// now estimate number of filter taps required based on filter specs
int m_NumTaps = (Astop - 8.0) / (2.285*K_2PI*(normFstop - normFpass) ) + 1;

// clamp range of filter taps
if (num_taps < 0 && m_NumTaps > -num_taps)
m_NumTaps = -num_taps;
if (m_NumTaps < 3)
m_NumTaps = 3;

// early exit, if the user only wanted to estimate the number of taps
if (num_taps <= 0 && !Coef)
return m_NumTaps;

if (num_taps > 0)
m_NumTaps = num_taps;

float fCenter = .5f * (float)(m_NumTaps - 1);
float izb = Izero(Beta); //precalculate denominator since is same for all points
Expand All @@ -65,8 +98,8 @@ void KaiserWindow(int num_taps, float Astop, float normFpass, float normFstop, f
c = (float)sinf(K_2PI * x * normFcut) / (K_PI * x);
//calculate Kaiser window and multiply to get coefficient
x = ((float)n - ((float)m_NumTaps - 1.0f) / 2.0f) / (((float)m_NumTaps - 1.0f) / 2.0f);
m_Coef[n] = Scale * c * Izero(Beta * sqrtf(1 - (x * x))) / izb;
Coef[n] = Scale * c * Izero(Beta * sqrtf(1 - (x * x))) / izb;
}

return;
return m_NumTaps;
}
2 changes: 1 addition & 1 deletion Core/fir.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#pragma once

void KaiserWindow(int num_taps, float Astop, float normFpass, float normFstop, float *m_Coef);
int KaiserWindow(int num_taps, float Astop, float normFpass, float normFstop, float * Coef);
4 changes: 2 additions & 2 deletions Core/r2iq.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#define R2IQ_H
#include "license.txt"

#define NDECIDX 7 //number of srate
#define NDECIDX 10 //number of srate

#include <thread>
#include <mutex>
Expand Down Expand Up @@ -39,9 +39,9 @@ class r2iqControlClass {
// 128 Msps: 0 => 64Msps, 1 => 32Msps, 2=> 16Msps, 3 = 8Msps, 4 = 4Msps, 5 = 2Msps
bool r2iqOn; // r2iq on flag
int16_t RandTable[65536]; // ADC RANDomize table used to whitening EMI from ADC data bus.
int mratio [NDECIDX]; // ratio

private:
int mratio [NDECIDX]; // ratio
bool randADC; // randomized ADC output
bool sideband;
};
Expand Down