From 3230a6c01aa5a98500e2d043fa687635c35a7b9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Mon, 10 Mar 2025 23:17:33 +0100 Subject: [PATCH 1/8] bump manylinux --- .github/workflows/ffi-builds.yml | 4 ++-- soxr-sys/build.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ffi-builds.yml b/.github/workflows/ffi-builds.yml index 491a721cc..1e1b9311c 100644 --- a/.github/workflows/ffi-builds.yml +++ b/.github/workflows/ffi-builds.yml @@ -68,13 +68,13 @@ jobs: buildargs: --no-default-features --features "rustls-tls-webpki-roots" - os: ubuntu-latest platform: linux - build_image: quay.io/pypa/manylinux_2_28_x86_64 + build_image: quay.io/pypa/manylinux_2_34_x86_64 dylib: liblivekit_ffi.so target: x86_64-unknown-linux-gnu name: ffi-linux-x86_64 - os: ubuntu-24.04-arm platform: linux - build_image: quay.io/pypa/manylinux_2_28_aarch64 + build_image: quay.io/pypa/manylinux_2_34_aarch64 dylib: liblivekit_ffi.so target: aarch64-unknown-linux-gnu name: ffi-linux-arm64 diff --git a/soxr-sys/build.rs b/soxr-sys/build.rs index 17fef5ace..e214a0c60 100644 --- a/soxr-sys/build.rs +++ b/soxr-sys/build.rs @@ -7,7 +7,7 @@ fn main() { build.define("SOXR_LIB", "0"); build - .flag_if_supported("-std=gnu89") + .flag_if_supported("-std=gnu99") .flag_if_supported("-Wnested-externs") .flag_if_supported("-Wmissing-prototypes") .flag_if_supported("-Wstrict-prototypes") From af03d0018a09be9a56eeffd3c10678cb063a8019 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Tue, 11 Mar 2025 00:19:47 +0100 Subject: [PATCH 2/8] Update rint-clip.h --- soxr-sys/src/rint-clip.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/soxr-sys/src/rint-clip.h b/soxr-sys/src/rint-clip.h index bfb645847..32a03ea84 100644 --- a/soxr-sys/src/rint-clip.h +++ b/soxr-sys/src/rint-clip.h @@ -39,6 +39,7 @@ static void RINT_CLIP(RINT_T * const dest, FLOATX const * const src, COPY_SEED DITHER_VARS; for (; i < n; ++i) { + fe_clear_invalid(); FLOATD const d = src[i] DITHERING; RINT(dest[stride * i], d); if (fe_test_invalid()) { @@ -62,6 +63,7 @@ static size_t LSX_RINT_CLIP(void * * const dest0, FLOATX const * const src, #if defined FE_INVALID && defined FPU_RINT #define _ RINT(dest[i], src[i] DITHERING); ++i for (i = 0; i < (n & ~15u);) { + fe_clear_invalid(); COPY_SEED1; DITHER_VARS; DO_16; @@ -105,6 +107,7 @@ static size_t LSX_RINT_CLIP_2(void * * dest0, FLOATX const * const * srcs, for (j = 0; j < stride; ++j, ++dest) { FLOATX const * const src = srcs[j]; for (i = 0; i < (n & ~15u);) { + fe_clear_invalid(); COPY_SEED1; DITHER_VARS; DO_16; From e1c7543962807ce757e924c5768718ff0a68cd14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Fri, 21 Mar 2025 19:46:09 +0100 Subject: [PATCH 3/8] test patches --- soxr-sys/src/data-io.c | 4 ++-- soxr-sys/src/data-io.h | 4 ++-- soxr-sys/src/rint-clip.h | 10 +++++----- soxr-sys/src/soxr.c | 7 ++++--- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/soxr-sys/src/data-io.c b/soxr-sys/src/data-io.c index fb6167583..2a93fda12 100644 --- a/soxr-sys/src/data-io.c +++ b/soxr-sys/src/data-io.c @@ -172,7 +172,7 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */ #if WITH_CR64 || WITH_CR64S size_t /* clips */ _soxr_interleave(soxr_datatype_t data_type, void * * dest0, - double const * const * src, size_t n, unsigned ch, unsigned long * seed) + double const * const * src, size_t n, unsigned ch, unsigned long long * seed) { switch (data_type & 3) { case SOXR_FLOAT32: INTERLEAVE_TO(float, 0); @@ -198,7 +198,7 @@ size_t /* clips */ _soxr_interleave(soxr_datatype_t data_type, void * * dest0, #if WITH_CR32 || WITH_CR32S || WITH_VR32 size_t /* clips */ _soxr_interleave_f(soxr_datatype_t data_type, void * * dest0, - float const * const * src, size_t n, unsigned ch, unsigned long * seed) + float const * const * src, size_t n, unsigned ch, unsigned long long * seed) { switch (data_type & 3) { case SOXR_FLOAT32: INTERLEAVE_TO(float, 1); diff --git a/soxr-sys/src/data-io.h b/soxr-sys/src/data-io.h index 83a0a133d..28e2d8907 100644 --- a/soxr-sys/src/data-io.h +++ b/soxr-sys/src/data-io.h @@ -26,7 +26,7 @@ size_t /* clips */ _soxr_interleave( double const * const * src, size_t n, unsigned ch, - unsigned long * seed); + unsigned long long * seed); size_t /* clips */ _soxr_interleave_f( soxr_datatype_t data_type, @@ -34,6 +34,6 @@ size_t /* clips */ _soxr_interleave_f( float const * const * src, size_t n, unsigned ch, - unsigned long * seed); + unsigned long long * seed); #endif diff --git a/soxr-sys/src/rint-clip.h b/soxr-sys/src/rint-clip.h index 32a03ea84..3294f4eaf 100644 --- a/soxr-sys/src/rint-clip.h +++ b/soxr-sys/src/rint-clip.h @@ -4,12 +4,12 @@ #if defined DITHER #define DITHERING + (1./32)*(int)(((ran1>>=3)&31)-((ran2>>=3)&31)) -#define DITHER_RAND (seed = 1664525UL * seed + 1013904223UL) >> 3 -#define DITHER_VARS unsigned long ran1 = DITHER_RAND, ran2 = DITHER_RAND -#define SEED_ARG , unsigned long * seed0 +#define DITHER_RAND (seed = 1664525ULL * seed + 1013904223ULL) >> 3 +#define DITHER_VARS unsigned long long ran1 = DITHER_RAND, ran2 = DITHER_RAND +#define SEED_ARG , unsigned long long * seed0 #define SAVE_SEED *seed0 = seed -#define COPY_SEED unsigned long seed = *seed0; -#define COPY_SEED1 unsigned long seed1 = seed +#define COPY_SEED unsigned long long seed = *seed0; +#define COPY_SEED1 unsigned long long seed1 = seed #define PASS_SEED1 , &seed1 #define PASS_SEED , &seed #define FLOATD double diff --git a/soxr-sys/src/soxr.c b/soxr-sys/src/soxr.c index c2861ac7c..0ece116bf 100644 --- a/soxr-sys/src/soxr.c +++ b/soxr-sys/src/soxr.c @@ -64,7 +64,7 @@ typedef void * resampler_shared_t; /* Between channels. */ typedef void (* deinterleave_t)(sample_t * * dest, soxr_datatype_t data_type, void const * * src0, size_t n, unsigned ch); typedef size_t (* interleave_t)(soxr_datatype_t data_type, void * * dest, - sample_t const * const * src, size_t, unsigned, unsigned long *); + sample_t const * const * src, size_t, unsigned, unsigned long long *); struct soxr { unsigned num_channels; @@ -86,7 +86,7 @@ struct soxr { void * * channel_ptrs; size_t clips; - unsigned long seed; + unsigned long long seed; int flushing; }; @@ -428,7 +428,8 @@ soxr_t soxr_create( p->io_spec.scale *= datatype_full_scale[p->io_spec.otype & 3] / datatype_full_scale[p->io_spec.itype & 3]; - p->seed = (unsigned long)time(0) ^ (unsigned long)(size_t)p; + //p->seed = (unsigned long)time(0) ^ (unsigned long)(size_t)p; + p->seed = 0xc2ec33ef97a5ULL; /* Fixed dithering seed for deterministic int16 output */ #if WITH_CR32 || WITH_CR32S || WITH_VR32 if (0 From d4580101631188b7518d85b99b03ad2c2a0b40b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Fri, 21 Mar 2025 20:10:38 +0100 Subject: [PATCH 4/8] ? --- soxr-sys/build.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/soxr-sys/build.rs b/soxr-sys/build.rs index e214a0c60..7a8266294 100644 --- a/soxr-sys/build.rs +++ b/soxr-sys/build.rs @@ -7,7 +7,7 @@ fn main() { build.define("SOXR_LIB", "0"); build - .flag_if_supported("-std=gnu99") + .flag_if_supported("-std=gnu89") .flag_if_supported("-Wnested-externs") .flag_if_supported("-Wmissing-prototypes") .flag_if_supported("-Wstrict-prototypes") @@ -16,8 +16,8 @@ fn main() { .flag_if_supported("-Wextra") .flag_if_supported("-pedantic") .flag_if_supported("-Wundef") - .flag_if_supported("-Wpointer-arith") - .flag_if_supported("-Wno-long-long"); + .flag_if_supported("-Wpointer-arith"); + //.flag_if_supported("-Wno-long-long"); // TODO(theomonnom): Add SIMD support let sources = [ From 0779fb0670c3c3ee0eb73582aca73d3debedfc61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Fri, 21 Mar 2025 21:35:40 +0100 Subject: [PATCH 5/8] use cmake because i'm lazy --- .gitmodules | 3 + Cargo.lock | 21 +- soxr-sys/Cargo.toml | 4 +- soxr-sys/build.rs | 53 +- soxr-sys/generate_bindings.sh | 2 +- soxr-sys/soxr | 1 + soxr-sys/src/LICENCE | 23 - soxr-sys/src/aliases.h | 39 - soxr-sys/src/avfft32.c | 33 - soxr-sys/src/avfft32s.c | 32 - soxr-sys/src/ccrw2.h | 75 -- soxr-sys/src/cr-core.c | 314 ------ soxr-sys/src/cr.c | 588 ---------- soxr-sys/src/cr.h | 178 --- soxr-sys/src/cr32.c | 8 - soxr-sys/src/cr32s.c | 8 - soxr-sys/src/cr64.c | 8 - soxr-sys/src/cr64s.c | 8 - soxr-sys/src/data-io.c | 223 ---- soxr-sys/src/data-io.h | 39 - soxr-sys/src/dbesi0.c | 149 --- soxr-sys/src/dev32s.h | 54 - soxr-sys/src/dev64s.h | 42 - soxr-sys/src/fft4g.c | 1346 ----------------------- soxr-sys/src/fft4g.h | 23 - soxr-sys/src/fft4g32.c | 36 - soxr-sys/src/fft4g32s.c | 31 - soxr-sys/src/fft4g64.c | 35 - soxr-sys/src/fft4g_cache.h | 92 -- soxr-sys/src/fifo.h | 125 --- soxr-sys/src/filter.c | 277 ----- soxr-sys/src/filter.h | 44 - soxr-sys/src/half-coefs.h | 75 -- soxr-sys/src/half-fir.h | 61 -- soxr-sys/src/internal.h | 84 -- soxr-sys/src/math-wrap.h | 31 - soxr-sys/src/pffft-avx.h | 40 - soxr-sys/src/pffft-wrap.c | 110 -- soxr-sys/src/pffft.c | 1946 --------------------------------- soxr-sys/src/pffft.h | 197 ---- soxr-sys/src/pffft32.c | 39 - soxr-sys/src/pffft32s.c | 34 - soxr-sys/src/pffft64s.c | 34 - soxr-sys/src/poly-fir.h | 150 --- soxr-sys/src/poly-fir0.h | 56 - soxr-sys/src/rdft.h | 31 - soxr-sys/src/rdft_t.h | 24 - soxr-sys/src/rint-clip.h | 161 --- soxr-sys/src/rint.h | 102 -- soxr-sys/src/samplerate.h | 1 - soxr-sys/src/soxr-config.h | 28 - soxr-sys/src/soxr-lsr.c | 198 ---- soxr-sys/src/soxr-lsr.h | 78 -- soxr-sys/src/soxr.c | 843 -------------- soxr-sys/src/soxr.h | 344 ------ soxr-sys/src/soxr.rs | 237 ++-- soxr-sys/src/std-types.h | 48 - soxr-sys/src/util-simd.c | 89 -- soxr-sys/src/util32s.c | 8 - soxr-sys/src/util32s.h | 23 - soxr-sys/src/util64s.c | 8 - soxr-sys/src/util64s.h | 23 - soxr-sys/src/vr-coefs.c | 115 -- soxr-sys/src/vr-coefs.h | 94 -- soxr-sys/src/vr32.c | 651 ----------- 65 files changed, 96 insertions(+), 9781 deletions(-) create mode 160000 soxr-sys/soxr delete mode 100644 soxr-sys/src/LICENCE delete mode 100644 soxr-sys/src/aliases.h delete mode 100644 soxr-sys/src/avfft32.c delete mode 100644 soxr-sys/src/avfft32s.c delete mode 100644 soxr-sys/src/ccrw2.h delete mode 100644 soxr-sys/src/cr-core.c delete mode 100644 soxr-sys/src/cr.c delete mode 100644 soxr-sys/src/cr.h delete mode 100644 soxr-sys/src/cr32.c delete mode 100644 soxr-sys/src/cr32s.c delete mode 100644 soxr-sys/src/cr64.c delete mode 100644 soxr-sys/src/cr64s.c delete mode 100644 soxr-sys/src/data-io.c delete mode 100644 soxr-sys/src/data-io.h delete mode 100644 soxr-sys/src/dbesi0.c delete mode 100644 soxr-sys/src/dev32s.h delete mode 100644 soxr-sys/src/dev64s.h delete mode 100644 soxr-sys/src/fft4g.c delete mode 100644 soxr-sys/src/fft4g.h delete mode 100644 soxr-sys/src/fft4g32.c delete mode 100644 soxr-sys/src/fft4g32s.c delete mode 100644 soxr-sys/src/fft4g64.c delete mode 100644 soxr-sys/src/fft4g_cache.h delete mode 100644 soxr-sys/src/fifo.h delete mode 100644 soxr-sys/src/filter.c delete mode 100644 soxr-sys/src/filter.h delete mode 100644 soxr-sys/src/half-coefs.h delete mode 100644 soxr-sys/src/half-fir.h delete mode 100644 soxr-sys/src/internal.h delete mode 100644 soxr-sys/src/math-wrap.h delete mode 100644 soxr-sys/src/pffft-avx.h delete mode 100644 soxr-sys/src/pffft-wrap.c delete mode 100644 soxr-sys/src/pffft.c delete mode 100644 soxr-sys/src/pffft.h delete mode 100644 soxr-sys/src/pffft32.c delete mode 100644 soxr-sys/src/pffft32s.c delete mode 100644 soxr-sys/src/pffft64s.c delete mode 100644 soxr-sys/src/poly-fir.h delete mode 100644 soxr-sys/src/poly-fir0.h delete mode 100644 soxr-sys/src/rdft.h delete mode 100644 soxr-sys/src/rdft_t.h delete mode 100644 soxr-sys/src/rint-clip.h delete mode 100644 soxr-sys/src/rint.h delete mode 100644 soxr-sys/src/samplerate.h delete mode 100644 soxr-sys/src/soxr-config.h delete mode 100644 soxr-sys/src/soxr-lsr.c delete mode 100644 soxr-sys/src/soxr-lsr.h delete mode 100644 soxr-sys/src/soxr.c delete mode 100644 soxr-sys/src/soxr.h delete mode 100644 soxr-sys/src/std-types.h delete mode 100644 soxr-sys/src/util-simd.c delete mode 100644 soxr-sys/src/util32s.c delete mode 100644 soxr-sys/src/util32s.h delete mode 100644 soxr-sys/src/util64s.c delete mode 100644 soxr-sys/src/util64s.h delete mode 100644 soxr-sys/src/vr-coefs.c delete mode 100644 soxr-sys/src/vr-coefs.h delete mode 100644 soxr-sys/src/vr32.c diff --git a/.gitmodules b/.gitmodules index b70842c2a..bdbb4f029 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "yuv-sys/libyuv"] path = yuv-sys/libyuv url = https://chromium.googlesource.com/libyuv/libyuv +[submodule "soxr-sys/soxr"] + path = soxr-sys/soxr + url = https://github.com/dofuuz/soxr/ diff --git a/Cargo.lock b/Cargo.lock index f8a5eac4c..f21db717e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -431,9 +431,9 @@ checksum = "a2698f953def977c68f935bb0dfa959375ad4638570e969e2f1e9f433cbf1af6" [[package]] name = "cc" -version = "1.0.83" +version = "1.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +checksum = "e9e8aabfac534be767c909e0690571677d49f41bd8465ae876fe043d52ba5292" dependencies = [ "jobserver", "libc", @@ -495,6 +495,15 @@ dependencies = [ "libloading", ] +[[package]] +name = "cmake" +version = "0.1.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0" +dependencies = [ + "cc", +] + [[package]] name = "codespan-reporting" version = "0.11.1" @@ -1481,9 +1490,9 @@ checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" [[package]] name = "jobserver" -version = "0.1.27" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" dependencies = [ "libc", ] @@ -1655,7 +1664,7 @@ dependencies = [ [[package]] name = "livekit-ffi" -version = "0.12.15" +version = "0.12.16" dependencies = [ "console-subscriber", "dashmap", @@ -2713,7 +2722,7 @@ dependencies = [ name = "soxr-sys" version = "0.1.0" dependencies = [ - "cc", + "cmake", "hound", ] diff --git a/soxr-sys/Cargo.toml b/soxr-sys/Cargo.toml index fde7a4c42..bd95141a5 100644 --- a/soxr-sys/Cargo.toml +++ b/soxr-sys/Cargo.toml @@ -5,11 +5,9 @@ authors = ["Theo Monnom . - - -Notes - -1. Re software in the `examples' directory: works that are not resampling -examples but are based on the given examples -- for example, applications using -the library -- shall not be considered to be derivative works of the examples. - -2. If building with pffft.c, see the licence embedded in that file. diff --git a/soxr-sys/src/aliases.h b/soxr-sys/src/aliases.h deleted file mode 100644 index d1a392f6e..000000000 --- a/soxr-sys/src/aliases.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#if defined SOXR_LIB - -#define lsx_bessel_I_0 _soxr_bessel_I_0 -#define lsx_cdft_f _soxr_cdft_f -#define lsx_cdft _soxr_cdft -#define lsx_clear_fft_cache_f _soxr_clear_fft_cache_f -#define lsx_clear_fft_cache _soxr_clear_fft_cache -#define lsx_ddct_f _soxr_ddct_f -#define lsx_ddct _soxr_ddct -#define lsx_ddst_f _soxr_ddst_f -#define lsx_ddst _soxr_ddst -#define lsx_design_lpf _soxr_design_lpf -#define lsx_dfct_f _soxr_dfct_f -#define lsx_dfct _soxr_dfct -#define lsx_dfst_f _soxr_dfst_f -#define lsx_dfst _soxr_dfst -#define lsx_fir_to_phase _soxr_fir_to_phase -#define lsx_f_resp _soxr_f_resp -#define lsx_init_fft_cache_f _soxr_init_fft_cache_f -#define lsx_init_fft_cache _soxr_init_fft_cache -#define lsx_inv_f_resp _soxr_inv_f_resp -#define lsx_kaiser_beta _soxr_kaiser_beta -#define lsx_kaiser_params _soxr_kaiser_params -#define lsx_make_lpf _soxr_make_lpf -#define lsx_ordered_convolve_f _soxr_ordered_convolve_f -#define lsx_ordered_convolve _soxr_ordered_convolve -#define lsx_ordered_partial_convolve_f _soxr_ordered_partial_convolve_f -#define lsx_ordered_partial_convolve _soxr_ordered_partial_convolve -#define lsx_rdft_f _soxr_rdft_f -#define lsx_rdft _soxr_rdft -#define lsx_safe_cdft_f _soxr_safe_cdft_f -#define lsx_safe_cdft _soxr_safe_cdft -#define lsx_safe_rdft_f _soxr_safe_rdft_f -#define lsx_safe_rdft _soxr_safe_rdft - -#endif diff --git a/soxr-sys/src/avfft32.c b/soxr-sys/src/avfft32.c deleted file mode 100644 index fe651f5db..000000000 --- a/soxr-sys/src/avfft32.c +++ /dev/null @@ -1,33 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#include -#include -#include -#include "filter.h" -#include "rdft_t.h" - -static void * forward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),DFT_R2C);} -static void * backward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),IDFT_C2R);} -static void rdft(int length, void * setup, float * h) {av_rdft_calc(setup, h); (void)length;} -static int multiplier(void) {return 2;} -static void nothing(void) {} -static int flags(void) {return 0;} - -fn_t _soxr_rdft32_cb[] = { - (fn_t)forward_setup, - (fn_t)backward_setup, - (fn_t)av_rdft_end, - (fn_t)rdft, - (fn_t)rdft, - (fn_t)rdft, - (fn_t)rdft, - (fn_t)_soxr_ordered_convolve_f, - (fn_t)_soxr_ordered_partial_convolve_f, - (fn_t)multiplier, - (fn_t)nothing, - (fn_t)malloc, - (fn_t)calloc, - (fn_t)free, - (fn_t)flags, -}; diff --git a/soxr-sys/src/avfft32s.c b/soxr-sys/src/avfft32s.c deleted file mode 100644 index 5a7e62db2..000000000 --- a/soxr-sys/src/avfft32s.c +++ /dev/null @@ -1,32 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#include -#include -#include "util32s.h" -#include "rdft_t.h" - -static void * forward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),DFT_R2C);} -static void * backward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),IDFT_C2R);} -static void rdft(int length, void * setup, float * h) {av_rdft_calc(setup, h); (void)length;} -static int multiplier(void) {return 2;} -static void nothing(void) {} -static int flags(void) {return RDFT_IS_SIMD;} - -fn_t _soxr_rdft32s_cb[] = { - (fn_t)forward_setup, - (fn_t)backward_setup, - (fn_t)av_rdft_end, - (fn_t)rdft, - (fn_t)rdft, - (fn_t)rdft, - (fn_t)rdft, - (fn_t)ORDERED_CONVOLVE_SIMD, - (fn_t)ORDERED_PARTIAL_CONVOLVE_SIMD, - (fn_t)multiplier, - (fn_t)nothing, - (fn_t)SIMD_ALIGNED_MALLOC, - (fn_t)SIMD_ALIGNED_CALLOC, - (fn_t)SIMD_ALIGNED_FREE, - (fn_t)flags, -}; diff --git a/soxr-sys/src/ccrw2.h b/soxr-sys/src/ccrw2.h deleted file mode 100644 index 09331a4b1..000000000 --- a/soxr-sys/src/ccrw2.h +++ /dev/null @@ -1,75 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -/* Concurrent Control with "Readers" and "Writers", P.J. Courtois et al, 1971 */ - -#if !defined soxr_ccrw2_included -#define soxr_ccrw2_included - -#if defined SOXR_LIB -#include "internal.h" -#endif - -#if defined _OPENMP - -#include - -typedef struct { - int readcount, writecount; /* initial value = 0 */ - omp_lock_t mutex_1, mutex_2, mutex_3, w, r; /* initial value = 1 */ -} ccrw2_t; /* Problem #2: `writers-preference' */ - -#define ccrw2_become_reader(p) do {\ - omp_set_lock(&p.mutex_3);\ - omp_set_lock(&p.r);\ - omp_set_lock(&p.mutex_1);\ - if (++p.readcount == 1) omp_set_lock(&p.w);\ - omp_unset_lock(&p.mutex_1);\ - omp_unset_lock(&p.r);\ - omp_unset_lock(&p.mutex_3);\ -} while (0) -#define ccrw2_cease_reading(p) do {\ - omp_set_lock(&p.mutex_1);\ - if (!--p.readcount) omp_unset_lock(&p.w);\ - omp_unset_lock(&p.mutex_1);\ -} while (0) -#define ccrw2_become_writer(p) do {\ - omp_set_lock(&p.mutex_2);\ - if (++p.writecount == 1) omp_set_lock(&p.r);\ - omp_unset_lock(&p.mutex_2);\ - omp_set_lock(&p.w);\ -} while (0) -#define ccrw2_cease_writing(p) do {\ - omp_unset_lock(&p.w);\ - omp_set_lock(&p.mutex_2);\ - if (!--p.writecount) omp_unset_lock(&p.r);\ - omp_unset_lock(&p.mutex_2);\ -} while (0) -#define ccrw2_init(p) do {\ - omp_init_lock(&p.mutex_1);\ - omp_init_lock(&p.mutex_2);\ - omp_init_lock(&p.mutex_3);\ - omp_init_lock(&p.w);\ - omp_init_lock(&p.r);\ -} while (0) -#define ccrw2_clear(p) do {\ - omp_destroy_lock(&p.r);\ - omp_destroy_lock(&p.w);\ - omp_destroy_lock(&p.mutex_3);\ - omp_destroy_lock(&p.mutex_2);\ - omp_destroy_lock(&p.mutex_1);\ -} while (0) - -#else - -typedef int ccrw2_t; -#define ccrw2_become_reader(x) (void)(x) -#define ccrw2_cease_reading(x) (void)(x) -#define ccrw2_become_writer(x) (void)(x) -#define ccrw2_cease_writing(x) (void)(x) -#define ccrw2_init(x) (void)(x) -#define ccrw2_clear(x) (void)(x) - -#endif /* _OPENMP */ - -#endif diff --git a/soxr-sys/src/cr-core.c b/soxr-sys/src/cr-core.c deleted file mode 100644 index 159a5d976..000000000 --- a/soxr-sys/src/cr-core.c +++ /dev/null @@ -1,314 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-18 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. - * - * Constant-rate resampling engine-specific code. */ - -#include -#include -#include -#include - -#include "filter.h" - -#if defined SOXR_LIB - #include "internal.h" - #include "cr.h" - #if CORE_TYPE & CORE_DBL - typedef double sample_t; - #if CORE_TYPE & CORE_SIMD_DFT - #define RDFT_CB _soxr_rdft64s_cb - #else - #define RDFT_CB _soxr_rdft64_cb - #endif - #else - typedef float sample_t; - #if CORE_TYPE & CORE_SIMD_DFT - #define RDFT_CB _soxr_rdft32s_cb - #else - #define RDFT_CB _soxr_rdft32_cb - #endif - #endif - - #if CORE_TYPE & (CORE_SIMD_POLY|CORE_SIMD_HALF|CORE_SIMD_DFT) - #if CORE_TYPE & CORE_DBL - #include "util64s.h" - #include "dev64s.h" - #else - #include "util32s.h" - #include "dev32s.h" - #endif - #endif - - extern fn_t RDFT_CB[]; -#else - #define RDFT_CB 0 -#endif - - - -static void cubic_stage_fn(stage_t * p, fifo_t * output_fifo) -{ - sample_t const * input = stage_read_p(p); - int num_in = min(stage_occupancy(p), p->input_size); - int i, max_num_out = 1 + (int)(num_in * p->out_in_ratio); - sample_t * output = fifo_reserve(output_fifo, max_num_out); - - for (i = 0; p->at.integer < num_in; ++i, p->at.whole += p->step.whole) { - sample_t const * s = input + p->at.integer; - double x = p->at.fraction * (1 / MULT32); - double b = .5*(s[1]+s[-1])-*s, a = (1/6.)*(s[2]-s[1]+s[-1]-*s-4*b); - double c = s[1]-*s-a-b; - output[i] = (sample_t)(p->mult * (((a*x + b)*x + c)*x + *s)); - } - assert(max_num_out - i >= 0); - fifo_trim_by(output_fifo, max_num_out - i); - fifo_read(&p->fifo, p->at.integer, NULL); - p->at.integer = 0; -} - - - -#if defined __AVX__ - #define DEFINED_AVX 1 -#else - #define DEFINED_AVX 0 -#endif - -#if defined __x86_64__ || defined _M_X64 || defined i386 || defined _M_IX86 - #define DEFINED_X86 1 -#else - #define DEFINED_X86 0 -#endif - -#if defined __arm__ - #define DEFINED_ARM 1 -#else - #define DEFINED_ARM 0 -#endif - - - -#if CORE_TYPE & CORE_DBL - #define SIMD_AVX ((CORE_TYPE & CORE_SIMD_HALF) && DEFINED_AVX) - #define SIMD_SSE 0 -#else - #define SIMD_SSE ((CORE_TYPE & CORE_SIMD_HALF) && DEFINED_X86) - #define SIMD_AVX 0 -#endif - -#define SIMD_NEON ((CORE_TYPE & CORE_SIMD_HALF) && DEFINED_ARM) - - - -#include "half-coefs.h" - -#if !(CORE_TYPE & CORE_SIMD_HALF) -#define FUNCTION_H h7 -#define CONVOLVE ____ __ _ -#include "half-fir.h" -#endif - -#define FUNCTION_H h8 -#define CONVOLVE ____ ____ -#include "half-fir.h" - -#define FUNCTION_H h9 -#define CONVOLVE ____ ____ _ -#include "half-fir.h" - -#if CORE_TYPE & CORE_DBL - #define FUNCTION_H h10 - #define CONVOLVE ____ ____ __ - #include "half-fir.h" - - #define FUNCTION_H h11 - #define CONVOLVE ____ ____ __ _ - #include "half-fir.h" - - #define FUNCTION_H h12 - #define CONVOLVE ____ ____ ____ - #include "half-fir.h" - - #define FUNCTION_H h13 - #define CONVOLVE ____ ____ ____ _ - #include "half-fir.h" -#endif - -static half_fir_info_t const half_firs[] = { -#if !(CORE_TYPE & CORE_SIMD_HALF) - { 7, half_fir_coefs_7 , h7 , 0 , 120.65f}, -#endif - { 8, half_fir_coefs_8 , h8 , 0 , 136.51f}, - { 9, half_fir_coefs_9 , h9 , 0 , 152.32f}, -#if CORE_TYPE & CORE_DBL - {10, half_fir_coefs_10, h10, 0 , 168.08f}, - {11, half_fir_coefs_11, h11, 0 , 183.79f}, - {12, half_fir_coefs_12, h12, 0 , 199.46f}, - {13, half_fir_coefs_13, h13, 0 , 215.12f}, -#endif -}; - -#undef SIMD_AVX -#undef SIMD_NEON -#undef SIMD_SSE - - - -#if CORE_TYPE & CORE_DBL - #define SIMD_AVX ((CORE_TYPE & CORE_SIMD_POLY) && DEFINED_AVX) - #define SIMD_SSE 0 -#else - #define SIMD_SSE ((CORE_TYPE & CORE_SIMD_POLY) && DEFINED_X86) - #define SIMD_AVX 0 -#endif - -#define SIMD_NEON ((CORE_TYPE & CORE_SIMD_POLY) && DEFINED_ARM) - - - -#define COEFS (sample_t * __restrict)p->shared->poly_fir_coefs -#define VAR_LENGTH p->n -#define VAR_CONVOLVE(n) while (j < (n)) _ -#define VAR_POLY_PHASE_BITS p->phase_bits - - - -#define FUNCTION vpoly0 -#define FIR_LENGTH VAR_LENGTH -#define CONVOLVE(n) VAR_CONVOLVE(n) -#include "poly-fir0.h" - -#define FUNCTION vpoly1 -#define COEF_INTERP 1 -#define PHASE_BITS VAR_POLY_PHASE_BITS -#define FIR_LENGTH VAR_LENGTH -#define CONVOLVE(n) VAR_CONVOLVE(n) -#include "poly-fir.h" - -#define FUNCTION vpoly2 -#define COEF_INTERP 2 -#define PHASE_BITS VAR_POLY_PHASE_BITS -#define FIR_LENGTH VAR_LENGTH -#define CONVOLVE(n) VAR_CONVOLVE(n) -#include "poly-fir.h" - -#define FUNCTION vpoly3 -#define COEF_INTERP 3 -#define PHASE_BITS VAR_POLY_PHASE_BITS -#define FIR_LENGTH VAR_LENGTH -#define CONVOLVE(n) VAR_CONVOLVE(n) -#include "poly-fir.h" - - - -#if !(CORE_TYPE & CORE_SIMD_POLY) - -#define poly_fir_convolve_U100 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ -#define FUNCTION U100_0 -#define FIR_LENGTH U100_l -#define CONVOLVE(n) poly_fir_convolve_U100 -#include "poly-fir0.h" - -#define u100_l 11 -#define poly_fir_convolve_u100 _ _ _ _ _ _ _ _ _ _ _ -#define FUNCTION u100_0 -#define FIR_LENGTH u100_l -#define CONVOLVE(n) poly_fir_convolve_u100 -#include "poly-fir0.h" - -#define FUNCTION u100_1 -#define COEF_INTERP 1 -#define PHASE_BITS 8 -#define FIR_LENGTH u100_l -#define CONVOLVE(n) poly_fir_convolve_u100 -#include "poly-fir.h" - -#define FUNCTION u100_2 -#define COEF_INTERP 2 -#define PHASE_BITS 6 -#define FIR_LENGTH u100_l -#define CONVOLVE(n) poly_fir_convolve_u100 -#include "poly-fir.h" - -#endif - -#define u100_1_b 8 -#define u100_2_b 6 - - - -static poly_fir_t const poly_firs[] = { - {-1, {{0, vpoly0}, { 7.2f, vpoly1}, {5.0f, vpoly2}}}, - {-1, {{0, vpoly0}, { 9.4f, vpoly1}, {6.7f, vpoly2}}}, - {-1, {{0, vpoly0}, {12.4f, vpoly1}, {7.8f, vpoly2}}}, - {-1, {{0, vpoly0}, {13.6f, vpoly1}, {9.3f, vpoly2}}}, - {-1, {{0, vpoly0}, {10.5f, vpoly2}, {8.4f, vpoly3}}}, - {-1, {{0, vpoly0}, {11.85f,vpoly2}, {9.0f, vpoly3}}}, - - {-1, {{0, vpoly0}, { 8.0f, vpoly1}, {5.3f, vpoly2}}}, - {-1, {{0, vpoly0}, { 8.6f, vpoly1}, {5.7f, vpoly2}}}, - {-1, {{0, vpoly0}, {10.6f, vpoly1}, {6.75f,vpoly2}}}, - {-1, {{0, vpoly0}, {12.6f, vpoly1}, {8.6f, vpoly2}}}, - {-1, {{0, vpoly0}, { 9.6f, vpoly2}, {7.6f, vpoly3}}}, - {-1, {{0, vpoly0}, {11.4f, vpoly2}, {8.65f,vpoly3}}}, - -#if CORE_TYPE & CORE_SIMD_POLY - {10.62f, {{0, vpoly0}, {0, 0}, {0, 0}}}, - {-1, {{0, vpoly0}, {u100_1_b, vpoly1}, {u100_2_b, vpoly2}}}, -#else - {10.62f, {{U100_l, U100_0}, {0, 0}, {0, 0}}}, - {11.28f, {{u100_l, u100_0}, {u100_1_b, u100_1}, {u100_2_b, u100_2}}}, -#endif - {-1, {{0, vpoly0}, { 9, vpoly1}, { 6, vpoly2}}}, - {-1, {{0, vpoly0}, { 11, vpoly1}, { 7, vpoly2}}}, - {-1, {{0, vpoly0}, { 13, vpoly1}, { 8, vpoly2}}}, - {-1, {{0, vpoly0}, { 10, vpoly2}, { 8, vpoly3}}}, - {-1, {{0, vpoly0}, { 12, vpoly2}, { 9, vpoly3}}}, -}; - - - -static cr_core_t const cr_core = { - -#if CORE_TYPE & CORE_SIMD_POLY - {SIMD_ALIGNED_MALLOC, SIMD_ALIGNED_CALLOC, SIMD_ALIGNED_FREE}, -#else - {malloc, calloc, free}, -#endif - half_firs, array_length(half_firs), - 0, 0, - cubic_stage_fn, - poly_firs, RDFT_CB -}; - - - -#if defined SOXR_LIB - -#include "soxr.h" - -static char const * rate_create(void * channel, void * shared, double io_ratio, - soxr_quality_spec_t * q_spec, soxr_runtime_spec_t * r_spec, double scale) -{ - return _soxr_init(channel, shared, io_ratio, q_spec, r_spec, scale, - &cr_core, CORE_TYPE); -} - - - -static char const * id(void) {return CORE_STR;} - -fn_t RATE_CB[] = { - (fn_t)_soxr_input, - (fn_t)_soxr_process, - (fn_t)_soxr_output, - (fn_t)_soxr_flush, - (fn_t)_soxr_close, - (fn_t)_soxr_delay, - (fn_t)_soxr_sizes, - (fn_t)rate_create, - (fn_t)0, - (fn_t)id, -}; - -#endif diff --git a/soxr-sys/src/cr.c b/soxr-sys/src/cr.c deleted file mode 100644 index 4122db3ce..000000000 --- a/soxr-sys/src/cr.c +++ /dev/null @@ -1,588 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. - * - * Constant-rate resampling common code. */ - -#include -#include -#include -#include - -#include "filter.h" - -#if defined SOXR_LIB - #include "internal.h" - #define STATIC -#endif - -#include "cr.h" - -#define num_coefs4 ((core_flags&CORE_SIMD_POLY)? ((num_coefs+3)&~3) : num_coefs) - -#define coef_coef(C,T,x) \ - C((T*)result, interp_order, num_coefs4, j, x, num_coefs4 - 1 - i) - -#define STORE(C,T) { \ - if (interp_order > 2) coef_coef(C,T,3) = (T)d; \ - if (interp_order > 1) coef_coef(C,T,2) = (T)c; \ - if (interp_order > 0) coef_coef(C,T,1) = (T)b; \ - coef_coef(C,T,0) = (T)f0;} - -static real * prepare_poly_fir_coefs(double const * coefs, int num_coefs, - int num_phases, int interp_order, double multiplier, - core_flags_t core_flags, alloc_t const * mem) -{ - int i, j, length = num_coefs4 * num_phases * (interp_order + 1); - real * result = mem->calloc(1,(size_t)length << LOG2_SIZEOF_REAL(core_flags)); - double fm1 = coefs[0], f1 = 0, f2 = 0; - - for (i = num_coefs - 1; i >= 0; --i) - for (j = num_phases - 1; j >= 0; --j) { - double f0 = fm1, b = 0, c = 0, d = 0; /* = 0 to kill compiler warning */ - int pos = i * num_phases + j - 1; - fm1 = pos > 0 ? coefs[pos - 1] * multiplier : 0; - switch (interp_order) { - case 1: b = f1 - f0; break; - case 2: b = f1 - (.5 * (f2+f0) - f1) - f0; c = .5 * (f2+f0) - f1; break; - case 3: c=.5*(f1+fm1)-f0;d=(1/6.)*(f2-f1+fm1-f0-4*c);b=f1-f0-d-c; break; - default: assert(!interp_order); - } - switch (core_flags & 3) { - case 0: if (WITH_CR32 ) STORE(coef , float ); break; - case 1: if (WITH_CR64 ) STORE(coef , double); break; - case 2: if (WITH_CR32S) STORE(coef4, float ); break; - default:if (WITH_CR64S) STORE(coef4, double); break; - } - f2 = f1, f1 = f0; - } - return result; -} - -#undef STORE -#undef coef_coef - -#define IS_FLOAT32 (WITH_CR32 || WITH_CR32S) && \ - (!(WITH_CR64 || WITH_CR64S) || sizeof_real == sizeof(float)) -#define WITH_FLOAT64 WITH_CR64 || WITH_CR64S - -static void dft_stage_fn(stage_t * p, fifo_t * output_fifo) -{ - real * output, * dft_out; - int i, j, num_in = max(0, fifo_occupancy(&p->fifo)); - rate_shared_t const * s = p->shared; - dft_filter_t const * f = &s->dft_filter[p->dft_filter_num]; - int const overlap = f->num_taps - 1; - - if (p->at.integer + p->L * num_in >= f->dft_length) { - fn_t const * const RDFT_CB = p->rdft_cb; - size_t const sizeof_real = sizeof(char) << LOG2_SIZEOF_REAL(p->core_flags); - div_t divd = div(f->dft_length - overlap - p->at.integer + p->L - 1, p->L); - real const * input = fifo_read_ptr(&p->fifo); - fifo_read(&p->fifo, divd.quot, NULL); - num_in -= divd.quot; - - output = fifo_reserve(output_fifo, f->dft_length); - dft_out = (p->core_flags & CORE_SIMD_DFT)? p->dft_out : output; - - if (lsx_is_power_of_2(p->L)) { /* F-domain */ - int portion = f->dft_length / p->L; - memcpy(dft_out, input, (unsigned)portion * sizeof_real); - rdft_oforward(portion, f->dft_forward_setup, dft_out, p->dft_scratch); - if (IS_FLOAT32) { -#define dft_out ((float *)dft_out) - for (i = portion + 2; i < (portion << 1); i += 2) /* Mirror image. */ - dft_out[i] = dft_out[(portion << 1) - i], - dft_out[i+1] = -dft_out[(portion << 1) - i + 1]; - dft_out[portion] = dft_out[1]; - dft_out[portion + 1] = 0; - dft_out[1] = dft_out[0]; -#undef dft_out - } - else if (WITH_FLOAT64) { -#define dft_out ((double *)dft_out) - for (i = portion + 2; i < (portion << 1); i += 2) /* Mirror image. */ - dft_out[i] = dft_out[(portion << 1) - i], - dft_out[i+1] = -dft_out[(portion << 1) - i + 1]; - dft_out[portion] = dft_out[1]; - dft_out[portion + 1] = 0; - dft_out[1] = dft_out[0]; -#undef dft_out - } - - for (portion <<= 1; i < f->dft_length; i += portion, portion <<= 1) { - memcpy((char *)dft_out + (size_t)i * sizeof_real, dft_out, (size_t)portion * sizeof_real); - if (IS_FLOAT32) - #define dft_out ((float *)dft_out) - dft_out[i + 1] = 0; - #undef dft_out - else if (WITH_FLOAT64) - #define dft_out ((double *)dft_out) - dft_out[i + 1] = 0; - #undef dft_out - } - if (p->step.integer > 0) - rdft_reorder_back(f->dft_length, f->dft_backward_setup, dft_out, p->dft_scratch); - } else { - if (p->L == 1) - memcpy(dft_out, input, (size_t)f->dft_length * sizeof_real); - else { - memset(dft_out, 0, (size_t)f->dft_length * sizeof_real); - if (IS_FLOAT32) - for (j = 0, i = p->at.integer; i < f->dft_length; ++j, i += p->L) - ((float *)dft_out)[i] = ((float *)input)[j]; - else if (WITH_FLOAT64) - for (j = 0, i = p->at.integer; i < f->dft_length; ++j, i += p->L) - ((double *)dft_out)[i] = ((double *)input)[j]; - p->at.integer = p->L - 1 - divd.rem; - } - if (p->step.integer > 0) - rdft_forward(f->dft_length, f->dft_forward_setup, dft_out, p->dft_scratch); - else - rdft_oforward(f->dft_length, f->dft_forward_setup, dft_out, p->dft_scratch); - } - - if (p->step.integer > 0) { - rdft_convolve(f->dft_length, f->dft_backward_setup, dft_out, f->coefs); - rdft_backward(f->dft_length, f->dft_backward_setup, dft_out, p->dft_scratch); - if ((p->core_flags & CORE_SIMD_DFT) && p->step.integer == 1) - memcpy(output, dft_out, (size_t)f->dft_length * sizeof_real); - if (p->step.integer != 1) { - if (IS_FLOAT32) - for (j = 0, i = p->remM; i < f->dft_length - overlap; ++j, - i += p->step.integer) - ((float *)output)[j] = ((float *)dft_out)[i]; - else if (WITH_FLOAT64) - for (j = 0, i = p->remM; i < f->dft_length - overlap; ++j, - i += p->step.integer) - ((double *)output)[j] = ((double *)dft_out)[i]; - p->remM = i - (f->dft_length - overlap); - fifo_trim_by(output_fifo, f->dft_length - j); - } - else fifo_trim_by(output_fifo, overlap); - } - else { /* F-domain */ - int m = -p->step.integer; - rdft_convolve_portion(f->dft_length >> m, dft_out, f->coefs); - rdft_obackward(f->dft_length >> m, f->dft_backward_setup, dft_out, p->dft_scratch); - if (p->core_flags & CORE_SIMD_DFT) - memcpy(output, dft_out, (size_t)(f->dft_length >> m) * sizeof_real); - fifo_trim_by(output_fifo, (((1 << m) - 1) * f->dft_length + overlap) >>m); - } - (void)RDFT_CB; - } - p->input_size = (f->dft_length - p->at.integer + p->L - 1) / p->L; -} - -/* Set to 4 x nearest power of 2 or half of that */ -/* if danger of causing too many cache misses. */ -static int set_dft_length(int num_taps, int min, int large) -{ - double d = log((double)num_taps) / log(2.); - return 1 << range_limit((int)(d + 2.77), min, max((int)(d + 1.77), large)); -} - -static void dft_stage_init( - unsigned instance, double Fp, double Fs, double Fn, double att, - double phase_response, stage_t * p, int L, int M, double * multiplier, - unsigned min_dft_size, unsigned large_dft_size, core_flags_t core_flags, - fn_t const * RDFT_CB) -{ - dft_filter_t * f = &p->shared->dft_filter[instance]; - int num_taps = 0, dft_length = f->dft_length, i, offset; - bool f_domain_m = abs(3-M) == 1 && Fs <= 1; - size_t const sizeof_real = sizeof(char) << LOG2_SIZEOF_REAL(core_flags); - - if (!dft_length) { - int k = phase_response == 50 && lsx_is_power_of_2(L) && Fn == L? L << 1 : 4; - double m, * h = lsx_design_lpf(Fp, Fs, Fn, att, &num_taps, -k, -1.); - - if (phase_response != 50) - lsx_fir_to_phase(&h, &num_taps, &f->post_peak, phase_response); - else f->post_peak = num_taps / 2; - - dft_length = set_dft_length(num_taps, (int)min_dft_size, (int)large_dft_size); - f->coefs = rdft_calloc((size_t)dft_length, sizeof_real); - offset = dft_length - num_taps + 1; - m = (1. / dft_length) * rdft_multiplier() * L * *multiplier; - if (IS_FLOAT32) for (i = 0; i < num_taps; ++i) - ((float *)f->coefs)[(i + offset) & (dft_length - 1)] =(float)(h[i] * m); - else if (WITH_FLOAT64) for (i = 0; i < num_taps; ++i) - ((double *)f->coefs)[(i + offset) & (dft_length - 1)] = h[i] * m; - free(h); - } - - if (rdft_flags() & RDFT_IS_SIMD) - p->dft_out = rdft_malloc(sizeof_real * (size_t)dft_length); - if (rdft_flags() & RDFT_NEEDS_SCRATCH) - p->dft_scratch = rdft_malloc(2 * sizeof_real * (size_t)dft_length); - - if (!f->dft_length) { - void * coef_setup = rdft_forward_setup(dft_length); - int Lp = lsx_is_power_of_2(L)? L : 1; - int Mp = f_domain_m? M : 1; - f->dft_forward_setup = rdft_forward_setup(dft_length / Lp); - f->dft_backward_setup = rdft_backward_setup(dft_length / Mp); - if (Mp == 1) - rdft_forward(dft_length, coef_setup, f->coefs, p->dft_scratch); - else - rdft_oforward(dft_length, coef_setup, f->coefs, p->dft_scratch); - rdft_delete_setup(coef_setup); - f->num_taps = num_taps; - f->dft_length = dft_length; - lsx_debug("fir_len=%i dft_length=%i Fp=%g Fs=%g Fn=%g att=%g %i/%i", - num_taps, dft_length, Fp, Fs, Fn, att, L, M); - } - *multiplier = 1; - p->out_in_ratio = (double)L / M; - p->core_flags = core_flags; - p->rdft_cb = RDFT_CB; - p->fn = dft_stage_fn; - p->preload = f->post_peak / L; - p->at.integer = f->post_peak % L; - p->L = L; - p->step.integer = f_domain_m? -M/2 : M; - p->dft_filter_num = instance; - p->block_len = f->dft_length - (f->num_taps - 1); - p->phase0 = p->at.integer / p->L; - p->input_size = (f->dft_length - p->at.integer + p->L - 1) / p->L; -} - -static struct half_fir_info const * find_half_fir( - struct half_fir_info const * firs, size_t len, double att) -{ - size_t i; - for (i = 0; i + 1 < len && att > firs[i].att; ++i); - return &firs[i]; -} - -#define have_pre_stage (preM * preL != 1) -#define have_arb_stage (arbM * arbL != 1) -#define have_post_stage (postM * postL != 1) - -#include "soxr.h" - -STATIC char const * _soxr_init( - rate_t * const p, /* Per audio channel. */ - rate_shared_t * const shared, /* By channels undergoing same rate change. */ - double const io_ratio, /* Input rate divided by output rate. */ - soxr_quality_spec_t const * const q_spec, - soxr_runtime_spec_t const * const r_spec, - double multiplier, /* Linear gain to apply during conversion. */ - cr_core_t const * const core, - core_flags_t const core_flags) -{ - size_t const sizeof_real = sizeof(char) << LOG2_SIZEOF_REAL(core_flags); - double const tolerance = 1 + 1e-5; - - double bits = q_spec->precision; - rolloff_t const rolloff = (rolloff_t)(q_spec->flags & 3); - int interpolator = (int)(r_spec->flags & 3) - 1; - double const Fp0 = q_spec->passband_end, Fs0 = q_spec->stopband_begin; - double const phase_response = q_spec->phase_response, tbw0 = Fs0-Fp0; - - bool const maintain_3dB_pt = !!(q_spec->flags & SOXR_MAINTAIN_3DB_PT); - double tbw_tighten = 1, alpha; - #define tighten(x) (Fs0-(Fs0-(x))*tbw_tighten) - - double arbM = io_ratio, Fn1, Fp1 = Fp0, Fs1 = Fs0, bits1 = min(bits,33); - double att = (bits1 + 1) * linear_to_dB(2.), attArb = att; /* +1: pass+stop */ - int preL = 1, preM = 1, shr = 0, arbL = 1, postL = 1, postM = 1; - bool upsample=false, rational=false, iOpt=!(r_spec->flags&SOXR_NOSMALLINTOPT); - bool lq_bits= (q_spec->flags & SOXR_PROMOTE_TO_LQ)? bits <= 16 : bits == 16; - bool lq_Fp0 = (q_spec->flags & SOXR_PROMOTE_TO_LQ)? Fp0<=lq_bw0 : Fp0==lq_bw0; - int n = 0, i, mode = lq_bits && rolloff == rolloff_medium? io_ratio > 1 || - phase_response != 50 || !lq_Fp0 || Fs0 != 1 : ((int)ceil(bits1) - 6) / 4; - struct half_fir_info const * half_fir_info; - stage_t * s; - - if (io_ratio < 1 && Fs0 - 1 > 1 - Fp0 / tolerance) - return "imaging greater than rolloff"; - if (.002 / tolerance > tbw0 || tbw0 > .5 * tolerance) - return "transition bandwidth not in [0.2,50] % of nyquist"; - if (.5 / tolerance > Fp0 || Fs0 > 1.5 * tolerance) - return "transition band not within [50,150] % of nyquist"; - if (bits!=0 && (15 > bits || bits > 33)) - return "precision not in [15,33] bits"; - if (io_ratio <= 0) - return "resampling factor not positive"; - if (0 > phase_response || phase_response > 100) - return "phase response not in [0=min-phase,100=max-phase] %"; - - p->core = core; - p->io_ratio = io_ratio; - if (bits!=0) while (!n++) { /* Determine stages: */ - int try, L, M, x, maxL = interpolator > 0? 1 : mode? 2048 : - (int)ceil(r_spec->coef_size_kbytes * 1000. / (U100_l * (int)sizeof_real)); - double d, epsilon = 0, frac; - upsample = arbM < 1; - for (i = (int)(.5 * arbM), shr = 0; i >>= 1; arbM *= .5, ++shr); - preM = upsample || (arbM > 1.5 && arbM < 2); - postM = 1 + (arbM > 1 && preM), arbM /= postM; - preL = 1 + (!preM && arbM < 2) + (upsample && mode), arbM *= preL; - if ((frac = arbM - (int)arbM)!=0) - epsilon = fabs(floor(frac * MULT32 + .5) / (frac * MULT32) - 1); - for (i = 1, rational = frac==0; i <= maxL && !rational; ++i) { - d = frac * i, try = (int)(d + .5); - if ((rational = fabs(try / d - 1) <= epsilon)) { /* No long doubles! */ - if (try == i) - arbM = ceil(arbM), shr += x = arbM > 3, arbM /= 1 + x; - else arbM = i * (int)arbM + try, arbL = i; - } - } - L = preL * arbL, M = (int)(arbM * postM), x = (L|M)&1, L >>= !x, M >>= !x; - if (iOpt && postL == 1 && (d = preL * arbL / arbM) > 4 && d != 5) { - for (postL = 4, i = (int)(d / 16); (i >>= 1) && postL < 256; postL <<= 1); - arbM = arbM * postL / arbL / preL, arbL = 1, n = 0; - } else if (rational && (max(L, M) < 3 + 2 * iOpt || L * M < 6 * iOpt)) - preL = L, preM = M, arbM = arbL = postM = 1; - if (!mode && (!rational || !n)) - ++mode, n = 0; - } - - p->num_stages = shr + have_pre_stage + have_arb_stage + have_post_stage; - if (!p->num_stages && multiplier != 1) { - bits = arbL = 0; /* Use cubic_stage in this case. */ - ++p->num_stages; - } - p->stages = calloc((size_t)p->num_stages + 1, sizeof(*p->stages)); - if (!p->stages) - return "out of memory"; - for (i = 0; i < p->num_stages; ++i) { - p->stages[i].num = i; - p->stages[i].shared = shared; - p->stages[i].input_size = 8192; - } - p->stages[0].is_input = true; - - alpha = postM / (io_ratio * (postL << 0)); - - if ((n = p->num_stages) > 1) { /* Att. budget: */ - if (have_arb_stage) - att += linear_to_dB(2.), attArb = att, --n; - att += linear_to_dB((double)n); - } - - half_fir_info = find_half_fir(core->half_firs, core->half_firs_len, att); - for (i = 0, s = p->stages; i < shr; ++i, ++s) { - s->fn = half_fir_info->fn; - s->coefs = half_fir_info->coefs; - s->n = half_fir_info->num_coefs; - s->pre_post = 4 * s->n; - s->preload = s->pre = s->pre_post >> 1; - } - - if (have_pre_stage) { - if (maintain_3dB_pt && have_post_stage) { /* Trans. bands overlapping. */ - double x = tbw0 * lsx_inv_f_resp(-3., att); - x = -lsx_f_resp(x / (max(2 * alpha - Fs0, alpha) - Fp0), att); - if (x > .035) { - tbw_tighten = ((4.3074e-3 - 3.9121e-4 * x) * x - .040009) * x + 1.0014; - lsx_debug("tbw_tighten=%g (%gdB)", tbw_tighten, x); - } - } - Fn1 = preM? max(preL, preM) : arbM / arbL; - dft_stage_init(0, tighten(Fp1), Fs1, Fn1, att, phase_response, s++, preL, - max(preM, 1), &multiplier, r_spec->log2_min_dft_size, - r_spec->log2_large_dft_size, core_flags, core->rdft_cb); - Fp1 /= Fn1, Fs1 /= Fn1; - } - - if (bits==0 && have_arb_stage) { /* `Quick' cubic arb stage: */ - s->fn = core->cubic_stage_fn; - s->mult = multiplier, multiplier = 1; - s->step.whole = (int64_t)(arbM * MULT32 + .5); - s->pre_post = max(3, s->step.integer); - s->preload = s->pre = 1; - s->out_in_ratio = MULT32 / (double)s->step.whole; - } - else if (have_arb_stage) { /* Higher quality arb stage: */ - static const float rolloffs[] = {-.01f, -.3f, 0, -.103f}; - poly_fir_t const * f = &core->poly_firs[6*(upsample+!!preM)+mode-!upsample]; - int order, num_coefs = (int)f->interp[0].scalar, phase_bits, phases; - size_t coefs_size; - double at, Fp = Fp1, Fs, Fn, mult = upsample? 1 : arbM / arbL; - poly_fir1_t const * f1; - - if (!upsample && preM) - Fn = 2 * mult, Fs = 3 + fabs(Fs1 - 1); - else Fn = 1, Fs = 2 - (mode? Fp1 + (Fs1 - Fp1) * .7 : Fs1); - - if (mode) - Fp = Fs - (Fs - Fp) / (1 - lsx_inv_f_resp(rolloffs[rolloff], attArb)); - - i = (interpolator < 0? !rational : max(interpolator, !rational)) - 1; - do { - f1 = &f->interp[++i]; - assert(f1->fn); - if (i) - arbM /= arbL, arbL = 1, rational = false; - phase_bits = (int)ceil(f1->scalar - log(mult)/log(2.)); - phases = !rational? (1 << phase_bits) : arbL; - if (f->interp[0].scalar==0) { - int phases0 = max(phases, 19), n0 = 0; - lsx_design_lpf(Fp, Fs, -Fn, attArb, &n0, phases0, f->beta); - num_coefs = n0 / phases0 + 1, num_coefs += num_coefs & !preM; - } - if ((num_coefs & 1) && rational && (arbL & 1)) - phases <<= 1, arbL <<= 1, arbM *= 2; - at = arbL * (s->phase0 = .5 * (num_coefs & 1)); - order = i + (i && mode > 4); - coefs_size = (size_t)(num_coefs4 * phases * (order+1)) * sizeof_real; - } while (interpolator < 0 && i < 2 && f->interp[i+1].fn && - coefs_size / 1000 > r_spec->coef_size_kbytes); - - if (!s->shared->poly_fir_coefs) { - int num_taps = num_coefs * phases - 1; - double * coefs = lsx_design_lpf( - Fp, Fs, Fn, attArb, &num_taps, phases, f->beta); - s->shared->poly_fir_coefs = prepare_poly_fir_coefs( - coefs, num_coefs, phases, order, multiplier, core_flags, &core->mem); - lsx_debug("fir_len=%i phases=%i coef_interp=%i size=%.3gk", - num_coefs, phases, order, (double)coefs_size / 1000.); - free(coefs); - } - multiplier = 1; - s->fn = f1->fn; - s->pre_post = num_coefs4 - 1; - s->preload = ((num_coefs - 1) >> 1) + (num_coefs4 - num_coefs); - s->n = num_coefs4; - s->phase_bits = phase_bits; - s->L = arbL; - s->use_hi_prec_clock = - mode>1 && (q_spec->flags & SOXR_HI_PREC_CLOCK) && !rational; -#if WITH_FLOAT_STD_PREC_CLOCK - if (order && !s->use_hi_prec_clock) { - s->at.flt = at; - s->step.flt = arbM; - s->out_in_ratio = (double)(arbL / s->step.flt); - } else -#endif - { - s->at.whole = (int64_t)(at * MULT32 + .5); -#if WITH_HI_PREC_CLOCK - if (s->use_hi_prec_clock) { - double M = arbM * MULT32; - s->at.fix.ls.parts.ms = 0x80000000ul; - s->step.whole = (int64_t)M; - M -= (double)s->step.whole; - M *= MULT32 * MULT32; - s->step.fix.ls.all = (uint64_t)M; - } else -#endif - s->step.whole = (int64_t)(arbM * MULT32 + .5); - s->out_in_ratio = MULT32 * arbL / (double)s->step.whole; - } - ++s; - } - - if (have_post_stage) - dft_stage_init(1, tighten(Fp0 / (upsample? alpha : 1)), upsample? max(2 - - Fs0 / alpha, 1) : Fs0, (double)max(postL, postM), att, phase_response, - s++, postL, postM, &multiplier, r_spec->log2_min_dft_size, - r_spec->log2_large_dft_size, core_flags, core->rdft_cb); - - lsx_debug("%g: >>%i %i/%i %i/%g %i/%i (%x)", 1/io_ratio, - shr, preL, preM, arbL, arbM, postL, postM, core_flags); - - for (i = 0, s = p->stages; i < p->num_stages; ++i, ++s) { - fifo_create(&s->fifo, (int)sizeof_real); - memset(fifo_reserve(&s->fifo, s->preload), 0, - sizeof_real * (size_t)s->preload); - lsx_debug_more("%5i|%-5i preload=%i remL=%i", - s->pre, s->pre_post-s->pre, s->preload, s->at.integer); - } - fifo_create(&s->fifo, (int)sizeof_real); - return 0; -} - -static bool stage_process(stage_t * stage, bool flushing) -{ - fifo_t * fifo = &stage->fifo; - bool done = false; - int want; - while (!done && (want = stage->input_size - fifo_occupancy(fifo)) > 0) { - if (stage->is_input) { - if (flushing) - memset(fifo_reserve(fifo, want), 0, fifo->item_size * (size_t)want); - else done = true; - } - else done = stage_process(stage - 1, flushing); - } - stage->fn(stage, &stage[1].fifo); - return done && fifo_occupancy(fifo) < stage->input_size; -} - -STATIC void _soxr_process(rate_t * p, size_t olen) -{ - int const n = p->flushing? min(-(int)p->samples_out, (int)olen) : (int)olen; - stage_t * stage = &p->stages[p->num_stages]; - fifo_t * fifo = &stage->fifo; - bool done = false; - while (!done && fifo_occupancy(fifo) < (int)n) - done = stage->is_input || stage_process(stage - 1, p->flushing); -} - -STATIC real * _soxr_input(rate_t * p, real const * samples, size_t n) -{ - if (p->flushing) - return 0; - p->samples_in += (int64_t)n; - return fifo_write(&p->stages[0].fifo, (int)n, samples); -} - -STATIC real const * _soxr_output(rate_t * p, real * samples, size_t * n0) -{ - fifo_t * fifo = &p->stages[p->num_stages].fifo; - int n = p->flushing? min(-(int)p->samples_out, (int)*n0) : (int)*n0; - p->samples_out += n = min(n, fifo_occupancy(fifo)); - return fifo_read(fifo, (int)(*n0 = (size_t)n), samples); -} - -STATIC void _soxr_flush(rate_t * p) -{ - if (p->flushing) return; - p->samples_out -= (int64_t)((double)p->samples_in / p->io_ratio + .5); - p->samples_in = 0; - p->flushing = true; -} - -STATIC void _soxr_close(rate_t * p) -{ - if (p->stages) { - fn_t const * const RDFT_CB = p->core->rdft_cb; - rate_shared_t * shared = p->stages[0].shared; - int i; - - for (i = 0; i <= p->num_stages; ++i) { - stage_t * s = &p->stages[i]; - rdft_free(s->dft_scratch); - rdft_free(s->dft_out); - fifo_delete(&s->fifo); - } - if (shared) { - for (i = 0; i < 2; ++i) { - dft_filter_t * f= &shared->dft_filter[i]; - rdft_free(f->coefs); - rdft_delete_setup(f->dft_forward_setup); - rdft_delete_setup(f->dft_backward_setup); - } - p->core->mem.free(shared->poly_fir_coefs); - memset(shared, 0, sizeof(*shared)); - } - free(p->stages); - (void)RDFT_CB; - } -} - -#if defined SOXR_LIB -STATIC double _soxr_delay(rate_t * p) -{ - return (double)p->samples_in / p->io_ratio - (double)p->samples_out; -} - -STATIC void _soxr_sizes(size_t * shared, size_t * channel) -{ - *shared = sizeof(rate_shared_t); - *channel = sizeof(rate_t); -} -#endif diff --git a/soxr-sys/src/cr.h b/soxr-sys/src/cr.h deleted file mode 100644 index d6e863799..000000000 --- a/soxr-sys/src/cr.h +++ /dev/null @@ -1,178 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#if !defined soxr_cr_included -#define soxr_cr_included - -#define FIFO_SIZE_T int -#include "fifo.h" - -typedef void real; /* float or double */ -struct stage; -typedef void (* stage_fn_t)(struct stage * input, fifo_t * output); -typedef struct half_fir_info { - int num_coefs; - real const * coefs; - stage_fn_t fn, dfn; - float att; -} half_fir_info_t; -typedef struct {float scalar; stage_fn_t fn;} poly_fir1_t; -typedef struct {float beta; poly_fir1_t interp[3];} poly_fir_t; - -#define U100_l 42 -#define MULT32 (65536. * 65536.) - -/* Conceptually: coef_p is &coefs[num_phases][fir_len][interp_order+1]: */ -#define coef(coef_p, interp_order, fir_len, phase_num, coef_interp_num, fir_coef_num) (coef_p)[\ - (fir_len) * ((interp_order) + 1) * (phase_num) + \ - ((interp_order) + 1) * (fir_coef_num) + \ - ((interp_order) - (coef_interp_num))] - -/* Conceptually: coef_p is &coefs[num_phases][fir_len/4][interp_order+1][4]: */ -#define coef4(coef_p, interp_order, fir_len, phase_num, coef_interp_num, fir_coef_num) (coef_p)[\ - (fir_len) * ((interp_order) + 1) * (phase_num) + \ - ((interp_order) + 1) * ((fir_coef_num) & ~3) + \ - 4 * ((interp_order) - (coef_interp_num)) + \ - ((fir_coef_num) & 3)] - -typedef union { /* Int64 in parts */ - #if HAVE_BIGENDIAN - struct {int32_t ms; uint32_t ls;} parts; - #else - struct {uint32_t ls; int32_t ms;} parts; - #endif - int64_t all; -} int64p_t; - -typedef union { /* Uint64 in parts */ - #if HAVE_BIGENDIAN - struct {uint32_t ms, ls;} parts; - #else - struct {uint32_t ls, ms;} parts; - #endif - uint64_t all; -} uint64p_t; - -typedef struct { - int dft_length, num_taps, post_peak; - void * dft_forward_setup, * dft_backward_setup; - real * coefs; -} dft_filter_t; - -typedef struct { /* So generated filter coefs may be shared between channels */ - real * poly_fir_coefs; - dft_filter_t dft_filter[2]; -} rate_shared_t; - -typedef double float_step_t; /* Or long double or __float128. */ - -typedef union { /* Fixed point arithmetic */ - struct {uint64p_t ls; int64p_t ms;} fix; /* Hi-prec has ~96 bits. */ - float_step_t flt; -} step_t; - -#define integer fix.ms.parts.ms -#define fraction fix.ms.parts.ls -#define whole fix.ms.all - -#define CORE_DBL 1 -#define CORE_SIMD_POLY 2 -#define CORE_SIMD_HALF 4 -#define CORE_SIMD_DFT 8 -#define LOG2_SIZEOF_REAL(core_flags) (2 + ((core_flags) & 1)) - -typedef int core_flags_t; - -#if defined SOXR_LIB -#include "rdft_t.h" -#else -typedef void fn_t; -#endif - -typedef struct stage { - int num; - - /* Common to all stage types: */ - core_flags_t core_flags; - stage_fn_t fn; - fifo_t fifo; - int pre; /* Number of past samples to store */ - int pre_post; /* pre + number of future samples to store */ - int preload; /* Number of zero samples to pre-load the fifo */ - double out_in_ratio; /* For buffer management. */ - int input_size; - bool is_input; - - /* For a stage with variable (run-time generated) filter coefs: */ - fn_t const * rdft_cb; - rate_shared_t * shared; - unsigned dft_filter_num; /* Which, if any, of the 2 DFT filters to use */ - real * dft_scratch; - float * dft_out; - real const * coefs; - - /* For a stage with variable L/M: */ - step_t at, step; - bool use_hi_prec_clock; - int L, remM; - int n, phase_bits, block_len; - double mult, phase0; -} stage_t; - -#define stage_occupancy(s) max(0, fifo_occupancy(&(s)->fifo) - (s)->pre_post) -#define stage_read_p(s) ((sample_t *)fifo_read_ptr(&(s)->fifo) + (s)->pre) - -#define lq_bw0 (1385/2048.) /* ~.67625, FP exact. */ - -typedef enum {rolloff_small, rolloff_medium, rolloff_none} rolloff_t; - -typedef struct { - void * (* alloc)(size_t); - void * (* calloc)(size_t, size_t); - void (* free)(void *); -} alloc_t; - -typedef struct { - alloc_t mem; - half_fir_info_t const * half_firs; - size_t half_firs_len; - half_fir_info_t const * doub_firs; - size_t doub_firs_len; - stage_fn_t cubic_stage_fn; - poly_fir_t const * poly_firs; - fn_t * rdft_cb; -} cr_core_t; - -typedef struct rate rate_t; -struct rate { - cr_core_t const * core; - double io_ratio; - int64_t samples_in, samples_out; - int num_stages, flushing; - stage_t * stages; -}; - -#if defined SOXR_LIB - -#include "soxr.h" - -char const * _soxr_init( - rate_t * const p, /* Per audio channel. */ - rate_shared_t * const shared, /* Between channels (undergoing same rate change)*/ - double const io_ratio, /* Input rate divided by output rate. */ - soxr_quality_spec_t const * const q_spec, - soxr_runtime_spec_t const * const r_spec, - double multiplier, /* Linear gain to apply during conversion. 1 */ - cr_core_t const * const core, - core_flags_t const); - -void _soxr_process(struct rate * p, size_t olen); -real * _soxr_input(struct rate * p, real const * samples, size_t n); -real const * _soxr_output(struct rate * p, real * samples, size_t * n0); -void _soxr_flush(struct rate * p); -void _soxr_close(struct rate * p); -double _soxr_delay(struct rate * p); -void _soxr_sizes(size_t * shared, size_t * channel); -#endif - -#endif diff --git a/soxr-sys/src/cr32.c b/soxr-sys/src/cr32.c deleted file mode 100644 index b9eb264d0..000000000 --- a/soxr-sys/src/cr32.c +++ /dev/null @@ -1,8 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#define RATE_CB _soxr_rate32_cb -#define CORE_STR "cr32" - -#define CORE_TYPE 0 -#include "cr-core.c" diff --git a/soxr-sys/src/cr32s.c b/soxr-sys/src/cr32s.c deleted file mode 100644 index 5de2a4336..000000000 --- a/soxr-sys/src/cr32s.c +++ /dev/null @@ -1,8 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#define RATE_CB _soxr_rate32s_cb -#define CORE_STR "cr32s" - -#define CORE_TYPE (CORE_SIMD_POLY|CORE_SIMD_HALF|CORE_SIMD_DFT) -#include "cr-core.c" diff --git a/soxr-sys/src/cr64.c b/soxr-sys/src/cr64.c deleted file mode 100644 index 518cdd761..000000000 --- a/soxr-sys/src/cr64.c +++ /dev/null @@ -1,8 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#define RATE_CB _soxr_rate64_cb -#define CORE_STR "cr64" - -#define CORE_TYPE CORE_DBL -#include "cr-core.c" diff --git a/soxr-sys/src/cr64s.c b/soxr-sys/src/cr64s.c deleted file mode 100644 index 5dcd6f100..000000000 --- a/soxr-sys/src/cr64s.c +++ /dev/null @@ -1,8 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#define RATE_CB _soxr_rate64s_cb -#define CORE_STR "cr64s" - -#define CORE_TYPE (CORE_DBL|CORE_SIMD_POLY|CORE_SIMD_HALF|CORE_SIMD_DFT) -#include "cr-core.c" diff --git a/soxr-sys/src/data-io.c b/soxr-sys/src/data-io.c deleted file mode 100644 index 2a93fda12..000000000 --- a/soxr-sys/src/data-io.c +++ /dev/null @@ -1,223 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#include -#include -#include - -#include "data-io.h" -#include "internal.h" - - - -#define DEINTERLEAVE_FROM(T,flag) do { \ - unsigned i; \ - size_t j; \ - T const * src = *src0; \ - if (ch > 1) for (j = 0; j < n; ++j) \ - for (i = 0; i < ch; ++i) dest[i][j] = (DEINTERLEAVE_TO)*src++; \ - else if (flag) memcpy(dest[0], src, n * sizeof(T)), src = &src[n]; \ - else for (j = 0; j < n; dest[0][j++] = (DEINTERLEAVE_TO)*src++); \ - *src0 = src; \ -} while (0) - - - -#if WITH_CR64 || WITH_CR64S -void _soxr_deinterleave(double * * dest, /* Round/clipping not needed here */ - soxr_datatype_t data_type, void const * * src0, size_t n, unsigned ch) -{ -#define DEINTERLEAVE_TO double - switch (data_type & 3) { - case SOXR_FLOAT32: DEINTERLEAVE_FROM(float, 0); break; - case SOXR_FLOAT64: DEINTERLEAVE_FROM(double, 1); break; - case SOXR_INT32: DEINTERLEAVE_FROM(int32_t, 0); break; - case SOXR_INT16: DEINTERLEAVE_FROM(int16_t, 0); break; - default: break; - } -} -#endif - - - -#if WITH_CR32 || WITH_CR32S || WITH_VR32 -void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */ - soxr_datatype_t data_type, void const * * src0, size_t n, unsigned ch) -{ -#undef DEINTERLEAVE_TO -#define DEINTERLEAVE_TO float - switch (data_type & 3) { - case SOXR_FLOAT32: DEINTERLEAVE_FROM(float, 1); break; - case SOXR_FLOAT64: DEINTERLEAVE_FROM(double, 0); break; - case SOXR_INT32: DEINTERLEAVE_FROM(int32_t, 0); break; - case SOXR_INT16: DEINTERLEAVE_FROM(int16_t, 0); break; - default: break; - } -} -#endif - - - -#include "rint.h" - - - -#if defined FE_INVALID && defined FPU_RINT32 && defined __STDC_VERSION__ - #if __STDC_VERSION__ >= 199901L - #pragma STDC FENV_ACCESS ON - #endif -#endif - -#if WITH_CR64 || WITH_CR64S -#define FLOATX double - -#define LSX_RINT_CLIP_2 lsx_rint32_clip_2 -#define LSX_RINT_CLIP lsx_rint32_clip -#define RINT_CLIP rint32_clip -#define RINT rint32D -#if defined FPU_RINT32 - #define FPU_RINT -#endif -#define RINT_T int32_t -#define RINT_MAX 2147483647L -#include "rint-clip.h" - -#define LSX_RINT_CLIP_2 lsx_rint16_clip_2 -#define LSX_RINT_CLIP lsx_rint16_clip -#define RINT_CLIP rint16_clip -#define RINT rint16D -#if defined FPU_RINT16 - #define FPU_RINT -#endif -#define RINT_T int16_t -#define RINT_MAX 32767 -#include "rint-clip.h" - -#define LSX_RINT_CLIP_2 lsx_rint16_clip_2_dither -#define LSX_RINT_CLIP lsx_rint16_clip_dither -#define RINT_CLIP rint16_clip_dither -#define RINT rint16D -#if defined FPU_RINT16 - #define FPU_RINT -#endif -#define RINT_T int16_t -#define RINT_MAX 32767 -#define DITHER -#include "rint-clip.h" - -#undef FLOATX -#endif - - - -#if WITH_CR32 || WITH_CR32S || WITH_VR32 -#define FLOATX float - -#define LSX_RINT_CLIP_2 lsx_rint32_clip_2_f -#define LSX_RINT_CLIP lsx_rint32_clip_f -#define RINT_CLIP rint32_clip_f -#define RINT rint32F -#if defined FPU_RINT32 - #define FPU_RINT -#endif -#define RINT_T int32_t -#define RINT_MAX 2147483647L -#include "rint-clip.h" - -#define LSX_RINT_CLIP_2 lsx_rint16_clip_2_f -#define LSX_RINT_CLIP lsx_rint16_clip_f -#define RINT_CLIP rint16_clip_f -#define RINT rint16F -#if defined FPU_RINT16 - #define FPU_RINT -#endif -#define RINT_T int16_t -#define RINT_MAX 32767 -#include "rint-clip.h" - -#define LSX_RINT_CLIP_2 lsx_rint16_clip_2_dither_f -#define LSX_RINT_CLIP lsx_rint16_clip_dither_f -#define RINT_CLIP rint16_clip_dither_f -#define RINT rint16D -#if defined FPU_RINT16 - #define FPU_RINT -#endif -#define RINT_T int16_t -#define RINT_MAX 32767 -#define DITHER -#include "rint-clip.h" - -#undef FLOATX -#endif - -#if defined FE_INVALID && defined FPU_RINT32 && defined __STDC_VERSION__ - #if __STDC_VERSION__ >= 199901L - #pragma STDC FENV_ACCESS OFF - #endif -#endif - - - -#define INTERLEAVE_TO(T,flag) do { \ - unsigned i; \ - size_t j; \ - T * dest = *dest0; \ - if (ch > 1) \ - for (j = 0; j < n; ++j) for (i = 0; i < ch; ++i) *dest++ = (T)src[i][j]; \ - else if (flag) memcpy(dest, src[0], n * sizeof(T)), dest = &dest[n]; \ - else for (j = 0; j < n; *dest++ = (T)src[0][j++]); \ - *dest0 = dest; \ - return 0; \ -} while (0) - -#if WITH_CR64 || WITH_CR64S -size_t /* clips */ _soxr_interleave(soxr_datatype_t data_type, void * * dest0, - double const * const * src, size_t n, unsigned ch, unsigned long long * seed) -{ - switch (data_type & 3) { - case SOXR_FLOAT32: INTERLEAVE_TO(float, 0); - case SOXR_FLOAT64: INTERLEAVE_TO(double, 1); - - case SOXR_INT32: if (ch == 1) - return lsx_rint32_clip(dest0, src[0], n); - return lsx_rint32_clip_2(dest0, src, ch, n); - - case SOXR_INT16: if (seed) { - if (ch == 1) - return lsx_rint16_clip_dither(dest0, src[0], n, seed); - return lsx_rint16_clip_2_dither(dest0, src, ch, n, seed); - } - if (ch == 1) - return lsx_rint16_clip(dest0, src[0], n); - return lsx_rint16_clip_2(dest0, src, ch, n); - default: break; - } - return 0; -} -#endif - -#if WITH_CR32 || WITH_CR32S || WITH_VR32 -size_t /* clips */ _soxr_interleave_f(soxr_datatype_t data_type, void * * dest0, - float const * const * src, size_t n, unsigned ch, unsigned long long * seed) -{ - switch (data_type & 3) { - case SOXR_FLOAT32: INTERLEAVE_TO(float, 1); - case SOXR_FLOAT64: INTERLEAVE_TO(double, 0); - - case SOXR_INT32: if (ch == 1) - return lsx_rint32_clip_f(dest0, src[0], n); - return lsx_rint32_clip_2_f(dest0, src, ch, n); - - case SOXR_INT16: if (seed) { - if (ch == 1) - return lsx_rint16_clip_dither_f(dest0, src[0], n, seed); - return lsx_rint16_clip_2_dither_f(dest0, src, ch, n, seed); - } - if (ch == 1) - return lsx_rint16_clip_f(dest0, src[0], n); - return lsx_rint16_clip_2_f(dest0, src, ch, n); - default: break; - } - return 0; -} -#endif diff --git a/soxr-sys/src/data-io.h b/soxr-sys/src/data-io.h deleted file mode 100644 index 28e2d8907..000000000 --- a/soxr-sys/src/data-io.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#if !defined soxr_data_io_included -#define soxr_data_io_included - -#include "soxr.h" - -void _soxr_deinterleave( - double * * dest, - soxr_datatype_t data_type, - void const * * src0, - size_t n, - unsigned ch); - -void _soxr_deinterleave_f( - float * * dest, - soxr_datatype_t data_type, - void const * * src0, - size_t n, - unsigned ch); - -size_t /* clips */ _soxr_interleave( - soxr_datatype_t data_type, - void * * dest, - double const * const * src, - size_t n, - unsigned ch, - unsigned long long * seed); - -size_t /* clips */ _soxr_interleave_f( - soxr_datatype_t data_type, - void * * dest, - float const * const * src, - size_t n, - unsigned ch, - unsigned long long * seed); - -#endif diff --git a/soxr-sys/src/dbesi0.c b/soxr-sys/src/dbesi0.c deleted file mode 100644 index 654216eb4..000000000 --- a/soxr-sys/src/dbesi0.c +++ /dev/null @@ -1,149 +0,0 @@ -/* Copyright(C) 1996 Takuya OOURA - -You may use, copy, modify this code for any purpose and -without fee. - -Package home: http://www.kurims.kyoto-u.ac.jp/~ooura/bessel.html -*/ - -#include "filter.h" -#define dbesi0 lsx_bessel_I_0 - -/* Bessel I_0(x) function in double precision */ - -#include - -double dbesi0(double x) -{ - int k; - double w, t, y; - static double a[65] = { - 8.5246820682016865877e-11, 2.5966600546497407288e-9, - 7.9689994568640180274e-8, 1.9906710409667748239e-6, - 4.0312469446528002532e-5, 6.4499871606224265421e-4, - 0.0079012345761930579108, 0.071111111109207045212, - 0.444444444444724909, 1.7777777777777532045, - 4.0000000000000011182, 3.99999999999999998, - 1.0000000000000000001, - 1.1520919130377195927e-10, 2.2287613013610985225e-9, - 8.1903951930694585113e-8, 1.9821560631611544984e-6, - 4.0335461940910133184e-5, 6.4495330974432203401e-4, - 0.0079013012611467520626, 0.071111038160875566622, - 0.44444450319062699316, 1.7777777439146450067, - 4.0000000132337935071, 3.9999999968569015366, - 1.0000000003426703174, - 1.5476870780515238488e-10, 1.2685004214732975355e-9, - 9.2776861851114223267e-8, 1.9063070109379044378e-6, - 4.0698004389917945832e-5, 6.4370447244298070713e-4, - 0.0079044749458444976958, 0.071105052411749363882, - 0.44445280640924755082, 1.7777694934432109713, - 4.0000055808824003386, 3.9999977081165740932, - 1.0000004333949319118, - 2.0675200625006793075e-10, -6.1689554705125681442e-10, - 1.2436765915401571654e-7, 1.5830429403520613423e-6, - 4.2947227560776583326e-5, 6.3249861665073441312e-4, - 0.0079454472840953930811, 0.070994327785661860575, - 0.44467219586283000332, 1.7774588182255374745, - 4.0003038986252717972, 3.9998233869142057195, - 1.0000472932961288324, - 2.7475684794982708655e-10, -3.8991472076521332023e-9, - 1.9730170483976049388e-7, 5.9651531561967674521e-7, - 5.1992971474748995357e-5, 5.7327338675433770752e-4, - 0.0082293143836530412024, 0.069990934858728039037, - 0.44726764292723985087, 1.7726685170014087784, - 4.0062907863712704432, 3.9952750700487845355, - 1.0016354346654179322 - }; - static double b[70] = { - 6.7852367144945531383e-8, 4.6266061382821826854e-7, - 6.9703135812354071774e-6, 7.6637663462953234134e-5, - 7.9113515222612691636e-4, 0.0073401204731103808981, - 0.060677114958668837046, 0.43994941411651569622, - 2.7420017097661750609, 14.289661921740860534, - 59.820609640320710779, 188.78998681199150629, - 399.8731367825601118, 427.56411572180478514, - 1.8042097874891098754e-7, 1.2277164312044637357e-6, - 1.8484393221474274861e-5, 2.0293995900091309208e-4, - 0.0020918539850246207459, 0.019375315654033949297, - 0.15985869016767185908, 1.1565260527420641724, - 7.1896341224206072113, 37.354773811947484532, - 155.80993164266268457, 489.5211371158540918, - 1030.9147225169564806, 1093.5883545113746958, - 4.8017305613187493564e-7, 3.261317843912380074e-6, - 4.9073137508166159639e-5, 5.3806506676487583755e-4, - 0.0055387918291051866561, 0.051223717488786549025, - 0.42190298621367914765, 3.0463625987357355872, - 18.895299447327733204, 97.915189029455461554, - 407.13940115493494659, 1274.3088990480582632, - 2670.9883037012547506, 2815.7166284662544712, - 1.2789926338424623394e-6, 8.6718263067604918916e-6, - 1.3041508821299929489e-4, 0.001428224737372747892, - 0.014684070635768789378, 0.13561403190404185755, - 1.1152592585977393953, 8.0387088559465389038, - 49.761318895895479206, 257.2684232313529138, - 1066.8543146269566231, 3328.3874581009636362, - 6948.8586598121634874, 7288.4893398212481055, - 3.409350368197032893e-6, 2.3079025203103376076e-5, - 3.4691373283901830239e-4, 0.003794994977222908545, - 0.038974209677945602145, 0.3594948380414878371, - 2.9522878893539528226, 21.246564609514287056, - 131.28727387146173141, 677.38107093296675421, - 2802.3724744545046518, 8718.5731420798254081, - 18141.348781638832286, 18948.925349296308859 - }; - static double c[45] = { - 2.5568678676452702768e-15, 3.0393953792305924324e-14, - 6.3343751991094840009e-13, 1.5041298011833009649e-11, - 4.4569436918556541414e-10, 1.746393051427167951e-8, - 1.0059224011079852317e-6, 1.0729838945088577089e-4, - 0.05150322693642527738, - 5.2527963991711562216e-15, 7.202118481421005641e-15, - 7.2561421229904797156e-13, 1.482312146673104251e-11, - 4.4602670450376245434e-10, 1.7463600061788679671e-8, - 1.005922609132234756e-6, 1.0729838937545111487e-4, - 0.051503226936437300716, - 1.3365917359358069908e-14, -1.2932643065888544835e-13, - 1.7450199447905602915e-12, 1.0419051209056979788e-11, - 4.58047881980598326e-10, 1.7442405450073548966e-8, - 1.0059461453281292278e-6, 1.0729837434500161228e-4, - 0.051503226940658446941, - 5.3771611477352308649e-14, -1.1396193006413731702e-12, - 1.2858641335221653409e-11, -5.9802086004570057703e-11, - 7.3666894305929510222e-10, 1.6731837150730356448e-8, - 1.0070831435812128922e-6, 1.0729733111203704813e-4, - 0.051503227360726294675, - 3.7819492084858931093e-14, -4.8600496888588034879e-13, - 1.6898350504817224909e-12, 4.5884624327524255865e-11, - 1.2521615963377513729e-10, 1.8959658437754727957e-8, - 1.0020716710561353622e-6, 1.073037119856927559e-4, - 0.05150322383300230775 - }; - - w = fabs(x); - if (w < 8.5) { - t = w * w * 0.0625; - k = 13 * ((int) t); - y = (((((((((((a[k] * t + a[k + 1]) * t + - a[k + 2]) * t + a[k + 3]) * t + a[k + 4]) * t + - a[k + 5]) * t + a[k + 6]) * t + a[k + 7]) * t + - a[k + 8]) * t + a[k + 9]) * t + a[k + 10]) * t + - a[k + 11]) * t + a[k + 12]; - } else if (w < 12.5) { - k = (int) w; - t = w - k; - k = 14 * (k - 8); - y = ((((((((((((b[k] * t + b[k + 1]) * t + - b[k + 2]) * t + b[k + 3]) * t + b[k + 4]) * t + - b[k + 5]) * t + b[k + 6]) * t + b[k + 7]) * t + - b[k + 8]) * t + b[k + 9]) * t + b[k + 10]) * t + - b[k + 11]) * t + b[k + 12]) * t + b[k + 13]; - } else { - t = 60 / w; - k = 9 * ((int) t); - y = ((((((((c[k] * t + c[k + 1]) * t + - c[k + 2]) * t + c[k + 3]) * t + c[k + 4]) * t + - c[k + 5]) * t + c[k + 6]) * t + c[k + 7]) * t + - c[k + 8]) * sqrt(t) * exp(w); - } - return y; -} diff --git a/soxr-sys/src/dev32s.h b/soxr-sys/src/dev32s.h deleted file mode 100644 index 7edae868d..000000000 --- a/soxr-sys/src/dev32s.h +++ /dev/null @@ -1,54 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#if !defined soxr_dev32s_included -#define soxr_dev32s_included - -#if defined __GNUC__ - #define SIMD_INLINE(T) static __inline T __attribute__((always_inline)) - #define vAlign __attribute__((aligned (16))) -#elif defined _MSC_VER - #define SIMD_INLINE(T) static __forceinline T - #define vAlign __declspec(align(16)) -#endif - -#if defined __x86_64__ || defined _M_X64 || defined i386 || defined _M_IX86 - -#include - -#define vZero() _mm_setzero_ps() -#define vSet1(a) _mm_set_ss(a) -#define vMul(a,b) _mm_mul_ps(a,b) -#define vAdd(a,b) _mm_add_ps(a,b) -#define vMac(a,b,c) vAdd(vMul(a,b),c) -#define vLds(a) _mm_set1_ps(a) -#define vLd(a) _mm_load_ps(a) -#define vLdu(a) _mm_loadu_ps(a) - -typedef __m128 v4_t; - -SIMD_INLINE(void) vStorSum(float * a, v4_t b) { - v4_t t = vAdd(_mm_movehl_ps(b, b), b); - _mm_store_ss(a, vAdd(t, _mm_shuffle_ps(t,t,1)));} - -#elif defined __arm__ - -#include - -#define vZero() vdupq_n_f32(0) -#define vMul(a,b) vmulq_f32(a,b) -#define vAdd(a,b) vaddq_f32(a,b) -#define vMac(a,b,c) vmlaq_f32(c,a,b) -#define vLds(a) vld1q_dup_f32(&(a)) -#define vLd(a) vld1q_f32(a) -#define vLdu(a) vld1q_f32(a) - -typedef float32x4_t v4_t; - -SIMD_INLINE(void) vStorSum(float * a, v4_t b) { - float32x2_t t = vadd_f32(vget_high_f32(b), vget_low_f32(b)); - *a = vget_lane_f32(vpadd_f32(t, t), 0);} - -#endif - -#endif diff --git a/soxr-sys/src/dev64s.h b/soxr-sys/src/dev64s.h deleted file mode 100644 index 4672210d1..000000000 --- a/soxr-sys/src/dev64s.h +++ /dev/null @@ -1,42 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#if !defined soxr_dev64s_included -#define soxr_dev64s_included - -#if defined __GNUC__ - #define SIMD_INLINE(T) static __inline T __attribute__((always_inline)) - #define vAlign __attribute__((aligned (32))) -#elif defined _MSC_VER - #define SIMD_INLINE(T) static __forceinline T - #define vAlign __declspec(align(32)) -#else - #define SIMD_INLINE(T) static __inline T -#endif - -#if defined __x86_64__ || defined _M_X64 || defined i386 || defined _M_IX86 - -#include - -#if defined __AVX__ - -#define vZero() _mm256_setzero_pd() -#define vSet1(a) _mm256_set_pd(0,0,0,a) -#define vMul(a,b) _mm256_mul_pd(a,b) -#define vAdd(a,b) _mm256_add_pd(a,b) -#define vMac(a,b,c) vAdd(vMul(a,b),c) /* Note: gcc -mfma will `fuse' these */ -#define vLds(a) _mm256_set1_pd(a) -#define vLd(a) _mm256_load_pd(a) -#define vLdu(a) _mm256_loadu_pd(a) - -typedef __m256d v4_t; - -SIMD_INLINE(void) vStorSum(double * a, v4_t b) { - b = _mm256_hadd_pd(b, _mm256_permute2f128_pd(b,b,1)); - _mm_store_sd(a, _mm256_castpd256_pd128(_mm256_hadd_pd(b,b)));} - -#endif - -#endif - -#endif diff --git a/soxr-sys/src/fft4g.c b/soxr-sys/src/fft4g.c deleted file mode 100644 index cf6293a04..000000000 --- a/soxr-sys/src/fft4g.c +++ /dev/null @@ -1,1346 +0,0 @@ -/* Copyright Takuya OOURA, 1996-2001. - -You may use, copy, modify and distribute this code for any -purpose (include commercial use) and without fee. Please -refer to this package when you modify this code. - -Package home: http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html - -Fast Fourier/Cosine/Sine Transform - dimension :one - data length :power of 2 - decimation :frequency - radix :4, 2 - data :inplace - table :use -functions - cdft: Complex Discrete Fourier Transform - rdft: Real Discrete Fourier Transform - ddct: Discrete Cosine Transform - ddst: Discrete Sine Transform - dfct: Cosine Transform of RDFT (Real Symmetric DFT) - dfst: Sine Transform of RDFT (Real Anti-symmetric DFT) -function prototypes - void cdft(int, int, double *, int *, double *); - void rdft(int, int, double *, int *, double *); - void ddct(int, int, double *, int *, double *); - void ddst(int, int, double *, int *, double *); - void dfct(int, double *, double *, int *, double *); - void dfst(int, double *, double *, int *, double *); - - --------- Complex DFT (Discrete Fourier Transform) -------- - [definition] - - X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k - X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k - ip[0] = 0; // first time only - cdft(2*n, 1, a, ip, w); - - ip[0] = 0; // first time only - cdft(2*n, -1, a, ip, w); - [parameters] - 2*n :data length (int) - n >= 1, n = power of 2 - a[0...2*n-1] :input/output data (double *) - input data - a[2*j] = Re(x[j]), - a[2*j+1] = Im(x[j]), 0<=j= 2+sqrt(n) - strictly, - length of ip >= - 2+(1<<(int)(log(n+0.5)/log(2))/2). - ip[0],ip[1] are pointers of the cos/sin table. - w[0...n/2-1] :cos/sin table (double *) - w[],ip[] are initialized if ip[0] == 0. - [remark] - Inverse of - cdft(2*n, -1, a, ip, w); - is - cdft(2*n, 1, a, ip, w); - for (j = 0; j <= 2 * n - 1; j++) { - a[j] *= 1.0 / n; - } - . - - --------- Real DFT / Inverse of Real DFT -------- - [definition] - RDFT - R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2 - I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0 IRDFT (excluding scale) - a[k] = (R[0] + R[n/2]*cos(pi*k))/2 + - sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) + - sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k - ip[0] = 0; // first time only - rdft(n, 1, a, ip, w); - - ip[0] = 0; // first time only - rdft(n, -1, a, ip, w); - [parameters] - n :data length (int) - n >= 2, n = power of 2 - a[0...n-1] :input/output data (double *) - - output data - a[2*k] = R[k], 0<=k - input data - a[2*j] = R[j], 0<=j= 2+sqrt(n/2) - strictly, - length of ip >= - 2+(1<<(int)(log(n/2+0.5)/log(2))/2). - ip[0],ip[1] are pointers of the cos/sin table. - w[0...n/2-1] :cos/sin table (double *) - w[],ip[] are initialized if ip[0] == 0. - [remark] - Inverse of - rdft(n, 1, a, ip, w); - is - rdft(n, -1, a, ip, w); - for (j = 0; j <= n - 1; j++) { - a[j] *= 2.0 / n; - } - . - - --------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- - [definition] - IDCT (excluding scale) - C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k DCT - C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k - ip[0] = 0; // first time only - ddct(n, 1, a, ip, w); - - ip[0] = 0; // first time only - ddct(n, -1, a, ip, w); - [parameters] - n :data length (int) - n >= 2, n = power of 2 - a[0...n-1] :input/output data (double *) - output data - a[k] = C[k], 0<=k= 2+sqrt(n/2) - strictly, - length of ip >= - 2+(1<<(int)(log(n/2+0.5)/log(2))/2). - ip[0],ip[1] are pointers of the cos/sin table. - w[0...n*5/4-1] :cos/sin table (double *) - w[],ip[] are initialized if ip[0] == 0. - [remark] - Inverse of - ddct(n, -1, a, ip, w); - is - a[0] *= 0.5; - ddct(n, 1, a, ip, w); - for (j = 0; j <= n - 1; j++) { - a[j] *= 2.0 / n; - } - . - - --------- DST (Discrete Sine Transform) / Inverse of DST -------- - [definition] - IDST (excluding scale) - S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k DST - S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0 - ip[0] = 0; // first time only - ddst(n, 1, a, ip, w); - - ip[0] = 0; // first time only - ddst(n, -1, a, ip, w); - [parameters] - n :data length (int) - n >= 2, n = power of 2 - a[0...n-1] :input/output data (double *) - - input data - a[j] = A[j], 0 - output data - a[k] = S[k], 0= 2+sqrt(n/2) - strictly, - length of ip >= - 2+(1<<(int)(log(n/2+0.5)/log(2))/2). - ip[0],ip[1] are pointers of the cos/sin table. - w[0...n*5/4-1] :cos/sin table (double *) - w[],ip[] are initialized if ip[0] == 0. - [remark] - Inverse of - ddst(n, -1, a, ip, w); - is - a[0] *= 0.5; - ddst(n, 1, a, ip, w); - for (j = 0; j <= n - 1; j++) { - a[j] *= 2.0 / n; - } - . - - --------- Cosine Transform of RDFT (Real Symmetric DFT) -------- - [definition] - C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n - [usage] - ip[0] = 0; // first time only - dfct(n, a, t, ip, w); - [parameters] - n :data length - 1 (int) - n >= 2, n = power of 2 - a[0...n] :input/output data (double *) - output data - a[k] = C[k], 0<=k<=n - t[0...n/2] :work area (double *) - ip[0...*] :work area for bit reversal (int *) - length of ip >= 2+sqrt(n/4) - strictly, - length of ip >= - 2+(1<<(int)(log(n/4+0.5)/log(2))/2). - ip[0],ip[1] are pointers of the cos/sin table. - w[0...n*5/8-1] :cos/sin table (double *) - w[],ip[] are initialized if ip[0] == 0. - [remark] - Inverse of - a[0] *= 0.5; - a[n] *= 0.5; - dfct(n, a, t, ip, w); - is - a[0] *= 0.5; - a[n] *= 0.5; - dfct(n, a, t, ip, w); - for (j = 0; j <= n; j++) { - a[j] *= 2.0 / n; - } - . - - --------- Sine Transform of RDFT (Real Anti-symmetric DFT) -------- - [definition] - S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0= 2, n = power of 2 - a[0...n-1] :input/output data (double *) - output data - a[k] = S[k], 0= 2+sqrt(n/4) - strictly, - length of ip >= - 2+(1<<(int)(log(n/4+0.5)/log(2))/2). - ip[0],ip[1] are pointers of the cos/sin table. - w[0...n*5/8-1] :cos/sin table (double *) - w[],ip[] are initialized if ip[0] == 0. - [remark] - Inverse of - dfst(n, a, t, ip, w); - is - dfst(n, a, t, ip, w); - for (j = 1; j <= n - 1; j++) { - a[j] *= 2.0 / n; - } - . - - -Appendix : - The cos/sin table is recalculated when the larger table required. - w[] and ip[] are compatible with all routines. -*/ - - -#include "math-wrap.h" -#include "fft4g.h" - -#ifdef FFT4G_FLOAT - #define double float - #define one_half 0.5f - - #define sin(x) sinf(x) - #define cos(x) cosf(x) - #define atan(x) atanf(x) - - #define cdft lsx_cdft_f - #define rdft lsx_rdft_f - #define ddct lsx_ddct_f - #define ddst lsx_ddst_f - #define dfct lsx_dfct_f - #define dfst lsx_dfst_f -#else - #define one_half 0.5 - #define cdft lsx_cdft - #define rdft lsx_rdft - #define ddct lsx_ddct - #define ddst lsx_ddst - #define dfct lsx_dfct - #define dfst lsx_dfst -#endif - -static void bitrv2conj(int n, int *ip, double *a); -static void bitrv2(int n, int *ip, double *a); -static void cft1st(int n, double *a, double const *w); -static void cftbsub(int n, double *a, double const *w); -static void cftfsub(int n, double *a, double const *w); -static void cftmdl(int n, int l, double *a, double const *w); -static void dctsub(int n, double *a, int nc, double const *c); -static void dstsub(int n, double *a, int nc, double const *c); -static void makect(int nc, int *ip, double *c); -static void makewt(int nw, int *ip, double *w); -static void rftbsub(int n, double *a, int nc, double const *c); -static void rftfsub(int n, double *a, int nc, double const *c); - - -void cdft(int n, int isgn, double *a, int *ip, double *w) -{ - if (n > (ip[0] << 2)) { - makewt(n >> 2, ip, w); - } - if (n > 4) { - if (isgn >= 0) { - bitrv2(n, ip + 2, a); - cftfsub(n, a, w); - } else { - bitrv2conj(n, ip + 2, a); - cftbsub(n, a, w); - } - } else if (n == 4) { - cftfsub(n, a, w); - } -} - - -void rdft(int n, int isgn, double *a, int *ip, double *w) -{ - int nw, nc; - double xi; - - nw = ip[0]; - if (n > (nw << 2)) { - nw = n >> 2; - makewt(nw, ip, w); - } - nc = ip[1]; - if (n > (nc << 2)) { - nc = n >> 2; - makect(nc, ip, w + nw); - } - if (isgn >= 0) { - if (n > 4) { - bitrv2(n, ip + 2, a); - cftfsub(n, a, w); - rftfsub(n, a, nc, w + nw); - } else if (n == 4) { - cftfsub(n, a, w); - } - xi = a[0] - a[1]; - a[0] += a[1]; - a[1] = xi; - } else { - a[1] = one_half * (a[0] - a[1]); - a[0] -= a[1]; - if (n > 4) { - rftbsub(n, a, nc, w + nw); - bitrv2(n, ip + 2, a); - cftbsub(n, a, w); - } else if (n == 4) { - cftfsub(n, a, w); - } - } -} - - -void ddct(int n, int isgn, double *a, int *ip, double *w) -{ - int j, nw, nc; - double xr; - - nw = ip[0]; - if (n > (nw << 2)) { - nw = n >> 2; - makewt(nw, ip, w); - } - nc = ip[1]; - if (n > nc) { - nc = n; - makect(nc, ip, w + nw); - } - if (isgn < 0) { - xr = a[n - 1]; - for (j = n - 2; j >= 2; j -= 2) { - a[j + 1] = a[j] - a[j - 1]; - a[j] += a[j - 1]; - } - a[1] = a[0] - xr; - a[0] += xr; - if (n > 4) { - rftbsub(n, a, nc, w + nw); - bitrv2(n, ip + 2, a); - cftbsub(n, a, w); - } else if (n == 4) { - cftfsub(n, a, w); - } - } - dctsub(n, a, nc, w + nw); - if (isgn >= 0) { - if (n > 4) { - bitrv2(n, ip + 2, a); - cftfsub(n, a, w); - rftfsub(n, a, nc, w + nw); - } else if (n == 4) { - cftfsub(n, a, w); - } - xr = a[0] - a[1]; - a[0] += a[1]; - for (j = 2; j < n; j += 2) { - a[j - 1] = a[j] - a[j + 1]; - a[j] += a[j + 1]; - } - a[n - 1] = xr; - } -} - - -void ddst(int n, int isgn, double *a, int *ip, double *w) -{ - int j, nw, nc; - double xr; - - nw = ip[0]; - if (n > (nw << 2)) { - nw = n >> 2; - makewt(nw, ip, w); - } - nc = ip[1]; - if (n > nc) { - nc = n; - makect(nc, ip, w + nw); - } - if (isgn < 0) { - xr = a[n - 1]; - for (j = n - 2; j >= 2; j -= 2) { - a[j + 1] = -a[j] - a[j - 1]; - a[j] -= a[j - 1]; - } - a[1] = a[0] + xr; - a[0] -= xr; - if (n > 4) { - rftbsub(n, a, nc, w + nw); - bitrv2(n, ip + 2, a); - cftbsub(n, a, w); - } else if (n == 4) { - cftfsub(n, a, w); - } - } - dstsub(n, a, nc, w + nw); - if (isgn >= 0) { - if (n > 4) { - bitrv2(n, ip + 2, a); - cftfsub(n, a, w); - rftfsub(n, a, nc, w + nw); - } else if (n == 4) { - cftfsub(n, a, w); - } - xr = a[0] - a[1]; - a[0] += a[1]; - for (j = 2; j < n; j += 2) { - a[j - 1] = -a[j] - a[j + 1]; - a[j] -= a[j + 1]; - } - a[n - 1] = -xr; - } -} - - -void dfct(int n, double *a, double *t, int *ip, double *w) -{ - int j, k, l, m, mh, nw, nc; - double xr, xi, yr, yi; - - nw = ip[0]; - if (n > (nw << 3)) { - nw = n >> 3; - makewt(nw, ip, w); - } - nc = ip[1]; - if (n > (nc << 1)) { - nc = n >> 1; - makect(nc, ip, w + nw); - } - m = n >> 1; - yi = a[m]; - xi = a[0] + a[n]; - a[0] -= a[n]; - t[0] = xi - yi; - t[m] = xi + yi; - if (n > 2) { - mh = m >> 1; - for (j = 1; j < mh; j++) { - k = m - j; - xr = a[j] - a[n - j]; - xi = a[j] + a[n - j]; - yr = a[k] - a[n - k]; - yi = a[k] + a[n - k]; - a[j] = xr; - a[k] = yr; - t[j] = xi - yi; - t[k] = xi + yi; - } - t[mh] = a[mh] + a[n - mh]; - a[mh] -= a[n - mh]; - dctsub(m, a, nc, w + nw); - if (m > 4) { - bitrv2(m, ip + 2, a); - cftfsub(m, a, w); - rftfsub(m, a, nc, w + nw); - } else if (m == 4) { - cftfsub(m, a, w); - } - a[n - 1] = a[0] - a[1]; - a[1] = a[0] + a[1]; - for (j = m - 2; j >= 2; j -= 2) { - a[2 * j + 1] = a[j] + a[j + 1]; - a[2 * j - 1] = a[j] - a[j + 1]; - } - l = 2; - m = mh; - while (m >= 2) { - dctsub(m, t, nc, w + nw); - if (m > 4) { - bitrv2(m, ip + 2, t); - cftfsub(m, t, w); - rftfsub(m, t, nc, w + nw); - } else if (m == 4) { - cftfsub(m, t, w); - } - a[n - l] = t[0] - t[1]; - a[l] = t[0] + t[1]; - k = 0; - for (j = 2; j < m; j += 2) { - k += l << 2; - a[k - l] = t[j] - t[j + 1]; - a[k + l] = t[j] + t[j + 1]; - } - l <<= 1; - mh = m >> 1; - for (j = 0; j < mh; j++) { - k = m - j; - t[j] = t[m + k] - t[m + j]; - t[k] = t[m + k] + t[m + j]; - } - t[mh] = t[m + mh]; - m = mh; - } - a[l] = t[0]; - a[n] = t[2] - t[1]; - a[0] = t[2] + t[1]; - } else { - a[1] = a[0]; - a[2] = t[0]; - a[0] = t[1]; - } -} - - -void dfst(int n, double *a, double *t, int *ip, double *w) -{ - int j, k, l, m, mh, nw, nc; - double xr, xi, yr, yi; - - nw = ip[0]; - if (n > (nw << 3)) { - nw = n >> 3; - makewt(nw, ip, w); - } - nc = ip[1]; - if (n > (nc << 1)) { - nc = n >> 1; - makect(nc, ip, w + nw); - } - if (n > 2) { - m = n >> 1; - mh = m >> 1; - for (j = 1; j < mh; j++) { - k = m - j; - xr = a[j] + a[n - j]; - xi = a[j] - a[n - j]; - yr = a[k] + a[n - k]; - yi = a[k] - a[n - k]; - a[j] = xr; - a[k] = yr; - t[j] = xi + yi; - t[k] = xi - yi; - } - t[0] = a[mh] - a[n - mh]; - a[mh] += a[n - mh]; - a[0] = a[m]; - dstsub(m, a, nc, w + nw); - if (m > 4) { - bitrv2(m, ip + 2, a); - cftfsub(m, a, w); - rftfsub(m, a, nc, w + nw); - } else if (m == 4) { - cftfsub(m, a, w); - } - a[n - 1] = a[1] - a[0]; - a[1] = a[0] + a[1]; - for (j = m - 2; j >= 2; j -= 2) { - a[2 * j + 1] = a[j] - a[j + 1]; - a[2 * j - 1] = -a[j] - a[j + 1]; - } - l = 2; - m = mh; - while (m >= 2) { - dstsub(m, t, nc, w + nw); - if (m > 4) { - bitrv2(m, ip + 2, t); - cftfsub(m, t, w); - rftfsub(m, t, nc, w + nw); - } else if (m == 4) { - cftfsub(m, t, w); - } - a[n - l] = t[1] - t[0]; - a[l] = t[0] + t[1]; - k = 0; - for (j = 2; j < m; j += 2) { - k += l << 2; - a[k - l] = -t[j] - t[j + 1]; - a[k + l] = t[j] - t[j + 1]; - } - l <<= 1; - mh = m >> 1; - for (j = 1; j < mh; j++) { - k = m - j; - t[j] = t[m + k] + t[m + j]; - t[k] = t[m + k] - t[m + j]; - } - t[0] = t[m + mh]; - m = mh; - } - a[l] = t[0]; - } - a[0] = 0; -} - - -/* -------- initializing routines -------- */ - - -static void makewt(int nw, int *ip, double *w) -{ - int j, nwh; - double delta, x, y; - - ip[0] = nw; - ip[1] = 1; - if (nw > 2) { - nwh = nw >> 1; - delta = atan(1.0) / (double)nwh; - w[0] = 1; - w[1] = 0; - w[nwh] = cos(delta * (double)nwh); - w[nwh + 1] = w[nwh]; - if (nwh > 2) { - for (j = 2; j < nwh; j += 2) { - x = cos(delta * (double)j); - y = sin(delta * (double)j); - w[j] = x; - w[j + 1] = y; - w[nw - j] = y; - w[nw - j + 1] = x; - } - bitrv2(nw, ip + 2, w); - } - } -} - - -static void makect(int nc, int *ip, double *c) -{ - int j, nch; - double delta; - - ip[1] = nc; - if (nc > 1) { - nch = nc >> 1; - delta = atan(1.0) / (double)nch; - c[0] = cos(delta * (double)nch); - c[nch] = one_half * c[0]; - for (j = 1; j < nch; j++) { - c[j] = one_half * cos(delta * (double)j); - c[nc - j] = one_half * sin(delta * (double)j); - } - } -} - - -/* -------- child routines -------- */ - - -static void bitrv2(int n, int *ip0, double *a) -{ - int j, j1, k, k1, l, m, m2, ip[1024]; - double xr, xi, yr, yi; - - (void)ip0; - ip[0] = 0; - l = n; - m = 1; - while ((m << 3) < l) { - l >>= 1; - for (j = 0; j < m; j++) { - ip[m + j] = ip[j] + l; - } - m <<= 1; - } - m2 = 2 * m; - if ((m << 3) == l) { - for (k = 0; k < m; k++) { - for (j = 0; j < k; j++) { - j1 = 2 * j + ip[k]; - k1 = 2 * k + ip[j]; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += m2; - k1 += 2 * m2; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += m2; - k1 -= m2; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += m2; - k1 += 2 * m2; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - } - j1 = 2 * k + m2 + ip[k]; - k1 = j1 + m2; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - } - } else { - for (k = 1; k < m; k++) { - for (j = 0; j < k; j++) { - j1 = 2 * j + ip[k]; - k1 = 2 * k + ip[j]; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += m2; - k1 += m2; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - } - } - } -} - - -static void bitrv2conj(int n, int *ip0, double *a) -{ - int j, j1, k, k1, l, m, m2, ip[512]; - double xr, xi, yr, yi; - - (void)ip0; - ip[0] = 0; - l = n; - m = 1; - while ((m << 3) < l) { - l >>= 1; - for (j = 0; j < m; j++) { - ip[m + j] = ip[j] + l; - } - m <<= 1; - } - m2 = 2 * m; - if ((m << 3) == l) { - for (k = 0; k < m; k++) { - for (j = 0; j < k; j++) { - j1 = 2 * j + ip[k]; - k1 = 2 * k + ip[j]; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += m2; - k1 += 2 * m2; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += m2; - k1 -= m2; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += m2; - k1 += 2 * m2; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - } - k1 = 2 * k + ip[k]; - a[k1 + 1] = -a[k1 + 1]; - j1 = k1 + m2; - k1 = j1 + m2; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - k1 += m2; - a[k1 + 1] = -a[k1 + 1]; - } - } else { - a[1] = -a[1]; - a[m2 + 1] = -a[m2 + 1]; - for (k = 1; k < m; k++) { - for (j = 0; j < k; j++) { - j1 = 2 * j + ip[k]; - k1 = 2 * k + ip[j]; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += m2; - k1 += m2; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - } - k1 = 2 * k + ip[k]; - a[k1 + 1] = -a[k1 + 1]; - a[k1 + m2 + 1] = -a[k1 + m2 + 1]; - } - } -} - - -static void cftfsub(int n, double *a, double const *w) -{ - int j, j1, j2, j3, l; - double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; - - l = 2; - if (n > 8) { - cft1st(n, a, w); - l = 8; - while ((l << 2) < n) { - cftmdl(n, l, a, w); - l <<= 2; - } - } - if ((l << 2) == n) { - for (j = 0; j < l; j += 2) { - j1 = j + l; - j2 = j1 + l; - j3 = j2 + l; - x0r = a[j] + a[j1]; - x0i = a[j + 1] + a[j1 + 1]; - x1r = a[j] - a[j1]; - x1i = a[j + 1] - a[j1 + 1]; - x2r = a[j2] + a[j3]; - x2i = a[j2 + 1] + a[j3 + 1]; - x3r = a[j2] - a[j3]; - x3i = a[j2 + 1] - a[j3 + 1]; - a[j] = x0r + x2r; - a[j + 1] = x0i + x2i; - a[j2] = x0r - x2r; - a[j2 + 1] = x0i - x2i; - a[j1] = x1r - x3i; - a[j1 + 1] = x1i + x3r; - a[j3] = x1r + x3i; - a[j3 + 1] = x1i - x3r; - } - } else { - for (j = 0; j < l; j += 2) { - j1 = j + l; - x0r = a[j] - a[j1]; - x0i = a[j + 1] - a[j1 + 1]; - a[j] += a[j1]; - a[j + 1] += a[j1 + 1]; - a[j1] = x0r; - a[j1 + 1] = x0i; - } - } -} - - -static void cftbsub(int n, double *a, double const *w) -{ - int j, j1, j2, j3, l; - double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; - - l = 2; - if (n > 8) { - cft1st(n, a, w); - l = 8; - while ((l << 2) < n) { - cftmdl(n, l, a, w); - l <<= 2; - } - } - if ((l << 2) == n) { - for (j = 0; j < l; j += 2) { - j1 = j + l; - j2 = j1 + l; - j3 = j2 + l; - x0r = a[j] + a[j1]; - x0i = -a[j + 1] - a[j1 + 1]; - x1r = a[j] - a[j1]; - x1i = -a[j + 1] + a[j1 + 1]; - x2r = a[j2] + a[j3]; - x2i = a[j2 + 1] + a[j3 + 1]; - x3r = a[j2] - a[j3]; - x3i = a[j2 + 1] - a[j3 + 1]; - a[j] = x0r + x2r; - a[j + 1] = x0i - x2i; - a[j2] = x0r - x2r; - a[j2 + 1] = x0i + x2i; - a[j1] = x1r - x3i; - a[j1 + 1] = x1i - x3r; - a[j3] = x1r + x3i; - a[j3 + 1] = x1i + x3r; - } - } else { - for (j = 0; j < l; j += 2) { - j1 = j + l; - x0r = a[j] - a[j1]; - x0i = -a[j + 1] + a[j1 + 1]; - a[j] += a[j1]; - a[j + 1] = -a[j + 1] - a[j1 + 1]; - a[j1] = x0r; - a[j1 + 1] = x0i; - } - } -} - - -static void cft1st(int n, double *a, double const *w) -{ - int j, k1, k2; - double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; - double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; - - x0r = a[0] + a[2]; - x0i = a[1] + a[3]; - x1r = a[0] - a[2]; - x1i = a[1] - a[3]; - x2r = a[4] + a[6]; - x2i = a[5] + a[7]; - x3r = a[4] - a[6]; - x3i = a[5] - a[7]; - a[0] = x0r + x2r; - a[1] = x0i + x2i; - a[4] = x0r - x2r; - a[5] = x0i - x2i; - a[2] = x1r - x3i; - a[3] = x1i + x3r; - a[6] = x1r + x3i; - a[7] = x1i - x3r; - wk1r = w[2]; - x0r = a[8] + a[10]; - x0i = a[9] + a[11]; - x1r = a[8] - a[10]; - x1i = a[9] - a[11]; - x2r = a[12] + a[14]; - x2i = a[13] + a[15]; - x3r = a[12] - a[14]; - x3i = a[13] - a[15]; - a[8] = x0r + x2r; - a[9] = x0i + x2i; - a[12] = x2i - x0i; - a[13] = x0r - x2r; - x0r = x1r - x3i; - x0i = x1i + x3r; - a[10] = wk1r * (x0r - x0i); - a[11] = wk1r * (x0r + x0i); - x0r = x3i + x1r; - x0i = x3r - x1i; - a[14] = wk1r * (x0i - x0r); - a[15] = wk1r * (x0i + x0r); - k1 = 0; - for (j = 16; j < n; j += 16) { - k1 += 2; - k2 = 2 * k1; - wk2r = w[k1]; - wk2i = w[k1 + 1]; - wk1r = w[k2]; - wk1i = w[k2 + 1]; - wk3r = wk1r - 2 * wk2i * wk1i; - wk3i = 2 * wk2i * wk1r - wk1i; - x0r = a[j] + a[j + 2]; - x0i = a[j + 1] + a[j + 3]; - x1r = a[j] - a[j + 2]; - x1i = a[j + 1] - a[j + 3]; - x2r = a[j + 4] + a[j + 6]; - x2i = a[j + 5] + a[j + 7]; - x3r = a[j + 4] - a[j + 6]; - x3i = a[j + 5] - a[j + 7]; - a[j] = x0r + x2r; - a[j + 1] = x0i + x2i; - x0r -= x2r; - x0i -= x2i; - a[j + 4] = wk2r * x0r - wk2i * x0i; - a[j + 5] = wk2r * x0i + wk2i * x0r; - x0r = x1r - x3i; - x0i = x1i + x3r; - a[j + 2] = wk1r * x0r - wk1i * x0i; - a[j + 3] = wk1r * x0i + wk1i * x0r; - x0r = x1r + x3i; - x0i = x1i - x3r; - a[j + 6] = wk3r * x0r - wk3i * x0i; - a[j + 7] = wk3r * x0i + wk3i * x0r; - wk1r = w[k2 + 2]; - wk1i = w[k2 + 3]; - wk3r = wk1r - 2 * wk2r * wk1i; - wk3i = 2 * wk2r * wk1r - wk1i; - x0r = a[j + 8] + a[j + 10]; - x0i = a[j + 9] + a[j + 11]; - x1r = a[j + 8] - a[j + 10]; - x1i = a[j + 9] - a[j + 11]; - x2r = a[j + 12] + a[j + 14]; - x2i = a[j + 13] + a[j + 15]; - x3r = a[j + 12] - a[j + 14]; - x3i = a[j + 13] - a[j + 15]; - a[j + 8] = x0r + x2r; - a[j + 9] = x0i + x2i; - x0r -= x2r; - x0i -= x2i; - a[j + 12] = -wk2i * x0r - wk2r * x0i; - a[j + 13] = -wk2i * x0i + wk2r * x0r; - x0r = x1r - x3i; - x0i = x1i + x3r; - a[j + 10] = wk1r * x0r - wk1i * x0i; - a[j + 11] = wk1r * x0i + wk1i * x0r; - x0r = x1r + x3i; - x0i = x1i - x3r; - a[j + 14] = wk3r * x0r - wk3i * x0i; - a[j + 15] = wk3r * x0i + wk3i * x0r; - } -} - - -static void cftmdl(int n, int l, double *a, double const *w) -{ - int j, j1, j2, j3, k, k1, k2, m, m2; - double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; - double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; - - m = l << 2; - for (j = 0; j < l; j += 2) { - j1 = j + l; - j2 = j1 + l; - j3 = j2 + l; - x0r = a[j] + a[j1]; - x0i = a[j + 1] + a[j1 + 1]; - x1r = a[j] - a[j1]; - x1i = a[j + 1] - a[j1 + 1]; - x2r = a[j2] + a[j3]; - x2i = a[j2 + 1] + a[j3 + 1]; - x3r = a[j2] - a[j3]; - x3i = a[j2 + 1] - a[j3 + 1]; - a[j] = x0r + x2r; - a[j + 1] = x0i + x2i; - a[j2] = x0r - x2r; - a[j2 + 1] = x0i - x2i; - a[j1] = x1r - x3i; - a[j1 + 1] = x1i + x3r; - a[j3] = x1r + x3i; - a[j3 + 1] = x1i - x3r; - } - wk1r = w[2]; - for (j = m; j < l + m; j += 2) { - j1 = j + l; - j2 = j1 + l; - j3 = j2 + l; - x0r = a[j] + a[j1]; - x0i = a[j + 1] + a[j1 + 1]; - x1r = a[j] - a[j1]; - x1i = a[j + 1] - a[j1 + 1]; - x2r = a[j2] + a[j3]; - x2i = a[j2 + 1] + a[j3 + 1]; - x3r = a[j2] - a[j3]; - x3i = a[j2 + 1] - a[j3 + 1]; - a[j] = x0r + x2r; - a[j + 1] = x0i + x2i; - a[j2] = x2i - x0i; - a[j2 + 1] = x0r - x2r; - x0r = x1r - x3i; - x0i = x1i + x3r; - a[j1] = wk1r * (x0r - x0i); - a[j1 + 1] = wk1r * (x0r + x0i); - x0r = x3i + x1r; - x0i = x3r - x1i; - a[j3] = wk1r * (x0i - x0r); - a[j3 + 1] = wk1r * (x0i + x0r); - } - k1 = 0; - m2 = 2 * m; - for (k = m2; k < n; k += m2) { - k1 += 2; - k2 = 2 * k1; - wk2r = w[k1]; - wk2i = w[k1 + 1]; - wk1r = w[k2]; - wk1i = w[k2 + 1]; - wk3r = wk1r - 2 * wk2i * wk1i; - wk3i = 2 * wk2i * wk1r - wk1i; - for (j = k; j < l + k; j += 2) { - j1 = j + l; - j2 = j1 + l; - j3 = j2 + l; - x0r = a[j] + a[j1]; - x0i = a[j + 1] + a[j1 + 1]; - x1r = a[j] - a[j1]; - x1i = a[j + 1] - a[j1 + 1]; - x2r = a[j2] + a[j3]; - x2i = a[j2 + 1] + a[j3 + 1]; - x3r = a[j2] - a[j3]; - x3i = a[j2 + 1] - a[j3 + 1]; - a[j] = x0r + x2r; - a[j + 1] = x0i + x2i; - x0r -= x2r; - x0i -= x2i; - a[j2] = wk2r * x0r - wk2i * x0i; - a[j2 + 1] = wk2r * x0i + wk2i * x0r; - x0r = x1r - x3i; - x0i = x1i + x3r; - a[j1] = wk1r * x0r - wk1i * x0i; - a[j1 + 1] = wk1r * x0i + wk1i * x0r; - x0r = x1r + x3i; - x0i = x1i - x3r; - a[j3] = wk3r * x0r - wk3i * x0i; - a[j3 + 1] = wk3r * x0i + wk3i * x0r; - } - wk1r = w[k2 + 2]; - wk1i = w[k2 + 3]; - wk3r = wk1r - 2 * wk2r * wk1i; - wk3i = 2 * wk2r * wk1r - wk1i; - for (j = k + m; j < l + (k + m); j += 2) { - j1 = j + l; - j2 = j1 + l; - j3 = j2 + l; - x0r = a[j] + a[j1]; - x0i = a[j + 1] + a[j1 + 1]; - x1r = a[j] - a[j1]; - x1i = a[j + 1] - a[j1 + 1]; - x2r = a[j2] + a[j3]; - x2i = a[j2 + 1] + a[j3 + 1]; - x3r = a[j2] - a[j3]; - x3i = a[j2 + 1] - a[j3 + 1]; - a[j] = x0r + x2r; - a[j + 1] = x0i + x2i; - x0r -= x2r; - x0i -= x2i; - a[j2] = -wk2i * x0r - wk2r * x0i; - a[j2 + 1] = -wk2i * x0i + wk2r * x0r; - x0r = x1r - x3i; - x0i = x1i + x3r; - a[j1] = wk1r * x0r - wk1i * x0i; - a[j1 + 1] = wk1r * x0i + wk1i * x0r; - x0r = x1r + x3i; - x0i = x1i - x3r; - a[j3] = wk3r * x0r - wk3i * x0i; - a[j3 + 1] = wk3r * x0i + wk3i * x0r; - } - } -} - - -static void rftfsub(int n, double *a, int nc, double const *c) -{ - int j, k, kk, ks, m; - double wkr, wki, xr, xi, yr, yi; - - m = n >> 1; - ks = 2 * nc / m; - kk = 0; - for (j = 2; j < m; j += 2) { - k = n - j; - kk += ks; - wkr = one_half - c[nc - kk]; - wki = c[kk]; - xr = a[j] - a[k]; - xi = a[j + 1] + a[k + 1]; - yr = wkr * xr - wki * xi; - yi = wkr * xi + wki * xr; - a[j] -= yr; - a[j + 1] -= yi; - a[k] += yr; - a[k + 1] -= yi; - } -} - - -static void rftbsub(int n, double *a, int nc, double const *c) -{ - int j, k, kk, ks, m; - double wkr, wki, xr, xi, yr, yi; - - a[1] = -a[1]; - m = n >> 1; - ks = 2 * nc / m; - kk = 0; - for (j = 2; j < m; j += 2) { - k = n - j; - kk += ks; - wkr = one_half - c[nc - kk]; - wki = c[kk]; - xr = a[j] - a[k]; - xi = a[j + 1] + a[k + 1]; - yr = wkr * xr + wki * xi; - yi = wkr * xi - wki * xr; - a[j] -= yr; - a[j + 1] = yi - a[j + 1]; - a[k] += yr; - a[k + 1] = yi - a[k + 1]; - } - a[m + 1] = -a[m + 1]; -} - - -static void dctsub(int n, double *a, int nc, double const *c) -{ - int j, k, kk, ks, m; - double wkr, wki, xr; - - m = n >> 1; - ks = nc / n; - kk = 0; - for (j = 1; j < m; j++) { - k = n - j; - kk += ks; - wkr = c[kk] - c[nc - kk]; - wki = c[kk] + c[nc - kk]; - xr = wki * a[j] - wkr * a[k]; - a[j] = wkr * a[j] + wki * a[k]; - a[k] = xr; - } - a[m] *= c[0]; -} - - -static void dstsub(int n, double *a, int nc, double const *c) -{ - int j, k, kk, ks, m; - double wkr, wki, xr; - - m = n >> 1; - ks = nc / n; - kk = 0; - for (j = 1; j < m; j++) { - k = n - j; - kk += ks; - wkr = c[kk] - c[nc - kk]; - wki = c[kk] + c[nc - kk]; - xr = wki * a[k] - wkr * a[j]; - a[k] = wkr * a[k] + wki * a[j]; - a[j] = xr; - } - a[m] *= c[0]; -} diff --git a/soxr-sys/src/fft4g.h b/soxr-sys/src/fft4g.h deleted file mode 100644 index 0f906abcf..000000000 --- a/soxr-sys/src/fft4g.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -void lsx_cdft(int, int, double *, int *, double *); -void lsx_rdft(int, int, double *, int *, double *); -void lsx_ddct(int, int, double *, int *, double *); -void lsx_ddst(int, int, double *, int *, double *); -void lsx_dfct(int, double *, double *, int *, double *); -void lsx_dfst(int, double *, double *, int *, double *); - -void lsx_cdft_f(int, int, float *, int *, float *); -void lsx_rdft_f(int, int, float *, int *, float *); -void lsx_ddct_f(int, int, float *, int *, float *); -void lsx_ddst_f(int, int, float *, int *, float *); -void lsx_dfct_f(int, float *, float *, int *, float *); -void lsx_dfst_f(int, float *, float *, int *, float *); - -#define dft_br_len(l) (2ul + (1ul << (int)(log(l / 2 + .5) / log(2.)) / 2)) -#define dft_sc_len(l) ((unsigned long)l / 2) - -/* Over-allocate h by 2 to use these macros */ -#define LSX_PACK(h, n) h[1] = h[n] -#define LSX_UNPACK(h, n) h[n] = h[1], h[n + 1] = h[1] = 0; diff --git a/soxr-sys/src/fft4g32.c b/soxr-sys/src/fft4g32.c deleted file mode 100644 index 7a31ba4bb..000000000 --- a/soxr-sys/src/fft4g32.c +++ /dev/null @@ -1,36 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#include -#include "filter.h" -#define FFT4G_FLOAT -#include "fft4g.c" -#include "soxr-config.h" - -#if WITH_CR32 -#include "rdft_t.h" -static void * null(void) {return 0;} -static void forward (int length, void * setup, double * H) {lsx_safe_rdft_f(length, 1, H); (void)setup;} -static void backward(int length, void * setup, double * H) {lsx_safe_rdft_f(length, -1, H); (void)setup;} -static int multiplier(void) {return 2;} -static void nothing(void) {} -static int flags(void) {return 0;} - -fn_t _soxr_rdft32_cb[] = { - (fn_t)null, - (fn_t)null, - (fn_t)nothing, - (fn_t)forward, - (fn_t)forward, - (fn_t)backward, - (fn_t)backward, - (fn_t)_soxr_ordered_convolve_f, - (fn_t)_soxr_ordered_partial_convolve_f, - (fn_t)multiplier, - (fn_t)nothing, - (fn_t)malloc, - (fn_t)calloc, - (fn_t)free, - (fn_t)flags, -}; -#endif diff --git a/soxr-sys/src/fft4g32s.c b/soxr-sys/src/fft4g32s.c deleted file mode 100644 index 8ce9726ef..000000000 --- a/soxr-sys/src/fft4g32s.c +++ /dev/null @@ -1,31 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#include "filter.h" -#include "util32s.h" -#include "rdft_t.h" - -static void * null(void) {return 0;} -static void nothing(void) {} -static void forward (int length, void * setup, float * H) {lsx_safe_rdft_f(length, 1, H); (void)setup;} -static void backward(int length, void * setup, float * H) {lsx_safe_rdft_f(length, -1, H); (void)setup;} -static int multiplier(void) {return 2;} -static int flags(void) {return RDFT_IS_SIMD;} - -fn_t _soxr_rdft32s_cb[] = { - (fn_t)null, - (fn_t)null, - (fn_t)nothing, - (fn_t)forward, - (fn_t)forward, - (fn_t)backward, - (fn_t)backward, - (fn_t)ORDERED_CONVOLVE_SIMD, - (fn_t)ORDERED_PARTIAL_CONVOLVE_SIMD, - (fn_t)multiplier, - (fn_t)nothing, - (fn_t)SIMD_ALIGNED_MALLOC, - (fn_t)SIMD_ALIGNED_CALLOC, - (fn_t)SIMD_ALIGNED_FREE, - (fn_t)flags, -}; diff --git a/soxr-sys/src/fft4g64.c b/soxr-sys/src/fft4g64.c deleted file mode 100644 index 0018516a0..000000000 --- a/soxr-sys/src/fft4g64.c +++ /dev/null @@ -1,35 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#include -#include "filter.h" -#include "fft4g.c" -#include "soxr-config.h" - -#if WITH_CR64 -static void * null(void) {return 0;} -static void nothing(void) {} -static void forward (int length, void * setup, double * H) {lsx_safe_rdft(length, 1, H); (void)setup;} -static void backward(int length, void * setup, double * H) {lsx_safe_rdft(length, -1, H); (void)setup;} -static int multiplier(void) {return 2;} -static int flags(void) {return 0;} - -typedef void (* fn_t)(void); -fn_t _soxr_rdft64_cb[] = { - (fn_t)null, - (fn_t)null, - (fn_t)nothing, - (fn_t)forward, - (fn_t)forward, - (fn_t)backward, - (fn_t)backward, - (fn_t)_soxr_ordered_convolve, - (fn_t)_soxr_ordered_partial_convolve, - (fn_t)multiplier, - (fn_t)nothing, - (fn_t)malloc, - (fn_t)calloc, - (fn_t)free, - (fn_t)flags, -}; -#endif diff --git a/soxr-sys/src/fft4g_cache.h b/soxr-sys/src/fft4g_cache.h deleted file mode 100644 index d776c16c4..000000000 --- a/soxr-sys/src/fft4g_cache.h +++ /dev/null @@ -1,92 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -static int * LSX_FFT_BR; -static DFT_FLOAT * LSX_FFT_SC; -static int FFT_LEN = -1; -static ccrw2_t FFT_CACHE_CCRW; - -void LSX_INIT_FFT_CACHE(void) -{ - if (FFT_LEN >= 0) - return; - assert(LSX_FFT_BR == NULL); - assert(LSX_FFT_SC == NULL); - assert(FFT_LEN == -1); - ccrw2_init(FFT_CACHE_CCRW); - FFT_LEN = 0; -} - -void LSX_CLEAR_FFT_CACHE(void) -{ - assert(FFT_LEN >= 0); - ccrw2_clear(FFT_CACHE_CCRW); - free(LSX_FFT_BR); - free(LSX_FFT_SC); - LSX_FFT_SC = NULL; - LSX_FFT_BR = NULL; - FFT_LEN = -1; -} - -static bool UPDATE_FFT_CACHE(int len) -{ - LSX_INIT_FFT_CACHE(); - assert(lsx_is_power_of_2(len)); - assert(FFT_LEN >= 0); - ccrw2_become_reader(FFT_CACHE_CCRW); - if (len > FFT_LEN) { - ccrw2_cease_reading(FFT_CACHE_CCRW); - ccrw2_become_writer(FFT_CACHE_CCRW); - if (len > FFT_LEN) { - int old_n = FFT_LEN; - FFT_LEN = len; - LSX_FFT_BR = realloc(LSX_FFT_BR, dft_br_len(FFT_LEN) * sizeof(*LSX_FFT_BR)); - LSX_FFT_SC = realloc(LSX_FFT_SC, dft_sc_len(FFT_LEN) * sizeof(*LSX_FFT_SC)); - if (!old_n) { - LSX_FFT_BR[0] = 0; -#if SOXR_LIB - atexit(LSX_CLEAR_FFT_CACHE); -#endif - } - return true; - } - ccrw2_cease_writing(FFT_CACHE_CCRW); - ccrw2_become_reader(FFT_CACHE_CCRW); - } - return false; -} - -static void DONE_WITH_FFT_CACHE(bool is_writer) -{ - if (is_writer) - ccrw2_cease_writing(FFT_CACHE_CCRW); - else ccrw2_cease_reading(FFT_CACHE_CCRW); -} - -void LSX_SAFE_RDFT(int len, int type, DFT_FLOAT * d) -{ - bool is_writer = UPDATE_FFT_CACHE(len); - LSX_RDFT(len, type, d, LSX_FFT_BR, LSX_FFT_SC); - DONE_WITH_FFT_CACHE(is_writer); -} - -void LSX_SAFE_CDFT(int len, int type, DFT_FLOAT * d) -{ - bool is_writer = UPDATE_FFT_CACHE(len); - LSX_CDFT(len, type, d, LSX_FFT_BR, LSX_FFT_SC); - DONE_WITH_FFT_CACHE(is_writer); -} - -#undef UPDATE_FFT_CACHE -#undef LSX_SAFE_RDFT -#undef LSX_SAFE_CDFT -#undef LSX_RDFT -#undef LSX_INIT_FFT_CACHE -#undef LSX_FFT_SC -#undef LSX_FFT_BR -#undef LSX_CLEAR_FFT_CACHE -#undef LSX_CDFT -#undef FFT_LEN -#undef FFT_CACHE_CCRW -#undef DONE_WITH_FFT_CACHE -#undef DFT_FLOAT diff --git a/soxr-sys/src/fifo.h b/soxr-sys/src/fifo.h deleted file mode 100644 index 33af9fe63..000000000 --- a/soxr-sys/src/fifo.h +++ /dev/null @@ -1,125 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#ifndef soxr_fifo_included -#define soxr_fifo_included - -#if !defined FIFO_SIZE_T -#define FIFO_SIZE_T size_t -#endif - -#if !defined FIFO_REALLOC -#include - #define FIFO_REALLOC(a,b,c) realloc(a,b) - #undef FIFO_FREE - #define FIFO_FREE free - #undef FIFO_MALLOC - #define FIFO_MALLOC malloc -#endif - -typedef struct { - char * data; - size_t allocation; /* Number of bytes allocated for data. */ - size_t item_size; /* Size of each item in data */ - size_t begin; /* Offset of the first byte to read. */ - size_t end; /* 1 + Offset of the last byte byte to read. */ -} fifo_t; - -#if !defined FIFO_MIN - #define FIFO_MIN 0x4000 -#endif - -#if !defined UNUSED - #define UNUSED -#endif - -UNUSED static void fifo_clear(fifo_t * f) -{ - f->end = f->begin = 0; -} - -UNUSED static void * fifo_reserve(fifo_t * f, FIFO_SIZE_T n0) -{ - size_t n = (size_t)n0; - n *= f->item_size; - - if (f->begin == f->end) - fifo_clear(f); - - while (1) { - if (f->end + n <= f->allocation) { - void *p = f->data + f->end; - - f->end += n; - return p; - } - if (f->begin > FIFO_MIN) { - memmove(f->data, f->data + f->begin, f->end - f->begin); - f->end -= f->begin; - f->begin = 0; - continue; - } - f->data = FIFO_REALLOC(f->data, f->allocation + n, f->allocation); - f->allocation += n; - if (!f->data) - return 0; - } -} - -UNUSED static void * fifo_write(fifo_t * f, FIFO_SIZE_T n0, void const * data) -{ - size_t n = (size_t)n0; - void * s = fifo_reserve(f, n0); - if (data) - memcpy(s, data, n * f->item_size); - return s; -} - -UNUSED static void fifo_trim_to(fifo_t * f, FIFO_SIZE_T n0) -{ - size_t n = (size_t)n0; - n *= f->item_size; - f->end = f->begin + n; -} - -UNUSED static void fifo_trim_by(fifo_t * f, FIFO_SIZE_T n0) -{ - size_t n = (size_t)n0; - n *= f->item_size; - f->end -= n; -} - -UNUSED static FIFO_SIZE_T fifo_occupancy(fifo_t * f) -{ - return (FIFO_SIZE_T)((f->end - f->begin) / f->item_size); -} - -UNUSED static void * fifo_read(fifo_t * f, FIFO_SIZE_T n0, void * data) -{ - size_t n = (size_t)n0; - char * ret = f->data + f->begin; - n *= f->item_size; - if (n > (f->end - f->begin)) - return NULL; - if (data) - memcpy(data, ret, (size_t)n); - f->begin += n; - return ret; -} - -#define fifo_read_ptr(f) fifo_read(f, (FIFO_SIZE_T)0, NULL) - -UNUSED static void fifo_delete(fifo_t * f) -{ - FIFO_FREE(f->data); -} - -UNUSED static int fifo_create(fifo_t * f, FIFO_SIZE_T item_size) -{ - f->item_size = (size_t)item_size; - f->allocation = FIFO_MIN; - fifo_clear(f); - return !(f->data = FIFO_MALLOC(f->allocation)); -} - -#endif diff --git a/soxr-sys/src/filter.c b/soxr-sys/src/filter.c deleted file mode 100644 index 019d24d90..000000000 --- a/soxr-sys/src/filter.c +++ /dev/null @@ -1,277 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#include "filter.h" - -#include "math-wrap.h" -#include -#include -#include - -#include "fft4g.h" -#include "ccrw2.h" - -#if 1 || WITH_CR64 || WITH_CR64S /* Always need this, for lsx_fir_to_phase. */ -#define DFT_FLOAT double -#define DONE_WITH_FFT_CACHE done_with_fft_cache -#define FFT_CACHE_CCRW fft_cache_ccrw -#define FFT_LEN fft_len -#define LSX_CDFT lsx_cdft -#define LSX_CLEAR_FFT_CACHE lsx_clear_fft_cache -#define LSX_FFT_BR lsx_fft_br -#define LSX_FFT_SC lsx_fft_sc -#define LSX_INIT_FFT_CACHE lsx_init_fft_cache -#define LSX_RDFT lsx_rdft -#define LSX_SAFE_CDFT lsx_safe_cdft -#define LSX_SAFE_RDFT lsx_safe_rdft -#define UPDATE_FFT_CACHE update_fft_cache -#include "fft4g_cache.h" -#endif - -#if (WITH_CR32 && !AVCODEC_FOUND) || (WITH_CR32S && !AVCODEC_FOUND && !WITH_PFFFT) -#define DFT_FLOAT float -#define DONE_WITH_FFT_CACHE done_with_fft_cache_f -#define FFT_CACHE_CCRW fft_cache_ccrw_f -#define FFT_LEN fft_len_f -#define LSX_CDFT lsx_cdft_f -#define LSX_CLEAR_FFT_CACHE lsx_clear_fft_cache_f -#define LSX_FFT_BR lsx_fft_br_f -#define LSX_FFT_SC lsx_fft_sc_f -#define LSX_INIT_FFT_CACHE lsx_init_fft_cache_f -#define LSX_RDFT lsx_rdft_f -#define LSX_SAFE_CDFT lsx_safe_cdft_f -#define LSX_SAFE_RDFT lsx_safe_rdft_f -#define UPDATE_FFT_CACHE update_fft_cache_f -#include "fft4g_cache.h" -#endif - -#if WITH_CR64 || WITH_CR64S || !SOXR_LIB -#define DFT_FLOAT double -#define ORDERED_CONVOLVE lsx_ordered_convolve -#define ORDERED_PARTIAL_CONVOLVE lsx_ordered_partial_convolve -#include "rdft.h" -#endif - -#if WITH_CR32 -#define DFT_FLOAT float -#define ORDERED_CONVOLVE lsx_ordered_convolve_f -#define ORDERED_PARTIAL_CONVOLVE lsx_ordered_partial_convolve_f -#include "rdft.h" -#endif - -double lsx_kaiser_beta(double att, double tr_bw) -{ - if (att >= 60) { - static const double coefs[][4] = { - {-6.784957e-10,1.02856e-05,0.1087556,-0.8988365+.001}, - {-6.897885e-10,1.027433e-05,0.10876,-0.8994658+.002}, - {-1.000683e-09,1.030092e-05,0.1087677,-0.9007898+.003}, - {-3.654474e-10,1.040631e-05,0.1087085,-0.8977766+.006}, - {8.106988e-09,6.983091e-06,0.1091387,-0.9172048+.015}, - {9.519571e-09,7.272678e-06,0.1090068,-0.9140768+.025}, - {-5.626821e-09,1.342186e-05,0.1083999,-0.9065452+.05}, - {-9.965946e-08,5.073548e-05,0.1040967,-0.7672778+.085}, - {1.604808e-07,-5.856462e-05,0.1185998,-1.34824+.1}, - {-1.511964e-07,6.363034e-05,0.1064627,-0.9876665+.18}, - }; - double realm = log(tr_bw/.0005)/log(2.); - double const * c0 = coefs[range_limit( (int)realm, 0, (int)array_length(coefs)-1)]; - double const * c1 = coefs[range_limit(1+(int)realm, 0, (int)array_length(coefs)-1)]; - double b0 = ((c0[0]*att + c0[1])*att + c0[2])*att + c0[3]; - double b1 = ((c1[0]*att + c1[1])*att + c1[2])*att + c1[3]; - return b0 + (b1 - b0) * (realm - (int)realm); - } - if (att > 50 ) return .1102 * (att - 8.7); - if (att > 20.96) return .58417 * pow(att -20.96, .4) + .07886 * (att - 20.96); - return 0; -} - -double * lsx_make_lpf( - int num_taps, double Fc, double beta, double rho, double scale) -{ - int i, m = num_taps - 1; - double * h = malloc((size_t)num_taps * sizeof(*h)); - double mult = scale / lsx_bessel_I_0(beta), mult1 = 1 / (.5 * m + rho); - assert(Fc >= 0 && Fc <= 1); - lsx_debug("make_lpf(n=%i Fc=%.7g beta=%g rho=%g scale=%g)", - num_taps, Fc, beta, rho, scale); - - if (h) for (i = 0; i <= m / 2; ++i) { - double z = i - .5 * m, x = z * M_PI, y = z * mult1; - h[i] = x!=0? sin(Fc * x) / x : Fc; - h[i] *= lsx_bessel_I_0(beta * sqrt(1 - y * y)) * mult; - if (m - i != i) - h[m - i] = h[i]; - } - return h; -} - -void lsx_kaiser_params(double att, double Fc, double tr_bw, double * beta, int * num_taps) -{ - *beta = *beta < 0? lsx_kaiser_beta(att, tr_bw * .5 / Fc): *beta; - att = att < 60? (att - 7.95) / (2.285 * M_PI * 2) : - ((.0007528358-1.577737e-05**beta)**beta+.6248022)**beta+.06186902; - *num_taps = !*num_taps? (int)ceil(att/tr_bw + 1) : *num_taps; -} - -double * lsx_design_lpf( - double Fp, /* End of pass-band */ - double Fs, /* Start of stop-band */ - double Fn, /* Nyquist freq; e.g. 0.5, 1, PI */ - double att, /* Stop-band attenuation in dB */ - int * num_taps, /* 0: value will be estimated */ - int k, /* >0: number of phases; <0: num_taps = 1 (mod -k) */ - double beta) /* <0: value will be estimated */ -{ - int n = *num_taps, phases = max(k, 1), modulo = max(-k, 1); - double tr_bw, Fc, rho = phases == 1? .5 : att < 120? .63 : .75; - - lsx_debug_more("./sinctest %-12.7g %-12.7g %g 0 %-5g %i %i 50 %g %g -4 >1", - Fp, Fs, Fn, att, *num_taps, k, beta, rho); - - Fp /= fabs(Fn), Fs /= fabs(Fn); /* Normalise to Fn = 1 */ - tr_bw = .5 * (Fs - Fp); /* Transition band-width: 6dB to stop points */ - tr_bw /= phases, Fs /= phases; - tr_bw = min(tr_bw, .5 * Fs); - Fc = Fs - tr_bw; - assert(Fc - tr_bw >= 0); - lsx_kaiser_params(att, Fc, tr_bw, &beta, num_taps); - if (!n) - *num_taps = phases > 1? *num_taps / phases * phases + phases - 1 : - (*num_taps + modulo - 2) / modulo * modulo + 1; - return Fn < 0? 0 : lsx_make_lpf(*num_taps, Fc, beta, rho, (double)phases); -} - -static double safe_log(double x) -{ - assert(x >= 0); - if (x!=0) - return log(x); - lsx_debug("log(0)"); - return -26; -} - -void lsx_fir_to_phase(double * * h, int * len, int * post_len, double phase) -{ - double * pi_wraps, * work, phase1 = (phase > 50 ? 100 - phase : phase) / 50; - int i, work_len, begin, end, imp_peak = 0, peak = 0; - double imp_sum = 0, peak_imp_sum = 0; - double prev_angle2 = 0, cum_2pi = 0, prev_angle1 = 0, cum_1pi = 0; - - for (i = *len, work_len = 2 * 2 * 8; i > 1; work_len <<= 1, i >>= 1); - - work = calloc((size_t)work_len + 2, sizeof(*work)); /* +2: (UN)PACK */ - pi_wraps = malloc((((size_t)work_len + 2) / 2) * sizeof(*pi_wraps)); - - memcpy(work, *h, (size_t)*len * sizeof(*work)); - lsx_safe_rdft(work_len, 1, work); /* Cepstral: */ - LSX_UNPACK(work, work_len); - - for (i = 0; i <= work_len; i += 2) { - double angle = atan2(work[i + 1], work[i]); - double detect = 2 * M_PI; - double delta = angle - prev_angle2; - double adjust = detect * ((delta < -detect * .7) - (delta > detect * .7)); - prev_angle2 = angle; - cum_2pi += adjust; - angle += cum_2pi; - detect = M_PI; - delta = angle - prev_angle1; - adjust = detect * ((delta < -detect * .7) - (delta > detect * .7)); - prev_angle1 = angle; - cum_1pi += fabs(adjust); /* fabs for when 2pi and 1pi have combined */ - pi_wraps[i >> 1] = cum_1pi; - - work[i] = safe_log(sqrt(sqr(work[i]) + sqr(work[i + 1]))); - work[i + 1] = 0; - } - LSX_PACK(work, work_len); - lsx_safe_rdft(work_len, -1, work); - for (i = 0; i < work_len; ++i) work[i] *= 2. / work_len; - - for (i = 1; i < work_len / 2; ++i) { /* Window to reject acausal components */ - work[i] *= 2; - work[i + work_len / 2] = 0; - } - lsx_safe_rdft(work_len, 1, work); - - for (i = 2; i < work_len; i += 2) /* Interpolate between linear & min phase */ - work[i + 1] = phase1 * i / work_len * pi_wraps[work_len >> 1] + - (1 - phase1) * (work[i + 1] + pi_wraps[i >> 1]) - pi_wraps[i >> 1]; - - work[0] = exp(work[0]), work[1] = exp(work[1]); - for (i = 2; i < work_len; i += 2) { - double x = exp(work[i]); - work[i ] = x * cos(work[i + 1]); - work[i + 1] = x * sin(work[i + 1]); - } - - lsx_safe_rdft(work_len, -1, work); - for (i = 0; i < work_len; ++i) work[i] *= 2. / work_len; - - /* Find peak pos. */ - for (i = 0; i <= (int)(pi_wraps[work_len >> 1] / M_PI + .5); ++i) { - imp_sum += work[i]; - if (fabs(imp_sum) > fabs(peak_imp_sum)) { - peak_imp_sum = imp_sum; - peak = i; - } - if (work[i] > work[imp_peak]) /* For debug check only */ - imp_peak = i; - } - while (peak && fabs(work[peak-1]) > fabs(work[peak]) && work[peak-1] * work[peak] > 0) - --peak; - - if (phase1==0) - begin = 0; - else if (phase1 == 1) - begin = peak - *len / 2; - else { - begin = (int)((.997 - (2 - phase1) * .22) * *len + .5); - end = (int)((.997 + (0 - phase1) * .22) * *len + .5); - begin = peak - (begin & ~3); - end = peak + 1 + ((end + 3) & ~3); - *len = end - begin; - *h = realloc(*h, (size_t)*len * sizeof(**h)); - } - for (i = 0; i < *len; ++i) (*h)[i] = - work[(begin + (phase > 50 ? *len - 1 - i : i) + work_len) & (work_len - 1)]; - *post_len = phase > 50 ? peak - begin : begin + *len - (peak + 1); - - lsx_debug("nPI=%g peak-sum@%i=%g (val@%i=%g); len=%i post=%i (%g%%)", - pi_wraps[work_len >> 1] / M_PI, peak, peak_imp_sum, imp_peak, - work[imp_peak], *len, *post_len, 100 - 100. * *post_len / (*len - 1)); - free(pi_wraps), free(work); -} - -#define F_x(F,expr) static double F(double x) {return expr;} -F_x(sinePhi, ((2.0517e-07*x-1.1303e-04)*x+.023154)*x+.55924 ) -F_x(sinePsi, ((9.0667e-08*x-5.6114e-05)*x+.013658)*x+1.0977 ) -F_x(sinePow, log(.5)/log(sin(x*.5)) ) -#define dB_to_linear(x) exp((x) * (M_LN10 * 0.05)) - -double lsx_f_resp(double t, double a) -{ - double x; - if (t > (a <= 160? .8 : .82)) { - double a1 = a+15; - double p = .00035*a+.375; - double w = 1/(1-.597)*asin(pow((a1-10.6)/a1,1/p)); - double c = 1+asin(pow(1-a/a1,1/p))/w; - return a1*(pow(sin((c-t)*w),p)-1); - } - if (t > .5) - x = sinePsi(a), x = pow(sin((1-t) * x), sinePow(x)); - else - x = sinePhi(a), x = 1 - pow(sin(t * x), sinePow(x)); - return linear_to_dB(x); -} - -double lsx_inv_f_resp(double drop, double a) -{ - double x = sinePhi(a), s; - drop = dB_to_linear(drop); - s = drop > .5 ? 1 - drop : drop; - x = asin(pow(s, 1/sinePow(x))) / x; - return drop > .5? x : 1 -x; -} diff --git a/soxr-sys/src/filter.h b/soxr-sys/src/filter.h deleted file mode 100644 index ccb3ba836..000000000 --- a/soxr-sys/src/filter.h +++ /dev/null @@ -1,44 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#if !defined soxr_filter_included -#define soxr_filter_included - -#include "aliases.h" - -double lsx_bessel_I_0(double x); -void lsx_init_fft_cache(void); -void lsx_clear_fft_cache(void); -void lsx_init_fft_cache_f(void); -void lsx_clear_fft_cache_f(void); -#define lsx_is_power_of_2(x) !(x < 2 || (x & (x - 1))) -void lsx_safe_rdft(int len, int type, double * d); -void lsx_safe_cdft(int len, int type, double * d); -void lsx_safe_rdft_f(int len, int type, float * d); -void lsx_safe_cdft_f(int len, int type, float * d); -void lsx_ordered_convolve(int n, void * not_used, double * a, const double * b); -void lsx_ordered_convolve_f(int n, void * not_used, float * a, const float * b); -void lsx_ordered_partial_convolve(int n, double * a, const double * b); -void lsx_ordered_partial_convolve_f(int n, float * a, const float * b); - -double lsx_kaiser_beta(double att, double tr_bw); -double * lsx_make_lpf(int num_taps, double Fc, double beta, double rho, - double scale); -void lsx_kaiser_params(double att, double Fc, double tr_bw, double * beta, int * num_taps); -double * lsx_design_lpf( - double Fp, /* End of pass-band */ - double Fs, /* Start of stop-band */ - double Fn, /* Nyquist freq; e.g. 0.5, 1, PI; < 0: dummy run */ - double att, /* Stop-band attenuation in dB */ - int * num_taps, /* 0: value will be estimated */ - int k, /* >0: number of phases; <0: num_taps = 1 (mod -k) */ - double beta); /* <0: value will be estimated */ - -void lsx_fir_to_phase(double * * h, int * len, - int * post_len, double phase0); - -double lsx_f_resp(double t, double a); -double lsx_inv_f_resp(double drop, double a); -#define lsx_to_3dB(a) (1 - lsx_inv_f_resp(-3., a)) - -#endif diff --git a/soxr-sys/src/half-coefs.h b/soxr-sys/src/half-coefs.h deleted file mode 100644 index a5a0882bc..000000000 --- a/soxr-sys/src/half-coefs.h +++ /dev/null @@ -1,75 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#if defined __GNUC__ - #pragma GCC system_header -#elif defined __SUNPRO_C - #pragma disable_warn -#elif defined _MSC_VER - #pragma warning(push, 1) -#endif - -#if CORE_TYPE & CORE_SIMD_HALF - #define VALIGN vAlign -#else - #define VALIGN -#endif - -#if !(CORE_TYPE & CORE_SIMD_HALF) -static VALIGN const sample_t half_fir_coefs_7[] = { - 3.1062656496657370e-01, -8.4998810699955796e-02, 3.4007044621123500e-02, --1.2839903789829387e-02, 3.9899380181723145e-03, -8.9355202017945374e-04, - 1.0918292424806546e-04, -}; -#endif - -static VALIGN const sample_t half_fir_coefs_8[] = { - 3.1154652365332069e-01, -8.7344917685739543e-02, 3.6814458353637280e-02, --1.5189204581464479e-02, 5.4540855610738801e-03, -1.5643862626630416e-03, - 3.1816575906323303e-04, -3.4799449225005688e-05, -}; - -static VALIGN const sample_t half_fir_coefs_9[] = { - 3.1227034755311189e-01, -8.9221517147969526e-02, 3.9139704015071934e-02, --1.7250558515852023e-02, 6.8589440230476112e-03, -2.3045049636430419e-03, - 6.0963740543348963e-04, -1.1323803957431231e-04, 1.1197769991000046e-05, -}; - -#if CORE_TYPE & CORE_DBL -static VALIGN const sample_t half_fir_coefs_10[] = { - 3.1285456012000523e-01, -9.0756740799292787e-02, 4.1096398104193160e-02, --1.9066319572525220e-02, 8.1840569787684902e-03, -3.0766876176359834e-03, - 9.6396524429277980e-04, -2.3585679989922018e-04, 4.0252189026627833e-05, --3.6298196342497932e-06, -}; - -static VALIGN const sample_t half_fir_coefs_11[] = { - 3.1333588822574199e-01, -9.2035898673019811e-02, 4.2765169698406408e-02, --2.0673580894964429e-02, 9.4225426824512421e-03, -3.8563379950013192e-03, - 1.3634742159642453e-03, -3.9874150714431009e-04, 9.0586723632664806e-05, --1.4285617244076783e-05, 1.1834642946400529e-06, -}; - -static VALIGN const sample_t half_fir_coefs_12[] = { - 3.1373928463345568e-01, -9.3118180335301962e-02, 4.4205005881659098e-02, --2.2103860986973051e-02, 1.0574689371162864e-02, -4.6276428065385065e-03, - 1.7936153397572132e-03, -5.9617527051353237e-04, 1.6314517495669067e-04, --3.4555126770115446e-05, 5.0617615610782593e-06, -3.8768958592971409e-07, -}; - -static VALIGN const sample_t half_fir_coefs_13[] = { - 3.1408224847888910e-01, -9.4045836332667387e-02, 4.5459878763259978e-02, --2.3383369012219993e-02, 1.1644273044890753e-02, -5.3806714579057013e-03, - 2.2429072878264022e-03, -8.2204347506606424e-04, 2.5724946477840893e-04, --6.6072709864248668e-05, 1.3099163296288644e-05, -1.7907147069136000e-06, - 1.2750825595240592e-07, -}; -#endif - -#undef VALIGN - -#if defined __SUNPRO_C - #pragma enable_warn -#elif defined _MSC_VER - #pragma warning(pop) -#endif diff --git a/soxr-sys/src/half-fir.h b/soxr-sys/src/half-fir.h deleted file mode 100644 index 782be1bc7..000000000 --- a/soxr-sys/src/half-fir.h +++ /dev/null @@ -1,61 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -/* Decimate by 2 using a FIR with odd length (LEN). */ -/* Input must be preceded and followed by LEN >> 1 samples. */ - -#define COEFS ((sample_t const *)p->coefs) - -#if SIMD_SSE - #define BEGINNING v4_t sum, q1, q2, t - #define ____ \ - q1 = _mm_shuffle_ps(t=vLdu(input+2*j),vLdu(input+2*j+4),_MM_SHUFFLE(3,1,3,1)); \ - q2 = _mm_shuffle_ps(vLdu(input-2*j-4),vLdu(input-2*j-8),_MM_SHUFFLE(1,3,1,3)); \ - sum = vAdd(j? sum : vMul(vSet1(.5), t), vMul(vAdd(q1, q2), vLd(COEFS+j))); \ - j += 4; - #define __ \ - q1 = _mm_shuffle_ps(vLdu(input+2*j), vLdu(input-2*j-4), _MM_SHUFFLE(1,3,3,1)); \ - q2 = _mm_loadl_pi(q2, (__m64*)(COEFS+j)), q2 = _mm_movelh_ps(q2, q2); \ - sum = vAdd(sum, vMul(q1, q2)); \ - j += 2; - #define _ \ - q1 = _mm_add_ss(_mm_load_ss(input+2*j+1), _mm_load_ss(input-2*j-1)); \ - sum = _mm_add_ss(sum, _mm_mul_ss(q1, _mm_load_ss(COEFS+j))); \ - ++j; - #define END vStorSum(output+i, sum) -/* #elif SIMD_AVX; No good solution found. */ -/* #elif SIMD_NEON; No need: gcc -O3 does a good job by itself. */ -#else - #define BEGINNING sample_t sum = input[0] * .5f - #define ____ __ __ - #define __ _ _ - #define _ sum += (input[-(2*j +1)] + input[(2*j +1)]) * COEFS[j], ++j; - #define END output[i] = sum -#endif - - - -static void FUNCTION_H(stage_t * p, fifo_t * output_fifo) -{ - sample_t const * __restrict input = stage_read_p(p); - int num_in = min(stage_occupancy(p), p->input_size); - int i, num_out = (num_in + 1) >> 1; - sample_t * __restrict output = fifo_reserve(output_fifo, num_out); - - for (i = 0; i < num_out; ++i, input += 2) { - int j = 0; - BEGINNING; CONVOLVE; END; - } - fifo_read(&p->fifo, 2 * num_out, NULL); -} - - - -#undef _ -#undef __ -#undef ____ -#undef BEGINNING -#undef END -#undef COEFS -#undef CONVOLVE -#undef FUNCTION_H diff --git a/soxr-sys/src/internal.h b/soxr-sys/src/internal.h deleted file mode 100644 index 08924d500..000000000 --- a/soxr-sys/src/internal.h +++ /dev/null @@ -1,84 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#if !defined soxr_internal_included -#define soxr_internal_included - -#include "std-types.h" - - - -#undef min -#undef max -#define min(a, b) ((a) <= (b) ? (a) : (b)) -#define max(a, b) ((a) >= (b) ? (a) : (b)) - - - -#define range_limit(x, lower, upper) (min(max(x, lower), upper)) -#define linear_to_dB(x) (log10(x) * 20) -#define array_length(a) (sizeof(a)/sizeof(a[0])) -#if !defined AL -#define AL(a) array_length(a) -#endif -#define iAL(a) (int)AL(a) -#define sqr(a) ((a) * (a)) - - - -#if defined __GNUC__ - #define UNUSED __attribute__ ((unused)) -#else - #define UNUSED -#endif - - - -#if !WITH_DEV_TRACE - #ifdef __GNUC__ - void lsx_dummy(char const *, ...); - #else - static __inline void lsx_dummy(char const * x, ...) {} - #endif - #define lsx_debug if(0) lsx_dummy - #define lsx_debug_more lsx_debug -#else - extern int _soxr_trace_level; - void _soxr_trace(char const * fmt, ...); - #define lsx_debug if (_soxr_trace_level > 0) _soxr_trace - #define lsx_debug_more if (_soxr_trace_level > 1) _soxr_trace -#endif - - - -/* soxr_quality_spec_t.flags: */ - -#define SOXR_ROLLOFF_LSR2Q 3u /* Reserved for internal use. */ -#define SOXR_ROLLOFF_MASK 3u /* For masking these bits. */ -#define SOXR_MAINTAIN_3DB_PT 4u /* Reserved for internal use. */ -#define SOXR_PROMOTE_TO_LQ 64u /* Reserved for internal use. */ - - - -/* soxr_runtime_spec_t.flags: */ - -#define SOXR_STRICT_BUFFERING 4u /* Reserved for future use. */ -#define SOXR_NOSMALLINTOPT 8u /* For test purposes only. */ - - - -/* soxr_quality_spec recipe: */ - -#define SOXR_PRECISIONQ 11 /* Quality specified by the precision parameter. */ - -#define SOXR_PHASE_MASK 0x30 /* For masking these bits. */ - - - -/* soxr_quality_spec flags: */ - -#define RESET_ON_CLEAR (1u<<31) - - - -#endif diff --git a/soxr-sys/src/math-wrap.h b/soxr-sys/src/math-wrap.h deleted file mode 100644 index 8a526f13e..000000000 --- a/soxr-sys/src/math-wrap.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#if !defined soxr_math_wrap_included -#define soxr_math_wrap_included - -#include - -#if defined __STRICT_ANSI__ - #define sinf(x) (float)sin ((double)(x)) - #define cosf(x) (float)cos ((double)(x)) - #define atanf(x) (float)atan((double)(x)) -#endif - -#if !defined M_PI - #define M_PI 3.141592653589793238462643383279502884 -#endif - -#if !defined M_LN10 - #define M_LN10 2.302585092994045684017991454684364208 -#endif - -#if !defined M_SQRT2 - #define M_SQRT2 1.414213562373095048801688724209698079 -#endif - -#if !defined M_LN2 - #define M_LN2 0.693147180559945309417232121458176568 -#endif - -#endif diff --git a/soxr-sys/src/pffft-avx.h b/soxr-sys/src/pffft-avx.h deleted file mode 100644 index ace19b57d..000000000 --- a/soxr-sys/src/pffft-avx.h +++ /dev/null @@ -1,40 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -/* AVX support macros */ - -#if !defined soxr_avx_included -#define soxr_avx_included - -#include - -typedef __m256d v4sf; -#define VZERO() _mm256_setzero_pd() -#define VMUL(a,b) _mm256_mul_pd(a,b) -#define VADD(a,b) _mm256_add_pd(a,b) -#define VMADD(a,b,c) VADD(VMUL(a,b),c) /* Note: gcc -mfma will `fuse' these */ -#define VSUB(a,b) _mm256_sub_pd(a,b) -#define LD_PS1(p) _mm256_set1_pd(p) -#define INTERLEAVE2(in1, in2, out1, out2) {v4sf \ - t1 = _mm256_unpacklo_pd(in1, in2), \ - t2 = _mm256_unpackhi_pd(in1, in2); \ - out1 = _mm256_permute2f128_pd(t1,t2,0x20); \ - out2 = _mm256_permute2f128_pd(t1,t2,0x31); } -#define UNINTERLEAVE2(in1, in2, out1, out2) {v4sf \ - t1 = _mm256_permute2f128_pd(in1,in2,0x20), \ - t2 = _mm256_permute2f128_pd(in1,in2,0x31); \ - out1 = _mm256_unpacklo_pd(t1, t2); \ - out2 = _mm256_unpackhi_pd(t1, t2);} -#define VTRANSPOSE4(x0,x1,x2,x3) {v4sf \ - t0 = _mm256_shuffle_pd(x0,x1, 0x0), \ - t2 = _mm256_shuffle_pd(x0,x1, 0xf), \ - t1 = _mm256_shuffle_pd(x2,x3, 0x0), \ - t3 = _mm256_shuffle_pd(x2,x3, 0xf); \ - x0 = _mm256_permute2f128_pd(t0,t1, 0x20); \ - x1 = _mm256_permute2f128_pd(t2,t3, 0x20); \ - x2 = _mm256_permute2f128_pd(t0,t1, 0x31); \ - x3 = _mm256_permute2f128_pd(t2,t3, 0x31);} -#define VSWAPHL(a,b) _mm256_permute2f128_pd(b, a, 0x30) -#define VALIGNED(ptr) ((((long)(ptr)) & 0x1F) == 0) - -#endif diff --git a/soxr-sys/src/pffft-wrap.c b/soxr-sys/src/pffft-wrap.c deleted file mode 100644 index c920f06ea..000000000 --- a/soxr-sys/src/pffft-wrap.c +++ /dev/null @@ -1,110 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#if !defined PFFT_MACROS_ONLY - -#include "math-wrap.h" - -#if PFFFT_DOUBLE - #include "util64s.h" -#else - #include "util32s.h" - #define sin(x) sinf(x) - #define cos(x) cosf(x) -#endif - -#define pffft_aligned_free SIMD_ALIGNED_FREE -#define pffft_aligned_malloc SIMD_ALIGNED_MALLOC -#define pffft_aligned_calloc SIMD_ALIGNED_CALLOC - -#undef inline -#define inline __inline - -#endif - - - -#include "pffft.c" - - - -#if !defined PFFT_MACROS_ONLY - -#if !defined PFFFT_SIMD_DISABLE - -static void pffft_zconvolve(PFFFT_Setup *s, const float *a, const float *b, float *ab) { - int i, Ncvec = s->Ncvec; - const v4sf * /*RESTRICT*/ va = (const v4sf*)a; - const v4sf * RESTRICT vb = (const v4sf*)b; - v4sf * /*RESTRICT*/ vab = (v4sf*)ab; - - float ar, ai, br, bi; - -#ifdef __arm__ - __builtin_prefetch(va); - __builtin_prefetch(vb); - __builtin_prefetch(va+2); - __builtin_prefetch(vb+2); - __builtin_prefetch(va+4); - __builtin_prefetch(vb+4); - __builtin_prefetch(va+6); - __builtin_prefetch(vb+6); -#endif - - assert(VALIGNED(a) && VALIGNED(b) && VALIGNED(ab)); - ar = ((v4sf_union*)va)[0].f[0]; - ai = ((v4sf_union*)va)[1].f[0]; - br = ((v4sf_union*)vb)[0].f[0]; - bi = ((v4sf_union*)vb)[1].f[0]; - - for (i=0; i < Ncvec; i += 2) { - v4sf ar, ai, br, bi; - ar = va[2*i+0]; ai = va[2*i+1]; - br = vb[2*i+0]; bi = vb[2*i+1]; - VCPLXMUL(ar, ai, br, bi); - vab[2*i+0] = ar; - vab[2*i+1] = ai; - ar = va[2*i+2]; ai = va[2*i+3]; - br = vb[2*i+2]; bi = vb[2*i+3]; - VCPLXMUL(ar, ai, br, bi); - vab[2*i+2] = ar; - vab[2*i+3] = ai; - } - if (s->transform == PFFFT_REAL) { - ((v4sf_union*)vab)[0].f[0] = ar*br; - ((v4sf_union*)vab)[1].f[0] = ai*bi; - } -} - -#else - -static void pffft_zconvolve(PFFFT_Setup *s, const float *a, const float *b, float *ab) { - int i, Ncvec = s->Ncvec; - - if (s->transform == PFFFT_REAL) { - /* take care of the fftpack ordering */ - ab[0] = a[0]*b[0]; - ab[2*Ncvec-1] = a[2*Ncvec-1]*b[2*Ncvec-1]; - ++ab; ++a; ++b; --Ncvec; - } - for (i=0; i < Ncvec; ++i) { - float ar, ai, br, bi; - ar = a[2*i+0]; ai = a[2*i+1]; - br = b[2*i+0]; bi = b[2*i+1]; - VCPLXMUL(ar, ai, br, bi); - ab[2*i+0] = ar; - ab[2*i+1] = ai; - } -} - -#endif - -#include - -static void pffft_reorder_back(int length, void * setup, float * data, float * work) -{ - memcpy(work, data, (unsigned)length * sizeof(*work)); - pffft_zreorder(setup, work, data, PFFFT_BACKWARD); -} - -#endif diff --git a/soxr-sys/src/pffft.c b/soxr-sys/src/pffft.c deleted file mode 100644 index 46c841e74..000000000 --- a/soxr-sys/src/pffft.c +++ /dev/null @@ -1,1946 +0,0 @@ -/* https://bitbucket.org/jpommier/pffft/raw/483453d8f7661058e74aa4e7cf5c27bcd7887e7a/pffft.c - * with minor changes for libsoxr. */ - -/* Copyright (c) 2013 Julien Pommier ( pommier@modartt.com ) - - Based on original fortran 77 code from FFTPACKv4 from NETLIB - (http://www.netlib.org/fftpack), authored by Dr Paul Swarztrauber - of NCAR, in 1985. - - As confirmed by the NCAR fftpack software curators, the following - FFTPACKv5 license applies to FFTPACKv4 sources. My changes are - released under the same terms. - - FFTPACK license: - - http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html - - Copyright (c) 2004 the University Corporation for Atmospheric - Research ("UCAR"). All rights reserved. Developed by NCAR's - Computational and Information Systems Laboratory, UCAR, - www.cisl.ucar.edu. - - Redistribution and use of the Software in source and binary forms, - with or without modification, is permitted provided that the - following conditions are met: - - - Neither the names of NCAR's Computational and Information Systems - Laboratory, the University Corporation for Atmospheric Research, - nor the names of its sponsors or contributors may be used to - endorse or promote products derived from this Software without - specific prior written permission. - - - Redistributions of source code must retain the above copyright - notices, this list of conditions, and the disclaimer below. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the disclaimer below in the - documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT - HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE - SOFTWARE. - - - PFFFT : a Pretty Fast FFT. - - This file is largerly based on the original FFTPACK implementation, modified in - order to take advantage of SIMD instructions of modern CPUs. -*/ - -/* - ChangeLog: - - 2011/10/02, version 1: This is the very first release of this file. -*/ - -#include "pffft.h" -#include -#include -#include -#include - -/* detect compiler flavour */ -#if defined(_MSC_VER) -# define COMPILER_MSVC -#elif defined(__GNUC__) -# define COMPILER_GCC -#endif - -#if defined(COMPILER_GCC) -# define ALWAYS_INLINE(return_type) inline return_type __attribute__ ((always_inline)) -# define NEVER_INLINE(return_type) return_type __attribute__ ((noinline)) -# define RESTRICT __restrict -# define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ varname__[size__]; -#elif defined(COMPILER_MSVC) -# define ALWAYS_INLINE(return_type) __forceinline return_type -# define NEVER_INLINE(return_type) __declspec(noinline) return_type -# define RESTRICT __restrict -# define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (type__*)_alloca(size__ * sizeof(type__)) -#endif - - -/* - vector support macros: the rest of the code is independant of - SSE/Altivec/NEON -- adding support for other platforms with 4-element - vectors should be limited to these macros -*/ - - -/* define PFFFT_SIMD_DISABLE if you want to use scalar code instead of simd code */ -/*#define PFFFT_SIMD_DISABLE */ - -/* - Altivec support macros -*/ -#if !defined(PFFFT_SIMD_DISABLE) && (defined(__ppc__) || defined(__ppc64__)) -typedef vector float v4sf; -# define SIMD_SZ 4 -# define VZERO() ((vector float) vec_splat_u8(0)) -# define VMUL(a,b) vec_madd(a,b, VZERO()) -# define VADD(a,b) vec_add(a,b) -# define VMADD(a,b,c) vec_madd(a,b,c) -# define VSUB(a,b) vec_sub(a,b) -inline v4sf ld_ps1(const float *p) { v4sf v=vec_lde(0,p); return vec_splat(vec_perm(v, v, vec_lvsl(0, p)), 0); } -# define LD_PS1(p) ld_ps1(&p) -# define INTERLEAVE2(in1, in2, out1, out2) { v4sf tmp__ = vec_mergeh(in1, in2); out2 = vec_mergel(in1, in2); out1 = tmp__; } -# define UNINTERLEAVE2(in1, in2, out1, out2) { \ - vector unsigned char vperm1 = (vector unsigned char)(0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); \ - vector unsigned char vperm2 = (vector unsigned char)(4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); \ - v4sf tmp__ = vec_perm(in1, in2, vperm1); out2 = vec_perm(in1, in2, vperm2); out1 = tmp__; \ - } -# define VTRANSPOSE4(x0,x1,x2,x3) { \ - v4sf y0 = vec_mergeh(x0, x2); \ - v4sf y1 = vec_mergel(x0, x2); \ - v4sf y2 = vec_mergeh(x1, x3); \ - v4sf y3 = vec_mergel(x1, x3); \ - x0 = vec_mergeh(y0, y2); \ - x1 = vec_mergel(y0, y2); \ - x2 = vec_mergeh(y1, y3); \ - x3 = vec_mergel(y1, y3); \ - } -# define VSWAPHL(a,b) vec_perm(a,b, (vector unsigned char)(16,17,18,19,20,21,22,23,8,9,10,11,12,13,14,15)) -# define VALIGNED(ptr) ((((long)(ptr)) & 0xF) == 0) - -/* - SSE1 support macros -*/ -#elif !defined(PFFFT_SIMD_DISABLE) && (defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(_M_IX86)) - -# define SIMD_SZ 4 /* 4 floats by simd vector -- this is pretty much hardcoded in the preprocess/finalize functions anyway so you will have to work if you want to enable AVX with its 256-bit vectors. */ - -#if !PFFFT_DOUBLE -#include -typedef __m128 v4sf; -# define VZERO() _mm_setzero_ps() -# define VMUL(a,b) _mm_mul_ps(a,b) -# define VADD(a,b) _mm_add_ps(a,b) -# define VMADD(a,b,c) _mm_add_ps(_mm_mul_ps(a,b), c) -# define VSUB(a,b) _mm_sub_ps(a,b) -# define LD_PS1(p) _mm_set1_ps(p) -# define INTERLEAVE2(in1, in2, out1, out2) { v4sf tmp__ = _mm_unpacklo_ps(in1, in2); out2 = _mm_unpackhi_ps(in1, in2); out1 = tmp__; } -# define UNINTERLEAVE2(in1, in2, out1, out2) { v4sf tmp__ = _mm_shuffle_ps(in1, in2, _MM_SHUFFLE(2,0,2,0)); out2 = _mm_shuffle_ps(in1, in2, _MM_SHUFFLE(3,1,3,1)); out1 = tmp__; } -# define VTRANSPOSE4(x0,x1,x2,x3) _MM_TRANSPOSE4_PS(x0,x1,x2,x3) -# define VSWAPHL(a,b) _mm_shuffle_ps(b, a, _MM_SHUFFLE(3,2,1,0)) -# define VALIGNED(ptr) ((((long)(ptr)) & 0xF) == 0) - -#else -#include "pffft-avx.h" -#endif - -/* - ARM NEON support macros -*/ -#elif !defined(PFFFT_SIMD_DISABLE) && defined(__arm__) -# include -typedef float32x4_t v4sf; -# define SIMD_SZ 4 -# define VZERO() vdupq_n_f32(0) -# define VMUL(a,b) vmulq_f32(a,b) -# define VADD(a,b) vaddq_f32(a,b) -# define VMADD(a,b,c) vmlaq_f32(c,a,b) -# define VSUB(a,b) vsubq_f32(a,b) -# define LD_PS1(p) vld1q_dup_f32(&(p)) -# define INTERLEAVE2(in1, in2, out1, out2) { float32x4x2_t tmp__ = vzipq_f32(in1,in2); out1=tmp__.val[0]; out2=tmp__.val[1]; } -# define UNINTERLEAVE2(in1, in2, out1, out2) { float32x4x2_t tmp__ = vuzpq_f32(in1,in2); out1=tmp__.val[0]; out2=tmp__.val[1]; } -# define VTRANSPOSE4(x0,x1,x2,x3) { \ - float32x4x2_t t0_ = vzipq_f32(x0, x2); \ - float32x4x2_t t1_ = vzipq_f32(x1, x3); \ - float32x4x2_t u0_ = vzipq_f32(t0_.val[0], t1_.val[0]); \ - float32x4x2_t u1_ = vzipq_f32(t0_.val[1], t1_.val[1]); \ - x0 = u0_.val[0]; x1 = u0_.val[1]; x2 = u1_.val[0]; x3 = u1_.val[1]; \ - } -/* marginally faster version */ -/*# define VTRANSPOSE4(x0,x1,x2,x3) { asm("vtrn.32 %q0, %q1;\n vtrn.32 %q2,%q3\n vswp %f0,%e2\n vswp %f1,%e3" : "+w"(x0), "+w"(x1), "+w"(x2), "+w"(x3)::); } */ -# define VSWAPHL(a,b) vcombine_f32(vget_low_f32(b), vget_high_f32(a)) -# define VALIGNED(ptr) ((((long)(ptr)) & 0x3) == 0) -#else -# if !defined(PFFFT_SIMD_DISABLE) -# warning "building with simd disabled !\n"; -# define PFFFT_SIMD_DISABLE /* fallback to scalar code */ -# endif -#endif - -#if PFFFT_DOUBLE -#define float double -#endif - -/* fallback mode for situations where SSE/Altivec are not available, use scalar mode instead */ -#ifdef PFFFT_SIMD_DISABLE -typedef float v4sf; -# define SIMD_SZ 1 -# define VZERO() 0.f -# define VMUL(a,b) ((a)*(b)) -# define VADD(a,b) ((a)+(b)) -# define VMADD(a,b,c) ((a)*(b)+(c)) -# define VSUB(a,b) ((a)-(b)) -# define LD_PS1(p) (p) -# define VALIGNED(ptr) ((((long)(ptr)) & 0x3) == 0) -#endif - -/* shortcuts for complex multiplcations */ -#define VCPLXMUL(ar,ai,br,bi) { v4sf tmp; tmp=VMUL(ar,bi); ar=VMUL(ar,br); ar=VSUB(ar,VMUL(ai,bi)); ai=VMUL(ai,br); ai=VADD(ai,tmp); } -#define VCPLXMULCONJ(ar,ai,br,bi) { v4sf tmp; tmp=VMUL(ar,bi); ar=VMUL(ar,br); ar=VADD(ar,VMUL(ai,bi)); ai=VMUL(ai,br); ai=VSUB(ai,tmp); } -#ifndef SVMUL -/* multiply a scalar with a vector */ -#define SVMUL(f,v) VMUL(LD_PS1(f),v) -#endif - -#if !defined PFFT_MACROS_ONLY - -#if !defined(PFFFT_SIMD_DISABLE) -typedef union v4sf_union { - v4sf v; - float f[4]; -} v4sf_union; - -#if 0 -#include - -#define assertv4(v,f0,f1,f2,f3) assert(v.f[0] == (f0) && v.f[1] == (f1) && v.f[2] == (f2) && v.f[3] == (f3)) - -/* detect bugs with the vector support macros */ -void validate_pffft_simd(void); -void validate_pffft_simd(void) { - float f[16] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 }; - v4sf_union a0, a1, a2, a3, t, u; - memcpy(a0.f, f, 4*sizeof(float)); - memcpy(a1.f, f+4, 4*sizeof(float)); - memcpy(a2.f, f+8, 4*sizeof(float)); - memcpy(a3.f, f+12, 4*sizeof(float)); - - t = a0; u = a1; t.v = VZERO(); - printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 0, 0, 0, 0); - t.v = VADD(a1.v, a2.v); - printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 12, 14, 16, 18); - t.v = VMUL(a1.v, a2.v); - printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 45, 60, 77); - t.v = VMADD(a1.v, a2.v,a0.v); - printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 46, 62, 80); - INTERLEAVE2(a1.v,a2.v,t.v,u.v); - printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]); - assertv4(t, 4, 8, 5, 9); assertv4(u, 6, 10, 7, 11); - UNINTERLEAVE2(a1.v,a2.v,t.v,u.v); - printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]); - assertv4(t, 4, 6, 8, 10); assertv4(u, 5, 7, 9, 11); - - t.v=LD_PS1(f[15]); - printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); - assertv4(t, 15, 15, 15, 15); - t.v = VSWAPHL(a1.v, a2.v); - printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); - assertv4(t, 8, 9, 6, 7); - VTRANSPOSE4(a0.v, a1.v, a2.v, a3.v); - printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", - a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3], - a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]); - assertv4(a0, 0, 4, 8, 12); assertv4(a1, 1, 5, 9, 13); assertv4(a2, 2, 6, 10, 14); assertv4(a3, 3, 7, 11, 15); -} -#endif -#endif /*!PFFFT_SIMD_DISABLE */ - -#if 0 -/* SSE and co like 16-bytes aligned pointers */ -#define MALLOC_V4SF_ALIGNMENT 64 /* with a 64-byte alignment, we are even aligned on L2 cache lines... */ -void *pffft_aligned_malloc(size_t nb_bytes) { - void *p, *p0 = malloc(nb_bytes + MALLOC_V4SF_ALIGNMENT); - if (!p0) return (void *) 0; - p = (void *) (((size_t) p0 + MALLOC_V4SF_ALIGNMENT) & (~((size_t) (MALLOC_V4SF_ALIGNMENT-1)))); - *((void **) p - 1) = p0; - return p; -} - -void pffft_aligned_free(void *p) { - if (p) free(*((void **) p - 1)); -} - -int pffft_simd_size() { return SIMD_SZ; } -#endif - -/* - passf2 and passb2 has been merged here, fsign = -1 for passf2, +1 for passb2 -*/ -static NEVER_INLINE(void) passf2_ps(int ido, int l1, const v4sf *cc, v4sf *ch, const float *wa1, float fsign) { - int k, i; - int l1ido = l1*ido; - if (ido <= 2) { - for (k=0; k < l1ido; k += ido, ch += ido, cc+= 2*ido) { - ch[0] = VADD(cc[0], cc[ido+0]); - ch[l1ido] = VSUB(cc[0], cc[ido+0]); - ch[1] = VADD(cc[1], cc[ido+1]); - ch[l1ido + 1] = VSUB(cc[1], cc[ido+1]); - } - } else { - for (k=0; k < l1ido; k += ido, ch += ido, cc += 2*ido) { - for (i=0; i 2); - for (k=0; k< l1ido; k += ido, cc+= 3*ido, ch +=ido) { - for (i=0; i 2); - for (k = 0; k < l1; ++k, cc += 5*ido, ch += ido) { - for (i = 0; i < ido-1; i += 2) { - ti5 = VSUB(cc_ref(i , 2), cc_ref(i , 5)); - ti2 = VADD(cc_ref(i , 2), cc_ref(i , 5)); - ti4 = VSUB(cc_ref(i , 3), cc_ref(i , 4)); - ti3 = VADD(cc_ref(i , 3), cc_ref(i , 4)); - tr5 = VSUB(cc_ref(i-1, 2), cc_ref(i-1, 5)); - tr2 = VADD(cc_ref(i-1, 2), cc_ref(i-1, 5)); - tr4 = VSUB(cc_ref(i-1, 3), cc_ref(i-1, 4)); - tr3 = VADD(cc_ref(i-1, 3), cc_ref(i-1, 4)); - ch_ref(i-1, 1) = VADD(cc_ref(i-1, 1), VADD(tr2, tr3)); - ch_ref(i , 1) = VADD(cc_ref(i , 1), VADD(ti2, ti3)); - cr2 = VADD(cc_ref(i-1, 1), VADD(SVMUL(tr11, tr2),SVMUL(tr12, tr3))); - ci2 = VADD(cc_ref(i , 1), VADD(SVMUL(tr11, ti2),SVMUL(tr12, ti3))); - cr3 = VADD(cc_ref(i-1, 1), VADD(SVMUL(tr12, tr2),SVMUL(tr11, tr3))); - ci3 = VADD(cc_ref(i , 1), VADD(SVMUL(tr12, ti2),SVMUL(tr11, ti3))); - cr5 = VADD(SVMUL(ti11, tr5), SVMUL(ti12, tr4)); - ci5 = VADD(SVMUL(ti11, ti5), SVMUL(ti12, ti4)); - cr4 = VSUB(SVMUL(ti12, tr5), SVMUL(ti11, tr4)); - ci4 = VSUB(SVMUL(ti12, ti5), SVMUL(ti11, ti4)); - dr3 = VSUB(cr3, ci4); - dr4 = VADD(cr3, ci4); - di3 = VADD(ci3, cr4); - di4 = VSUB(ci3, cr4); - dr5 = VADD(cr2, ci5); - dr2 = VSUB(cr2, ci5); - di5 = VSUB(ci2, cr5); - di2 = VADD(ci2, cr5); - wr1=wa1[i], wi1=fsign*wa1[i+1], wr2=wa2[i], wi2=fsign*wa2[i+1]; - wr3=wa3[i], wi3=fsign*wa3[i+1], wr4=wa4[i], wi4=fsign*wa4[i+1]; - VCPLXMUL(dr2, di2, LD_PS1(wr1), LD_PS1(wi1)); - ch_ref(i - 1, 2) = dr2; - ch_ref(i, 2) = di2; - VCPLXMUL(dr3, di3, LD_PS1(wr2), LD_PS1(wi2)); - ch_ref(i - 1, 3) = dr3; - ch_ref(i, 3) = di3; - VCPLXMUL(dr4, di4, LD_PS1(wr3), LD_PS1(wi3)); - ch_ref(i - 1, 4) = dr4; - ch_ref(i, 4) = di4; - VCPLXMUL(dr5, di5, LD_PS1(wr4), LD_PS1(wi4)); - ch_ref(i - 1, 5) = dr5; - ch_ref(i, 5) = di5; - } - } -#undef ch_ref -#undef cc_ref -} -#endif - -static NEVER_INLINE(void) radf2_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT ch, const float *wa1) { - static const float minus_one = -1.f; - int i, k, l1ido = l1*ido; - for (k=0; k < l1ido; k += ido) { - v4sf a = cc[k], b = cc[k + l1ido]; - ch[2*k] = VADD(a, b); - ch[2*(k+ido)-1] = VSUB(a, b); - } - if (ido < 2) return; - if (ido != 2) { - for (k=0; k < l1ido; k += ido) { - for (i=2; i 5) { - wa[i1-1] = wa[i-1]; - wa[i1] = wa[i]; - } - } - l1 = l2; - } -} /* cffti1 */ - - -static -v4sf *cfftf1_ps(int n, const v4sf *input_readonly, v4sf *work1, v4sf *work2, const float *wa, const int *ifac, int isign) { - v4sf *in = (v4sf*)input_readonly; - v4sf *out = (in == work2 ? work1 : work2); - int nf = ifac[1], k1; - int l1 = 1; - int iw = 0; - assert(in != out && work1 != work2); - for (k1=2; k1<=nf+1; k1++) { - int ip = ifac[k1]; - int l2 = ip*l1; - int ido = n / l2; - int idot = ido + ido; - switch (ip) { -#if 0 - case 5: { - int ix2 = iw + idot; - int ix3 = ix2 + idot; - int ix4 = ix3 + idot; - passf5_ps(idot, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], (float)isign); - } break; -#endif - case 4: { - int ix2 = iw + idot; - int ix3 = ix2 + idot; - passf4_ps(idot, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3], (float)isign); - } break; - case 2: { - passf2_ps(idot, l1, in, out, &wa[iw], (float)isign); - } break; -#if 0 - case 3: { - int ix2 = iw + idot; - passf3_ps(idot, l1, in, out, &wa[iw], &wa[ix2], (float)isign); - } break; -#endif - default: - assert(0); - } - l1 = l2; - iw += (ip - 1)*idot; - if (out == work2) { - out = work1; in = work2; - } else { - out = work2; in = work1; - } - } - - return in; /* this is in fact the output .. */ -} - - -struct PFFFT_Setup { - int N; - int Ncvec; /* nb of complex simd vectors (N/4 if PFFFT_COMPLEX, N/8 if PFFFT_REAL) */ - int ifac[15]; - pffft_transform_t transform; - v4sf *data; /* allocated room for twiddle coefs */ - float *e; /* points into 'data' , N/4*3 elements */ - float *twiddle; /* points into 'data', N/4 elements */ -}; - -static -PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform) { - PFFFT_Setup *s = (PFFFT_Setup*)malloc(sizeof(PFFFT_Setup)); - int k, m; - if (!s) return s; - /* unfortunately, the fft size must be a multiple of 16 for complex FFTs - and 32 for real FFTs -- a lot of stuff would need to be rewritten to - handle other cases (or maybe just switch to a scalar fft, I don't know..) */ - if (transform == PFFFT_REAL) { assert((N%(2*SIMD_SZ*SIMD_SZ))==0 && N>0); } - if (transform == PFFFT_COMPLEX) { assert((N%(SIMD_SZ*SIMD_SZ))==0 && N>0); } - /*assert((N % 32) == 0); */ - s->N = N; - s->transform = transform; - /* nb of complex simd vectors */ - s->Ncvec = (transform == PFFFT_REAL ? N/2 : N)/SIMD_SZ; - s->data = (v4sf*)pffft_aligned_malloc(2*(size_t)s->Ncvec * sizeof(v4sf)); - if (!s->data) {free(s); return 0;} - s->e = (float*)s->data; - s->twiddle = (float*)(s->data + (2*s->Ncvec*(SIMD_SZ-1))/SIMD_SZ); - - if (transform == PFFFT_REAL) { - for (k=0; k < s->Ncvec; ++k) { - int i = k/SIMD_SZ; - int j = k%SIMD_SZ; - for (m=0; m < SIMD_SZ-1; ++m) { - float A = (float)(-2*M_PI*(m+1)*k / N); - s->e[(2*(i*3 + m) + 0) * SIMD_SZ + j] = cos(A); - s->e[(2*(i*3 + m) + 1) * SIMD_SZ + j] = sin(A); - } - } - rffti1_ps(N/SIMD_SZ, s->twiddle, s->ifac); - } else { - for (k=0; k < s->Ncvec; ++k) { - int i = k/SIMD_SZ; - int j = k%SIMD_SZ; - for (m=0; m < SIMD_SZ-1; ++m) { - float A = (float)(-2*M_PI*(m+1)*k / N); - s->e[(2*(i*3 + m) + 0)*SIMD_SZ + j] = cos(A); - s->e[(2*(i*3 + m) + 1)*SIMD_SZ + j] = sin(A); - } - } - cffti1_ps(N/SIMD_SZ, s->twiddle, s->ifac); - } - - /* check that N is decomposable with allowed prime factors */ - for (k=0, m=1; k < s->ifac[1]; ++k) { m *= s->ifac[2+k]; } - if (m != N/SIMD_SZ) { - pffft_destroy_setup(s); s = 0; - } - - return s; -} - - -static -void pffft_destroy_setup(PFFFT_Setup *s) { - if (!s) return; - pffft_aligned_free(s->data); - free(s); -} - -#if !defined(PFFFT_SIMD_DISABLE) - -/* [0 0 1 2 3 4 5 6 7 8] -> [0 8 7 6 5 4 3 2 1] */ -static void reversed_copy(int N, const v4sf *in, int in_stride, v4sf *out) { - v4sf g0, g1; - int k; - INTERLEAVE2(in[0], in[1], g0, g1); in += in_stride; - - *--out = VSWAPHL(g0, g1); /* [g0l, g0h], [g1l g1h] -> [g1l, g0h] */ - for (k=1; k < N; ++k) { - v4sf h0, h1; - INTERLEAVE2(in[0], in[1], h0, h1); in += in_stride; - *--out = VSWAPHL(g1, h0); - *--out = VSWAPHL(h0, h1); - g1 = h1; - } - *--out = VSWAPHL(g1, g0); -} - -static void unreversed_copy(int N, const v4sf *in, v4sf *out, int out_stride) { - v4sf g0, g1, h0, h1; - int k; - g0 = g1 = in[0]; ++in; - for (k=1; k < N; ++k) { - h0 = *in++; h1 = *in++; - g1 = VSWAPHL(g1, h0); - h0 = VSWAPHL(h0, h1); - UNINTERLEAVE2(h0, g1, out[0], out[1]); out += out_stride; - g1 = h1; - } - h0 = *in++; h1 = g0; - g1 = VSWAPHL(g1, h0); - h0 = VSWAPHL(h0, h1); - UNINTERLEAVE2(h0, g1, out[0], out[1]); -} - -static -void pffft_zreorder(PFFFT_Setup *setup, const float *in, float *out, pffft_direction_t direction) { - int k, N = setup->N, Ncvec = setup->Ncvec; - const v4sf *vin = (const v4sf*)in; - v4sf *vout = (v4sf*)out; - assert(in != out); - if (setup->transform == PFFFT_REAL) { - int k, dk = N/32; - if (direction == PFFFT_FORWARD) { - for (k=0; k < dk; ++k) { - INTERLEAVE2(vin[k*8 + 0], vin[k*8 + 1], vout[2*(0*dk + k) + 0], vout[2*(0*dk + k) + 1]); - INTERLEAVE2(vin[k*8 + 4], vin[k*8 + 5], vout[2*(2*dk + k) + 0], vout[2*(2*dk + k) + 1]); - } - reversed_copy(dk, vin+2, 8, (v4sf*)(out + N/2)); - reversed_copy(dk, vin+6, 8, (v4sf*)(out + N)); - } else { - for (k=0; k < dk; ++k) { - UNINTERLEAVE2(vin[2*(0*dk + k) + 0], vin[2*(0*dk + k) + 1], vout[k*8 + 0], vout[k*8 + 1]); - UNINTERLEAVE2(vin[2*(2*dk + k) + 0], vin[2*(2*dk + k) + 1], vout[k*8 + 4], vout[k*8 + 5]); - } - unreversed_copy(dk, (v4sf*)(in + N/4), (v4sf*)(out + N - 6*SIMD_SZ), -8); - unreversed_copy(dk, (v4sf*)(in + 3*N/4), (v4sf*)(out + N - 2*SIMD_SZ), -8); - } - } else { - if (direction == PFFFT_FORWARD) { - for (k=0; k < Ncvec; ++k) { - int kk = (k/4) + (k%4)*(Ncvec/4); - INTERLEAVE2(vin[k*2], vin[k*2+1], vout[kk*2], vout[kk*2+1]); - } - } else { - for (k=0; k < Ncvec; ++k) { - int kk = (k/4) + (k%4)*(Ncvec/4); - UNINTERLEAVE2(vin[kk*2], vin[kk*2+1], vout[k*2], vout[k*2+1]); - } - } - } -} - -static -void pffft_cplx_finalize(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) { - int k, dk = Ncvec/SIMD_SZ; /* number of 4x4 matrix blocks */ - v4sf r0, i0, r1, i1, r2, i2, r3, i3; - v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1; - assert(in != out); - for (k=0; k < dk; ++k) { - r0 = in[8*k+0]; i0 = in[8*k+1]; - r1 = in[8*k+2]; i1 = in[8*k+3]; - r2 = in[8*k+4]; i2 = in[8*k+5]; - r3 = in[8*k+6]; i3 = in[8*k+7]; - VTRANSPOSE4(r0,r1,r2,r3); - VTRANSPOSE4(i0,i1,i2,i3); - VCPLXMUL(r1,i1,e[k*6+0],e[k*6+1]); - VCPLXMUL(r2,i2,e[k*6+2],e[k*6+3]); - VCPLXMUL(r3,i3,e[k*6+4],e[k*6+5]); - - sr0 = VADD(r0,r2); dr0 = VSUB(r0, r2); - sr1 = VADD(r1,r3); dr1 = VSUB(r1, r3); - si0 = VADD(i0,i2); di0 = VSUB(i0, i2); - si1 = VADD(i1,i3); di1 = VSUB(i1, i3); - - /* - transformation for each column is: - - [1 1 1 1 0 0 0 0] [r0] - [1 0 -1 0 0 -1 0 1] [r1] - [1 -1 1 -1 0 0 0 0] [r2] - [1 0 -1 0 0 1 0 -1] [r3] - [0 0 0 0 1 1 1 1] * [i0] - [0 1 0 -1 1 0 -1 0] [i1] - [0 0 0 0 1 -1 1 -1] [i2] - [0 -1 0 1 1 0 -1 0] [i3] - */ - - r0 = VADD(sr0, sr1); i0 = VADD(si0, si1); - r1 = VADD(dr0, di1); i1 = VSUB(di0, dr1); - r2 = VSUB(sr0, sr1); i2 = VSUB(si0, si1); - r3 = VSUB(dr0, di1); i3 = VADD(di0, dr1); - - *out++ = r0; *out++ = i0; *out++ = r1; *out++ = i1; - *out++ = r2; *out++ = i2; *out++ = r3; *out++ = i3; - } -} - -static -void pffft_cplx_preprocess(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) { - int k, dk = Ncvec/SIMD_SZ; /* number of 4x4 matrix blocks */ - v4sf r0, i0, r1, i1, r2, i2, r3, i3; - v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1; - assert(in != out); - for (k=0; k < dk; ++k) { - r0 = in[8*k+0]; i0 = in[8*k+1]; - r1 = in[8*k+2]; i1 = in[8*k+3]; - r2 = in[8*k+4]; i2 = in[8*k+5]; - r3 = in[8*k+6]; i3 = in[8*k+7]; - - sr0 = VADD(r0,r2); dr0 = VSUB(r0, r2); - sr1 = VADD(r1,r3); dr1 = VSUB(r1, r3); - si0 = VADD(i0,i2); di0 = VSUB(i0, i2); - si1 = VADD(i1,i3); di1 = VSUB(i1, i3); - - r0 = VADD(sr0, sr1); i0 = VADD(si0, si1); - r1 = VSUB(dr0, di1); i1 = VADD(di0, dr1); - r2 = VSUB(sr0, sr1); i2 = VSUB(si0, si1); - r3 = VADD(dr0, di1); i3 = VSUB(di0, dr1); - - VCPLXMULCONJ(r1,i1,e[k*6+0],e[k*6+1]); - VCPLXMULCONJ(r2,i2,e[k*6+2],e[k*6+3]); - VCPLXMULCONJ(r3,i3,e[k*6+4],e[k*6+5]); - - VTRANSPOSE4(r0,r1,r2,r3); - VTRANSPOSE4(i0,i1,i2,i3); - - *out++ = r0; *out++ = i0; *out++ = r1; *out++ = i1; - *out++ = r2; *out++ = i2; *out++ = r3; *out++ = i3; - } -} - - -static ALWAYS_INLINE(void) pffft_real_finalize_4x4(const v4sf *in0, const v4sf *in1, const v4sf *in, - const v4sf *e, v4sf *out) { - v4sf r0, i0, r1, i1, r2, i2, r3, i3; - v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1; - r0 = *in0; i0 = *in1; - r1 = *in++; i1 = *in++; r2 = *in++; i2 = *in++; r3 = *in++; i3 = *in++; - VTRANSPOSE4(r0,r1,r2,r3); - VTRANSPOSE4(i0,i1,i2,i3); - - /* - transformation for each column is: - - [1 1 1 1 0 0 0 0] [r0] - [1 0 -1 0 0 -1 0 1] [r1] - [1 0 -1 0 0 1 0 -1] [r2] - [1 -1 1 -1 0 0 0 0] [r3] - [0 0 0 0 1 1 1 1] * [i0] - [0 -1 0 1 -1 0 1 0] [i1] - [0 -1 0 1 1 0 -1 0] [i2] - [0 0 0 0 -1 1 -1 1] [i3] - */ - - /*cerr << "matrix initial, before e , REAL:\n 1: " << r0 << "\n 1: " << r1 << "\n 1: " << r2 << "\n 1: " << r3 << "\n"; */ - /*cerr << "matrix initial, before e, IMAG :\n 1: " << i0 << "\n 1: " << i1 << "\n 1: " << i2 << "\n 1: " << i3 << "\n"; */ - - VCPLXMUL(r1,i1,e[0],e[1]); - VCPLXMUL(r2,i2,e[2],e[3]); - VCPLXMUL(r3,i3,e[4],e[5]); - - /*cerr << "matrix initial, real part:\n 1: " << r0 << "\n 1: " << r1 << "\n 1: " << r2 << "\n 1: " << r3 << "\n"; */ - /*cerr << "matrix initial, imag part:\n 1: " << i0 << "\n 1: " << i1 << "\n 1: " << i2 << "\n 1: " << i3 << "\n"; */ - - sr0 = VADD(r0,r2); dr0 = VSUB(r0,r2); - sr1 = VADD(r1,r3); dr1 = VSUB(r3,r1); - si0 = VADD(i0,i2); di0 = VSUB(i0,i2); - si1 = VADD(i1,i3); di1 = VSUB(i3,i1); - - r0 = VADD(sr0, sr1); - r3 = VSUB(sr0, sr1); - i0 = VADD(si0, si1); - i3 = VSUB(si1, si0); - r1 = VADD(dr0, di1); - r2 = VSUB(dr0, di1); - i1 = VSUB(dr1, di0); - i2 = VADD(dr1, di0); - - *out++ = r0; - *out++ = i0; - *out++ = r1; - *out++ = i1; - *out++ = r2; - *out++ = i2; - *out++ = r3; - *out++ = i3; - -} - -static NEVER_INLINE(void) pffft_real_finalize(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) { - int k, dk = Ncvec/SIMD_SZ; /* number of 4x4 matrix blocks */ - /* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */ - - v4sf_union cr, ci, *uout = (v4sf_union*)out; - v4sf save = in[7], zero=VZERO(); - float xr0, xi0, xr1, xi1, xr2, xi2, xr3, xi3; - static const float s = (float)(M_SQRT2/2); - - cr.v = in[0]; ci.v = in[Ncvec*2-1]; - assert(in != out); - pffft_real_finalize_4x4(&zero, &zero, in+1, e, out); - - /* - [cr0 cr1 cr2 cr3 ci0 ci1 ci2 ci3] - - [Xr(1)] ] [1 1 1 1 0 0 0 0] - [Xr(N/4) ] [0 0 0 0 1 s 0 -s] - [Xr(N/2) ] [1 0 -1 0 0 0 0 0] - [Xr(3N/4)] [0 0 0 0 1 -s 0 s] - [Xi(1) ] [1 -1 1 -1 0 0 0 0] - [Xi(N/4) ] [0 0 0 0 0 -s -1 -s] - [Xi(N/2) ] [0 -1 0 1 0 0 0 0] - [Xi(3N/4)] [0 0 0 0 0 -s 1 -s] - */ - - xr0=(cr.f[0]+cr.f[2]) + (cr.f[1]+cr.f[3]); uout[0].f[0] = xr0; - xi0=(cr.f[0]+cr.f[2]) - (cr.f[1]+cr.f[3]); uout[1].f[0] = xi0; - xr2=(cr.f[0]-cr.f[2]); uout[4].f[0] = xr2; - xi2=(cr.f[3]-cr.f[1]); uout[5].f[0] = xi2; - xr1= ci.f[0] + s*(ci.f[1]-ci.f[3]); uout[2].f[0] = xr1; - xi1=-ci.f[2] - s*(ci.f[1]+ci.f[3]); uout[3].f[0] = xi1; - xr3= ci.f[0] - s*(ci.f[1]-ci.f[3]); uout[6].f[0] = xr3; - xi3= ci.f[2] - s*(ci.f[1]+ci.f[3]); uout[7].f[0] = xi3; - - for (k=1; k < dk; ++k) { - v4sf save_next = in[8*k+7]; - pffft_real_finalize_4x4(&save, &in[8*k+0], in + 8*k+1, - e + k*6, out + k*8); - save = save_next; - } - -} - -static ALWAYS_INLINE(void) pffft_real_preprocess_4x4(const v4sf *in, - const v4sf *e, v4sf *out, int first) { - v4sf r0=in[0], i0=in[1], r1=in[2], i1=in[3], r2=in[4], i2=in[5], r3=in[6], i3=in[7]; - /* - transformation for each column is: - - [1 1 1 1 0 0 0 0] [r0] - [1 0 0 -1 0 -1 -1 0] [r1] - [1 -1 -1 1 0 0 0 0] [r2] - [1 0 0 -1 0 1 1 0] [r3] - [0 0 0 0 1 -1 1 -1] * [i0] - [0 -1 1 0 1 0 0 1] [i1] - [0 0 0 0 1 1 -1 -1] [i2] - [0 1 -1 0 1 0 0 1] [i3] - */ - - v4sf sr0 = VADD(r0,r3), dr0 = VSUB(r0,r3); - v4sf sr1 = VADD(r1,r2), dr1 = VSUB(r1,r2); - v4sf si0 = VADD(i0,i3), di0 = VSUB(i0,i3); - v4sf si1 = VADD(i1,i2), di1 = VSUB(i1,i2); - - r0 = VADD(sr0, sr1); - r2 = VSUB(sr0, sr1); - r1 = VSUB(dr0, si1); - r3 = VADD(dr0, si1); - i0 = VSUB(di0, di1); - i2 = VADD(di0, di1); - i1 = VSUB(si0, dr1); - i3 = VADD(si0, dr1); - - VCPLXMULCONJ(r1,i1,e[0],e[1]); - VCPLXMULCONJ(r2,i2,e[2],e[3]); - VCPLXMULCONJ(r3,i3,e[4],e[5]); - - VTRANSPOSE4(r0,r1,r2,r3); - VTRANSPOSE4(i0,i1,i2,i3); - - if (!first) { - *out++ = r0; - *out++ = i0; - } - *out++ = r1; - *out++ = i1; - *out++ = r2; - *out++ = i2; - *out++ = r3; - *out++ = i3; -} - -static NEVER_INLINE(void) pffft_real_preprocess(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) { - int k, dk = Ncvec/SIMD_SZ; /* number of 4x4 matrix blocks */ - /* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */ - - v4sf_union Xr, Xi, *uout = (v4sf_union*)out; - float cr0, ci0, cr1, ci1, cr2, ci2, cr3, ci3; - static const float s = (float)M_SQRT2; - assert(in != out); - for (k=0; k < 4; ++k) { - Xr.f[k] = ((float*)in)[8*k]; - Xi.f[k] = ((float*)in)[8*k+4]; - } - - pffft_real_preprocess_4x4(in, e, out+1, 1); /* will write only 6 values */ - - /* - [Xr0 Xr1 Xr2 Xr3 Xi0 Xi1 Xi2 Xi3] - - [cr0] [1 0 2 0 1 0 0 0] - [cr1] [1 0 0 0 -1 0 -2 0] - [cr2] [1 0 -2 0 1 0 0 0] - [cr3] [1 0 0 0 -1 0 2 0] - [ci0] [0 2 0 2 0 0 0 0] - [ci1] [0 s 0 -s 0 -s 0 -s] - [ci2] [0 0 0 0 0 -2 0 2] - [ci3] [0 -s 0 s 0 -s 0 -s] - */ - for (k=1; k < dk; ++k) { - pffft_real_preprocess_4x4(in+8*k, e + k*6, out-1+k*8, 0); - } - - cr0=(Xr.f[0]+Xi.f[0]) + 2*Xr.f[2]; uout[0].f[0] = cr0; - cr1=(Xr.f[0]-Xi.f[0]) - 2*Xi.f[2]; uout[0].f[1] = cr1; - cr2=(Xr.f[0]+Xi.f[0]) - 2*Xr.f[2]; uout[0].f[2] = cr2; - cr3=(Xr.f[0]-Xi.f[0]) + 2*Xi.f[2]; uout[0].f[3] = cr3; - ci0= 2*(Xr.f[1]+Xr.f[3]); uout[2*Ncvec-1].f[0] = ci0; - ci1= s*(Xr.f[1]-Xr.f[3]) - s*(Xi.f[1]+Xi.f[3]); uout[2*Ncvec-1].f[1] = ci1; - ci2= 2*(Xi.f[3]-Xi.f[1]); uout[2*Ncvec-1].f[2] = ci2; - ci3=-s*(Xr.f[1]-Xr.f[3]) - s*(Xi.f[1]+Xi.f[3]); uout[2*Ncvec-1].f[3] = ci3; -} - - -static -void pffft_transform_internal(PFFFT_Setup *setup, const float *finput, float *foutput, v4sf *scratch, - pffft_direction_t direction, int ordered) { - int k, Ncvec = setup->Ncvec; - int nf_odd = (setup->ifac[1] & 1); - -#if 0 - /* temporary buffer is allocated on the stack if the scratch pointer is NULL */ - int stack_allocate = (scratch == 0 ? Ncvec*2 : 1); - VLA_ARRAY_ON_STACK(v4sf, scratch_on_stack, stack_allocate); -#endif - - const v4sf *vinput = (const v4sf*)finput; - v4sf *voutput = (v4sf*)foutput; - v4sf *buff[2]; - int ib = (nf_odd ^ ordered ? 1 : 0); - buff[0] = voutput; buff[1] = scratch; - - assert(VALIGNED(finput) && VALIGNED(foutput)); - - /*assert(finput != foutput); */ - if (direction == PFFFT_FORWARD) { - ib = !ib; - if (setup->transform == PFFFT_REAL) { - ib = (rfftf1_ps(Ncvec*2, vinput, buff[ib], buff[!ib], - setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1); - pffft_real_finalize(Ncvec, buff[ib], buff[!ib], (v4sf*)setup->e); - } else { - v4sf *tmp = buff[ib]; - for (k=0; k < Ncvec; ++k) { - UNINTERLEAVE2(vinput[k*2], vinput[k*2+1], tmp[k*2], tmp[k*2+1]); - } - ib = (cfftf1_ps(Ncvec, buff[ib], buff[!ib], buff[ib], - setup->twiddle, &setup->ifac[0], -1) == buff[0] ? 0 : 1); - pffft_cplx_finalize(Ncvec, buff[ib], buff[!ib], (v4sf*)setup->e); - } - if (ordered) { - pffft_zreorder(setup, (float*)buff[!ib], (float*)buff[ib], PFFFT_FORWARD); - } else ib = !ib; - } else { - if (vinput == buff[ib]) { - ib = !ib; /* may happen when finput == foutput */ - } - if (ordered) { - pffft_zreorder(setup, (float*)vinput, (float*)buff[ib], PFFFT_BACKWARD); - vinput = buff[ib]; ib = !ib; - } - if (setup->transform == PFFFT_REAL) { - pffft_real_preprocess(Ncvec, vinput, buff[ib], (v4sf*)setup->e); - ib = (rfftb1_ps(Ncvec*2, buff[ib], buff[0], buff[1], - setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1); - } else { - pffft_cplx_preprocess(Ncvec, vinput, buff[ib], (v4sf*)setup->e); - ib = (cfftf1_ps(Ncvec, buff[ib], buff[0], buff[1], - setup->twiddle, &setup->ifac[0], +1) == buff[0] ? 0 : 1); - for (k=0; k < Ncvec; ++k) { - INTERLEAVE2(buff[ib][k*2], buff[ib][k*2+1], buff[ib][k*2], buff[ib][k*2+1]); - } - } - } - - if (buff[ib] != voutput) { - /* extra copy required -- this situation should only happen when finput == foutput */ - assert(finput==foutput); - for (k=0; k < Ncvec; ++k) { - v4sf a = buff[ib][2*k], b = buff[ib][2*k+1]; - voutput[2*k] = a; voutput[2*k+1] = b; - } - ib = !ib; - } - assert(buff[ib] == voutput); -} - -#if 0 -void pffft_zconvolve_accumulate(PFFFT_Setup *s, const float *a, const float *b, float *ab, float scaling) { - int Ncvec = s->Ncvec; - const v4sf * RESTRICT va = (const v4sf*)a; - const v4sf * RESTRICT vb = (const v4sf*)b; - v4sf * RESTRICT vab = (v4sf*)ab; - -#ifdef __arm__ - __builtin_prefetch(va); - __builtin_prefetch(vb); - __builtin_prefetch(vab); - __builtin_prefetch(va+2); - __builtin_prefetch(vb+2); - __builtin_prefetch(vab+2); - __builtin_prefetch(va+4); - __builtin_prefetch(vb+4); - __builtin_prefetch(vab+4); - __builtin_prefetch(va+6); - __builtin_prefetch(vb+6); - __builtin_prefetch(vab+6); -# ifndef __clang__ -# define ZCONVOLVE_USING_INLINE_NEON_ASM -# endif -#endif - - float ar, ai, br, bi, abr, abi; -#ifndef ZCONVOLVE_USING_INLINE_ASM - v4sf vscal = LD_PS1(scaling); - int i; -#endif - - assert(VALIGNED(a) && VALIGNED(b) && VALIGNED(ab)); - ar = ((v4sf_union*)va)[0].f[0]; - ai = ((v4sf_union*)va)[1].f[0]; - br = ((v4sf_union*)vb)[0].f[0]; - bi = ((v4sf_union*)vb)[1].f[0]; - abr = ((v4sf_union*)vab)[0].f[0]; - abi = ((v4sf_union*)vab)[1].f[0]; - -#ifdef ZCONVOLVE_USING_INLINE_ASM /* inline asm version, unfortunately miscompiled by clang 3.2, at least on ubuntu.. so this will be restricted to gcc */ - const float *a_ = a, *b_ = b; float *ab_ = ab; - int N = Ncvec; - asm volatile("mov r8, %2 \n" - "vdup.f32 q15, %4 \n" - "1: \n" - "pld [%0,#64] \n" - "pld [%1,#64] \n" - "pld [%2,#64] \n" - "pld [%0,#96] \n" - "pld [%1,#96] \n" - "pld [%2,#96] \n" - "vld1.f32 {q0,q1}, [%0,:128]! \n" - "vld1.f32 {q4,q5}, [%1,:128]! \n" - "vld1.f32 {q2,q3}, [%0,:128]! \n" - "vld1.f32 {q6,q7}, [%1,:128]! \n" - "vld1.f32 {q8,q9}, [r8,:128]! \n" - - "vmul.f32 q10, q0, q4 \n" - "vmul.f32 q11, q0, q5 \n" - "vmul.f32 q12, q2, q6 \n" - "vmul.f32 q13, q2, q7 \n" - "vmls.f32 q10, q1, q5 \n" - "vmla.f32 q11, q1, q4 \n" - "vld1.f32 {q0,q1}, [r8,:128]! \n" - "vmls.f32 q12, q3, q7 \n" - "vmla.f32 q13, q3, q6 \n" - "vmla.f32 q8, q10, q15 \n" - "vmla.f32 q9, q11, q15 \n" - "vmla.f32 q0, q12, q15 \n" - "vmla.f32 q1, q13, q15 \n" - "vst1.f32 {q8,q9},[%2,:128]! \n" - "vst1.f32 {q0,q1},[%2,:128]! \n" - "subs %3, #2 \n" - "bne 1b \n" - : "+r"(a_), "+r"(b_), "+r"(ab_), "+r"(N) : "r"(scaling) : "r8", "q0","q1","q2","q3","q4","q5","q6","q7","q8","q9", "q10","q11","q12","q13","q15","memory"); -#else /* default routine, works fine for non-arm cpus with current compilers */ - for (i=0; i < Ncvec; i += 2) { - v4sf ar, ai, br, bi; - ar = va[2*i+0]; ai = va[2*i+1]; - br = vb[2*i+0]; bi = vb[2*i+1]; - VCPLXMUL(ar, ai, br, bi); - vab[2*i+0] = VMADD(ar, vscal, vab[2*i+0]); - vab[2*i+1] = VMADD(ai, vscal, vab[2*i+1]); - ar = va[2*i+2]; ai = va[2*i+3]; - br = vb[2*i+2]; bi = vb[2*i+3]; - VCPLXMUL(ar, ai, br, bi); - vab[2*i+2] = VMADD(ar, vscal, vab[2*i+2]); - vab[2*i+3] = VMADD(ai, vscal, vab[2*i+3]); - } -#endif - if (s->transform == PFFFT_REAL) { - ((v4sf_union*)vab)[0].f[0] = abr + ar*br*scaling; - ((v4sf_union*)vab)[1].f[0] = abi + ai*bi*scaling; - } -} -#endif - - -#else /* defined(PFFFT_SIMD_DISABLE) */ - -/* standard routine using scalar floats, without SIMD stuff. */ - -#define pffft_zreorder_nosimd pffft_zreorder -static -void pffft_zreorder_nosimd(PFFFT_Setup *setup, const float *in, float *out, pffft_direction_t direction) { - int k, N = setup->N; - if (setup->transform == PFFFT_COMPLEX) { - for (k=0; k < 2*N; ++k) out[k] = in[k]; - return; - } - else if (direction == PFFFT_FORWARD) { - float x_N = in[N-1]; - for (k=N-1; k > 1; --k) out[k] = in[k-1]; - out[0] = in[0]; - out[1] = x_N; - } else { - float x_N = in[1]; - for (k=1; k < N-1; ++k) out[k] = in[k+1]; - out[0] = in[0]; - out[N-1] = x_N; - } -} - -#define pffft_transform_internal_nosimd pffft_transform_internal -static -void pffft_transform_internal_nosimd(PFFFT_Setup *setup, const float *input, float *output, float *scratch, - pffft_direction_t direction, int ordered) { - int Ncvec = setup->Ncvec; - int nf_odd = (setup->ifac[1] & 1); - -#if 0 - /* temporary buffer is allocated on the stack if the scratch pointer is NULL */ - int stack_allocate = (scratch == 0 ? Ncvec*2 : 1); - VLA_ARRAY_ON_STACK(v4sf, scratch_on_stack, stack_allocate); -#endif - float *buff[2]; - int ib; - /* if (scratch == 0) scratch = scratch_on_stack; */ - buff[0] = output; buff[1] = scratch; - - if (setup->transform == PFFFT_COMPLEX) ordered = 0; /* it is always ordered. */ - ib = (nf_odd ^ ordered ? 1 : 0); - - if (direction == PFFFT_FORWARD) { - if (setup->transform == PFFFT_REAL) { - ib = (rfftf1_ps(Ncvec*2, input, buff[ib], buff[!ib], - setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1); - } else { - ib = (cfftf1_ps(Ncvec, input, buff[ib], buff[!ib], - setup->twiddle, &setup->ifac[0], -1) == buff[0] ? 0 : 1); - } - if (ordered) { - pffft_zreorder(setup, buff[ib], buff[!ib], PFFFT_FORWARD); ib = !ib; - } - } else { - if (input == buff[ib]) { - ib = !ib; /* may happen when finput == foutput */ - } - if (ordered) { - pffft_zreorder(setup, input, buff[!ib], PFFFT_BACKWARD); - input = buff[!ib]; - } - if (setup->transform == PFFFT_REAL) { - ib = (rfftb1_ps(Ncvec*2, input, buff[ib], buff[!ib], - setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1); - } else { - ib = (cfftf1_ps(Ncvec, input, buff[ib], buff[!ib], - setup->twiddle, &setup->ifac[0], +1) == buff[0] ? 0 : 1); - } - } - if (buff[ib] != output) { - int k; - /* extra copy required -- this situation should happens only when finput == foutput */ - assert(input==output); - for (k=0; k < Ncvec; ++k) { - float a = buff[ib][2*k], b = buff[ib][2*k+1]; - output[2*k] = a; output[2*k+1] = b; - } - ib = !ib; - } - assert(buff[ib] == output); -} - -#if 0 -#define pffft_zconvolve_accumulate_nosimd pffft_zconvolve_accumulate -void pffft_zconvolve_accumulate_nosimd(PFFFT_Setup *s, const float *a, const float *b, - float *ab, float scaling) { - int i, Ncvec = s->Ncvec; - - if (s->transform == PFFFT_REAL) { - /* take care of the fftpack ordering */ - ab[0] += a[0]*b[0]*scaling; - ab[2*Ncvec-1] += a[2*Ncvec-1]*b[2*Ncvec-1]*scaling; - ++ab; ++a; ++b; --Ncvec; - } - for (i=0; i < Ncvec; ++i) { - float ar, ai, br, bi; - ar = a[2*i+0]; ai = a[2*i+1]; - br = b[2*i+0]; bi = b[2*i+1]; - VCPLXMUL(ar, ai, br, bi); - ab[2*i+0] += ar*scaling; - ab[2*i+1] += ai*scaling; - } -} -#endif - -#endif /* defined(PFFFT_SIMD_DISABLE) */ - -static -void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction) { - pffft_transform_internal(setup, input, output, (v4sf*)work, direction, 0); -} - -static -void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction) { - pffft_transform_internal(setup, input, output, (v4sf*)work, direction, 1); -} - -#endif diff --git a/soxr-sys/src/pffft.h b/soxr-sys/src/pffft.h deleted file mode 100644 index 63522cacb..000000000 --- a/soxr-sys/src/pffft.h +++ /dev/null @@ -1,197 +0,0 @@ -/* https://bitbucket.org/jpommier/pffft/raw/483453d8f7661058e74aa4e7cf5c27bcd7887e7a/pffft.h - * with minor changes for libsoxr. */ - -#if !defined PFFT_MACROS_ONLY - -/* Copyright (c) 2013 Julien Pommier ( pommier@modartt.com ) - - Based on original fortran 77 code from FFTPACKv4 from NETLIB, - authored by Dr Paul Swarztrauber of NCAR, in 1985. - - As confirmed by the NCAR fftpack software curators, the following - FFTPACKv5 license applies to FFTPACKv4 sources. My changes are - released under the same terms. - - FFTPACK license: - - http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html - - Copyright (c) 2004 the University Corporation for Atmospheric - Research ("UCAR"). All rights reserved. Developed by NCAR's - Computational and Information Systems Laboratory, UCAR, - www.cisl.ucar.edu. - - Redistribution and use of the Software in source and binary forms, - with or without modification, is permitted provided that the - following conditions are met: - - - Neither the names of NCAR's Computational and Information Systems - Laboratory, the University Corporation for Atmospheric Research, - nor the names of its sponsors or contributors may be used to - endorse or promote products derived from this Software without - specific prior written permission. - - - Redistributions of source code must retain the above copyright - notices, this list of conditions, and the disclaimer below. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the disclaimer below in the - documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT - HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE - SOFTWARE. -*/ - -/* - PFFFT : a Pretty Fast FFT. - - This is basically an adaptation of the single precision fftpack - (v4) as found on netlib taking advantage of SIMD instruction found - on cpus such as intel x86 (SSE1), powerpc (Altivec), and arm (NEON). - - For architectures where no SIMD instruction is available, the code - falls back to a scalar version. - - Restrictions: - - - 1D transforms only, with 32-bit single precision. - - - supports only transforms for inputs of length N of the form - N=(2^a)*(3^b)*(5^c), a >= 5, b >=0, c >= 0 (32, 48, 64, 96, 128, - 144, 160, etc are all acceptable lengths). Performance is best for - 128<=N<=8192. - - - all (float*) pointers in the functions below are expected to - have an "simd-compatible" alignment, that is 16 bytes on x86 and - powerpc CPUs. - - You can allocate such buffers with the functions - pffft_aligned_malloc / pffft_aligned_free (or with stuff like - posix_memalign..) - -*/ - -#ifndef PFFFT_H -#define PFFFT_H - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#if PFFFT_DOUBLE -#define float double -#endif - - /* opaque struct holding internal stuff (precomputed twiddle factors) - this struct can be shared by many threads as it contains only - read-only data. - */ - typedef struct PFFFT_Setup PFFFT_Setup; - - /* direction of the transform */ - typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t; - - /* type of transform */ - typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t; - - /* - prepare for performing transforms of size N -- the returned - PFFFT_Setup structure is read-only so it can safely be shared by - multiple concurrent threads. - */ - static - PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform); - static - void pffft_destroy_setup(PFFFT_Setup *); - /* - Perform a Fourier transform , The z-domain data is stored in the - most efficient order for transforming it back, or using it for - convolution. If you need to have its content sorted in the - "usual" way, that is as an array of interleaved complex numbers, - either use pffft_transform_ordered , or call pffft_zreorder after - the forward fft, and before the backward fft. - - Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x. - Typically you will want to scale the backward transform by 1/N. - - The 'work' pointer should point to an area of N (2*N for complex - fft) floats, properly aligned. If 'work' is NULL, then stack will - be used instead (this is probably the best strategy for small - FFTs, say for N < 16384). - - input and output may alias. - */ - static - void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction); - - /* - Similar to pffft_transform, but makes sure that the output is - ordered as expected (interleaved complex numbers). This is - similar to calling pffft_transform and then pffft_zreorder. - - input and output may alias. - */ - static - void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction); - - /* - call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(..., - PFFFT_FORWARD) if you want to have the frequency components in - the correct "canonical" order, as interleaved complex numbers. - - (for real transforms, both 0-frequency and half frequency - components, which are real, are assembled in the first entry as - F(0)+i*F(n/2+1). Note that the original fftpack did place - F(n/2+1) at the end of the arrays). - - input and output should not alias. - */ - static - void pffft_zreorder(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction); - - /* - Perform a multiplication of the frequency components of dft_a and - dft_b and accumulate them into dft_ab. The arrays should have - been obtained with pffft_transform(.., PFFFT_FORWARD) and should - *not* have been reordered with pffft_zreorder (otherwise just - perform the operation yourself as the dft coefs are stored as - interleaved complex numbers). - - the operation performed is: dft_ab += (dft_a * fdt_b)*scaling - - The dft_a, dft_b and dft_ab pointers may alias. - */ - void pffft_zconvolve_accumulate(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling); - - /* - the float buffers must have the correct alignment (16-byte boundary - on intel and powerpc). This function may be used to obtain such - correctly aligned buffers. - */ -#if 0 - void *pffft_aligned_malloc(size_t nb_bytes); - void pffft_aligned_free(void *); - - /* return 4 or 1 wether support SSE/Altivec instructions was enable when building pffft.c */ - int pffft_simd_size(); -#endif - -#undef float - -#ifdef __cplusplus -} -#endif - -#endif - -#endif diff --git a/soxr-sys/src/pffft32.c b/soxr-sys/src/pffft32.c deleted file mode 100644 index f48080949..000000000 --- a/soxr-sys/src/pffft32.c +++ /dev/null @@ -1,39 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#define SIMD_ALIGNED_FREE free -#define SIMD_ALIGNED_MALLOC malloc -#define PFFFT_SIMD_DISABLE -#define PFFFT_DOUBLE 0 -#include "pffft-wrap.c" - -#include "filter.h" -#include "rdft_t.h" - -static void * setup(int len) {return pffft_new_setup(len, PFFFT_REAL);} -static void delete_setup(void * setup) {pffft_destroy_setup(setup);} -static void forward (int length, void * setup, float * h, float * scratch) {pffft_transform (setup, h, h, scratch, PFFFT_FORWARD); (void)length;} -static void oforward (int length, void * setup, float * h, float * scratch) {pffft_transform_ordered(setup, h, h, scratch, PFFFT_FORWARD); (void)length;} -static void backward (int length, void * setup, float * H, float * scratch) {pffft_transform (setup, H, H, scratch, PFFFT_BACKWARD);(void)length;} -static void obackward(int length, void * setup, float * H, float * scratch) {pffft_transform_ordered(setup, H, H, scratch, PFFFT_BACKWARD);(void)length;} -static void convolve(int length, void * setup, float * H, float const * with) { pffft_zconvolve(setup, H, with, H); (void)length;} -static int multiplier(void) {return 1;} -static int flags(void) {return RDFT_NEEDS_SCRATCH;} - -fn_t _soxr_rdft32_cb[] = { - (fn_t)setup, - (fn_t)setup, - (fn_t)delete_setup, - (fn_t)forward, - (fn_t)oforward, - (fn_t)backward, - (fn_t)obackward, - (fn_t)convolve, - (fn_t)_soxr_ordered_partial_convolve_f, - (fn_t)multiplier, - (fn_t)pffft_reorder_back, - (fn_t)malloc, - (fn_t)calloc, - (fn_t)free, - (fn_t)flags, -}; diff --git a/soxr-sys/src/pffft32s.c b/soxr-sys/src/pffft32s.c deleted file mode 100644 index 7798a45c0..000000000 --- a/soxr-sys/src/pffft32s.c +++ /dev/null @@ -1,34 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#define PFFFT_DOUBLE 0 -#include "pffft-wrap.c" - -#include "rdft_t.h" - -static void * setup(int len) {return pffft_new_setup(len, PFFFT_REAL);} -static void forward (int length, void * setup, float * h, float * scratch) {pffft_transform (setup, h, h, scratch, PFFFT_FORWARD); (void)length;} -static void oforward (int length, void * setup, float * h, float * scratch) {pffft_transform_ordered(setup, h, h, scratch, PFFFT_FORWARD); (void)length;} -static void backward (int length, void * setup, float * H, float * scratch) {pffft_transform (setup, H, H, scratch, PFFFT_BACKWARD);(void)length;} -static void obackward(int length, void * setup, float * H, float * scratch) {pffft_transform_ordered(setup, H, H, scratch, PFFFT_BACKWARD);(void)length;} -static void convolve(int length, void * setup, float * H, float const * with) {pffft_zconvolve(setup, H, with, H); (void)length;} -static int multiplier(void) {return 1;} -static int flags(void) {return RDFT_IS_SIMD | RDFT_NEEDS_SCRATCH;} - -fn_t _soxr_rdft32s_cb[] = { - (fn_t)setup, - (fn_t)setup, - (fn_t)pffft_destroy_setup, - (fn_t)forward, - (fn_t)oforward, - (fn_t)backward, - (fn_t)obackward, - (fn_t)convolve, - (fn_t)ORDERED_PARTIAL_CONVOLVE_SIMD, - (fn_t)multiplier, - (fn_t)pffft_reorder_back, - (fn_t)SIMD_ALIGNED_MALLOC, - (fn_t)SIMD_ALIGNED_CALLOC, - (fn_t)SIMD_ALIGNED_FREE, - (fn_t)flags, -}; diff --git a/soxr-sys/src/pffft64s.c b/soxr-sys/src/pffft64s.c deleted file mode 100644 index 7c37c9d4d..000000000 --- a/soxr-sys/src/pffft64s.c +++ /dev/null @@ -1,34 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#define PFFFT_DOUBLE 1 -#include "pffft-wrap.c" - -#include "rdft_t.h" - -static void * setup(int len) {return pffft_new_setup(len, PFFFT_REAL);} -static void forward (int length, void * setup, double * h, double * scratch) {pffft_transform (setup, h, h, scratch, PFFFT_FORWARD); (void)length;} -static void oforward (int length, void * setup, double * h, double * scratch) {pffft_transform_ordered(setup, h, h, scratch, PFFFT_FORWARD); (void)length;} -static void backward (int length, void * setup, double * H, double * scratch) {pffft_transform (setup, H, H, scratch, PFFFT_BACKWARD);(void)length;} -static void obackward(int length, void * setup, double * H, double * scratch) {pffft_transform_ordered(setup, H, H, scratch, PFFFT_BACKWARD);(void)length;} -static void convolve(int length, void * setup, double * H, double const * with) {pffft_zconvolve(setup, H, with, H); (void)length;} -static int multiplier(void) {return 1;} -static int flags(void) {return RDFT_IS_SIMD | RDFT_NEEDS_SCRATCH;} - -fn_t _soxr_rdft64s_cb[] = { - (fn_t)setup, - (fn_t)setup, - (fn_t)pffft_destroy_setup, - (fn_t)forward, - (fn_t)oforward, - (fn_t)backward, - (fn_t)obackward, - (fn_t)convolve, - (fn_t)ORDERED_PARTIAL_CONVOLVE_SIMD, - (fn_t)multiplier, - (fn_t)pffft_reorder_back, - (fn_t)SIMD_ALIGNED_MALLOC, - (fn_t)SIMD_ALIGNED_CALLOC, - (fn_t)SIMD_ALIGNED_FREE, - (fn_t)flags, -}; diff --git a/soxr-sys/src/poly-fir.h b/soxr-sys/src/poly-fir.h deleted file mode 100644 index d138e030f..000000000 --- a/soxr-sys/src/poly-fir.h +++ /dev/null @@ -1,150 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -/* Resample using an interpolated poly-phase FIR with length LEN. */ -/* Input must be followed by FIR_LENGTH-1 samples. */ - -#if COEF_INTERP != 1 && COEF_INTERP != 2 && COEF_INTERP != 3 - #error COEF_INTERP -#endif - -#if SIMD_AVX || SIMD_SSE || SIMD_NEON - #define N (FIR_LENGTH>>2) - - #if COEF_INTERP == 1 - #define _ sum=vMac(vMac(b,X,a),vLdu(in+j*4),sum), ++j; - #elif COEF_INTERP == 2 - #define _ sum=vMac(vMac(vMac(c,X,b),X,a),vLdu(in+j*4),sum), ++j; - #else - #define _ sum=vMac(vMac(vMac(vMac(d,X,c),X,b),X,a),vLdu(in+j*4),sum), ++j; - #endif - - #define a coefs[(COEF_INTERP+1)*(N*phase+j)+(COEF_INTERP-0)] - #define b coefs[(COEF_INTERP+1)*(N*phase+j)+(COEF_INTERP-1)] - #define c coefs[(COEF_INTERP+1)*(N*phase+j)+(COEF_INTERP-2)] - #define d coefs[(COEF_INTERP+1)*(N*phase+j)+(COEF_INTERP-3)] - - #define BEGINNING v4_t X = vLds(x), sum = vZero(); \ - v4_t const * const __restrict coefs = (v4_t *)COEFS - #define END vStorSum(output+i, sum) - #define cc(n) case n: core(n); break - #define CORE(n) switch (n) {cc(2); cc(3); cc(4); cc(5); cc(6); default: core(n);} -#else - #define N FIR_LENGTH - - #if COEF_INTERP == 1 - #define _ sum += (b*x + a)*in[j], ++j; - #elif COEF_INTERP == 2 - #define _ sum += ((c*x + b)*x + a)*in[j], ++j; - #else - #define _ sum += (((d*x + c)*x + b)*x + a)*in[j], ++j; - #endif - - #define a (coef(COEFS, COEF_INTERP, N, phase, 0,j)) - #define b (coef(COEFS, COEF_INTERP, N, phase, 1,j)) - #define c (coef(COEFS, COEF_INTERP, N, phase, 2,j)) - #define d (coef(COEFS, COEF_INTERP, N, phase, 3,j)) - - #define BEGINNING sample_t sum = 0 - #define END output[i] = sum - #define CORE(n) core(n) -#endif - - - -#define floatPrecCore(n) { \ - float_step_t at = p->at.flt; \ - for (i = 0; (int)at < num_in; ++i, at += p->step.flt) { \ - sample_t const * const __restrict in = input + (int)at; \ - float_step_t frac = at - (int)at; \ - int phase = (int)(frac * (1 << PHASE_BITS)); \ - sample_t x = (sample_t)(frac * (1 << PHASE_BITS) - phase); \ - int j = 0; \ - BEGINNING; CONVOLVE(n); END; \ - } \ - fifo_read(&p->fifo, (int)at, NULL); \ - p->at.flt = at - (int)at; } /* Could round to 1 in some cirmcumstances. */ - - - -#define highPrecCore(n) { \ - step_t at; at.fix = p->at.fix; \ - for (i = 0; at.integer < num_in; ++i, \ - at.fix.ls.all += p->step.fix.ls.all, \ - at.whole += p->step.whole + (at.fix.ls.all < p->step.fix.ls.all)) { \ - sample_t const * const __restrict in = input + at.integer; \ - uint32_t frac = at.fraction; \ - int phase = (int)(frac >> (32 - PHASE_BITS)); /* High-order bits */ \ - /* Low-order bits, scaled to [0,1): */ \ - sample_t x = (sample_t)((frac << PHASE_BITS) * (1 / MULT32)); \ - int j = 0; \ - BEGINNING; CONVOLVE(n); END; \ - } \ - fifo_read(&p->fifo, at.integer, NULL); \ - p->at.whole = at.fraction; \ - p->at.fix.ls = at.fix.ls; } - - - -#define stdPrecCore(n) { \ - int64p_t at; at.all = p->at.whole; \ - for (i = 0; at.parts.ms < num_in; ++i, at.all += p->step.whole) { \ - sample_t const * const __restrict in = input + at.parts.ms; \ - uint32_t const frac = at.parts.ls; \ - int phase = (int)(frac >> (32 - PHASE_BITS)); /* high-order bits */ \ - /* Low-order bits, scaled to [0,1): */ \ - sample_t x = (sample_t)((frac << PHASE_BITS) * (1 / MULT32)); \ - int j = 0; \ - BEGINNING; CONVOLVE(n); END; \ - } \ - fifo_read(&p->fifo, at.parts.ms, NULL); \ - p->at.whole = at.parts.ls; } - - - -#if WITH_FLOAT_STD_PREC_CLOCK - #define SPCORE floatPrecCore -#else - #define SPCORE stdPrecCore -#endif - - - -#if WITH_HI_PREC_CLOCK - #define core(n) if (p->use_hi_prec_clock) highPrecCore(n) else SPCORE(n) -#else - #define core(n) SPCORE(n) -#endif - - - -static void FUNCTION(stage_t * p, fifo_t * output_fifo) -{ - sample_t const * input = stage_read_p(p); - int num_in = min(stage_occupancy(p), p->input_size); - int i, max_num_out = 1 + (int)(num_in * p->out_in_ratio); - sample_t * const __restrict output = fifo_reserve(output_fifo, max_num_out); - - CORE(N); - assert(max_num_out - i >= 0); - fifo_trim_by(output_fifo, max_num_out - i); -} - - - -#undef _ -#undef a -#undef b -#undef c -#undef d -#undef CORE -#undef cc -#undef core -#undef COEF_INTERP -#undef N -#undef BEGINNING -#undef END -#undef CONVOLVE -#undef FIR_LENGTH -#undef FUNCTION -#undef PHASE_BITS diff --git a/soxr-sys/src/poly-fir0.h b/soxr-sys/src/poly-fir0.h deleted file mode 100644 index 76fca2d6b..000000000 --- a/soxr-sys/src/poly-fir0.h +++ /dev/null @@ -1,56 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -/* Resample using a non-interpolated poly-phase FIR with length LEN. */ -/* Input must be followed by FIR_LENGTH-1 samples. */ - -#if SIMD_AVX || SIMD_SSE || SIMD_NEON - #define N (FIR_LENGTH>>2) - #define BEGINNING v4_t sum = vZero(); \ - v4_t const * const __restrict coefs = (v4_t *)COEFS + N * rem; - #define _ sum = vMac(vLdu(at+j*4), coefs[j], sum), ++j; - #define END vStorSum(output+i, sum) - #define cc(n) case n: core(n); break - #define CORE(n) switch (n) {cc(2); cc(3); cc(4); cc(5); cc(6); default: core(n);} -#else - #define N FIR_LENGTH - #define BEGINNING sample_t sum = 0; \ - sample_t const * const __restrict coefs = (sample_t *)COEFS + N * rem; - #define _ sum += coefs[j]*at[j], ++j; - #define END output[i] = sum - #define CORE(n) core(n) -#endif - -#define core(n) \ - for (i = 0; at < num_in * p->L; ++i, at += step) { \ - int const div = at / p->L, rem = at % p->L; \ - sample_t const * const __restrict at = input + div; \ - int j = 0; BEGINNING; CONVOLVE(n); END;} - -static void FUNCTION(stage_t * p, fifo_t * output_fifo) -{ - int num_in = min(stage_occupancy(p), p->input_size); - if (num_in) { - sample_t const * input = stage_read_p(p); - int at = p->at.integer, step = p->step.integer; - int i, num_out = (num_in * p->L - at + step - 1) / step; - sample_t * __restrict output = fifo_reserve(output_fifo, num_out); - - CORE(N); - assert(i == num_out); - fifo_read(&p->fifo, at / p->L, NULL); - p->at.integer = at % p->L; - } -} - -#undef _ -#undef CORE -#undef cc -#undef core -#undef N -#undef BEGINNING -#undef MIDDLE -#undef END -#undef CONVOLVE -#undef FIR_LENGTH -#undef FUNCTION diff --git a/soxr-sys/src/rdft.h b/soxr-sys/src/rdft.h deleted file mode 100644 index 59ba17417..000000000 --- a/soxr-sys/src/rdft.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -void ORDERED_CONVOLVE(int n, void * not_used, DFT_FLOAT * a, const DFT_FLOAT * b) -{ - int i; - a[0] *= b[0]; - a[1] *= b[1]; - for (i = 2; i < n; i += 2) { - DFT_FLOAT tmp = a[i]; - a[i ] = b[i ] * tmp - b[i+1] * a[i+1]; - a[i+1] = b[i+1] * tmp + b[i ] * a[i+1]; - } - (void)not_used; -} - -void ORDERED_PARTIAL_CONVOLVE(int n, DFT_FLOAT * a, const DFT_FLOAT * b) -{ - int i; - a[0] *= b[0]; - for (i = 2; i < n; i += 2) { - DFT_FLOAT tmp = a[i]; - a[i ] = b[i ] * tmp - b[i+1] * a[i+1]; - a[i+1] = b[i+1] * tmp + b[i ] * a[i+1]; - } - a[1] = b[i] * a[i] - b[i+1] * a[i+1]; -} - -#undef ORDERED_CONVOLVE -#undef ORDERED_PARTIAL_CONVOLVE -#undef DFT_FLOAT diff --git a/soxr-sys/src/rdft_t.h b/soxr-sys/src/rdft_t.h deleted file mode 100644 index 293d9c37b..000000000 --- a/soxr-sys/src/rdft_t.h +++ /dev/null @@ -1,24 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -typedef void (* fn_t)(void); - -#define rdft_forward_setup (*(void * (*)(int))RDFT_CB[0]) -#define rdft_backward_setup (*(void * (*)(int))RDFT_CB[1]) -#define rdft_delete_setup (*(void (*)(void *))RDFT_CB[2]) -#define rdft_forward (*(void (*)(int, void *, void *, void *))RDFT_CB[3]) -#define rdft_oforward (*(void (*)(int, void *, void *, void *))RDFT_CB[4]) -#define rdft_backward (*(void (*)(int, void *, void *, void *))RDFT_CB[5]) -#define rdft_obackward (*(void (*)(int, void *, void *, void *))RDFT_CB[6]) -#define rdft_convolve (*(void (*)(int, void *, void *, void const *))RDFT_CB[7]) -#define rdft_convolve_portion (*(void (*)(int, void *, void const *))RDFT_CB[8]) -#define rdft_multiplier (*(int (*)(void))RDFT_CB[9]) -#define rdft_reorder_back (*(void (*)(int, void *, void *, void *))RDFT_CB[10]) -#define rdft_malloc (*(void * (*)(size_t))RDFT_CB[11]) -#define rdft_calloc (*(void * (*)(size_t, size_t))RDFT_CB[12]) -#define rdft_free (*(void (*)(void *))RDFT_CB[13]) -#define rdft_flags (*(int (*)(void))RDFT_CB[14]) - -/* Flag templates: */ -#define RDFT_IS_SIMD 1 -#define RDFT_NEEDS_SCRATCH 2 diff --git a/soxr-sys/src/rint-clip.h b/soxr-sys/src/rint-clip.h deleted file mode 100644 index 3294f4eaf..000000000 --- a/soxr-sys/src/rint-clip.h +++ /dev/null @@ -1,161 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#if defined DITHER - -#define DITHERING + (1./32)*(int)(((ran1>>=3)&31)-((ran2>>=3)&31)) -#define DITHER_RAND (seed = 1664525ULL * seed + 1013904223ULL) >> 3 -#define DITHER_VARS unsigned long long ran1 = DITHER_RAND, ran2 = DITHER_RAND -#define SEED_ARG , unsigned long long * seed0 -#define SAVE_SEED *seed0 = seed -#define COPY_SEED unsigned long long seed = *seed0; -#define COPY_SEED1 unsigned long long seed1 = seed -#define PASS_SEED1 , &seed1 -#define PASS_SEED , &seed -#define FLOATD double - -#else - -#define DITHERING -#define DITHER_VARS -#define SEED_ARG -#define SAVE_SEED -#define COPY_SEED -#define COPY_SEED1 -#define PASS_SEED1 -#define PASS_SEED -#define FLOATD FLOATX - -#endif - -#define DO_16 _;_;_;_;_;_;_;_;_;_;_;_;_;_;_;_ - - - -#if defined FE_INVALID && defined FPU_RINT -static void RINT_CLIP(RINT_T * const dest, FLOATX const * const src, - unsigned stride, size_t i, size_t const n, size_t * const clips SEED_ARG) -{ - COPY_SEED - DITHER_VARS; - for (; i < n; ++i) { - fe_clear_invalid(); - FLOATD const d = src[i] DITHERING; - RINT(dest[stride * i], d); - if (fe_test_invalid()) { - fe_clear_invalid(); - dest[stride * i] = d > 0? RINT_MAX : -RINT_MAX - 1; - ++*clips; - } - } - SAVE_SEED; -} -#endif - - - -static size_t LSX_RINT_CLIP(void * * const dest0, FLOATX const * const src, - size_t const n SEED_ARG) -{ - size_t i, clips = 0; - RINT_T * dest = *dest0; - COPY_SEED -#if defined FE_INVALID && defined FPU_RINT -#define _ RINT(dest[i], src[i] DITHERING); ++i - for (i = 0; i < (n & ~15u);) { - fe_clear_invalid(); - COPY_SEED1; - DITHER_VARS; - DO_16; - if (fe_test_invalid()) { - fe_clear_invalid(); - RINT_CLIP(dest, src, 1, i - 16, i, &clips PASS_SEED1); - } - } - RINT_CLIP(dest, src, 1, i, n, &clips PASS_SEED); -#else -#define _ d = src[i] DITHERING, dest[i++] = (RINT_T)(d > 0? \ - d+.5 >= N? ++clips, N-1 : d+.5 : d-.5 <= -N-1? ++clips, -N:d-.5) - const double N = 1. + RINT_MAX; - double d; - for (i = 0; i < (n & ~15u);) { - DITHER_VARS; - DO_16; - } - { - DITHER_VARS; - for (; i < n; _); - } -#endif - SAVE_SEED; - *dest0 = dest + n; - return clips; -} -#undef _ - - - -static size_t LSX_RINT_CLIP_2(void * * dest0, FLOATX const * const * srcs, - unsigned const stride, size_t const n SEED_ARG) -{ - unsigned j; - size_t i, clips = 0; - RINT_T * dest = *dest0; - COPY_SEED -#if defined FE_INVALID && defined FPU_RINT -#define _ RINT(dest[stride * i], src[i] DITHERING); ++i - for (j = 0; j < stride; ++j, ++dest) { - FLOATX const * const src = srcs[j]; - for (i = 0; i < (n & ~15u);) { - fe_clear_invalid(); - COPY_SEED1; - DITHER_VARS; - DO_16; - if (fe_test_invalid()) { - fe_clear_invalid(); - RINT_CLIP(dest, src, stride, i - 16, i, &clips PASS_SEED1); - } - } - RINT_CLIP(dest, src, stride, i, n, &clips PASS_SEED); - } -#else -#define _ d = src[i] DITHERING, dest[stride * i++] = (RINT_T)(d > 0? \ - d+.5 >= N? ++clips, N-1 : d+.5 : d-.5 <= -N-1? ++clips, -N:d-.5) - const double N = 1. + RINT_MAX; - double d; - for (j = 0; j < stride; ++j, ++dest) { - FLOATX const * const src = srcs[j]; - for (i = 0; i < (n & ~15u);) { - DITHER_VARS; - DO_16; - } - { - DITHER_VARS; - for (; i < n; _); - } - } -#endif - SAVE_SEED; - *dest0 = dest + stride * (n - 1); - return clips; -} -#undef _ - -#undef FLOATD -#undef PASS_SEED -#undef PASS_SEED1 -#undef COPY_SEED1 -#undef COPY_SEED -#undef SAVE_SEED -#undef SEED_ARG -#undef DITHER_VARS -#undef DITHERING -#undef DITHER - -#undef RINT_MAX -#undef RINT_T -#undef FPU_RINT -#undef RINT -#undef RINT_CLIP -#undef LSX_RINT_CLIP -#undef LSX_RINT_CLIP_2 diff --git a/soxr-sys/src/rint.h b/soxr-sys/src/rint.h deleted file mode 100644 index 2f1dfbed6..000000000 --- a/soxr-sys/src/rint.h +++ /dev/null @@ -1,102 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#if !defined soxr_rint_included -#define soxr_rint_included - -#include "std-types.h" - -/* For x86, compiler-supplied versions of these functions (where available) - * can have poor performance (e.g. mingw32), so prefer these asm versions: */ - -#if defined __GNUC__ && (defined __i386__ || defined __x86_64__) - #define FPU_RINT32 - #define FPU_RINT16 - #define rint32D(a,b) __asm__ __volatile__("fistpl %0": "=m"(a): "t"(b): "st") - #define rint16D(a,b) __asm__ __volatile__("fistps %0": "=m"(a): "t"(b): "st") - #define rint32F rint32D - #define rint16F rint16D - #define FE_INVALID 1 - static __inline int fe_test_invalid(void) { - int status_word; - __asm__ __volatile__("fnstsw %%ax": "=a"(status_word)); - return status_word & FE_INVALID; - } - static __inline int fe_clear_invalid(void) { - int32_t status[7]; - __asm__ __volatile__("fnstenv %0": "=m"(status)); - status[1] &= ~FE_INVALID; - __asm__ __volatile__("fldenv %0": : "m"(*status)); - return 0; - } -#elif defined _MSC_VER && defined _M_IX86 - #define FPU_RINT32 - #define FPU_RINT16 - #define rint_fn(N,Y,X) \ - static __inline void N(Y *y, X x) {Y t; {__asm fld x __asm fistp t} *y=t;} - rint_fn(rint32d, int32_t, double) - rint_fn(rint32f, int32_t, float ) - rint_fn(rint16d, int16_t, double) - rint_fn(rint16f, int16_t, float ) - #define rint32D(y,x) rint32d(&(y),x) - #define rint32F(y,x) rint32f(&(y),x) - #define rint16D(y,x) rint16d(&(y),x) - #define rint16F(y,x) rint16f(&(y),x) - #define FE_INVALID 1 - static __inline int fe_test_invalid(void) { - short status_word; - __asm fnstsw status_word - return status_word & FE_INVALID; - } - static __inline int fe_clear_invalid(void) { - int32_t status[7]; - __asm fnstenv status - status[1] &= ~FE_INVALID; - __asm fldenv status - return 0; - } -#elif defined _MSC_VER && defined _M_X64 - #include - #include - #define FPU_RINT32 - #define FPU_RINT16 - static __inline void rint32d(int32_t *y, double x) { - *y = _mm_cvtsd_si32(_mm_load_sd(&x));} - static __inline void rint32f(int32_t *y, float x) { - *y = _mm_cvtss_si32(_mm_load_ss(&x));} - static __inline void rint16d(int16_t *y, double x) { - x = x*65536+32738; *y = (int16_t)(_mm_cvtsd_si32(_mm_load_sd(&x)) >> 16);} - #define rint32D(y,x) rint32d(&(y),x) - #define rint32F(y,x) rint32f(&(y),x) - #define rint16D(y,x) rint16d(&(y),x) - #define rint16F(y,x) rint16d(&(y),(double)(x)) - #define FE_INVALID 1 - #define fe_test_invalid() (_statusfp() & _SW_INVALID) - #define fe_clear_invalid _clearfp /* Note: clears all. */ -#elif HAVE_LRINT && LONG_MAX == 2147483647L && HAVE_FENV_H - #include - #include - #define FPU_RINT32 - #define rint32D(y,x) ((y)=lrint(x)) - #define rint32F(y,x) ((y)=lrintf(x)) - #define fe_test_invalid() fetestexcept(FE_INVALID) - #define fe_clear_invalid() feclearexcept(FE_INVALID) -#endif - -#if !defined FPU_RINT32 - #define rint32D(y,x) ((y)=(int32_t)((x) < 0? x - .5 : x + .5)) - #define rint32F(y,x) rint32D(y,(double)(x)) -#endif - -#if !defined FPU_RINT16 - #define rint16D(y,x) ((y)=(int16_t)((x) < 0? x - .5 : x + .5)) - #define rint16F(y,x) rint16D(y,(double)(x)) -#endif - -static __inline int32_t rint32(double input) { - int32_t result; rint32D(result, input); return result;} - -static __inline int16_t rint16(double input) { - int16_t result; rint16D(result, input); return result;} - -#endif diff --git a/soxr-sys/src/samplerate.h b/soxr-sys/src/samplerate.h deleted file mode 100644 index 911cc5d0c..000000000 --- a/soxr-sys/src/samplerate.h +++ /dev/null @@ -1 +0,0 @@ -#include "soxr-lsr.h" diff --git a/soxr-sys/src/soxr-config.h b/soxr-sys/src/soxr-config.h deleted file mode 100644 index a559b5f10..000000000 --- a/soxr-sys/src/soxr-config.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - - -#if !defined soxr_config_included -#define soxr_config_included - -#define AVCODEC_FOUND 0 -#define AVUTIL_FOUND 0 -#define WITH_PFFFT 0 - -#define HAVE_FENV_H 1 -#define HAVE_STDBOOL_H 1 -#define HAVE_STDINT_H 1 -#define HAVE_LRINT 0 -#define HAVE_BIGENDIAN 0 - -#define WITH_CR32 1 -#define WITH_CR32S 0 -#define WITH_CR64 0 -#define WITH_CR64S 0 -#define WITH_VR32 1 - -#define WITH_HI_PREC_CLOCK 0 -#define WITH_FLOAT_STD_PREC_CLOCK 0 -#define WITH_DEV_TRACE 0 - -#endif diff --git a/soxr-sys/src/soxr-lsr.c b/soxr-sys/src/soxr-lsr.c deleted file mode 100644 index 58ab50a21..000000000 --- a/soxr-sys/src/soxr-lsr.c +++ /dev/null @@ -1,198 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-18 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -/* Wrapper mostly compatible with `libsamplerate'. */ - -#include -#include -#include "soxr.h" -#include "soxr-lsr.h" -#include "rint.h" - - - -SRC_STATE *src_new(SRC_SRCTYPE id, int channels, SRC_ERROR * error) -{ - return src_callback_new(0, id, channels, error, 0); -} - - - -SRC_ERROR src_process(SRC_STATE *p, SRC_DATA * io) -{ - size_t idone , odone; - - if (!p || !io) return -1; - - soxr_set_error( - p, soxr_set_io_ratio(p, 1/io->src_ratio, (size_t)io->output_frames)); - - soxr_process(p, io->data_in, /* hack: */ - (size_t)(io->end_of_input? ~io->input_frames : io->input_frames), - &idone, io->data_out, (size_t)io->output_frames, &odone); - - io->input_frames_used = (long)idone, io->output_frames_gen = (long)odone; - return -!!soxr_error(p); -} - - - -SRC_ERROR src_set_ratio(SRC_STATE * p, double oi_ratio) -{ - return -!!soxr_set_io_ratio(p, 1/oi_ratio, 0); -} - - - -SRC_ERROR src_reset(SRC_STATE * p) -{ - return -!!soxr_clear(p); -} - - - -SRC_ERROR src_error(SRC_STATE * p) -{ - return -!!soxr_error(p); -} - - - -SRC_STATE * src_delete(SRC_STATE * p) -{ - soxr_delete(p); - return 0; -} - - - -SRC_STATE *src_callback_new(src_callback_t fn, - SRC_SRCTYPE id, int channels, SRC_ERROR * error0, void * p) -{ - soxr_quality_spec_t q_spec = soxr_quality_spec(SOXR_LSR0Q + (unsigned)id, 0); - char const * e = getenv("SOXR_LSR_NUM_THREADS"); - soxr_runtime_spec_t r_spec = soxr_runtime_spec(!(e && atoi(e) != 1)); - soxr_error_t error; - soxr_t soxr = 0; - - assert (channels > 0); - soxr = soxr_create(0, 0, (unsigned)channels, &error, 0, &q_spec, &r_spec); - - if (soxr) - error = soxr_set_input_fn(soxr, (soxr_input_fn_t)fn, p, 0); - - if (error0) - *error0 = -!!error; - - return soxr; -} - - - -long src_callback_read(SRC_STATE *p, double oi_ratio, long olen, float * obuf) -{ - if (!p || olen < 0) return -1; - - soxr_set_error(p, soxr_set_io_ratio(p, 1/oi_ratio, (size_t)olen)); - return (long)soxr_output(p, obuf, (size_t)olen); -} - - - -SRC_ERROR src_simple(SRC_DATA * io, SRC_SRCTYPE id, int channels) -{ - size_t idone, odone; - soxr_error_t error; - soxr_quality_spec_t q_spec = soxr_quality_spec(SOXR_LSR0Q + (unsigned)id, 0); - char const * e = getenv("SOXR_LSR_NUM_THREADS"); - soxr_runtime_spec_t r_spec = soxr_runtime_spec(!(e && atoi(e) != 1)); - - if (!io || channels<=0 || io->input_frames<0 || io->output_frames<0) return-1; - - error = soxr_oneshot(1, io->src_ratio, (unsigned)channels, io->data_in, - (size_t)io->input_frames, &idone, io->data_out, (size_t)io->output_frames, - &odone, 0, &q_spec, &r_spec); - - io->input_frames_used = (long)idone, io->output_frames_gen = (long)odone; - - return -!!error; -} - - - -char const * src_get_name(SRC_SRCTYPE id) -{ - static char const * const names[] = { - "LSR best sinc", "LSR medium sinc", "LSR fastest sinc", - "LSR ZOH", "LSR linear", "SoX VHQ"}; - - return (unsigned)id < 5u + !getenv("SOXR_LSR_STRICT")? names[id] : 0; -} - - - -char const * src_get_description(SRC_SRCTYPE id) -{ - return src_get_name(id); -} - - - -char const * src_get_version(void) -{ - return soxr_version(); -} - - - -char const * src_strerror(SRC_ERROR error) -{ - return error == 1? "Placeholder." : error ? "soxr error" : soxr_strerror(0); -} - - - -int src_is_valid_ratio(double oi_ratio) -{ - return getenv("SOXR_LSR_STRICT")? - oi_ratio >= 1./256 && oi_ratio <= 256 : oi_ratio > 0; -} - - - -void src_short_to_float_array(short const * src, float * dest, int len) -{ - assert (src && dest); - - while (len--) dest[len] = (float)(src[len] * (1 / (1. + SHRT_MAX))); -} - - - -void src_float_to_short_array(float const * src, short * dest, int len) -{ - double d, N = 1. + SHRT_MAX; - assert (src && dest); - - while (len--) d = src[len] * N, dest[len] = - (short)(d > N - 1? (short)(N - 1) : d < -N? (short)-N : rint16(d)); -} - - - -void src_int_to_float_array(int const * src, float * dest, int len) -{ - assert (src && dest); - while (len--) dest[len] = (float)(src[len] * (1 / (32768. * 65536.))); -} - - - -void src_float_to_int_array(float const * src, int * dest, int len) -{ - double d, N = 32768. * 65536.; /* N.B. int32, not int! (Also above fn.) */ - assert (src && dest); - - while (len--) d = src[len] * N, dest[len] = - d >= N - 1? (int)(N - 1) : d < -N? (int)(-N) : rint32(d); -} diff --git a/soxr-sys/src/soxr-lsr.h b/soxr-sys/src/soxr-lsr.h deleted file mode 100644 index b1cc24706..000000000 --- a/soxr-sys/src/soxr-lsr.h +++ /dev/null @@ -1,78 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-18 robs@users.sourceforge.net - * - * This library is free software; you can redistribute it and/or modify it - * under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or (at - * your option) any later version. - * - * This library is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser - * General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -/* Wrapper compatible with `libsamplerate' (constant-rate). - * (Libsoxr's native API can be found in soxr.h). */ - -#if !defined SAMPLERATE_H -#define SAMPLERATE_H -#if defined __cplusplus - extern "C" { -#endif - -#if defined SOXR_DLL - #if defined soxr_lsr_EXPORTS - #define SOXR __declspec(dllexport) - #else - #define SOXR __declspec(dllimport) - #endif -#elif defined SOXR_VISIBILITY && defined __GNUC__ && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 1) - #define SOXR __attribute__ ((visibility("default"))) -#else - #define SOXR -#endif - -typedef float SRC_SAMPLE; -enum SRC_SRCTYPE_e {SRC_SINC_BEST_QUALITY, SRC_SINC_MEDIUM_QUALITY, - SRC_SINC_FASTEST, SRC_ZERO_ORDER_HOLD, SRC_LINEAR}; -typedef int SRC_SRCTYPE; -typedef int SRC_ERROR; -typedef long (* src_callback_t)(void *, SRC_SAMPLE * *); -typedef struct soxr SRC_STATE; -typedef struct SRC_DATA { - SRC_SAMPLE * data_in, * data_out; - long input_frames, output_frames; - long input_frames_used, output_frames_gen; - int end_of_input; - double src_ratio; -} SRC_DATA; -SOXR SRC_STATE * src_new(SRC_SRCTYPE, int num_channels, SRC_ERROR *); -SOXR SRC_ERROR src_process (SRC_STATE *, SRC_DATA *); -SOXR SRC_ERROR src_set_ratio(SRC_STATE *, double); -SOXR SRC_ERROR src_reset (SRC_STATE *); -SOXR SRC_ERROR src_error (SRC_STATE *); -SOXR SRC_STATE * src_delete (SRC_STATE *); -SOXR SRC_STATE * src_callback_new( - src_callback_t, SRC_SRCTYPE, int, SRC_ERROR *, void *); -SOXR long src_callback_read( - SRC_STATE *, double src_ratio, long, SRC_SAMPLE *); -SOXR SRC_ERROR src_simple(SRC_DATA *, SRC_SRCTYPE, int); -SOXR char const * src_get_name(SRC_SRCTYPE); -SOXR char const * src_get_description(SRC_SRCTYPE); -SOXR char const * src_get_version(void); -SOXR char const * src_strerror(SRC_ERROR); -SOXR int src_is_valid_ratio(double); -SOXR void src_short_to_float_array(short const *, float *, int); -SOXR void src_float_to_short_array(float const *, short *, int); -SOXR void src_int_to_float_array(int const *, float *, int); -SOXR void src_float_to_int_array(float const *, int *, int); - -#undef SOXR -#if defined __cplusplus - } -#endif -#endif diff --git a/soxr-sys/src/soxr.c b/soxr-sys/src/soxr.c deleted file mode 100644 index 0ece116bf..000000000 --- a/soxr-sys/src/soxr.c +++ /dev/null @@ -1,843 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-18 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#include -#include -#include -#include - -#include "soxr.h" -#include "data-io.h" -#include "internal.h" - -#if AVUTIL_FOUND - #include -#endif - - - -#if WITH_DEV_TRACE - -#include -#include - -int _soxr_trace_level; - -void _soxr_trace(char const * fmt, ...) -{ - va_list args; - va_start(args, fmt); - vfprintf(stderr, fmt, args); - fputc('\n', stderr); - va_end(args); -} - -#endif - - - -char const * soxr_version(void) -{ - return "libsoxr-" SOXR_THIS_VERSION_STR; -} - - - - -typedef void sample_t; /* float or double */ -typedef void (* fn_t)(void); -typedef fn_t control_block_t[10]; - -#define resampler_input (*(sample_t * (*)(void *, sample_t * samples, size_t n))p->control_block[0]) -#define resampler_process (*(void (*)(void *, size_t))p->control_block[1]) -#define resampler_output (*(sample_t const * (*)(void *, sample_t * samples, size_t * n))p->control_block[2]) -#define resampler_flush (*(void (*)(void *))p->control_block[3]) -#define resampler_close (*(void (*)(void *))p->control_block[4]) -#define resampler_delay (*(double (*)(void *))p->control_block[5]) -#define resampler_sizes (*(void (*)(size_t * shared, size_t * channel))p->control_block[6]) -#define resampler_create (*(char const * (*)(void * channel, void * shared, double io_ratio, soxr_quality_spec_t * q_spec, soxr_runtime_spec_t * r_spec, double scale))p->control_block[7]) -#define resampler_set_io_ratio (*(void (*)(void *, double io_ratio, size_t len))p->control_block[8]) -#define resampler_id (*(char const * (*)(void))p->control_block[9]) - -typedef void * resampler_t; /* For one channel. */ -typedef void * resampler_shared_t; /* Between channels. */ -typedef void (* deinterleave_t)(sample_t * * dest, - soxr_datatype_t data_type, void const * * src0, size_t n, unsigned ch); -typedef size_t (* interleave_t)(soxr_datatype_t data_type, void * * dest, - sample_t const * const * src, size_t, unsigned, unsigned long long *); - -struct soxr { - unsigned num_channels; - double io_ratio; - soxr_error_t error; - soxr_quality_spec_t q_spec; - soxr_io_spec_t io_spec; - soxr_runtime_spec_t runtime_spec; - - void * input_fn_state; - soxr_input_fn_t input_fn; - size_t max_ilen; - - resampler_shared_t shared; - resampler_t * resamplers; - control_block_t control_block; - deinterleave_t deinterleave; - interleave_t interleave; - - void * * channel_ptrs; - size_t clips; - unsigned long long seed; - int flushing; -}; - - - -#if WITH_CR32 || WITH_CR32S || WITH_CR64 || WITH_CR64S - #include "filter.h" -#else - #define lsx_to_3dB(x) ((x)/(x)) -#endif - - - -soxr_quality_spec_t soxr_quality_spec(unsigned long recipe, unsigned long flags) -{ - soxr_quality_spec_t spec, * p = &spec; - unsigned q = recipe & 0xf; /* TODO: move to soxr-lsr.c: */ - unsigned quality = q > SOXR_LSR2Q+2? SOXR_VHQ : q > SOXR_LSR2Q? SOXR_QQ : q; - double rej; - memset(p, 0, sizeof(*p)); - if (quality > SOXR_PRECISIONQ) { - p->e = "invalid quality type"; - return spec; - } - flags |= quality < SOXR_LSR0Q ? RESET_ON_CLEAR : 0; - p->phase_response = "\62\31\144"[(recipe & 0x30)>>4]; - p->stopband_begin = 1; - p->precision = - quality == SOXR_QQ ? 0 : - quality <= SOXR_16_BITQ ? 16 : - quality <= SOXR_32_BITQ ? 4 + quality * 4 : - quality <= SOXR_LSR2Q ? 55 - quality * 4 : /* TODO: move to soxr-lsr.c */ - 0; - rej = p->precision * linear_to_dB(2.); - p->flags = flags; - if (quality <= SOXR_32_BITQ || quality == SOXR_PRECISIONQ) { - #define LOW_Q_BW0 (1385 / 2048.) /* 0.67625 rounded to be a FP exact. */ - p->passband_end = quality == 1? LOW_Q_BW0 : 1 - .05 / lsx_to_3dB(rej); - if (quality <= 2) - p->flags &= ~SOXR_ROLLOFF_NONE, p->flags |= SOXR_ROLLOFF_MEDIUM; - } - else { /* TODO: move to soxr-lsr.c */ - static float const bw[] = {.931f, .832f, .663f}; - p->passband_end = bw[quality - SOXR_LSR0Q]; - if (quality == SOXR_LSR2Q) { - p->flags &= ~SOXR_ROLLOFF_NONE; - p->flags |= SOXR_ROLLOFF_LSR2Q | SOXR_PROMOTE_TO_LQ; - } - } - if (recipe & SOXR_STEEP_FILTER) - p->passband_end = 1 - .01 / lsx_to_3dB(rej); - return spec; -} - - - -char const * soxr_engine(soxr_t p) -{ - return resampler_id(); -} - - - -size_t * soxr_num_clips(soxr_t p) -{ - return &p->clips; -} - - - -soxr_error_t soxr_error(soxr_t p) -{ - return p->error; -} - - - -soxr_runtime_spec_t soxr_runtime_spec(unsigned num_threads) -{ - soxr_runtime_spec_t spec, * p = &spec; - memset(p, 0, sizeof(*p)); - p->log2_min_dft_size = 10; - p->log2_large_dft_size = 17; - p->coef_size_kbytes = 400; - p->num_threads = num_threads; - return spec; -} - - - -soxr_io_spec_t soxr_io_spec( - soxr_datatype_t itype, - soxr_datatype_t otype) -{ - soxr_io_spec_t spec, * p = &spec; - memset(p, 0, sizeof(*p)); - if ((itype | otype) >= SOXR_SPLIT * 2) - p->e = "invalid io datatype(s)"; - else { - p->itype = itype; - p->otype = otype; - p->scale = 1; - } - return spec; -} - - - -#if (WITH_CR32S && WITH_CR32) || (WITH_CR64S && WITH_CR64) - #if defined __GNUC__ && defined __x86_64__ - #define CPUID(type, eax_, ebx_, ecx_, edx_) \ - __asm__ __volatile__ ( \ - "cpuid \n\t" \ - : "=a" (eax_), "=b" (ebx_), "=c" (ecx_), "=d" (edx_) \ - : "a" (type), "c" (0)); - #elif defined __GNUC__ && defined __i386__ - #define CPUID(type, eax_, ebx_, ecx_, edx_) \ - __asm__ __volatile__ ( \ - "mov %%ebx, %%edi \n\t" \ - "cpuid \n\t" \ - "xchg %%edi, %%ebx \n\t" \ - : "=a" (eax_), "=D" (ebx_), "=c" (ecx_), "=d" (edx_) \ - : "a" (type), "c" (0)); - #elif defined _M_X64 && defined _MSC_VER && _MSC_VER > 1500 - void __cpuidex(int CPUInfo[4], int info_type, int ecxvalue); - #pragma intrinsic(__cpuidex) - #define CPUID(type, eax_, ebx_, ecx_, edx_) do { \ - int regs[4]; \ - __cpuidex(regs, type, 0); \ - eax_ = regs[0], ebx_ = regs[1], ecx_ = regs[2], edx_ = regs[3]; \ - } while(0) - #elif defined _M_X64 && defined _MSC_VER - void __cpuidex(int CPUInfo[4], int info_type); - #pragma intrinsic(__cpuidex) - #define CPUID(type, eax_, ebx_, ecx_, edx_) do { \ - int regs[4]; \ - __cpuidex(regs, type); \ - eax_ = regs[0], ebx_ = regs[1], ecx_ = regs[2], edx_ = regs[3]; \ - } while(0) - #elif defined _M_IX86 && defined _MSC_VER - #define CPUID(type, eax_, ebx_, ecx_, edx_) \ - __asm pushad \ - __asm mov eax, type \ - __asm xor ecx, ecx \ - __asm cpuid \ - __asm mov eax_, eax \ - __asm mov ebx_, ebx \ - __asm mov ecx_, ecx \ - __asm mov edx_, edx \ - __asm popad - #endif -#endif - - - -#if WITH_CR32S && WITH_CR32 - static bool cpu_has_simd32(void) - { - #if defined __x86_64__ || defined _M_X64 - return true; - #elif defined __i386__ || defined _M_IX86 - enum {SSE = 1 << 25, SSE2 = 1 << 26}; - unsigned eax_, ebx_, ecx_, edx_; - CPUID(1, eax_, ebx_, ecx_, edx_); - return (edx_ & (SSE|SSE2)) != 0; - #elif defined AV_CPU_FLAG_NEON - return !!(av_get_cpu_flags() & AV_CPU_FLAG_NEON); - #else - return false; - #endif - } - - static bool should_use_simd32(void) - { - char const * e; - return ((e = getenv("SOXR_USE_SIMD" )))? !!atoi(e) : - ((e = getenv("SOXR_USE_SIMD32")))? !!atoi(e) : cpu_has_simd32(); - } -#else - #define should_use_simd32() true -#endif - - - -#if WITH_CR64S && WITH_CR64 - #if defined __GNUC__ - #define XGETBV(type, eax_, edx_) \ - __asm__ __volatile__ ( \ - ".byte 0x0f, 0x01, 0xd0\n" \ - : "=a"(eax_), "=d"(edx_) : "c" (type)); - #elif defined _M_X64 && defined _MSC_FULL_VER && _MSC_FULL_VER >= 160040219 - #include - #define XGETBV(type, eax_, edx_) do { \ - union {uint64_t x; uint32_t y[2];} a = {_xgetbv(0)}; \ - eax_ = a.y[0], edx_ = a.y[1]; \ - } while(0) - #elif defined _M_IX86 && defined _MSC_VER - #define XGETBV(type, eax_, edx_) \ - __asm pushad \ - __asm mov ecx, type \ - __asm _emit 0x0f \ - __asm _emit 0x01 \ - __asm _emit 0xd0 \ - __asm mov eax_, eax \ - __asm mov edx_, edx \ - __asm popad - #else - #define XGETBV(type, eax_, edx_) eax_ = edx_ = 0 - #endif - - static bool cpu_has_simd64(void) - { - enum {OSXSAVE = 1 << 27, AVX = 1 << 28}; - unsigned eax_, ebx_, ecx_, edx_; - CPUID(1, eax_, ebx_, ecx_, edx_); - if ((ecx_ & (OSXSAVE|AVX)) == (OSXSAVE|AVX)) { - XGETBV(0, eax_, edx_); - return (eax_ & 6) == 6; - } - return false; - } - - static bool should_use_simd64(void) - { - char const * e; - return ((e = getenv("SOXR_USE_SIMD" )))? !!atoi(e) : - ((e = getenv("SOXR_USE_SIMD64")))? !!atoi(e) : cpu_has_simd64(); - } -#else - #define should_use_simd64() true -#endif - - - -extern control_block_t - _soxr_rate32_cb, - _soxr_rate32s_cb, - _soxr_rate64_cb, - _soxr_rate64s_cb, - _soxr_vr32_cb; - - - -static void runtime_num(char const * env_name, - int min, int max, unsigned * field) -{ - char const * e = getenv(env_name); - if (e) { - int i = atoi(e); - if (i >= min && i <= max) - *field = (unsigned)i; - } -} - - - -static void runtime_flag(char const * env_name, - unsigned n_bits, unsigned n_shift, unsigned long * flags) -{ - char const * e = getenv(env_name); - if (e) { - int i = atoi(e); - unsigned long mask = (1UL << n_bits) - 1; - if (i >= 0 && i <= (int)mask) - *flags &= ~(mask << n_shift), *flags |= ((unsigned long)i << n_shift); - } -} - - - -soxr_t soxr_create( - double input_rate, double output_rate, - unsigned num_channels, - soxr_error_t * error0, - soxr_io_spec_t const * io_spec, - soxr_quality_spec_t const * q_spec, - soxr_runtime_spec_t const * runtime_spec) -{ - double io_ratio = output_rate!=0? input_rate!=0? - input_rate / output_rate : -1 : input_rate!=0? -1 : 0; - static const float datatype_full_scale[] = {1, 1, 65536.*32768, 32768}; - soxr_t p = 0; - soxr_error_t error = 0; - -#if WITH_DEV_TRACE -#define _(x) (char)(sizeof(x)>=10? 'a'+(char)(sizeof(x)-10):'0'+(char)sizeof(x)) - char const * e = getenv("SOXR_TRACE"); - _soxr_trace_level = e? atoi(e) : 0; - { - static char const arch[] = {_(char), _(short), _(int), _(long), _(long long) - , ' ', _(float), _(double), _(long double) - , ' ', _(int *), _(int (*)(int)) - , ' ', HAVE_BIGENDIAN ? 'B' : 'L' -#if defined _OPENMP - , ' ', 'O', 'M', 'P' -#endif - , 0}; -#undef _ - lsx_debug("arch: %s", arch); - } -#endif - - if (q_spec && q_spec->e) error = q_spec->e; - else if (io_spec && (io_spec->itype | io_spec->otype) >= SOXR_SPLIT * 2) - error = "invalid io datatype(s)"; - - if (!error && !(p = calloc(sizeof(*p), 1))) error = "malloc failed"; - - if (p) { - control_block_t * control_block; - - p->q_spec = q_spec? *q_spec : soxr_quality_spec(SOXR_HQ, 0); - - if (q_spec) { /* Backwards compatibility with original API: */ - if (p->q_spec.passband_end > 2) - p->q_spec.passband_end /= 100; - if (p->q_spec.stopband_begin > 2) - p->q_spec.stopband_begin = 2 - p->q_spec.stopband_begin / 100; - } - - p->io_ratio = io_ratio; - p->num_channels = num_channels; - if (io_spec) - p->io_spec = *io_spec; - else - p->io_spec.scale = 1; - - p->runtime_spec = runtime_spec? *runtime_spec : soxr_runtime_spec(1); - - runtime_num("SOXR_MIN_DFT_SIZE", 8, 15, &p->runtime_spec.log2_min_dft_size); - runtime_num("SOXR_LARGE_DFT_SIZE", 8, 20, &p->runtime_spec.log2_large_dft_size); - runtime_num("SOXR_COEFS_SIZE", 100, 800, &p->runtime_spec.coef_size_kbytes); - runtime_num("SOXR_NUM_THREADS", 0, 64, &p->runtime_spec.num_threads); - runtime_flag("SOXR_COEF_INTERP", 2, 0, &p->runtime_spec.flags); - - runtime_flag("SOXR_STRICT_BUF", 1, 2, &p->runtime_spec.flags); - runtime_flag("SOXR_NOSMALLINTOPT", 1, 3, &p->runtime_spec.flags); - - p->io_spec.scale *= datatype_full_scale[p->io_spec.otype & 3] / - datatype_full_scale[p->io_spec.itype & 3]; - - //p->seed = (unsigned long)time(0) ^ (unsigned long)(size_t)p; - p->seed = 0xc2ec33ef97a5ULL; /* Fixed dithering seed for deterministic int16 output */ - -#if WITH_CR32 || WITH_CR32S || WITH_VR32 - if (0 -#if WITH_VR32 - || ((!WITH_CR32 && !WITH_CR32S) || (p->q_spec.flags & SOXR_VR)) -#endif -#if WITH_CR32 || WITH_CR32S - || !(WITH_CR64 || WITH_CR64S) || (p->q_spec.precision <= 20 && !(p->q_spec.flags & SOXR_DOUBLE_PRECISION)) -#endif - ) { - p->deinterleave = (deinterleave_t)_soxr_deinterleave_f; - p->interleave = (interleave_t)_soxr_interleave_f; - control_block = -#if WITH_VR32 - ((!WITH_CR32 && !WITH_CR32S) || (p->q_spec.flags & SOXR_VR))? &_soxr_vr32_cb : -#endif -#if WITH_CR32S - !WITH_CR32 || should_use_simd32()? &_soxr_rate32s_cb : -#endif - &_soxr_rate32_cb; - } -#if WITH_CR64 || WITH_CR64S - else -#endif -#endif -#if WITH_CR64 || WITH_CR64S - { - p->deinterleave = (deinterleave_t)_soxr_deinterleave; - p->interleave = (interleave_t)_soxr_interleave; - control_block = -#if WITH_CR64S - !WITH_CR64 || should_use_simd64()? &_soxr_rate64s_cb : -#endif - &_soxr_rate64_cb; - } -#endif - memcpy(&p->control_block, control_block, sizeof(p->control_block)); - - if (p->num_channels && io_ratio!=0) - error = soxr_set_io_ratio(p, io_ratio, 0); - } - if (error) - soxr_delete(p), p = 0; - if (error0) - *error0 = error; - return p; -} - - - -soxr_error_t soxr_set_input_fn(soxr_t p, - soxr_input_fn_t input_fn, void * input_fn_state, size_t max_ilen) -{ - p->input_fn_state = input_fn_state; - p->input_fn = input_fn; - p->max_ilen = max_ilen? max_ilen : (size_t)-1; - return 0; -} - - - -static void soxr_delete0(soxr_t p) -{ - unsigned i; - - if (p->resamplers) for (i = 0; i < p->num_channels; ++i) { - if (p->resamplers[i]) - resampler_close(p->resamplers[i]); - free(p->resamplers[i]); - } - free(p->resamplers); - free(p->channel_ptrs); - free(p->shared); - - memset(p, 0, sizeof(*p)); -} - - - -double soxr_delay(soxr_t p) -{ - return - (p && !p->error && p->resamplers)? resampler_delay(p->resamplers[0]) : 0; -} - - - -static soxr_error_t fatal_error(soxr_t p, soxr_error_t error) -{ - soxr_delete0(p); - return p->error = error; -} - - - -static soxr_error_t initialise(soxr_t p) -{ - unsigned i; - size_t shared_size, channel_size; - - resampler_sizes(&shared_size, &channel_size); - p->channel_ptrs = calloc(sizeof(*p->channel_ptrs), p->num_channels); - p->shared = calloc(shared_size, 1); - p->resamplers = calloc(sizeof(*p->resamplers), p->num_channels); - if (!p->shared || !p->channel_ptrs || !p->resamplers) - return fatal_error(p, "malloc failed"); - - for (i = 0; i < p->num_channels; ++i) { - soxr_error_t error; - if (!(p->resamplers[i] = calloc(channel_size, 1))) - return fatal_error(p, "malloc failed"); - error = resampler_create( - p->resamplers[i], - p->shared, - p->io_ratio, - &p->q_spec, - &p->runtime_spec, - p->io_spec.scale); - if (error) - return fatal_error(p, error); - } - return 0; -} - - - -soxr_error_t soxr_set_num_channels(soxr_t p, unsigned num_channels) -{ - if (!p) return "invalid soxr_t pointer"; - if (num_channels == p->num_channels) return p->error; - if (!num_channels) return "invalid # of channels"; - if (p->resamplers) return "# of channels can't be changed"; - p->num_channels = num_channels; - return soxr_set_io_ratio(p, p->io_ratio, 0); -} - - - -soxr_error_t soxr_set_io_ratio(soxr_t p, double io_ratio, size_t slew_len) -{ - unsigned i; - soxr_error_t error; - if (!p) return "invalid soxr_t pointer"; - if ((error = p->error)) return error; - if (!p->num_channels) return "must set # channels before O/I ratio"; - if (io_ratio <= 0) return "I/O ratio out-of-range"; - if (!p->channel_ptrs) { - p->io_ratio = io_ratio; - return initialise(p); - } - if (p->control_block[8]) { - for (i = 0; !error && i < p->num_channels; ++i) - resampler_set_io_ratio(p->resamplers[i], io_ratio, slew_len); - return error; - } - return fabs(p->io_ratio - io_ratio) < 1e-15? 0 : - "varying O/I ratio is not supported with this quality level"; -} - - - -void soxr_delete(soxr_t p) -{ - if (p) - soxr_delete0(p), free(p); -} - - - -soxr_error_t soxr_clear(soxr_t p) /* TODO: this, properly. */ -{ - if (p) { - struct soxr tmp = *p; - soxr_delete0(p); - memset(p, 0, sizeof(*p)); - p->input_fn = tmp.input_fn; - p->runtime_spec = tmp.runtime_spec; - p->q_spec = tmp.q_spec; - p->io_spec = tmp.io_spec; - p->num_channels = tmp.num_channels; - p->input_fn_state = tmp.input_fn_state; - memcpy(p->control_block, tmp.control_block, sizeof(p->control_block)); - p->deinterleave = tmp.deinterleave; - p->interleave = tmp.interleave; - return (p->q_spec.flags & RESET_ON_CLEAR)? - soxr_set_io_ratio(p, tmp.io_ratio, 0) : 0; - } - return "invalid soxr_t pointer"; -} - - - -static void soxr_input_1ch(soxr_t p, unsigned i, soxr_cbuf_t src, size_t len) -{ - sample_t * dest = resampler_input(p->resamplers[i], NULL, len); - (*p->deinterleave)(&dest, p->io_spec.itype, &src, len, 1); -} - - - -static size_t soxr_input(soxr_t p, void const * in, size_t len) -{ - bool separated = !!(p->io_spec.itype & SOXR_SPLIT); - unsigned i; - if (!p || p->error) return 0; - if (!in && len) {p->error = "null input buffer pointer"; return 0;} - if (!len) { - p->flushing = true; - return 0; - } - if (separated) - for (i = 0; i < p->num_channels; ++i) - soxr_input_1ch(p, i, ((soxr_cbufs_t)in)[i], len); - else { - for (i = 0; i < p->num_channels; ++i) - p->channel_ptrs[i] = resampler_input(p->resamplers[i], NULL, len); - (*p->deinterleave)( - (sample_t **)p->channel_ptrs, p->io_spec.itype, &in, len, p->num_channels); - } - return len; -} - - - -static size_t soxr_output_1ch(soxr_t p, unsigned i, soxr_buf_t dest, size_t len, bool separated) -{ - sample_t const * src; - if (p->flushing) - resampler_flush(p->resamplers[i]); - resampler_process(p->resamplers[i], len); - src = resampler_output(p->resamplers[i], NULL, &len); - if (separated) - p->clips += (p->interleave)(p->io_spec.otype, &dest, &src, - len, 1, (p->io_spec.flags & SOXR_NO_DITHER)? 0 : &p->seed); - else p->channel_ptrs[i] = (void /* const */ *)src; - return len; -} - - - -static size_t soxr_output_no_callback(soxr_t p, soxr_buf_t out, size_t len) -{ - unsigned u; - size_t done = 0; - bool separated = !!(p->io_spec.otype & SOXR_SPLIT); -#if defined _OPENMP - int i; - if (!p->runtime_spec.num_threads && p->num_channels > 1) -#pragma omp parallel for - for (i = 0; i < (int)p->num_channels; ++i) { - size_t done1; - done1 = soxr_output_1ch(p, (unsigned)i, ((soxr_bufs_t)out)[i], len, separated); - if (!i) - done = done1; - } else -#endif - for (u = 0; u < p->num_channels; ++u) - done = soxr_output_1ch(p, u, ((soxr_bufs_t)out)[u], len, separated); - - if (!separated) - p->clips += (p->interleave)(p->io_spec.otype, &out, (sample_t const * const *)p->channel_ptrs, - done, p->num_channels, (p->io_spec.flags & SOXR_NO_DITHER)? 0 : &p->seed); - return done; -} - - - -size_t soxr_output(soxr_t p, void * out, size_t len0) -{ - size_t odone, odone0 = 0, olen = len0, osize, idone; - size_t ilen = min(p->max_ilen, (size_t)ceil((double)olen *p->io_ratio)); - void const * in = out; /* Set to !=0, so that caller may leave unset. */ - bool was_flushing; - - if (!p || p->error) return 0; - if (!out && len0) {p->error = "null output buffer pointer"; return 0;} - - do { - odone = soxr_output_no_callback(p, out, olen); - odone0 += odone; - if (odone0 == len0 || !p->input_fn || p->flushing) - break; - - osize = soxr_datatype_size(p->io_spec.otype) * p->num_channels; - out = (char *)out + osize * odone; - olen -= odone; - idone = p->input_fn(p->input_fn_state, &in, ilen); - was_flushing = p->flushing; - if (!in) - p->error = "input function reported failure"; - else soxr_input(p, in, idone); - } while (odone || idone || (!was_flushing && p->flushing)); - return odone0; -} - - - -static size_t soxr_i_for_o(soxr_t p, size_t olen, size_t ilen) -{ - size_t result; -#if 0 - if (p->runtime_spec.flags & SOXR_STRICT_BUFFERING) - result = rate_i_for_o(p->resamplers[0], olen); - else -#endif - result = (size_t)ceil((double)olen * p->io_ratio); - return min(result, ilen); -} - - - -#if 0 -static size_t soxr_o_for_i(soxr_t p, size_t ilen, size_t olen) -{ - size_t result = (size_t)ceil((double)ilen / p->io_ratio); - return min(result, olen); -} -#endif - - - -soxr_error_t soxr_process(soxr_t p, - void const * in , size_t ilen0, size_t * idone0, - void * out, size_t olen , size_t * odone0) -{ - size_t ilen, idone, odone = 0; - unsigned u; - bool flush_requested = false; - - if (!p) return "null pointer"; - - if (!in) - flush_requested = true, ilen = ilen0 = 0; - else { - if ((ptrdiff_t)ilen0 < 0) - flush_requested = true, ilen0 = ~ilen0; - if (idone0 && (1 || flush_requested)) - ilen = soxr_i_for_o(p, olen, ilen0); - else - ilen = ilen0/*, olen = soxr_o_for_i(p, ilen, olen)*/; - } - p->flushing |= ilen == ilen0 && flush_requested; - - if (!out && !in) - idone = ilen; - else if (p->io_spec.itype & p->io_spec.otype & SOXR_SPLIT) { /* Both i & o */ -#if defined _OPENMP - int i; - if (!p->runtime_spec.num_threads && p->num_channels > 1) -#pragma omp parallel for - for (i = 0; i < (int)p->num_channels; ++i) { - size_t done; - if (in) - soxr_input_1ch(p, (unsigned)i, ((soxr_cbufs_t)in)[i], ilen); - done = soxr_output_1ch(p, (unsigned)i, ((soxr_bufs_t)out)[i], olen, true); - if (!i) - odone = done; - } else -#endif - for (u = 0; u < p->num_channels; ++u) { - if (in) - soxr_input_1ch(p, u, ((soxr_cbufs_t)in)[u], ilen); - odone = soxr_output_1ch(p, u, ((soxr_bufs_t)out)[u], olen, true); - } - idone = ilen; - } - else { - idone = ilen? soxr_input (p, in , ilen) : 0; - odone = soxr_output(p, out, olen); - } - if (idone0) *idone0 = idone; - if (odone0) *odone0 = odone; - return p->error; -} - - - -soxr_error_t soxr_oneshot( - double irate, double orate, - unsigned num_channels, - void const * in , size_t ilen, size_t * idone, - void * out, size_t olen, size_t * odone, - soxr_io_spec_t const * io_spec, - soxr_quality_spec_t const * q_spec, - soxr_runtime_spec_t const * runtime_spec) -{ - soxr_t resampler; - soxr_error_t error = q_spec? q_spec->e : 0; - if (!error) { - soxr_quality_spec_t q_spec1; - if (!q_spec) - q_spec1 = soxr_quality_spec(SOXR_LQ, 0), q_spec = &q_spec1; - resampler = soxr_create(irate, orate, num_channels, - &error, io_spec, q_spec, runtime_spec); - } - if (!error) { - error = soxr_process(resampler, in, ~ilen, idone, out, olen, odone); - soxr_delete(resampler); - } - return error; -} - - - -soxr_error_t soxr_set_error(soxr_t p, soxr_error_t error) -{ - if (!p) return "null pointer"; - if (!p->error && p->error != error) return p->error; - p->error = error; - return 0; -} diff --git a/soxr-sys/src/soxr.h b/soxr-sys/src/soxr.h deleted file mode 100644 index 09ec7c466..000000000 --- a/soxr-sys/src/soxr.h +++ /dev/null @@ -1,344 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-18 robs@users.sourceforge.net - * - * This library is free software; you can redistribute it and/or modify it - * under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or (at - * your option) any later version. - * - * This library is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser - * General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - - - -/* -------------------------------- Gubbins --------------------------------- */ - -#if !defined soxr_included -#define soxr_included - - -#if defined __cplusplus - #include - extern "C" { -#else - #include -#endif - -#if defined SOXR_DLL - #if defined soxr_EXPORTS - #define SOXR __declspec(dllexport) - #else - #define SOXR __declspec(dllimport) - #endif -#elif defined SOXR_VISIBILITY && defined __GNUC__ && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 1) - #define SOXR __attribute__ ((visibility("default"))) -#else - #define SOXR -#endif - -typedef struct soxr_io_spec soxr_io_spec_t; -typedef struct soxr_quality_spec soxr_quality_spec_t; -typedef struct soxr_runtime_spec soxr_runtime_spec_t; - - - -/* ---------------------------- API conventions -------------------------------- - -Buffer lengths (and occupancies) are expressed as the number of contained -samples per channel. - -Parameter names for buffer lengths have the suffix `len'. - -A single-character `i' or 'o' is often used in names to give context as -input or output (e.g. ilen, olen). */ - - - -/* --------------------------- Version management --------------------------- */ - -/* E.g. #if SOXR_THIS_VERSION >= SOXR_VERSION(0,1,1) ... */ - -#define SOXR_VERSION(x,y,z) (((x)<<16)|((y)<<8)|(z)) -#define SOXR_THIS_VERSION SOXR_VERSION(0,1,3) -#define SOXR_THIS_VERSION_STR "0.1.3" - - - -/* --------------------------- Type declarations ---------------------------- */ - -typedef struct soxr * soxr_t; /* A resampler for 1 or more channels. */ -typedef char const * soxr_error_t; /* 0:no-error; non-0:error. */ - -typedef void * soxr_buf_t; /* 1 buffer of channel-interleaved samples. */ -typedef void const * soxr_cbuf_t; /* Ditto; read-only. */ - -typedef soxr_buf_t const * soxr_bufs_t;/* Or, a separate buffer for each ch. */ -typedef soxr_cbuf_t const * soxr_cbufs_t; /* Ditto; read-only. */ - -typedef void const * soxr_in_t; /* Either a soxr_cbuf_t or soxr_cbufs_t, - depending on itype in soxr_io_spec_t. */ -typedef void * soxr_out_t; /* Either a soxr_buf_t or soxr_bufs_t, - depending on otype in soxr_io_spec_t. */ - - - -/* --------------------------- API main functions --------------------------- */ - -SOXR char const * soxr_version(void); /* Query library version: "libsoxr-x.y.z" */ - -#define soxr_strerror(e) /* Soxr counterpart to strerror. */ \ - ((e)?(e):"no error") - - -/* Create a stream resampler: */ - -SOXR soxr_t soxr_create( - double input_rate, /* Input sample-rate. */ - double output_rate, /* Output sample-rate. */ - unsigned num_channels, /* Number of channels to be used. */ - /* All following arguments are optional (may be set to NULL). */ - soxr_error_t *, /* To report any error during creation. */ - soxr_io_spec_t const *, /* To specify non-default I/O formats. */ - soxr_quality_spec_t const *, /* To specify non-default resampling quality.*/ - soxr_runtime_spec_t const *);/* To specify non-default runtime resources. - - Default io_spec is per soxr_io_spec(SOXR_FLOAT32_I, SOXR_FLOAT32_I) - Default quality_spec is per soxr_quality_spec(SOXR_HQ, 0) - Default runtime_spec is per soxr_runtime_spec(1) */ - - - -/* If not using an app-supplied input function, after creating a stream - * resampler, repeatedly call: */ - -SOXR soxr_error_t soxr_process( - soxr_t resampler, /* As returned by soxr_create. */ - /* Input (to be resampled): */ - soxr_in_t in, /* Input buffer(s); may be NULL (see below). */ - size_t ilen, /* Input buf. length (samples per channel). */ - size_t * idone, /* To return actual # samples used (<= ilen). */ - /* Output (resampled): */ - soxr_out_t out, /* Output buffer(s).*/ - size_t olen, /* Output buf. length (samples per channel). */ - size_t * odone); /* To return actual # samples out (<= olen). - - Note that no special meaning is associated with ilen or olen equal to - zero. End-of-input (i.e. no data is available nor shall be available) - may be indicated by seting `in' to NULL. */ - - - -/* If using an app-supplied input function, it must look and behave like this:*/ - -typedef size_t /* data_len */ - (* soxr_input_fn_t)( /* Supply data to be resampled. */ - void * input_fn_state, /* As given to soxr_set_input_fn (below). */ - soxr_in_t * data, /* Returned data; see below. N.B. ptr to ptr(s)*/ - size_t requested_len); /* Samples per channel, >= returned data_len. - - data_len *data Indicates Meaning - ------- ------- ------------ ------------------------- - !=0 !=0 Success *data contains data to be - input to the resampler. - 0 !=0 (or End-of-input No data is available nor - not set) shall be available. - 0 0 Failure An error occurred whilst trying to - source data to be input to the resampler. */ - -/* and be registered with a previously created stream resampler using: */ - -SOXR soxr_error_t soxr_set_input_fn(/* Set (or reset) an input function.*/ - soxr_t resampler, /* As returned by soxr_create. */ - soxr_input_fn_t, /* Function to supply data to be resampled.*/ - void * input_fn_state, /* If needed by the input function. */ - size_t max_ilen); /* Maximum value for input fn. requested_len.*/ - -/* then repeatedly call: */ - -SOXR size_t /*odone*/ soxr_output(/* Resample and output a block of data.*/ - soxr_t resampler, /* As returned by soxr_create. */ - soxr_out_t data, /* App-supplied buffer(s) for resampled data.*/ - size_t olen); /* Amount of data to output; >= odone. */ - - - -/* Common stream resampler operations: */ - -SOXR soxr_error_t soxr_error(soxr_t); /* Query error status. */ -SOXR size_t * soxr_num_clips(soxr_t); /* Query int. clip counter (for R/W). */ -SOXR double soxr_delay(soxr_t); /* Query current delay in output samples.*/ -SOXR char const * soxr_engine(soxr_t); /* Query resampling engine name. */ - -SOXR soxr_error_t soxr_clear(soxr_t); /* Ready for fresh signal, same config. */ -SOXR void soxr_delete(soxr_t); /* Free resources. */ - - - -/* `Short-cut', single call to resample a (probably short) signal held entirely - * in memory. See soxr_create and soxr_process above for parameter details. - * Note that unlike soxr_create however, the default quality spec. for - * soxr_oneshot is per soxr_quality_spec(SOXR_LQ, 0). */ - -SOXR soxr_error_t soxr_oneshot( - double input_rate, - double output_rate, - unsigned num_channels, - soxr_in_t in , size_t ilen, size_t * idone, - soxr_out_t out, size_t olen, size_t * odone, - soxr_io_spec_t const *, - soxr_quality_spec_t const *, - soxr_runtime_spec_t const *); - - - -/* For variable-rate resampling. See example # 5 for how to create a - * variable-rate resampler and how to use this function. */ - -SOXR soxr_error_t soxr_set_io_ratio(soxr_t, double io_ratio, size_t slew_len); - - - -/* -------------------------- API type definitions -------------------------- */ - -typedef enum { /* Datatypes supported for I/O to/from the resampler: */ - /* Internal; do not use: */ - SOXR_FLOAT32, SOXR_FLOAT64, SOXR_INT32, SOXR_INT16, SOXR_SPLIT = 4, - - /* Use for interleaved channels: */ - SOXR_FLOAT32_I = SOXR_FLOAT32, SOXR_FLOAT64_I, SOXR_INT32_I, SOXR_INT16_I, - - /* Use for split channels: */ - SOXR_FLOAT32_S = SOXR_SPLIT , SOXR_FLOAT64_S, SOXR_INT32_S, SOXR_INT16_S - -} soxr_datatype_t; - -#define soxr_datatype_size(x) /* Returns `sizeof' a soxr_datatype_t sample. */\ - ((unsigned char *)"\4\10\4\2")[(x)&3] - - - -struct soxr_io_spec { /* Typically */ - soxr_datatype_t itype; /* Input datatype. SOXR_FLOAT32_I */ - soxr_datatype_t otype; /* Output datatype. SOXR_FLOAT32_I */ - double scale; /* Linear gain to apply during resampling. 1 */ - void * e; /* Reserved for internal use 0 */ - unsigned long flags; /* Per the following #defines. 0 */ -}; - -#define SOXR_TPDF 0 /* Applicable only if otype is INT16. */ -#define SOXR_NO_DITHER 8u /* Disable the above. */ - - - -struct soxr_quality_spec { /* Typically */ - double precision; /* Conversion precision (in bits). 20 */ - double phase_response; /* 0=minimum, ... 50=linear, ... 100=maximum 50 */ - double passband_end; /* 0dB pt. bandwidth to preserve; nyquist=1 0.913*/ - double stopband_begin; /* Aliasing/imaging control; > passband_end 1 */ - void * e; /* Reserved for internal use. 0 */ - unsigned long flags; /* Per the following #defines. 0 */ -}; - -#define SOXR_ROLLOFF_SMALL 0u /* <= 0.01 dB */ -#define SOXR_ROLLOFF_MEDIUM 1u /* <= 0.35 dB */ -#define SOXR_ROLLOFF_NONE 2u /* For Chebyshev bandwidth. */ - -#define SOXR_HI_PREC_CLOCK 8u /* Increase `irrational' ratio accuracy. */ -#define SOXR_DOUBLE_PRECISION 16u /* Use D.P. calcs even if precision <= 20. */ -#define SOXR_VR 32u /* Variable-rate resampling. */ - - - -struct soxr_runtime_spec { /* Typically */ - unsigned log2_min_dft_size; /* For DFT efficiency. [8,15] 10 */ - unsigned log2_large_dft_size; /* For DFT efficiency. [8,20] 17 */ - unsigned coef_size_kbytes; /* For SOXR_COEF_INTERP_AUTO (below). 400 */ - unsigned num_threads; /* 0: per OMP_NUM_THREADS; 1: 1 thread. 1 */ - void * e; /* Reserved for internal use. 0 */ - unsigned long flags; /* Per the following #defines. 0 */ -}; - /* For `irrational' ratios only: */ -#define SOXR_COEF_INTERP_AUTO 0u /* Auto select coef. interpolation. */ -#define SOXR_COEF_INTERP_LOW 2u /* Man. select: less CPU, more memory. */ -#define SOXR_COEF_INTERP_HIGH 3u /* Man. select: more CPU, less memory. */ - - - -/* -------------------------- API type constructors ------------------------- */ - -/* These functions allow setting of the most commonly-used structure - * parameters, with other parameters being given default values. The default - * values may then be overridden, directly in the structure, if needed. */ - -SOXR soxr_quality_spec_t soxr_quality_spec( - unsigned long recipe, /* Per the #defines immediately below. */ - unsigned long flags); /* As soxr_quality_spec_t.flags. */ - - /* The 5 standard qualities found in SoX: */ -#define SOXR_QQ 0 /* 'Quick' cubic interpolation. */ -#define SOXR_LQ 1 /* 'Low' 16-bit with larger rolloff. */ -#define SOXR_MQ 2 /* 'Medium' 16-bit with medium rolloff. */ -#define SOXR_HQ SOXR_20_BITQ /* 'High quality'. */ -#define SOXR_VHQ SOXR_28_BITQ /* 'Very high quality'. */ - -#define SOXR_16_BITQ 3 -#define SOXR_20_BITQ 4 -#define SOXR_24_BITQ 5 -#define SOXR_28_BITQ 6 -#define SOXR_32_BITQ 7 - /* Reserved for internal use (to be removed): */ -#define SOXR_LSR0Q 8 /* 'Best sinc'. */ -#define SOXR_LSR1Q 9 /* 'Medium sinc'. */ -#define SOXR_LSR2Q 10 /* 'Fast sinc'. */ - -#define SOXR_LINEAR_PHASE 0x00 -#define SOXR_INTERMEDIATE_PHASE 0x10 -#define SOXR_MINIMUM_PHASE 0x30 - -#define SOXR_STEEP_FILTER 0x40 - - - -SOXR soxr_runtime_spec_t soxr_runtime_spec( - unsigned num_threads); - - - -SOXR soxr_io_spec_t soxr_io_spec( - soxr_datatype_t itype, - soxr_datatype_t otype); - - - -/* --------------------------- Advanced use only ---------------------------- */ - -/* For new designs, the following functions/usage will probably not be needed. - * They might be useful when adding soxr into an existing design where values - * for the resampling-rate and/or number-of-channels parameters to soxr_create - * are not available when that function will be called. In such cases, the - * relevant soxr_create parameter(s) can be given as 0, then one or both of the - * following (as appropriate) later invoked (but prior to calling soxr_process - * or soxr_output): - * - * soxr_set_error(soxr, soxr_set_io_ratio(soxr, io_ratio, 0)); - * soxr_set_error(soxr, soxr_set_num_channels(soxr, num_channels)); - */ - -SOXR soxr_error_t soxr_set_error(soxr_t, soxr_error_t); -SOXR soxr_error_t soxr_set_num_channels(soxr_t, unsigned); - - - -#undef SOXR - -#if defined __cplusplus -} -#endif - -#endif diff --git a/soxr-sys/src/soxr.rs b/soxr-sys/src/soxr.rs index ec2a0fd72..8e8085933 100644 --- a/soxr-sys/src/soxr.rs +++ b/soxr-sys/src/soxr.rs @@ -1,4 +1,4 @@ -/* automatically generated by rust-bindgen 0.69.4 */ +/* automatically generated by rust-bindgen 0.71.1 */ pub const SOXR_THIS_VERSION_STR: &[u8; 6] = b"0.1.3\0"; pub const SOXR_TPDF: u32 = 0; @@ -45,10 +45,10 @@ pub type soxr_bufs_t = *const soxr_buf_t; pub type soxr_cbufs_t = *const soxr_cbuf_t; pub type soxr_in_t = *const ::std::os::raw::c_void; pub type soxr_out_t = *mut ::std::os::raw::c_void; -extern "C" { +unsafe extern "C" { pub fn soxr_version() -> *const ::std::os::raw::c_char; } -extern "C" { +unsafe extern "C" { pub fn soxr_create( input_rate: f64, output_rate: f64, @@ -59,7 +59,7 @@ extern "C" { arg4: *const soxr_runtime_spec_t, ) -> soxr_t; } -extern "C" { +unsafe extern "C" { pub fn soxr_process( resampler: soxr_t, in_: soxr_in_t, @@ -77,7 +77,7 @@ pub type soxr_input_fn_t = ::std::option::Option< requested_len: usize, ) -> usize, >; -extern "C" { +unsafe extern "C" { pub fn soxr_set_input_fn( resampler: soxr_t, arg1: soxr_input_fn_t, @@ -85,28 +85,28 @@ extern "C" { max_ilen: usize, ) -> soxr_error_t; } -extern "C" { +unsafe extern "C" { pub fn soxr_output(resampler: soxr_t, data: soxr_out_t, olen: usize) -> usize; } -extern "C" { +unsafe extern "C" { pub fn soxr_error(arg1: soxr_t) -> soxr_error_t; } -extern "C" { +unsafe extern "C" { pub fn soxr_num_clips(arg1: soxr_t) -> *mut usize; } -extern "C" { +unsafe extern "C" { pub fn soxr_delay(arg1: soxr_t) -> f64; } -extern "C" { +unsafe extern "C" { pub fn soxr_engine(arg1: soxr_t) -> *const ::std::os::raw::c_char; } -extern "C" { +unsafe extern "C" { pub fn soxr_clear(arg1: soxr_t) -> soxr_error_t; } -extern "C" { +unsafe extern "C" { pub fn soxr_delete(arg1: soxr_t); } -extern "C" { +unsafe extern "C" { pub fn soxr_oneshot( input_rate: f64, output_rate: f64, @@ -122,7 +122,7 @@ extern "C" { arg3: *const soxr_runtime_spec_t, ) -> soxr_error_t; } -extern "C" { +unsafe extern "C" { pub fn soxr_set_io_ratio(arg1: soxr_t, io_ratio: f64, slew_len: usize) -> soxr_error_t; } pub const soxr_datatype_t_SOXR_FLOAT32: soxr_datatype_t = 0; @@ -148,46 +148,16 @@ pub struct soxr_io_spec { pub e: *mut ::std::os::raw::c_void, pub flags: ::std::os::raw::c_ulong, } -#[test] -fn bindgen_test_layout_soxr_io_spec() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 32usize, - concat!("Size of: ", stringify!(soxr_io_spec)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(soxr_io_spec)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).itype) as usize - ptr as usize }, - 0usize, - concat!("Offset of field: ", stringify!(soxr_io_spec), "::", stringify!(itype)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).otype) as usize - ptr as usize }, - 4usize, - concat!("Offset of field: ", stringify!(soxr_io_spec), "::", stringify!(otype)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).scale) as usize - ptr as usize }, - 8usize, - concat!("Offset of field: ", stringify!(soxr_io_spec), "::", stringify!(scale)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).e) as usize - ptr as usize }, - 16usize, - concat!("Offset of field: ", stringify!(soxr_io_spec), "::", stringify!(e)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).flags) as usize - ptr as usize }, - 24usize, - concat!("Offset of field: ", stringify!(soxr_io_spec), "::", stringify!(flags)) - ); -} +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of soxr_io_spec"][::std::mem::size_of::() - 32usize]; + ["Alignment of soxr_io_spec"][::std::mem::align_of::() - 8usize]; + ["Offset of field: soxr_io_spec::itype"][::std::mem::offset_of!(soxr_io_spec, itype) - 0usize]; + ["Offset of field: soxr_io_spec::otype"][::std::mem::offset_of!(soxr_io_spec, otype) - 4usize]; + ["Offset of field: soxr_io_spec::scale"][::std::mem::offset_of!(soxr_io_spec, scale) - 8usize]; + ["Offset of field: soxr_io_spec::e"][::std::mem::offset_of!(soxr_io_spec, e) - 16usize]; + ["Offset of field: soxr_io_spec::flags"][::std::mem::offset_of!(soxr_io_spec, flags) - 24usize]; +}; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct soxr_quality_spec { @@ -198,61 +168,23 @@ pub struct soxr_quality_spec { pub e: *mut ::std::os::raw::c_void, pub flags: ::std::os::raw::c_ulong, } -#[test] -fn bindgen_test_layout_soxr_quality_spec() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 48usize, - concat!("Size of: ", stringify!(soxr_quality_spec)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(soxr_quality_spec)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).precision) as usize - ptr as usize }, - 0usize, - concat!("Offset of field: ", stringify!(soxr_quality_spec), "::", stringify!(precision)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).phase_response) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(soxr_quality_spec), - "::", - stringify!(phase_response) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).passband_end) as usize - ptr as usize }, - 16usize, - concat!("Offset of field: ", stringify!(soxr_quality_spec), "::", stringify!(passband_end)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).stopband_begin) as usize - ptr as usize }, - 24usize, - concat!( - "Offset of field: ", - stringify!(soxr_quality_spec), - "::", - stringify!(stopband_begin) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).e) as usize - ptr as usize }, - 32usize, - concat!("Offset of field: ", stringify!(soxr_quality_spec), "::", stringify!(e)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).flags) as usize - ptr as usize }, - 40usize, - concat!("Offset of field: ", stringify!(soxr_quality_spec), "::", stringify!(flags)) - ); -} +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of soxr_quality_spec"][::std::mem::size_of::() - 48usize]; + ["Alignment of soxr_quality_spec"][::std::mem::align_of::() - 8usize]; + ["Offset of field: soxr_quality_spec::precision"] + [::std::mem::offset_of!(soxr_quality_spec, precision) - 0usize]; + ["Offset of field: soxr_quality_spec::phase_response"] + [::std::mem::offset_of!(soxr_quality_spec, phase_response) - 8usize]; + ["Offset of field: soxr_quality_spec::passband_end"] + [::std::mem::offset_of!(soxr_quality_spec, passband_end) - 16usize]; + ["Offset of field: soxr_quality_spec::stopband_begin"] + [::std::mem::offset_of!(soxr_quality_spec, stopband_begin) - 24usize]; + ["Offset of field: soxr_quality_spec::e"] + [::std::mem::offset_of!(soxr_quality_spec, e) - 32usize]; + ["Offset of field: soxr_quality_spec::flags"] + [::std::mem::offset_of!(soxr_quality_spec, flags) - 40usize]; +}; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct soxr_runtime_spec { @@ -263,81 +195,38 @@ pub struct soxr_runtime_spec { pub e: *mut ::std::os::raw::c_void, pub flags: ::std::os::raw::c_ulong, } -#[test] -fn bindgen_test_layout_soxr_runtime_spec() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 32usize, - concat!("Size of: ", stringify!(soxr_runtime_spec)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(soxr_runtime_spec)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).log2_min_dft_size) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(soxr_runtime_spec), - "::", - stringify!(log2_min_dft_size) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).log2_large_dft_size) as usize - ptr as usize }, - 4usize, - concat!( - "Offset of field: ", - stringify!(soxr_runtime_spec), - "::", - stringify!(log2_large_dft_size) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).coef_size_kbytes) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(soxr_runtime_spec), - "::", - stringify!(coef_size_kbytes) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).num_threads) as usize - ptr as usize }, - 12usize, - concat!("Offset of field: ", stringify!(soxr_runtime_spec), "::", stringify!(num_threads)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).e) as usize - ptr as usize }, - 16usize, - concat!("Offset of field: ", stringify!(soxr_runtime_spec), "::", stringify!(e)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).flags) as usize - ptr as usize }, - 24usize, - concat!("Offset of field: ", stringify!(soxr_runtime_spec), "::", stringify!(flags)) - ); -} -extern "C" { +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of soxr_runtime_spec"][::std::mem::size_of::() - 32usize]; + ["Alignment of soxr_runtime_spec"][::std::mem::align_of::() - 8usize]; + ["Offset of field: soxr_runtime_spec::log2_min_dft_size"] + [::std::mem::offset_of!(soxr_runtime_spec, log2_min_dft_size) - 0usize]; + ["Offset of field: soxr_runtime_spec::log2_large_dft_size"] + [::std::mem::offset_of!(soxr_runtime_spec, log2_large_dft_size) - 4usize]; + ["Offset of field: soxr_runtime_spec::coef_size_kbytes"] + [::std::mem::offset_of!(soxr_runtime_spec, coef_size_kbytes) - 8usize]; + ["Offset of field: soxr_runtime_spec::num_threads"] + [::std::mem::offset_of!(soxr_runtime_spec, num_threads) - 12usize]; + ["Offset of field: soxr_runtime_spec::e"] + [::std::mem::offset_of!(soxr_runtime_spec, e) - 16usize]; + ["Offset of field: soxr_runtime_spec::flags"] + [::std::mem::offset_of!(soxr_runtime_spec, flags) - 24usize]; +}; +unsafe extern "C" { pub fn soxr_quality_spec( recipe: ::std::os::raw::c_ulong, flags: ::std::os::raw::c_ulong, ) -> soxr_quality_spec_t; } -extern "C" { +unsafe extern "C" { pub fn soxr_runtime_spec(num_threads: ::std::os::raw::c_uint) -> soxr_runtime_spec_t; } -extern "C" { +unsafe extern "C" { pub fn soxr_io_spec(itype: soxr_datatype_t, otype: soxr_datatype_t) -> soxr_io_spec_t; } -extern "C" { +unsafe extern "C" { pub fn soxr_set_error(arg1: soxr_t, arg2: soxr_error_t) -> soxr_error_t; } -extern "C" { +unsafe extern "C" { pub fn soxr_set_num_channels(arg1: soxr_t, arg2: ::std::os::raw::c_uint) -> soxr_error_t; } diff --git a/soxr-sys/src/std-types.h b/soxr-sys/src/std-types.h deleted file mode 100644 index c5e8636ac..000000000 --- a/soxr-sys/src/std-types.h +++ /dev/null @@ -1,48 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#if !defined soxr_std_types_included -#define soxr_std_types_included - -#include "soxr-config.h" - -#include - -#if HAVE_STDBOOL_H - #include -#else - #undef bool - #undef false - #undef true - #define bool int - #define false 0 - #define true 1 -#endif - -#if HAVE_STDINT_H - #include -#else - #undef int16_t - #undef int32_t - #undef int64_t - #undef uint32_t - #undef uint64_t - #define int16_t short - #if LONG_MAX > 2147483647L - #define int32_t int - #define int64_t long - #elif LONG_MAX < 2147483647L - #error this library requires that 'long int' has at least 32-bits - #else - #define int32_t long - #if defined _MSC_VER - #define int64_t __int64 - #else - #define int64_t long long - #endif - #endif - #define uint32_t unsigned int32_t - #define uint64_t unsigned int64_t -#endif - -#endif diff --git a/soxr-sys/src/util-simd.c b/soxr-sys/src/util-simd.c deleted file mode 100644 index ec548fdee..000000000 --- a/soxr-sys/src/util-simd.c +++ /dev/null @@ -1,89 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#include -#include -#include - -#include "soxr-config.h" - -#define SIMD_ALIGNMENT (sizeof(float) * (1 + (PFFFT_DOUBLE|AVCODEC_FOUND)) * 4) - -void * SIMD_ALIGNED_MALLOC(size_t size) -{ - char * p1 = 0, * p = malloc(size + SIMD_ALIGNMENT); - if (p) { - p1 = (char *)((size_t)(p + SIMD_ALIGNMENT) & ~(SIMD_ALIGNMENT - 1)); - *((void * *)p1 - 1) = p; - } - return p1; -} - - - -void * SIMD_ALIGNED_CALLOC(size_t nmemb, size_t size) -{ - void * p = SIMD_ALIGNED_MALLOC(nmemb * size); - if (p) - memset(p, 0, nmemb * size); - return p; -} - - - -void SIMD_ALIGNED_FREE(void * p1) -{ - if (p1) - free(*((void * *)p1 - 1)); -} - - - -#define PFFT_MACROS_ONLY -#include "pffft.c" - - - -void ORDERED_CONVOLVE_SIMD(int n, void * not_used, float * a, float const * b) -{ - int i; - float ab0, ab1; - v4sf * RESTRICT va = (v4sf *)a; - v4sf const * RESTRICT vb = (v4sf const *)b; - assert(VALIGNED(a) && VALIGNED(b)); - ab0 = a[0] * b[0], ab1 = a[1] * b[1]; - for (i = 0; i < n / 4; i += 2) { - v4sf a1r = va[i+0], a1i = va[i+1]; - v4sf b1r = vb[i+0], b1i = vb[i+1]; - UNINTERLEAVE2(a1r, a1i, a1r, a1i); - UNINTERLEAVE2(b1r, b1i, b1r, b1i); - VCPLXMUL(a1r, a1i, b1r, b1i); - INTERLEAVE2(a1r, a1i, a1r, a1i); - va[i+0] = a1r, va[i+1] = a1i; - } - a[0] = ab0, a[1] = ab1; - (void)not_used; -} - - - -void ORDERED_PARTIAL_CONVOLVE_SIMD(int n, float * a, float const * b) -{ - int i; - float ab0; - v4sf * RESTRICT va = (v4sf *)a; - v4sf const * RESTRICT vb = (v4sf const *)b; - assert(VALIGNED(a) && VALIGNED(b)); - ab0 = a[0] * b[0]; - for (i = 0; i < n / 4; i += 2) { - v4sf a1r = va[i+0], a1i = va[i+1]; - v4sf b1r = vb[i+0], b1i = vb[i+1]; - UNINTERLEAVE2(a1r, a1i, a1r, a1i); - UNINTERLEAVE2(b1r, b1i, b1r, b1i); - VCPLXMUL(a1r, a1i, b1r, b1i); - INTERLEAVE2(a1r, a1i, a1r, a1i); - va[i+0] = a1r, va[i+1] = a1i; - } - a[0] = ab0; - a[1] = b[n] * a[n] - b[n+1] * a[n+1]; -} diff --git a/soxr-sys/src/util32s.c b/soxr-sys/src/util32s.c deleted file mode 100644 index b9c9e08bd..000000000 --- a/soxr-sys/src/util32s.c +++ /dev/null @@ -1,8 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#define PFFFT_DOUBLE 0 - -#include "util32s.h" - -#include "util-simd.c" diff --git a/soxr-sys/src/util32s.h b/soxr-sys/src/util32s.h deleted file mode 100644 index 12226e501..000000000 --- a/soxr-sys/src/util32s.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#if !defined soxr_util32s_included -#define soxr_util32s_included - -#include - -void * _soxr_simd32_aligned_malloc(size_t); -void * _soxr_simd32_aligned_calloc(size_t, size_t); -void _soxr_simd32_aligned_free(void *); - -#define SIMD_ALIGNED_MALLOC _soxr_simd32_aligned_malloc -#define SIMD_ALIGNED_CALLOC _soxr_simd32_aligned_calloc -#define SIMD_ALIGNED_FREE _soxr_simd32_aligned_free - -void _soxr_ordered_convolve_simd32(int n, void * not_used, float * a, float const * b); -void _soxr_ordered_partial_convolve_simd32(int n, float * a, float const * b); - -#define ORDERED_CONVOLVE_SIMD _soxr_ordered_convolve_simd32 -#define ORDERED_PARTIAL_CONVOLVE_SIMD _soxr_ordered_partial_convolve_simd32 - -#endif diff --git a/soxr-sys/src/util64s.c b/soxr-sys/src/util64s.c deleted file mode 100644 index 0faa9e9ef..000000000 --- a/soxr-sys/src/util64s.c +++ /dev/null @@ -1,8 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#define PFFFT_DOUBLE 1 - -#include "util64s.h" - -#include "util-simd.c" diff --git a/soxr-sys/src/util64s.h b/soxr-sys/src/util64s.h deleted file mode 100644 index 7beeb8991..000000000 --- a/soxr-sys/src/util64s.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#if !defined soxr_util64s_included -#define soxr_util64s_included - -#include - -void * _soxr_simd64_aligned_malloc(size_t); -void * _soxr_simd64_aligned_calloc(size_t, size_t); -void _soxr_simd64_aligned_free(void *); - -#define SIMD_ALIGNED_MALLOC _soxr_simd64_aligned_malloc -#define SIMD_ALIGNED_CALLOC _soxr_simd64_aligned_calloc -#define SIMD_ALIGNED_FREE _soxr_simd64_aligned_free - -void _soxr_ordered_convolve_simd64(int n, void * not_used, double * a, double const * b); -void _soxr_ordered_partial_convolve_simd64(int n, double * a, double const * b); - -#define ORDERED_CONVOLVE_SIMD _soxr_ordered_convolve_simd64 -#define ORDERED_PARTIAL_CONVOLVE_SIMD _soxr_ordered_partial_convolve_simd64 - -#endif diff --git a/soxr-sys/src/vr-coefs.c b/soxr-sys/src/vr-coefs.c deleted file mode 100644 index a57bec8c2..000000000 --- a/soxr-sys/src/vr-coefs.c +++ /dev/null @@ -1,115 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2013 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -/* Generate the filter coefficients for variable-rate resampling. */ - -#include -#include -#include -#define PI 3.14159265358979323846 /* Since M_PI can't be relied on */ - -static void print(double * h, int m, double l, char const * name) -{ /* Print out a filter: */ - int i, N = l? (int)(l*m)-(l>1) : m, R=(N+1)/2; - int a = !l||l>1? 0:N-R, b = l>1? R:N; - printf("static float const %s[] = {\n", name); - if (l>1) printf(" 0.f,"); else if (!l) l=1; - for (i=a; h && i 0 && x E[i] >= x E[i z 1]) -#define PEAK do {if (k0)-(E[i]<0);} while (0) - -typedef struct {double x, beta, gamma;} coef_t; - -static double amp_response(coef_t * coef, int R, double f, int i) -{ - double n = 0, d = 0, x = cos(PI*f), t; - for (; i < R; d += t = coef[i].beta / t, n += coef[i].gamma * t, ++i) - if (fabs(t = x - coef[i].x) < 1e-9) return coef[i].gamma; - return n/d; -} - -static void fir(int m, double l, double Fp0, double Fs0, - double weight0, int density, char const * name) -{ - double Fp=Fp0/l, Fs=Fs0/l, weight=1/weight0, inc[2], Ws=1-Fs; - int N = (int)(l*m)-(l>1), R=(N+1)/2, NP=R+1, grid_size=1+density*R+1, pass=0; - int n1 = Ws>=(2*R-1)*Fp? 1:(int)(R*Fp/(Fp+Ws)+.5), n2=NP-n1, _1, i, j, k; - int * peak = calloc(sizeof(*peak), (size_t)(NP+1)), * P=peak, end[2]; - coef_t * coef = calloc(sizeof(*coef), (size_t)(NP)); - float * E = calloc(sizeof(*E ), (size_t)(grid_size)); - double d, n, e, f, mult, delta, sum, hi, lo, * A = (double*)E, *h=0; - - if (!P || !coef || !E) goto END; - end[0] = n1 * density, end[1] = grid_size-1; /* Create prototype peaks: */ - inc[0] = Fp/end[0], inc[1] = n2==1? 0 : Ws / ((n2-1)*density); - for (i=0; iE[i+1]) || (EE(-,-) && E[i] 1) goto END; /* Too many/few? */ - P = peak + k * (fabs(E[peak[0]]) < fabs(E[peak[NP]])); /* rm 1st? */ - - for (lo = hi = fabs(E[P[0]]), i=1; ihi? e:hi; - } while ((hi-lo)/hi > .001 && ++pass < 20); - /* Create impulse response from final amp. resp. coefs: */ - if (!(h = malloc(sizeof(*h)*(size_t)N))) goto END; - for (i = 0; i < R; f = 2.*i/N, A[i++] = amp_response(coef,R,f,0)*even_adj(f)); - for (i = 0; i < R; h[N-1-i] = h[i] = sum/N, ++i) - for (sum=*A, j=1; j -#include "math-wrap.h" -#include -#include -#include "internal.h" -#define FIFO_SIZE_T int -#define FIFO_MIN 0x8000 -#include "fifo.h" -#include "vr-coefs.h" - -#define FADE_LEN_BITS 9 -#define PHASE_BITS_D 10 -#define PHASE_BITS_U 9 - -#define PHASES0_D 12 -#define POLY_FIR_LEN_D 20 -#define PHASES0_U 6 -#define POLY_FIR_LEN_U 12 - -#define MULT32 (65536. * 65536.) -#define PHASES_D (1 << PHASE_BITS_D) -#define PHASES_U (1 << PHASE_BITS_U) - -#define CONVOLVE \ - _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \ - _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \ - _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ - -#define HALF_FIR_LEN_2 (iAL(half_fir_coefs) - 1) -#define HALF_FIR_LEN_4 (HALF_FIR_LEN_2 / 2) - -#define _ sum += (input[-i] + input[i]) * half_fir_coefs[i], ++i; -static float half_fir(float const * input) -{ - long i = 1; - float sum = input[0] * half_fir_coefs[0]; - CONVOLVE CONVOLVE - assert(i == HALF_FIR_LEN_2 + 1); - return (float)sum; -} -#undef _ - -#define _ sum += (input[-i] + input[i]) * half_fir_coefs[2*i], ++i; -static float double_fir0(float const * input) -{ - int i = 1; - float sum = input[0] * half_fir_coefs[0]; - CONVOLVE - assert(i == HALF_FIR_LEN_4 + 1); - return (float)(sum * 2); -} -#undef _ - -#define _ sum += (input[-i] + input[1+i]) * half_fir_coefs[2*i+1], ++i; -static float double_fir1(float const * input) -{ - int i = 0; - float sum = 0; - CONVOLVE - assert(i == HALF_FIR_LEN_4 + 0); - return (float)(sum * 2); -} -#undef _ - -static float fast_half_fir(float const * input) -{ - int i = 0; - float sum = input[0] * .5f; -#define _ sum += (input[-(2*i+1)] + input[2*i+1]) * fast_half_fir_coefs[i], ++i; - _ _ _ _ _ _ -#undef _ - return (float)sum; -} - -#define IIR_FILTER _ _ _ _ _ _ _ -#define _ in1=(in1-p->y[i])*iir_coefs[i]+tmp1;tmp1=p->y[i],p->y[i]=in1;++i;\ - in0=(in0-p->y[i])*iir_coefs[i]+tmp0;tmp0=p->y[i],p->y[i]=in0;++i; - -typedef struct {float x[2], y[AL(iir_coefs)];} half_iir_t; - -static float half_iir1(half_iir_t * p, float in0, float in1) -{ - int i = 0; - float tmp0, tmp1; - tmp0 = p->x[0], p->x[0] = in0; - tmp1 = p->x[1], p->x[1] = in1; - IIR_FILTER - p->y[i] = in1 = (in1 - p->y[i]) * iir_coefs[i] + tmp1; - return in1 + in0; -} -#undef _ - -static void half_iir(half_iir_t * p, float * obuf, float const * ibuf, int olen) -{ - int i; - for (i=0; i < olen; obuf[i] = (float)half_iir1(p, ibuf[i*2], ibuf[i*2+1]),++i); -} - -static void half_phase(half_iir_t * p, float * buf, int len) -{ - float const small_normal = 1/MULT32/MULT32; /* To quash denormals on path 0.*/ - int i; - for (i = 0; i < len; buf[i] = (float)half_iir1(p, buf[i], 0), ++i); -#define _ p->y[i] += small_normal, i += 2; - i = 0, _ IIR_FILTER -#undef _ -#define _ p->y[i] -= small_normal, i += 2; - i = 0, _ IIR_FILTER -#undef _ -} - -#define coef(coef_p, interp_order, fir_len, phase_num, coef_interp_num, \ - fir_coef_num) coef_p[(fir_len) * ((interp_order) + 1) * (phase_num) + \ - ((interp_order) + 1) * (fir_coef_num) + (interp_order - coef_interp_num)] - -#define COEF(h,l,i) ((i)<0||(i)>=(l)?0:(h)[(i)>(l)/2?(l)-(i):(i)]) -static void prepare_coefs(float * coefs, int n, int phases0, int phases, - float const * coefs0, double multiplier) -{ - double k[6]; - int length0 = n * phases0, length = n * phases, K0 = iAL(k)/2 - 1, i, j, pos; - float * coefs1 = malloc(((size_t)length / 2 + 1) * sizeof(*coefs1)); - float * p = coefs1, f0, f1 = 0; - - for (j = 0; j < iAL(k); k[j] = COEF(coefs0, length0, j - K0), ++j); - for (pos = i = 0; i < length0 / 2; ++i) { - double b=(1/24.)*(k[0]+k[4]+6*k[2]-4*(k[1]+k[3])),d=.5*(k[1]+k[3])-k[2]-b; - double a=(1/120.)*(k[5]-k[2]-9*(9*b+d)+2.5*(k[3]-k[1])-2*(k[4]-k[0])); - double c=(1/12.)*(k[4]-k[0]-2*(k[3]-k[1])-60*a),e=.5*(k[3]-k[1])-a-c; - for (; pos / phases == i; pos += phases0) { - double x = (double)(pos % phases) / phases; - *p++ = (float)(k[K0] + ((((a*x + b)*x + c)*x + d)*x + e)*x); - } - for (j = 0; j < iAL(k) - 1; k[j] = k[j + 1], ++j); - k[j] = COEF(coefs0, length0, i + iAL(k) / 2 + 1); - } - if (!(length & 1)) - *p++ = (float)k[K0]; - assert(p - coefs1 == length / 2 + 1); - - for (i = 0; i < n; ++i) for (j = phases - 1; j >= 0; --j, f1 = f0) { - pos = (n - 1 - i) * phases + j; - f0 = COEF(coefs1, length, pos) * (float)multiplier; - coef(coefs, 1, n, j, 0, i) = (float)f0; - coef(coefs, 1, n, j, 1, i) = (float)(f1 - f0); - } - free(coefs1); -} - -#define _ sum += (b *x + a)*input[i], ++i; -#define a (coef(poly_fir_coefs_d, 1, POLY_FIR_LEN_D, phase, 0,i)) -#define b (coef(poly_fir_coefs_d, 1, POLY_FIR_LEN_D, phase, 1,i)) -static float poly_fir_coefs_d[POLY_FIR_LEN_D * PHASES_D * 2]; - -static float poly_fir1_d(float const * input, uint32_t frac) -{ - int i = 0, phase = (int)(frac >> (32 - PHASE_BITS_D)); - float sum = 0, x = (float)(frac << PHASE_BITS_D) * (float)(1 / MULT32); - _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ - assert(i == POLY_FIR_LEN_D); - return (float)sum; -} -#undef a -#undef b -#define a (coef(poly_fir_coefs_u, 1, POLY_FIR_LEN_U, phase, 0,i)) -#define b (coef(poly_fir_coefs_u, 1, POLY_FIR_LEN_U, phase, 1,i)) -static float poly_fir_coefs_u[POLY_FIR_LEN_U * PHASES_U * 2]; - -static float poly_fir1_u(float const * input, uint32_t frac) -{ - int i = 0, phase = (int)(frac >> (32 - PHASE_BITS_U)); - float sum = 0, x = (float)(frac << PHASE_BITS_U) * (float)(1 / MULT32); - _ _ _ _ _ _ _ _ _ _ _ _ - assert(i == POLY_FIR_LEN_U); - return (float)sum; -} -#undef a -#undef b -#undef _ - -#define ADD_TO(x,y) x.all += y.all -#define SUBTRACT_FROM(x,y) x.all -= y.all -#define FRAC(x) x.part.frac -#define INT(x) x.part.integer - -typedef struct { - union { - int64_t all; -#if HAVE_BIGENDIAN - struct {int32_t integer; uint32_t frac;} part; -#else - struct {uint32_t frac; int32_t integer;} part; -#endif - } at, step, step_step; - float const * input; - int len, stage_num; - bool is_d; /* true: downsampling at x2 rate; false: upsampling at 1x rate. */ - double step_mult; -} stream_t; - -static int poly_fir_d(stream_t * s, float * output, int olen) -{ - int i; - float const * input = s->input - POLY_FIR_LEN_D / 2 + 1; - for (i = 0; i < olen && INT(s->at) < s->len; ++i) { - output[i] = poly_fir1_d(input + INT(s->at), FRAC(s->at)); - ADD_TO(s->at, s->step); - if (!(INT(s->at) < s->len)) { - SUBTRACT_FROM(s->at, s->step); - break; - } - output[++i] = poly_fir1_d(input + INT(s->at), FRAC(s->at)); - ADD_TO(s->at, s->step); - ADD_TO(s->step, s->step_step); - } - return i; -} - -static int poly_fir_fade_d( - stream_t * s, float const * vol, int step, float * output, int olen) -{ - int i; - float const * input = s->input - POLY_FIR_LEN_D / 2 + 1; - for (i = 0; i < olen && INT(s->at) < s->len; ++i, vol += step) { - output[i] += *vol * poly_fir1_d(input + INT(s->at), FRAC(s->at)); - ADD_TO(s->at, s->step); - if (!(INT(s->at) < s->len)) { - SUBTRACT_FROM(s->at, s->step); - break; - } - output[++i] += *(vol += step) * poly_fir1_d(input + INT(s->at),FRAC(s->at)); - ADD_TO(s->at, s->step); - ADD_TO(s->step, s->step_step); - } - return i; -} - -static int poly_fir_u(stream_t * s, float * output, int olen) -{ - int i; - float const * input = s->input - POLY_FIR_LEN_U / 2 + 1; - for (i = 0; i < olen && INT(s->at) < s->len; ++i) { - output[i] = poly_fir1_u(input + INT(s->at), FRAC(s->at)); - ADD_TO(s->at, s->step); - ADD_TO(s->step, s->step_step); - } - return i; -} - -static int poly_fir_fade_u( - stream_t * s, float const * vol, int step, float * output, int olen) -{ - int i; - float const * input = s->input - POLY_FIR_LEN_U / 2 + 1; - for (i = 0; i < olen && INT(s->at) < s->len; i += 2, vol += step) { - output[i] += *vol * poly_fir1_u(input + INT(s->at), FRAC(s->at)); - ADD_TO(s->at, s->step); - ADD_TO(s->step, s->step_step); - } - return i; -} - -#define shiftr(x,by) ((by) < 0? (x) << (-(by)) : (x) >> (by)) -#define shiftl(x,by) shiftr(x,-(by)) -#define stage_occupancy(s) (fifo_occupancy(&(s)->fifo) - 4*HALF_FIR_LEN_2) -#define stage_read_p(s) ((float *)fifo_read_ptr(&(s)->fifo) + 2*HALF_FIR_LEN_2) -#define stage_preload(s) memset(fifo_reserve(&(s)->fifo, (s)->preload), \ - 0, sizeof(float) * (size_t)(s)->preload); - -typedef struct { - fifo_t fifo; - double step_mult; - int is_fast, x_fade_len, preload; -} stage_t; - -typedef struct { - int num_stages0, num_stages, flushing; - int fade_len, slew_len, xfade, stage_inc, switch_stage_num; - double new_io_ratio, default_io_ratio; - stage_t * stages; - fifo_t output_fifo; - half_iir_t halfer; - stream_t current, fadeout; /* Current/fade-in, fadeout streams. */ -} rate_t; - -static float fade_coefs[(2 << FADE_LEN_BITS) + 1]; - -static void vr_init(rate_t * p, double default_io_ratio, int num_stages, double mult) -{ - int i; - assert(num_stages >= 0); - memset(p, 0, sizeof(*p)); - - p->num_stages0 = num_stages; - p->num_stages = num_stages = max(num_stages, 1); - p->stages = (stage_t *)calloc((unsigned)num_stages + 1, sizeof(*p->stages)) + 1; - for (i = -1; i < p->num_stages; ++i) { - stage_t * s = &p->stages[i]; - fifo_create(&s->fifo, sizeof(float)); - s->step_mult = 2 * MULT32 / shiftl(2, i); - s->preload = i < 0? 0 : i == 0? 2 * HALF_FIR_LEN_2 : 3 * HALF_FIR_LEN_2 / 2; - stage_preload(s); - s->is_fast = true; - lsx_debug("%-3i preload=%i", i, s->preload); - } - fifo_create(&p->output_fifo, sizeof(float)); - p->default_io_ratio = default_io_ratio; - if (fade_coefs[0]==0) { - for (i = 0; i < iAL(fade_coefs); ++i) - fade_coefs[i] = (float)(.5 * (1 + cos(M_PI * i / (AL(fade_coefs) - 1)))); - prepare_coefs(poly_fir_coefs_u, POLY_FIR_LEN_U, PHASES0_U, PHASES_U, coefs0_u, mult); - prepare_coefs(poly_fir_coefs_d, POLY_FIR_LEN_D, PHASES0_D, PHASES_D, coefs0_d, mult *.5); - } - assert(fade_coefs[0]); -} - -static void enter_new_stage(rate_t * p, int occupancy0) -{ - p->current.len = shiftr(occupancy0, p->current.stage_num); - p->current.input = stage_read_p(&p->stages[p->current.stage_num]); - - p->current.step_mult = p->stages[p->current.stage_num].step_mult; - p->current.is_d = p->current.stage_num >= 0; - if (p->current.is_d) - p->current.step_mult *= .5; -} - -static void set_step(stream_t * p, double io_ratio) -{ - p->step.all = (int64_t)(io_ratio * p->step_mult + .5); -} - -static bool set_step_step(stream_t * p, double io_ratio, int slew_len) -{ - int64_t dif; - int difi; - stream_t tmp = *p; - set_step(&tmp, io_ratio); - dif = tmp.step.all - p->step.all; - dif = dif < 0? dif - (slew_len >> 1) : dif + (slew_len >> 1); - difi = (int)dif; /* Try to avoid int64_t div. */ - p->step_step.all = difi == dif? difi / slew_len : dif / slew_len; - return p->step_step.all != 0; -} - -static void vr_set_io_ratio(rate_t * p, double io_ratio, size_t slew_len) -{ - assert(io_ratio > 0); - if (slew_len) { - if (!set_step_step(&p->current, io_ratio, p->slew_len = (int)slew_len)) - p->slew_len = 0, p->new_io_ratio = 0, p->fadeout.step_step.all = 0; - else { - p->new_io_ratio = io_ratio; - if (p->fade_len) - set_step_step(&p->fadeout, io_ratio, p->slew_len); - } - } - else { - if (p->default_io_ratio!=0) { /* Then this is the first call to this fn. */ - int octave = (int)floor(log(io_ratio) / M_LN2); - p->current.stage_num = octave < 0? -1 : min(octave, p->num_stages0-1); - enter_new_stage(p, 0); - } - else if (p->fade_len) - set_step(&p->fadeout, io_ratio); - set_step(&p->current, io_ratio); - if (p->default_io_ratio!=0) FRAC(p->current.at) = FRAC(p->current.step) >> 1; - p->default_io_ratio = 0; - } -} - -static bool do_input_stage(rate_t * p, int stage_num, int sign, int min_stage_num) -{ - int i = 0; - float * dest; - stage_t * s = &p->stages[stage_num]; - stage_t * s1 = &p->stages[stage_num - sign]; - float const * src = (float *)fifo_read_ptr(&s1->fifo) + HALF_FIR_LEN_2; - int len = shiftr(fifo_occupancy(&s1->fifo) - HALF_FIR_LEN_2 * 2, sign); - int already_done = fifo_occupancy(&s->fifo) - s->preload; - if ((len -= already_done) <= 0) - return false; - src += shiftl(already_done, sign); - - dest = fifo_reserve(&s->fifo, len); - if (stage_num < 0) for (; i < len; ++src) - dest[i++] = double_fir0(src), dest[i++] = double_fir1(src); - else { - bool should_be_fast = p->stage_inc; - if (!s->x_fade_len && stage_num == p->switch_stage_num) { - p->switch_stage_num = 0; - if (s->is_fast != should_be_fast) { - s->x_fade_len = 1 << FADE_LEN_BITS, s->is_fast = should_be_fast, ++p->xfade; - lsx_debug("xfade level %i, inc?=%i", stage_num, p->stage_inc); - } - } - if (s->x_fade_len) { - float const * vol1 = fade_coefs + (s->x_fade_len << 1); - float const * vol2 = fade_coefs + (((1 << FADE_LEN_BITS) - s->x_fade_len) << 1); - int n = min(len, s->x_fade_len); - /*lsx_debug("xfade level %i, inc?=%i len=%i n=%i", stage_num, p->stage_inc, s->x_fade_len, n);*/ - if (should_be_fast) - for (; i < n; vol2 += 2, vol1 -= 2, src += 2) - dest[i++] = *vol1 * fast_half_fir(src) + *vol2 * half_fir(src); - else for (; i < n; vol2 += 2, vol1 -= 2, src += 2) - dest[i++] = *vol2 * fast_half_fir(src) + *vol1 * half_fir(src); - s->x_fade_len -= n; - p->xfade -= !s->x_fade_len; - } - if (stage_num < min_stage_num) - for (; i < len; dest[i++] = fast_half_fir(src), src += 2); - else for (; i < len; dest[i++] = half_fir(src), src += 2); - } - if (p->flushing > 0) - stage_preload(s); - return true; -} - -static int vr_process(rate_t * p, int olen0) -{ - assert(p->num_stages > 0); - if (p->default_io_ratio!=0) - vr_set_io_ratio(p, p->default_io_ratio, 0); - { - float * output = fifo_reserve(&p->output_fifo, olen0); - int j, odone0 = 0, min_stage_num = p->current.stage_num; - int occupancy0, max_stage_num = min_stage_num; - if (p->fade_len) { - min_stage_num = min(min_stage_num, p->fadeout.stage_num); - max_stage_num = max(max_stage_num, p->fadeout.stage_num); - } - - for (j = min(min_stage_num, 0); j <= max_stage_num; ++j) - if (j && !do_input_stage(p, j, j < 0? -1 : 1, min_stage_num)) - break; - if (p->flushing > 0) - p->flushing = -1; - - occupancy0 = shiftl(max(0,stage_occupancy(&p->stages[max_stage_num])), max_stage_num); - p->current.len = shiftr(occupancy0, p->current.stage_num); - p->current.input = stage_read_p(&p->stages[p->current.stage_num]); - if (p->fade_len) { - p->fadeout.len = shiftr(occupancy0, p->fadeout.stage_num); - p->fadeout.input = stage_read_p(&p->stages[p->fadeout.stage_num]); - } - - while (odone0 < olen0) { - int odone, odone2, olen = olen0 - odone0, stage_dif = 0, shift; - float buf[64 << 1]; - - olen = min(olen, (int)(AL(buf) >> 1)); - if (p->slew_len) - olen = min(olen, p->slew_len); - else if (p->new_io_ratio!=0) { - set_step(&p->current, p->new_io_ratio); - set_step(&p->fadeout, p->new_io_ratio); - p->fadeout.step_step.all = p->current.step_step.all = 0; - p->new_io_ratio = 0; - } - if (!p->flushing && !p->fade_len && !p->xfade) { - if (p->current.is_d) { - if (INT(p->current.step) && FRAC(p->current.step)) - stage_dif = 1, ++max_stage_num; - else if (!INT(p->current.step) && FRAC(p->current.step) < (1u << 31)) - stage_dif = -1, --min_stage_num; - } else if (INT(p->current.step) > 1 && FRAC(p->current.step)) - stage_dif = 1, ++max_stage_num; - } - if (stage_dif) { - int n = p->current.stage_num + stage_dif; - if (n >= p->num_stages) - --max_stage_num; - else { - p->stage_inc = stage_dif > 0; - p->fadeout = p->current; - p->current.stage_num += stage_dif; - if (!p->stage_inc) - p->switch_stage_num = p->current.stage_num; - if ((p->current.stage_num < 0 && stage_dif < 0) || - (p->current.stage_num > 0 && stage_dif > 0)) { - stage_t * s = &p->stages[p->current.stage_num]; - fifo_clear(&s->fifo); - stage_preload(s); - s->is_fast = false; - do_input_stage(p, p->current.stage_num, stage_dif, p->current.stage_num); - } - if (p->current.stage_num > 0 && stage_dif < 0) { - int idone = INT(p->current.at); - stage_t * s = &p->stages[p->current.stage_num]; - fifo_trim_to(&s->fifo, 2 * HALF_FIR_LEN_2 + idone + (POLY_FIR_LEN_D >> 1)); - do_input_stage(p, p->current.stage_num, 1, p->current.stage_num); - } - enter_new_stage(p, occupancy0); - shift = -stage_dif; -#define lshift(x,by) (x)=(by)>0?(x)<<(by):(x)>>-(by) - lshift(p->current.at.all, shift); - shift += p->fadeout.is_d - p->current.is_d; - lshift(p->current.step.all, shift); - lshift(p->current.step_step.all, shift); - p->fade_len = AL(fade_coefs) - 1; - lsx_debug("switch from stage %i to %i, x2 from %i to %i", p->fadeout.stage_num, p->current.stage_num, p->fadeout.is_d, p->current.is_d); - } - } - - if (p->fade_len) { - float const * vol1 = fade_coefs + p->fade_len; - float const * vol2 = fade_coefs + (iAL(fade_coefs) - 1 - p->fade_len); - int olen2 = (olen = min(olen, p->fade_len >> 1)) << 1; - - /* x2 is more fine-grained so may fail to produce a pair of samples - * where x1 would not (the x1 second sample is a zero so is always - * available). So do x2 first, then feed odone to the second one. */ - memset(buf, 0, sizeof(*buf) * (size_t)olen2); - if (p->current.is_d && p->fadeout.is_d) { - odone = poly_fir_fade_d(&p->current, vol1,-1, buf, olen2); - odone2 = poly_fir_fade_d(&p->fadeout, vol2, 1, buf, odone); - } else if (p->current.is_d) { - odone = poly_fir_fade_d(&p->current, vol1,-1, buf, olen2); - odone2 = poly_fir_fade_u(&p->fadeout, vol2, 2, buf, odone); - } else { - assert(p->fadeout.is_d); - odone = poly_fir_fade_d(&p->fadeout, vol2, 1, buf, olen2); - odone2 = poly_fir_fade_u(&p->current, vol1,-2, buf, odone); - } - assert(odone == odone2); - (void)odone2; - p->fade_len -= odone; - if (!p->fade_len) { - if (p->stage_inc) - p->switch_stage_num = min_stage_num++; - else - --max_stage_num; - } - half_iir(&p->halfer, &output[odone0], buf, odone >>= 1); - } - else if (p->current.is_d) { - odone = poly_fir_d(&p->current, buf, olen << 1) >> 1; - half_iir(&p->halfer, &output[odone0], buf, odone); - } - else { - odone = poly_fir_u(&p->current, &output[odone0], olen); - if (p->num_stages0) - half_phase(&p->halfer, &output[odone0], odone); - } - odone0 += odone; - if (p->slew_len) - p->slew_len -= odone; - if (odone != olen) - break; /* Need more input. */ - } { - int from = max(0, max_stage_num), to = min(0, min_stage_num); - int i, idone = shiftr(INT(p->current.at), from - p->current.stage_num); - INT(p->current.at) -= shiftl(idone, from - p->current.stage_num); - if (p->fade_len) - INT(p->fadeout.at) -= shiftl(idone, from - p->fadeout.stage_num); - for (i = from; i >= to; --i, idone <<= 1) - fifo_read(&p->stages[i].fifo, idone, NULL); - } - fifo_trim_by(&p->output_fifo, olen0 - odone0); - return odone0; - } -} - -static float * vr_input(rate_t * p, float const * input, size_t n) -{ - return fifo_write(&p->stages[0].fifo, (int)n, input); -} - -static float const * vr_output(rate_t * p, float * output, size_t * n) -{ - fifo_t * fifo = &p->output_fifo; - if (1 || !p->num_stages0) - return fifo_read(fifo, (int)(*n = min(*n, (size_t)fifo_occupancy(fifo))), output); - else { /* Ignore this complication for now. */ - int const IIR_DELAY = 2; - float * ptr = fifo_read_ptr(fifo); - int olen = min((int)*n, max(0, fifo_occupancy(fifo) - IIR_DELAY)); - *n = (size_t)olen; - if (output) - memcpy(output, ptr + IIR_DELAY, *n * sizeof(*output)); - fifo_read(fifo, olen, NULL); - return ptr + IIR_DELAY; - } -} - -static void vr_flush(rate_t * p) -{ - if (!p->flushing) { - stage_preload(&p->stages[0]); - ++p->flushing; - } -} - -static void vr_close(rate_t * p) -{ - int i; - - fifo_delete(&p->output_fifo); - for (i = -1; i < p->num_stages; ++i) { - stage_t * s = &p->stages[i]; - fifo_delete(&s->fifo); - } - free(p->stages - 1); -} - -static double vr_delay(rate_t * p) -{ - return 100; /* TODO */ - (void)p; -} - -static void vr_sizes(size_t * shared, size_t * channel) -{ - *shared = 0; - *channel = sizeof(rate_t); -} - -static char const * vr_create(void * channel, void * shared,double max_io_ratio, - void * q_spec, void * r_spec, double scale) -{ - double x = max_io_ratio; - int n; - for (n = 0; x > 1; x *= .5, ++n); - vr_init(channel, max_io_ratio, n, scale); - return 0; - (void)shared, (void)q_spec, (void)r_spec; -} - -static char const * vr_id(void) -{ - return "vr32"; -} - -typedef void (* fn_t)(void); -fn_t _soxr_vr32_cb[] = { - (fn_t)vr_input, - (fn_t)vr_process, - (fn_t)vr_output, - (fn_t)vr_flush, - (fn_t)vr_close, - (fn_t)vr_delay, - (fn_t)vr_sizes, - (fn_t)vr_create, - (fn_t)vr_set_io_ratio, - (fn_t)vr_id, -}; From 37bebf39844073dac55f2cf231cef61eb8330a36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Fri, 21 Mar 2025 22:26:19 +0100 Subject: [PATCH 6/8] test --- livekit-ffi/src/server/resampler.rs | 57 ++++++++++++++++++----------- 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/livekit-ffi/src/server/resampler.rs b/livekit-ffi/src/server/resampler.rs index 6faa68b02..a7a7a7d8e 100644 --- a/livekit-ffi/src/server/resampler.rs +++ b/livekit-ffi/src/server/resampler.rs @@ -32,74 +32,84 @@ pub struct SoxResampler { unsafe impl Send for SoxResampler {} impl SoxResampler { + /// Creates a new SoxResampler using soxr's default quality and runtime options. + /// The provided `QualitySpec` and `RuntimeSpec` are ignored and null pointers are passed + /// to `soxr_create` to let soxr choose its defaults. pub fn new( input_rate: f64, output_rate: f64, num_channels: u32, io_spec: IOSpec, - quality_spec: QualitySpec, - runtime_spec: RuntimeSpec, + _quality_spec: QualitySpec, // ignored – using default soxr options + _runtime_spec: RuntimeSpec, // ignored – using default soxr options ) -> Result { - let error: *mut *const c_char = std::ptr::null_mut(); + let mut err: *const c_char = std::ptr::null(); let soxr_ptr = unsafe { + // Create io_spec from our types. let io_spec = soxr_sys::soxr_io_spec( to_soxr_datatype(io_spec.input_type), to_soxr_datatype(io_spec.output_type), ); - let quality_spec = soxr_sys::soxr_quality_spec( - quality_spec.quality as c_ulong, - quality_spec.flags as c_ulong, - ); - - let runtime_spec = soxr_sys::soxr_runtime_spec(runtime_spec.num_threads); - + // Pass null pointers for quality and runtime specs so that + // soxr will use its internal default options. soxr_sys::soxr_create( input_rate, output_rate, num_channels, - error, - &io_spec, - &quality_spec, - &runtime_spec, + &mut err, + std::ptr::null(), // default io_spec + std::ptr::null(), // default quality + std::ptr::null(), // default runtime ) }; - if !error.is_null() { - let error_msg = unsafe { std::ffi::CStr::from_ptr(*error) }; + if !err.is_null() || soxr_ptr.is_null() { + let error_msg = unsafe { std::ffi::CStr::from_ptr(err) }; return Err(error_msg.to_string_lossy().to_string()); } Ok(Self { soxr_ptr, out_buf: Vec::new(), input_rate, output_rate, num_channels }) } + /// Processes the input buffer and returns the resampled output. + /// This version verifies that the input length is a multiple of the number of channels + /// and uses valid pointers for tracking the number of frames consumed and produced. pub fn push(&mut self, input: &[i16]) -> Result<&[i16], String> { + // Ensure the input length is a multiple of the channel count. + if input.len() % self.num_channels as usize != 0 { + return Err("Input length must be a multiple of num_channels".to_string()); + } + let input_length = input.len() / self.num_channels as usize; let ratio = self.output_rate / self.input_rate; - let soxr_delay = unsafe { soxr_sys::soxr_delay(self.soxr_ptr) }; + let delay = unsafe { soxr_sys::soxr_delay(self.soxr_ptr) }; + // Estimate maximum output frames: processed frames + delay + an extra frame. let max_out_len = - ((input_length as f64 * ratio).ceil() as usize) + (soxr_delay.ceil() as usize) + 1; + (input_length as f64 * ratio).ceil() as usize + (delay.ceil() as usize) + 1; let required_output_size = max_out_len * self.num_channels as usize; if self.out_buf.len() < required_output_size { self.out_buf.resize(required_output_size, 0); } - let mut idone: usize = 0; + // Using valid pointers for both consumed input (idone) and produced output (odone) let mut odone: usize = 0; + let error = unsafe { soxr_sys::soxr_process( self.soxr_ptr, input.as_ptr() as *const c_void, input_length, - &mut idone, + std::ptr::null_mut(), self.out_buf.as_mut_ptr() as *mut c_void, max_out_len, &mut odone, ) }; + if !error.is_null() { let error_msg = unsafe { std::ffi::CStr::from_ptr(error) }; return Err(error_msg.to_string_lossy().to_string()); @@ -109,19 +119,22 @@ impl SoxResampler { Ok(&self.out_buf[..output_samples]) } + /// Flushes the internal state, processing any remaining data. + /// Passes null for the input pointer and for the idone parameter (since it is not needed). pub fn flush(&mut self) -> Result<&[i16], String> { let mut odone: usize = 0; let error = unsafe { soxr_sys::soxr_process( self.soxr_ptr, - std::ptr::null(), + std::ptr::null(), // no more input 0, - std::ptr::null_mut(), + std::ptr::null_mut(), // no need to know how many were consumed self.out_buf.as_mut_ptr() as *mut c_void, self.out_buf.len(), &mut odone, ) }; + if !error.is_null() { let error_msg = unsafe { std::ffi::CStr::from_ptr(error) }; return Err(error_msg.to_string_lossy().to_string()); From e814b1ab8b99be148937ed29e781d323922ad739 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Fri, 21 Mar 2025 22:35:49 +0100 Subject: [PATCH 7/8] Update resampler.rs --- livekit-ffi/src/server/resampler.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/livekit-ffi/src/server/resampler.rs b/livekit-ffi/src/server/resampler.rs index a7a7a7d8e..908343a64 100644 --- a/livekit-ffi/src/server/resampler.rs +++ b/livekit-ffi/src/server/resampler.rs @@ -59,7 +59,7 @@ impl SoxResampler { output_rate, num_channels, &mut err, - std::ptr::null(), // default io_spec + &io_spec, std::ptr::null(), // default quality std::ptr::null(), // default runtime ) From 46877b974484ac06256d381ccdb4ea0c57e34191 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81o=20Monnom?= Date: Fri, 21 Mar 2025 22:54:19 +0100 Subject: [PATCH 8/8] test --- livekit-ffi/src/server/resampler.rs | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/livekit-ffi/src/server/resampler.rs b/livekit-ffi/src/server/resampler.rs index 908343a64..113054d56 100644 --- a/livekit-ffi/src/server/resampler.rs +++ b/livekit-ffi/src/server/resampler.rs @@ -43,7 +43,7 @@ impl SoxResampler { _quality_spec: QualitySpec, // ignored – using default soxr options _runtime_spec: RuntimeSpec, // ignored – using default soxr options ) -> Result { - let mut err: *const c_char = std::ptr::null(); + let mut err: *mut *const c_char = std::ptr::null_mut(); let soxr_ptr = unsafe { // Create io_spec from our types. @@ -58,7 +58,7 @@ impl SoxResampler { input_rate, output_rate, num_channels, - &mut err, + err, &io_spec, std::ptr::null(), // default quality std::ptr::null(), // default runtime @@ -66,7 +66,7 @@ impl SoxResampler { }; if !err.is_null() || soxr_ptr.is_null() { - let error_msg = unsafe { std::ffi::CStr::from_ptr(err) }; + let error_msg = unsafe { std::ffi::CStr::from_ptr(*err) }; return Err(error_msg.to_string_lossy().to_string()); } @@ -77,11 +77,6 @@ impl SoxResampler { /// This version verifies that the input length is a multiple of the number of channels /// and uses valid pointers for tracking the number of frames consumed and produced. pub fn push(&mut self, input: &[i16]) -> Result<&[i16], String> { - // Ensure the input length is a multiple of the channel count. - if input.len() % self.num_channels as usize != 0 { - return Err("Input length must be a multiple of num_channels".to_string()); - } - let input_length = input.len() / self.num_channels as usize; let ratio = self.output_rate / self.input_rate; let delay = unsafe { soxr_sys::soxr_delay(self.soxr_ptr) }; @@ -96,6 +91,7 @@ impl SoxResampler { } // Using valid pointers for both consumed input (idone) and produced output (odone) + let mut idone: usize = 0; let mut odone: usize = 0; let error = unsafe { @@ -103,7 +99,7 @@ impl SoxResampler { self.soxr_ptr, input.as_ptr() as *const c_void, input_length, - std::ptr::null_mut(), + &mut idone, self.out_buf.as_mut_ptr() as *mut c_void, max_out_len, &mut odone,