From 3230a6c01aa5a98500e2d043fa687635c35a7b9c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?The=CC=81o=20Monnom?= <theo.monnom@outlook.com>
Date: Mon, 10 Mar 2025 23:17:33 +0100
Subject: [PATCH 1/8] bump manylinux

---
 .github/workflows/ffi-builds.yml | 4 ++--
 soxr-sys/build.rs                | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ffi-builds.yml b/.github/workflows/ffi-builds.yml
index 491a721cc..1e1b9311c 100644
--- a/.github/workflows/ffi-builds.yml
+++ b/.github/workflows/ffi-builds.yml
@@ -68,13 +68,13 @@ jobs:
             buildargs: --no-default-features --features "rustls-tls-webpki-roots"
           - os: ubuntu-latest
             platform: linux
-            build_image: quay.io/pypa/manylinux_2_28_x86_64
+            build_image: quay.io/pypa/manylinux_2_34_x86_64
             dylib: liblivekit_ffi.so
             target: x86_64-unknown-linux-gnu
             name: ffi-linux-x86_64
           - os: ubuntu-24.04-arm
             platform: linux
-            build_image: quay.io/pypa/manylinux_2_28_aarch64
+            build_image: quay.io/pypa/manylinux_2_34_aarch64
             dylib: liblivekit_ffi.so
             target: aarch64-unknown-linux-gnu
             name: ffi-linux-arm64
diff --git a/soxr-sys/build.rs b/soxr-sys/build.rs
index 17fef5ace..e214a0c60 100644
--- a/soxr-sys/build.rs
+++ b/soxr-sys/build.rs
@@ -7,7 +7,7 @@ fn main() {
     build.define("SOXR_LIB", "0");
 
     build
-        .flag_if_supported("-std=gnu89")
+        .flag_if_supported("-std=gnu99")
         .flag_if_supported("-Wnested-externs")
         .flag_if_supported("-Wmissing-prototypes")
         .flag_if_supported("-Wstrict-prototypes")

From af03d0018a09be9a56eeffd3c10678cb063a8019 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?The=CC=81o=20Monnom?= <theo.monnom@outlook.com>
Date: Tue, 11 Mar 2025 00:19:47 +0100
Subject: [PATCH 2/8] Update rint-clip.h

---
 soxr-sys/src/rint-clip.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/soxr-sys/src/rint-clip.h b/soxr-sys/src/rint-clip.h
index bfb645847..32a03ea84 100644
--- a/soxr-sys/src/rint-clip.h
+++ b/soxr-sys/src/rint-clip.h
@@ -39,6 +39,7 @@ static void RINT_CLIP(RINT_T * const dest, FLOATX const * const src,
   COPY_SEED
   DITHER_VARS;
   for (; i < n; ++i) {
+    fe_clear_invalid();
     FLOATD const d = src[i] DITHERING;
     RINT(dest[stride * i], d);
     if (fe_test_invalid()) {
@@ -62,6 +63,7 @@ static size_t LSX_RINT_CLIP(void * * const dest0, FLOATX const * const src,
 #if defined FE_INVALID && defined FPU_RINT
 #define _ RINT(dest[i], src[i] DITHERING); ++i
   for (i = 0; i < (n & ~15u);) {
+    fe_clear_invalid();
     COPY_SEED1;
     DITHER_VARS;
     DO_16;
@@ -105,6 +107,7 @@ static size_t LSX_RINT_CLIP_2(void * * dest0, FLOATX const * const * srcs,
   for (j = 0; j < stride; ++j, ++dest) {
     FLOATX const * const src = srcs[j];
     for (i = 0; i < (n & ~15u);) {
+      fe_clear_invalid();
       COPY_SEED1;
       DITHER_VARS;
       DO_16;

From e1c7543962807ce757e924c5768718ff0a68cd14 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?The=CC=81o=20Monnom?= <theo.8bits@gmail.com>
Date: Fri, 21 Mar 2025 19:46:09 +0100
Subject: [PATCH 3/8] test patches

---
 soxr-sys/src/data-io.c   |  4 ++--
 soxr-sys/src/data-io.h   |  4 ++--
 soxr-sys/src/rint-clip.h | 10 +++++-----
 soxr-sys/src/soxr.c      |  7 ++++---
 4 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/soxr-sys/src/data-io.c b/soxr-sys/src/data-io.c
index fb6167583..2a93fda12 100644
--- a/soxr-sys/src/data-io.c
+++ b/soxr-sys/src/data-io.c
@@ -172,7 +172,7 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */
 
 #if WITH_CR64 || WITH_CR64S
 size_t /* clips */ _soxr_interleave(soxr_datatype_t data_type, void * * dest0,
-  double const * const * src, size_t n, unsigned ch, unsigned long * seed)
+  double const * const * src, size_t n, unsigned ch, unsigned long long * seed)
 {
   switch (data_type & 3) {
     case SOXR_FLOAT32: INTERLEAVE_TO(float, 0);
@@ -198,7 +198,7 @@ size_t /* clips */ _soxr_interleave(soxr_datatype_t data_type, void * * dest0,
 
 #if WITH_CR32 || WITH_CR32S || WITH_VR32
 size_t /* clips */ _soxr_interleave_f(soxr_datatype_t data_type, void * * dest0,
-  float const * const * src, size_t n, unsigned ch, unsigned long * seed)
+  float const * const * src, size_t n, unsigned ch, unsigned long long * seed)
 {
   switch (data_type & 3) {
     case SOXR_FLOAT32: INTERLEAVE_TO(float, 1);
diff --git a/soxr-sys/src/data-io.h b/soxr-sys/src/data-io.h
index 83a0a133d..28e2d8907 100644
--- a/soxr-sys/src/data-io.h
+++ b/soxr-sys/src/data-io.h
@@ -26,7 +26,7 @@ size_t /* clips */ _soxr_interleave(
     double const * const * src,
     size_t n,
     unsigned ch,
-    unsigned long * seed);
+    unsigned long long * seed);
 
 size_t /* clips */ _soxr_interleave_f(
     soxr_datatype_t data_type,
@@ -34,6 +34,6 @@ size_t /* clips */ _soxr_interleave_f(
     float const * const * src,
     size_t n,
     unsigned ch,
-    unsigned long * seed);
+    unsigned long long * seed);
 
 #endif
diff --git a/soxr-sys/src/rint-clip.h b/soxr-sys/src/rint-clip.h
index 32a03ea84..3294f4eaf 100644
--- a/soxr-sys/src/rint-clip.h
+++ b/soxr-sys/src/rint-clip.h
@@ -4,12 +4,12 @@
 #if defined DITHER
 
 #define DITHERING + (1./32)*(int)(((ran1>>=3)&31)-((ran2>>=3)&31))
-#define DITHER_RAND (seed = 1664525UL * seed + 1013904223UL) >> 3
-#define DITHER_VARS unsigned long ran1 = DITHER_RAND, ran2 = DITHER_RAND
-#define SEED_ARG , unsigned long * seed0
+#define DITHER_RAND (seed = 1664525ULL * seed + 1013904223ULL) >> 3
+#define DITHER_VARS unsigned long long ran1 = DITHER_RAND, ran2 = DITHER_RAND
+#define SEED_ARG , unsigned long long * seed0
 #define SAVE_SEED *seed0 = seed
-#define COPY_SEED unsigned long seed = *seed0;
-#define COPY_SEED1 unsigned long seed1 = seed
+#define COPY_SEED unsigned long long seed = *seed0;
+#define COPY_SEED1 unsigned long long seed1 = seed
 #define PASS_SEED1 , &seed1
 #define PASS_SEED  , &seed
 #define FLOATD double
diff --git a/soxr-sys/src/soxr.c b/soxr-sys/src/soxr.c
index c2861ac7c..0ece116bf 100644
--- a/soxr-sys/src/soxr.c
+++ b/soxr-sys/src/soxr.c
@@ -64,7 +64,7 @@ typedef void * resampler_shared_t; /* Between channels. */
 typedef void (* deinterleave_t)(sample_t * * dest,
     soxr_datatype_t data_type, void const * * src0, size_t n, unsigned ch);
 typedef size_t (* interleave_t)(soxr_datatype_t data_type, void * * dest,
-    sample_t const * const * src, size_t, unsigned, unsigned long *);
+    sample_t const * const * src, size_t, unsigned, unsigned long long *);
 
 struct soxr {
   unsigned num_channels;
@@ -86,7 +86,7 @@ struct soxr {
 
   void * * channel_ptrs;
   size_t clips;
-  unsigned long seed;
+  unsigned long long seed;
   int flushing;
 };
 
@@ -428,7 +428,8 @@ soxr_t soxr_create(
     p->io_spec.scale *= datatype_full_scale[p->io_spec.otype & 3] /
                         datatype_full_scale[p->io_spec.itype & 3];
 
-    p->seed = (unsigned long)time(0) ^ (unsigned long)(size_t)p;
+    //p->seed = (unsigned long)time(0) ^ (unsigned long)(size_t)p;
+    p->seed = 0xc2ec33ef97a5ULL; /* Fixed dithering seed for deterministic int16 output */
 
 #if WITH_CR32 || WITH_CR32S || WITH_VR32
     if (0

From d4580101631188b7518d85b99b03ad2c2a0b40b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?The=CC=81o=20Monnom?= <theo.8bits@gmail.com>
Date: Fri, 21 Mar 2025 20:10:38 +0100
Subject: [PATCH 4/8] ?

---
 soxr-sys/build.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/soxr-sys/build.rs b/soxr-sys/build.rs
index e214a0c60..7a8266294 100644
--- a/soxr-sys/build.rs
+++ b/soxr-sys/build.rs
@@ -7,7 +7,7 @@ fn main() {
     build.define("SOXR_LIB", "0");
 
     build
-        .flag_if_supported("-std=gnu99")
+        .flag_if_supported("-std=gnu89")
         .flag_if_supported("-Wnested-externs")
         .flag_if_supported("-Wmissing-prototypes")
         .flag_if_supported("-Wstrict-prototypes")
@@ -16,8 +16,8 @@ fn main() {
         .flag_if_supported("-Wextra")
         .flag_if_supported("-pedantic")
         .flag_if_supported("-Wundef")
-        .flag_if_supported("-Wpointer-arith")
-        .flag_if_supported("-Wno-long-long");
+        .flag_if_supported("-Wpointer-arith");
+    //.flag_if_supported("-Wno-long-long");
 
     // TODO(theomonnom): Add SIMD support
     let sources = [

From 0779fb0670c3c3ee0eb73582aca73d3debedfc61 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?The=CC=81o=20Monnom?= <theo.8bits@gmail.com>
Date: Fri, 21 Mar 2025 21:35:40 +0100
Subject: [PATCH 5/8] use cmake because i'm lazy

---
 .gitmodules                   |    3 +
 Cargo.lock                    |   21 +-
 soxr-sys/Cargo.toml           |    4 +-
 soxr-sys/build.rs             |   53 +-
 soxr-sys/generate_bindings.sh |    2 +-
 soxr-sys/soxr                 |    1 +
 soxr-sys/src/LICENCE          |   23 -
 soxr-sys/src/aliases.h        |   39 -
 soxr-sys/src/avfft32.c        |   33 -
 soxr-sys/src/avfft32s.c       |   32 -
 soxr-sys/src/ccrw2.h          |   75 --
 soxr-sys/src/cr-core.c        |  314 ------
 soxr-sys/src/cr.c             |  588 ----------
 soxr-sys/src/cr.h             |  178 ---
 soxr-sys/src/cr32.c           |    8 -
 soxr-sys/src/cr32s.c          |    8 -
 soxr-sys/src/cr64.c           |    8 -
 soxr-sys/src/cr64s.c          |    8 -
 soxr-sys/src/data-io.c        |  223 ----
 soxr-sys/src/data-io.h        |   39 -
 soxr-sys/src/dbesi0.c         |  149 ---
 soxr-sys/src/dev32s.h         |   54 -
 soxr-sys/src/dev64s.h         |   42 -
 soxr-sys/src/fft4g.c          | 1346 -----------------------
 soxr-sys/src/fft4g.h          |   23 -
 soxr-sys/src/fft4g32.c        |   36 -
 soxr-sys/src/fft4g32s.c       |   31 -
 soxr-sys/src/fft4g64.c        |   35 -
 soxr-sys/src/fft4g_cache.h    |   92 --
 soxr-sys/src/fifo.h           |  125 ---
 soxr-sys/src/filter.c         |  277 -----
 soxr-sys/src/filter.h         |   44 -
 soxr-sys/src/half-coefs.h     |   75 --
 soxr-sys/src/half-fir.h       |   61 --
 soxr-sys/src/internal.h       |   84 --
 soxr-sys/src/math-wrap.h      |   31 -
 soxr-sys/src/pffft-avx.h      |   40 -
 soxr-sys/src/pffft-wrap.c     |  110 --
 soxr-sys/src/pffft.c          | 1946 ---------------------------------
 soxr-sys/src/pffft.h          |  197 ----
 soxr-sys/src/pffft32.c        |   39 -
 soxr-sys/src/pffft32s.c       |   34 -
 soxr-sys/src/pffft64s.c       |   34 -
 soxr-sys/src/poly-fir.h       |  150 ---
 soxr-sys/src/poly-fir0.h      |   56 -
 soxr-sys/src/rdft.h           |   31 -
 soxr-sys/src/rdft_t.h         |   24 -
 soxr-sys/src/rint-clip.h      |  161 ---
 soxr-sys/src/rint.h           |  102 --
 soxr-sys/src/samplerate.h     |    1 -
 soxr-sys/src/soxr-config.h    |   28 -
 soxr-sys/src/soxr-lsr.c       |  198 ----
 soxr-sys/src/soxr-lsr.h       |   78 --
 soxr-sys/src/soxr.c           |  843 --------------
 soxr-sys/src/soxr.h           |  344 ------
 soxr-sys/src/soxr.rs          |  237 ++--
 soxr-sys/src/std-types.h      |   48 -
 soxr-sys/src/util-simd.c      |   89 --
 soxr-sys/src/util32s.c        |    8 -
 soxr-sys/src/util32s.h        |   23 -
 soxr-sys/src/util64s.c        |    8 -
 soxr-sys/src/util64s.h        |   23 -
 soxr-sys/src/vr-coefs.c       |  115 --
 soxr-sys/src/vr-coefs.h       |   94 --
 soxr-sys/src/vr32.c           |  651 -----------
 65 files changed, 96 insertions(+), 9781 deletions(-)
 create mode 160000 soxr-sys/soxr
 delete mode 100644 soxr-sys/src/LICENCE
 delete mode 100644 soxr-sys/src/aliases.h
 delete mode 100644 soxr-sys/src/avfft32.c
 delete mode 100644 soxr-sys/src/avfft32s.c
 delete mode 100644 soxr-sys/src/ccrw2.h
 delete mode 100644 soxr-sys/src/cr-core.c
 delete mode 100644 soxr-sys/src/cr.c
 delete mode 100644 soxr-sys/src/cr.h
 delete mode 100644 soxr-sys/src/cr32.c
 delete mode 100644 soxr-sys/src/cr32s.c
 delete mode 100644 soxr-sys/src/cr64.c
 delete mode 100644 soxr-sys/src/cr64s.c
 delete mode 100644 soxr-sys/src/data-io.c
 delete mode 100644 soxr-sys/src/data-io.h
 delete mode 100644 soxr-sys/src/dbesi0.c
 delete mode 100644 soxr-sys/src/dev32s.h
 delete mode 100644 soxr-sys/src/dev64s.h
 delete mode 100644 soxr-sys/src/fft4g.c
 delete mode 100644 soxr-sys/src/fft4g.h
 delete mode 100644 soxr-sys/src/fft4g32.c
 delete mode 100644 soxr-sys/src/fft4g32s.c
 delete mode 100644 soxr-sys/src/fft4g64.c
 delete mode 100644 soxr-sys/src/fft4g_cache.h
 delete mode 100644 soxr-sys/src/fifo.h
 delete mode 100644 soxr-sys/src/filter.c
 delete mode 100644 soxr-sys/src/filter.h
 delete mode 100644 soxr-sys/src/half-coefs.h
 delete mode 100644 soxr-sys/src/half-fir.h
 delete mode 100644 soxr-sys/src/internal.h
 delete mode 100644 soxr-sys/src/math-wrap.h
 delete mode 100644 soxr-sys/src/pffft-avx.h
 delete mode 100644 soxr-sys/src/pffft-wrap.c
 delete mode 100644 soxr-sys/src/pffft.c
 delete mode 100644 soxr-sys/src/pffft.h
 delete mode 100644 soxr-sys/src/pffft32.c
 delete mode 100644 soxr-sys/src/pffft32s.c
 delete mode 100644 soxr-sys/src/pffft64s.c
 delete mode 100644 soxr-sys/src/poly-fir.h
 delete mode 100644 soxr-sys/src/poly-fir0.h
 delete mode 100644 soxr-sys/src/rdft.h
 delete mode 100644 soxr-sys/src/rdft_t.h
 delete mode 100644 soxr-sys/src/rint-clip.h
 delete mode 100644 soxr-sys/src/rint.h
 delete mode 100644 soxr-sys/src/samplerate.h
 delete mode 100644 soxr-sys/src/soxr-config.h
 delete mode 100644 soxr-sys/src/soxr-lsr.c
 delete mode 100644 soxr-sys/src/soxr-lsr.h
 delete mode 100644 soxr-sys/src/soxr.c
 delete mode 100644 soxr-sys/src/soxr.h
 delete mode 100644 soxr-sys/src/std-types.h
 delete mode 100644 soxr-sys/src/util-simd.c
 delete mode 100644 soxr-sys/src/util32s.c
 delete mode 100644 soxr-sys/src/util32s.h
 delete mode 100644 soxr-sys/src/util64s.c
 delete mode 100644 soxr-sys/src/util64s.h
 delete mode 100644 soxr-sys/src/vr-coefs.c
 delete mode 100644 soxr-sys/src/vr-coefs.h
 delete mode 100644 soxr-sys/src/vr32.c

diff --git a/.gitmodules b/.gitmodules
index b70842c2a..bdbb4f029 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -4,3 +4,6 @@
 [submodule "yuv-sys/libyuv"]
 	path = yuv-sys/libyuv
 	url = https://chromium.googlesource.com/libyuv/libyuv
+[submodule "soxr-sys/soxr"]
+	path = soxr-sys/soxr
+	url = https://github.com/dofuuz/soxr/
diff --git a/Cargo.lock b/Cargo.lock
index f8a5eac4c..f21db717e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -431,9 +431,9 @@ checksum = "a2698f953def977c68f935bb0dfa959375ad4638570e969e2f1e9f433cbf1af6"
 
 [[package]]
 name = "cc"
-version = "1.0.83"
+version = "1.1.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
+checksum = "e9e8aabfac534be767c909e0690571677d49f41bd8465ae876fe043d52ba5292"
 dependencies = [
  "jobserver",
  "libc",
@@ -495,6 +495,15 @@ dependencies = [
  "libloading",
 ]
 
+[[package]]
+name = "cmake"
+version = "0.1.54"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0"
+dependencies = [
+ "cc",
+]
+
 [[package]]
 name = "codespan-reporting"
 version = "0.11.1"
@@ -1481,9 +1490,9 @@ checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130"
 
 [[package]]
 name = "jobserver"
-version = "0.1.27"
+version = "0.1.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d"
+checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0"
 dependencies = [
  "libc",
 ]
@@ -1655,7 +1664,7 @@ dependencies = [
 
 [[package]]
 name = "livekit-ffi"
-version = "0.12.15"
+version = "0.12.16"
 dependencies = [
  "console-subscriber",
  "dashmap",
@@ -2713,7 +2722,7 @@ dependencies = [
 name = "soxr-sys"
 version = "0.1.0"
 dependencies = [
- "cc",
+ "cmake",
  "hound",
 ]
 
diff --git a/soxr-sys/Cargo.toml b/soxr-sys/Cargo.toml
index fde7a4c42..bd95141a5 100644
--- a/soxr-sys/Cargo.toml
+++ b/soxr-sys/Cargo.toml
@@ -5,11 +5,9 @@ authors = ["Theo Monnom <theo.8bits@gmail.com"]
 edition = "2021"
 license = "Apache-2.0"
 
-[dependencies]
-
 
 [build-dependencies]
-cc = "1.0"
+cmake = "0.1"
 
 [dev-dependencies]
 hound = "3.4"
diff --git a/soxr-sys/build.rs b/soxr-sys/build.rs
index 7a8266294..1e2163527 100644
--- a/soxr-sys/build.rs
+++ b/soxr-sys/build.rs
@@ -1,46 +1,17 @@
-use std::env;
+use cmake::Config;
 
 fn main() {
-    let mut build = cc::Build::new();
+    let dst = Config::new("soxr")
+        .define("BUILD_TESTS", "OFF")
+        .define("WITH_OPENMP", "OFF")
+        .define("WITH_LSR_BINDINGS", "OFF")
+        .define("BUILD_SHARED_LIBS", "OFF")
+        .define("WITH_VR32", "OFF")
+        .define("CMAKE_POSITION_INDEPENDENT_CODE", "ON")
+        .build();
 
-    build.include("src");
-    build.define("SOXR_LIB", "0");
+    let lib_dir = dst.join("lib");
 
-    build
-        .flag_if_supported("-std=gnu89")
-        .flag_if_supported("-Wnested-externs")
-        .flag_if_supported("-Wmissing-prototypes")
-        .flag_if_supported("-Wstrict-prototypes")
-        .flag_if_supported("-Wconversion")
-        .flag_if_supported("-Wall")
-        .flag_if_supported("-Wextra")
-        .flag_if_supported("-pedantic")
-        .flag_if_supported("-Wundef")
-        .flag_if_supported("-Wpointer-arith");
-    //.flag_if_supported("-Wno-long-long");
-
-    // TODO(theomonnom): Add SIMD support
-    let sources = [
-        "src/soxr.c",
-        "src/data-io.c",
-        "src/dbesi0.c",
-        "src/filter.c",
-        "src/cr.c",
-        "src/cr32.c",
-        "src/fft4g32.c",
-        "src/fft4g.c",
-        "src/fft4g64.c",
-        "src/vr32.c",
-    ];
-
-    for source in &sources {
-        build.file(source);
-    }
-
-    build.compile("libsoxr.a");
-
-    let target_os = env::var("CARGO_CFG_TARGET_OS").unwrap();
-    if target_os.as_str() != "windows" {
-        println!("cargo:rustc-link-lib=m");
-    }
+    println!("cargo:rustc-link-search=native={}", lib_dir.display());
+    println!("cargo:rustc-link-lib=static=soxr");
 }
diff --git a/soxr-sys/generate_bindings.sh b/soxr-sys/generate_bindings.sh
index 62c7525bc..96c15dfb8 100755
--- a/soxr-sys/generate_bindings.sh
+++ b/soxr-sys/generate_bindings.sh
@@ -1 +1 @@
-bindgen src/soxr.h -o src/soxr.rs
+bindgen soxr/src/soxr.h -o src/soxr.rs
diff --git a/soxr-sys/soxr b/soxr-sys/soxr
new file mode 160000
index 000000000..a66f3eeee
--- /dev/null
+++ b/soxr-sys/soxr
@@ -0,0 +1 @@
+Subproject commit a66f3eeeeb62a32403ff143b756eed92b1ec6b62
diff --git a/soxr-sys/src/LICENCE b/soxr-sys/src/LICENCE
deleted file mode 100644
index 43e5a7165..000000000
--- a/soxr-sys/src/LICENCE
+++ /dev/null
@@ -1,23 +0,0 @@
-SoX Resampler Library       Copyright (c) 2007-18 robs@users.sourceforge.net
-
-This library is free software; you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 2.1 of the License, or (at
-your option) any later version.
-
-This library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
-General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with this library; if not, see <https://www.gnu.org/licenses/>.
-
-
-Notes
-
-1. Re software in the `examples' directory: works that are not resampling
-examples but are based on the given examples -- for example, applications using
-the library -- shall not be considered to be derivative works of the examples.
-
-2. If building with pffft.c, see the licence embedded in that file.
diff --git a/soxr-sys/src/aliases.h b/soxr-sys/src/aliases.h
deleted file mode 100644
index d1a392f6e..000000000
--- a/soxr-sys/src/aliases.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#if defined SOXR_LIB
-
-#define lsx_bessel_I_0                 _soxr_bessel_I_0
-#define lsx_cdft_f                     _soxr_cdft_f
-#define lsx_cdft                       _soxr_cdft
-#define lsx_clear_fft_cache_f          _soxr_clear_fft_cache_f
-#define lsx_clear_fft_cache            _soxr_clear_fft_cache
-#define lsx_ddct_f                     _soxr_ddct_f
-#define lsx_ddct                       _soxr_ddct
-#define lsx_ddst_f                     _soxr_ddst_f
-#define lsx_ddst                       _soxr_ddst
-#define lsx_design_lpf                 _soxr_design_lpf
-#define lsx_dfct_f                     _soxr_dfct_f
-#define lsx_dfct                       _soxr_dfct
-#define lsx_dfst_f                     _soxr_dfst_f
-#define lsx_dfst                       _soxr_dfst
-#define lsx_fir_to_phase               _soxr_fir_to_phase
-#define lsx_f_resp                     _soxr_f_resp
-#define lsx_init_fft_cache_f           _soxr_init_fft_cache_f
-#define lsx_init_fft_cache             _soxr_init_fft_cache
-#define lsx_inv_f_resp                 _soxr_inv_f_resp
-#define lsx_kaiser_beta                _soxr_kaiser_beta
-#define lsx_kaiser_params              _soxr_kaiser_params
-#define lsx_make_lpf                   _soxr_make_lpf
-#define lsx_ordered_convolve_f         _soxr_ordered_convolve_f
-#define lsx_ordered_convolve           _soxr_ordered_convolve
-#define lsx_ordered_partial_convolve_f _soxr_ordered_partial_convolve_f
-#define lsx_ordered_partial_convolve   _soxr_ordered_partial_convolve
-#define lsx_rdft_f                     _soxr_rdft_f
-#define lsx_rdft                       _soxr_rdft
-#define lsx_safe_cdft_f                _soxr_safe_cdft_f
-#define lsx_safe_cdft                  _soxr_safe_cdft
-#define lsx_safe_rdft_f                _soxr_safe_rdft_f
-#define lsx_safe_rdft                  _soxr_safe_rdft
-
-#endif
diff --git a/soxr-sys/src/avfft32.c b/soxr-sys/src/avfft32.c
deleted file mode 100644
index fe651f5db..000000000
--- a/soxr-sys/src/avfft32.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#include <stdlib.h>
-#include <math.h>
-#include <libavcodec/avfft.h>
-#include "filter.h"
-#include "rdft_t.h"
-
-static void * forward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),DFT_R2C);}
-static void * backward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),IDFT_C2R);}
-static void rdft(int length, void * setup, float * h) {av_rdft_calc(setup, h); (void)length;}
-static int multiplier(void) {return 2;}
-static void nothing(void) {}
-static int flags(void) {return 0;}
-
-fn_t _soxr_rdft32_cb[] = {
-  (fn_t)forward_setup,
-  (fn_t)backward_setup,
-  (fn_t)av_rdft_end,
-  (fn_t)rdft,
-  (fn_t)rdft,
-  (fn_t)rdft,
-  (fn_t)rdft,
-  (fn_t)_soxr_ordered_convolve_f,
-  (fn_t)_soxr_ordered_partial_convolve_f,
-  (fn_t)multiplier,
-  (fn_t)nothing,
-  (fn_t)malloc,
-  (fn_t)calloc,
-  (fn_t)free,
-  (fn_t)flags,
-};
diff --git a/soxr-sys/src/avfft32s.c b/soxr-sys/src/avfft32s.c
deleted file mode 100644
index 5a7e62db2..000000000
--- a/soxr-sys/src/avfft32s.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#include <math.h>
-#include <libavcodec/avfft.h>
-#include "util32s.h"
-#include "rdft_t.h"
-
-static void * forward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),DFT_R2C);}
-static void * backward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),IDFT_C2R);}
-static void rdft(int length, void * setup, float * h) {av_rdft_calc(setup, h); (void)length;}
-static int multiplier(void) {return 2;}
-static void nothing(void) {}
-static int flags(void) {return RDFT_IS_SIMD;}
-
-fn_t _soxr_rdft32s_cb[] = {
-  (fn_t)forward_setup,
-  (fn_t)backward_setup,
-  (fn_t)av_rdft_end,
-  (fn_t)rdft,
-  (fn_t)rdft,
-  (fn_t)rdft,
-  (fn_t)rdft,
-  (fn_t)ORDERED_CONVOLVE_SIMD,
-  (fn_t)ORDERED_PARTIAL_CONVOLVE_SIMD,
-  (fn_t)multiplier,
-  (fn_t)nothing,
-  (fn_t)SIMD_ALIGNED_MALLOC,
-  (fn_t)SIMD_ALIGNED_CALLOC,
-  (fn_t)SIMD_ALIGNED_FREE,
-  (fn_t)flags,
-};
diff --git a/soxr-sys/src/ccrw2.h b/soxr-sys/src/ccrw2.h
deleted file mode 100644
index 09331a4b1..000000000
--- a/soxr-sys/src/ccrw2.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-/* Concurrent Control with "Readers" and "Writers", P.J. Courtois et al, 1971 */
-
-#if !defined soxr_ccrw2_included
-#define soxr_ccrw2_included
-
-#if defined SOXR_LIB
-#include "internal.h"
-#endif
-
-#if defined _OPENMP
-
-#include <omp.h>
-
-typedef struct {
-  int readcount, writecount; /* initial value = 0 */
-  omp_lock_t mutex_1, mutex_2, mutex_3, w, r; /* initial value = 1 */
-} ccrw2_t; /* Problem #2: `writers-preference' */
-
-#define ccrw2_become_reader(p) do {\
-  omp_set_lock(&p.mutex_3);\
-    omp_set_lock(&p.r);\
-      omp_set_lock(&p.mutex_1);\
-        if (++p.readcount == 1) omp_set_lock(&p.w);\
-      omp_unset_lock(&p.mutex_1);\
-    omp_unset_lock(&p.r);\
-  omp_unset_lock(&p.mutex_3);\
-} while (0)
-#define ccrw2_cease_reading(p) do {\
-  omp_set_lock(&p.mutex_1);\
-    if (!--p.readcount) omp_unset_lock(&p.w);\
-  omp_unset_lock(&p.mutex_1);\
-} while (0)
-#define ccrw2_become_writer(p) do {\
-  omp_set_lock(&p.mutex_2);\
-    if (++p.writecount == 1) omp_set_lock(&p.r);\
-  omp_unset_lock(&p.mutex_2);\
-  omp_set_lock(&p.w);\
-} while (0)
-#define ccrw2_cease_writing(p) do {\
-  omp_unset_lock(&p.w);\
-  omp_set_lock(&p.mutex_2);\
-    if (!--p.writecount) omp_unset_lock(&p.r);\
-  omp_unset_lock(&p.mutex_2);\
-} while (0)
-#define ccrw2_init(p) do {\
-  omp_init_lock(&p.mutex_1);\
-  omp_init_lock(&p.mutex_2);\
-  omp_init_lock(&p.mutex_3);\
-  omp_init_lock(&p.w);\
-  omp_init_lock(&p.r);\
-} while (0)
-#define ccrw2_clear(p) do {\
-  omp_destroy_lock(&p.r);\
-  omp_destroy_lock(&p.w);\
-  omp_destroy_lock(&p.mutex_3);\
-  omp_destroy_lock(&p.mutex_2);\
-  omp_destroy_lock(&p.mutex_1);\
-} while (0)
-
-#else
-
-typedef int ccrw2_t;
-#define ccrw2_become_reader(x) (void)(x)
-#define ccrw2_cease_reading(x) (void)(x)
-#define ccrw2_become_writer(x) (void)(x)
-#define ccrw2_cease_writing(x) (void)(x)
-#define ccrw2_init(x) (void)(x)
-#define ccrw2_clear(x) (void)(x)
-
-#endif /* _OPENMP */
-
-#endif
diff --git a/soxr-sys/src/cr-core.c b/soxr-sys/src/cr-core.c
deleted file mode 100644
index 159a5d976..000000000
--- a/soxr-sys/src/cr-core.c
+++ /dev/null
@@ -1,314 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-18 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details.
- *
- * Constant-rate resampling engine-specific code. */
-
-#include <math.h>
-#include <assert.h>
-#include <string.h>
-#include <stdlib.h>
-
-#include "filter.h"
-
-#if defined SOXR_LIB
-  #include "internal.h"
-  #include "cr.h"
-  #if CORE_TYPE & CORE_DBL
-    typedef double sample_t;
-    #if CORE_TYPE & CORE_SIMD_DFT
-      #define RDFT_CB    _soxr_rdft64s_cb
-    #else
-      #define RDFT_CB    _soxr_rdft64_cb
-    #endif
-  #else
-    typedef float sample_t;
-    #if CORE_TYPE & CORE_SIMD_DFT
-      #define RDFT_CB    _soxr_rdft32s_cb
-    #else
-      #define RDFT_CB    _soxr_rdft32_cb
-    #endif
-  #endif
-
-  #if CORE_TYPE & (CORE_SIMD_POLY|CORE_SIMD_HALF|CORE_SIMD_DFT)
-    #if CORE_TYPE & CORE_DBL
-      #include "util64s.h"
-      #include "dev64s.h"
-    #else
-      #include "util32s.h"
-      #include "dev32s.h"
-    #endif
-  #endif
-
-  extern fn_t RDFT_CB[];
-#else
-  #define RDFT_CB 0
-#endif
-
-
-
-static void cubic_stage_fn(stage_t * p, fifo_t * output_fifo)
-{
-  sample_t const * input = stage_read_p(p);
-  int num_in = min(stage_occupancy(p), p->input_size);
-  int i, max_num_out = 1 + (int)(num_in * p->out_in_ratio);
-  sample_t * output = fifo_reserve(output_fifo, max_num_out);
-
-  for (i = 0; p->at.integer < num_in; ++i, p->at.whole += p->step.whole) {
-    sample_t const * s = input + p->at.integer;
-    double x = p->at.fraction * (1 / MULT32);
-    double b = .5*(s[1]+s[-1])-*s, a = (1/6.)*(s[2]-s[1]+s[-1]-*s-4*b);
-    double c = s[1]-*s-a-b;
-    output[i] = (sample_t)(p->mult * (((a*x + b)*x + c)*x + *s));
-  }
-  assert(max_num_out - i >= 0);
-  fifo_trim_by(output_fifo, max_num_out - i);
-  fifo_read(&p->fifo, p->at.integer, NULL);
-  p->at.integer = 0;
-}
-
-
-
-#if defined __AVX__
-  #define DEFINED_AVX 1
-#else
-  #define DEFINED_AVX 0
-#endif
-
-#if defined __x86_64__ || defined _M_X64 || defined i386 || defined _M_IX86
-  #define DEFINED_X86 1
-#else
-  #define DEFINED_X86 0
-#endif
-
-#if defined __arm__
-  #define DEFINED_ARM 1
-#else
-  #define DEFINED_ARM 0
-#endif
-
-
-
-#if CORE_TYPE & CORE_DBL
-  #define SIMD_AVX ((CORE_TYPE & CORE_SIMD_HALF) && DEFINED_AVX)
-  #define SIMD_SSE 0
-#else
-  #define SIMD_SSE ((CORE_TYPE & CORE_SIMD_HALF) && DEFINED_X86)
-  #define SIMD_AVX 0
-#endif
-
-#define SIMD_NEON ((CORE_TYPE & CORE_SIMD_HALF) && DEFINED_ARM)
-
-
-
-#include "half-coefs.h"
-
-#if !(CORE_TYPE & CORE_SIMD_HALF)
-#define FUNCTION_H h7
-#define CONVOLVE ____ __ _
-#include "half-fir.h"
-#endif
-
-#define FUNCTION_H h8
-#define CONVOLVE ____ ____
-#include "half-fir.h"
-
-#define FUNCTION_H h9
-#define CONVOLVE ____ ____ _
-#include "half-fir.h"
-
-#if CORE_TYPE & CORE_DBL
-  #define FUNCTION_H h10
-  #define CONVOLVE ____ ____ __
-  #include "half-fir.h"
-
-  #define FUNCTION_H h11
-  #define CONVOLVE ____ ____ __ _
-  #include "half-fir.h"
-
-  #define FUNCTION_H h12
-  #define CONVOLVE ____ ____ ____
-  #include "half-fir.h"
-
-  #define FUNCTION_H h13
-  #define CONVOLVE ____ ____ ____ _
-  #include "half-fir.h"
-#endif
-
-static half_fir_info_t const half_firs[] = {
-#if !(CORE_TYPE & CORE_SIMD_HALF)
-  { 7, half_fir_coefs_7 , h7 , 0  , 120.65f},
-#endif
-  { 8, half_fir_coefs_8 , h8 , 0  , 136.51f},
-  { 9, half_fir_coefs_9 , h9 , 0  , 152.32f},
-#if CORE_TYPE & CORE_DBL
-  {10, half_fir_coefs_10, h10, 0  , 168.08f},
-  {11, half_fir_coefs_11, h11, 0  , 183.79f},
-  {12, half_fir_coefs_12, h12, 0  , 199.46f},
-  {13, half_fir_coefs_13, h13, 0  , 215.12f},
-#endif
-};
-
-#undef SIMD_AVX
-#undef SIMD_NEON
-#undef SIMD_SSE
-
-
-
-#if CORE_TYPE & CORE_DBL
-  #define SIMD_AVX ((CORE_TYPE & CORE_SIMD_POLY) && DEFINED_AVX)
-  #define SIMD_SSE 0
-#else
-  #define SIMD_SSE ((CORE_TYPE & CORE_SIMD_POLY) && DEFINED_X86)
-  #define SIMD_AVX 0
-#endif
-
-#define SIMD_NEON ((CORE_TYPE & CORE_SIMD_POLY) && DEFINED_ARM)
-
-
-
-#define COEFS (sample_t * __restrict)p->shared->poly_fir_coefs
-#define VAR_LENGTH p->n
-#define VAR_CONVOLVE(n) while (j < (n)) _
-#define VAR_POLY_PHASE_BITS p->phase_bits
-
-
-
-#define FUNCTION vpoly0
-#define FIR_LENGTH VAR_LENGTH
-#define CONVOLVE(n) VAR_CONVOLVE(n)
-#include "poly-fir0.h"
-
-#define FUNCTION vpoly1
-#define COEF_INTERP 1
-#define PHASE_BITS VAR_POLY_PHASE_BITS
-#define FIR_LENGTH VAR_LENGTH
-#define CONVOLVE(n) VAR_CONVOLVE(n)
-#include "poly-fir.h"
-
-#define FUNCTION vpoly2
-#define COEF_INTERP 2
-#define PHASE_BITS VAR_POLY_PHASE_BITS
-#define FIR_LENGTH VAR_LENGTH
-#define CONVOLVE(n) VAR_CONVOLVE(n)
-#include "poly-fir.h"
-
-#define FUNCTION vpoly3
-#define COEF_INTERP 3
-#define PHASE_BITS VAR_POLY_PHASE_BITS
-#define FIR_LENGTH VAR_LENGTH
-#define CONVOLVE(n) VAR_CONVOLVE(n)
-#include "poly-fir.h"
-
-
-
-#if !(CORE_TYPE & CORE_SIMD_POLY)
-
-#define poly_fir_convolve_U100 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
-#define FUNCTION U100_0
-#define FIR_LENGTH U100_l
-#define CONVOLVE(n) poly_fir_convolve_U100
-#include "poly-fir0.h"
-
-#define u100_l 11
-#define poly_fir_convolve_u100 _ _ _ _ _ _ _ _ _ _ _
-#define FUNCTION u100_0
-#define FIR_LENGTH u100_l
-#define CONVOLVE(n) poly_fir_convolve_u100
-#include "poly-fir0.h"
-
-#define FUNCTION u100_1
-#define COEF_INTERP 1
-#define PHASE_BITS 8
-#define FIR_LENGTH u100_l
-#define CONVOLVE(n) poly_fir_convolve_u100
-#include "poly-fir.h"
-
-#define FUNCTION u100_2
-#define COEF_INTERP 2
-#define PHASE_BITS 6
-#define FIR_LENGTH u100_l
-#define CONVOLVE(n) poly_fir_convolve_u100
-#include "poly-fir.h"
-
-#endif
-
-#define u100_1_b 8
-#define u100_2_b 6
-
-
-
-static poly_fir_t const poly_firs[] = {
-  {-1, {{0, vpoly0}, { 7.2f, vpoly1}, {5.0f, vpoly2}}},
-  {-1, {{0, vpoly0}, { 9.4f, vpoly1}, {6.7f, vpoly2}}},
-  {-1, {{0, vpoly0}, {12.4f, vpoly1}, {7.8f, vpoly2}}},
-  {-1, {{0, vpoly0}, {13.6f, vpoly1}, {9.3f, vpoly2}}},
-  {-1, {{0, vpoly0}, {10.5f, vpoly2}, {8.4f, vpoly3}}},
-  {-1, {{0, vpoly0}, {11.85f,vpoly2}, {9.0f, vpoly3}}},
-
-  {-1, {{0, vpoly0}, { 8.0f, vpoly1}, {5.3f, vpoly2}}},
-  {-1, {{0, vpoly0}, { 8.6f, vpoly1}, {5.7f, vpoly2}}},
-  {-1, {{0, vpoly0}, {10.6f, vpoly1}, {6.75f,vpoly2}}},
-  {-1, {{0, vpoly0}, {12.6f, vpoly1}, {8.6f, vpoly2}}},
-  {-1, {{0, vpoly0}, { 9.6f, vpoly2}, {7.6f, vpoly3}}},
-  {-1, {{0, vpoly0}, {11.4f, vpoly2}, {8.65f,vpoly3}}},
-
-#if CORE_TYPE & CORE_SIMD_POLY
-  {10.62f, {{0, vpoly0}, {0, 0}, {0, 0}}},
-  {-1, {{0, vpoly0}, {u100_1_b, vpoly1}, {u100_2_b, vpoly2}}},
-#else
-  {10.62f, {{U100_l, U100_0}, {0, 0}, {0, 0}}},
-  {11.28f, {{u100_l, u100_0}, {u100_1_b, u100_1}, {u100_2_b, u100_2}}},
-#endif
-  {-1, {{0, vpoly0}, {   9, vpoly1}, {  6, vpoly2}}},
-  {-1, {{0, vpoly0}, {  11, vpoly1}, {  7, vpoly2}}},
-  {-1, {{0, vpoly0}, {  13, vpoly1}, {  8, vpoly2}}},
-  {-1, {{0, vpoly0}, {  10, vpoly2}, {  8, vpoly3}}},
-  {-1, {{0, vpoly0}, {  12, vpoly2}, {  9, vpoly3}}},
-};
-
-
-
-static cr_core_t const cr_core = {
-
-#if CORE_TYPE & CORE_SIMD_POLY
-  {SIMD_ALIGNED_MALLOC, SIMD_ALIGNED_CALLOC, SIMD_ALIGNED_FREE},
-#else
-  {malloc, calloc, free},
-#endif
-  half_firs, array_length(half_firs),
-  0, 0,
-  cubic_stage_fn,
-  poly_firs, RDFT_CB
-};
-
-
-
-#if defined SOXR_LIB
-
-#include "soxr.h"
-
-static char const * rate_create(void * channel, void * shared, double io_ratio,
-    soxr_quality_spec_t * q_spec, soxr_runtime_spec_t * r_spec, double scale)
-{
-  return _soxr_init(channel, shared, io_ratio, q_spec, r_spec, scale,
-      &cr_core, CORE_TYPE);
-}
-
-
-
-static char const * id(void) {return CORE_STR;}
-
-fn_t RATE_CB[] = {
-  (fn_t)_soxr_input,
-  (fn_t)_soxr_process,
-  (fn_t)_soxr_output,
-  (fn_t)_soxr_flush,
-  (fn_t)_soxr_close,
-  (fn_t)_soxr_delay,
-  (fn_t)_soxr_sizes,
-  (fn_t)rate_create,
-  (fn_t)0,
-  (fn_t)id,
-};
-
-#endif
diff --git a/soxr-sys/src/cr.c b/soxr-sys/src/cr.c
deleted file mode 100644
index 4122db3ce..000000000
--- a/soxr-sys/src/cr.c
+++ /dev/null
@@ -1,588 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details.
- *
- * Constant-rate resampling common code. */
-
-#include <math.h>
-#include <assert.h>
-#include <string.h>
-#include <stdlib.h>
-
-#include "filter.h"
-
-#if defined SOXR_LIB
-  #include "internal.h"
-  #define STATIC
-#endif
-
-#include "cr.h"
-
-#define num_coefs4 ((core_flags&CORE_SIMD_POLY)? ((num_coefs+3)&~3) : num_coefs)
-
-#define coef_coef(C,T,x) \
-  C((T*)result, interp_order, num_coefs4, j, x, num_coefs4 - 1 - i)
-
-#define STORE(C,T) { \
-  if (interp_order > 2) coef_coef(C,T,3) = (T)d; \
-  if (interp_order > 1) coef_coef(C,T,2) = (T)c; \
-  if (interp_order > 0) coef_coef(C,T,1) = (T)b; \
-  coef_coef(C,T,0) = (T)f0;}
-
-static real * prepare_poly_fir_coefs(double const * coefs, int num_coefs,
-    int num_phases, int interp_order, double multiplier,
-    core_flags_t core_flags, alloc_t const * mem)
-{
-  int i, j, length = num_coefs4 * num_phases * (interp_order + 1);
-  real * result = mem->calloc(1,(size_t)length << LOG2_SIZEOF_REAL(core_flags));
-  double fm1 = coefs[0], f1 = 0, f2 = 0;
-
-  for (i = num_coefs - 1; i >= 0; --i)
-    for (j = num_phases - 1; j >= 0; --j) {
-      double f0 = fm1, b = 0, c = 0, d = 0; /* = 0 to kill compiler warning */
-      int pos = i * num_phases + j - 1;
-      fm1 = pos > 0 ? coefs[pos - 1] * multiplier : 0;
-      switch (interp_order) {
-        case 1: b = f1 - f0; break;
-        case 2: b = f1 - (.5 * (f2+f0) - f1) - f0; c = .5 * (f2+f0) - f1; break;
-        case 3: c=.5*(f1+fm1)-f0;d=(1/6.)*(f2-f1+fm1-f0-4*c);b=f1-f0-d-c; break;
-        default: assert(!interp_order);
-      }
-      switch (core_flags & 3) {
-        case 0: if (WITH_CR32 ) STORE(coef , float ); break;
-        case 1: if (WITH_CR64 ) STORE(coef , double); break;
-        case 2: if (WITH_CR32S) STORE(coef4, float ); break;
-        default:if (WITH_CR64S) STORE(coef4, double); break;
-      }
-      f2 = f1, f1 = f0;
-    }
-  return result;
-}
-
-#undef STORE
-#undef coef_coef
-
-#define IS_FLOAT32 (WITH_CR32 || WITH_CR32S) && \
-    (!(WITH_CR64 || WITH_CR64S) || sizeof_real == sizeof(float))
-#define WITH_FLOAT64 WITH_CR64 || WITH_CR64S
-
-static void dft_stage_fn(stage_t * p, fifo_t * output_fifo)
-{
-  real * output, * dft_out;
-  int i, j, num_in = max(0, fifo_occupancy(&p->fifo));
-  rate_shared_t const * s = p->shared;
-  dft_filter_t const * f = &s->dft_filter[p->dft_filter_num];
-  int const overlap = f->num_taps - 1;
-
-  if (p->at.integer + p->L * num_in >= f->dft_length) {
-    fn_t const * const RDFT_CB = p->rdft_cb;
-    size_t const sizeof_real = sizeof(char) << LOG2_SIZEOF_REAL(p->core_flags);
-    div_t divd = div(f->dft_length - overlap - p->at.integer + p->L - 1, p->L);
-    real const * input = fifo_read_ptr(&p->fifo);
-    fifo_read(&p->fifo, divd.quot, NULL);
-    num_in -= divd.quot;
-
-    output = fifo_reserve(output_fifo, f->dft_length);
-    dft_out = (p->core_flags & CORE_SIMD_DFT)? p->dft_out : output;
-
-    if (lsx_is_power_of_2(p->L)) { /* F-domain */
-      int portion = f->dft_length / p->L;
-      memcpy(dft_out, input, (unsigned)portion * sizeof_real);
-      rdft_oforward(portion, f->dft_forward_setup, dft_out, p->dft_scratch);
-      if (IS_FLOAT32) {
-#define dft_out ((float *)dft_out)
-        for (i = portion + 2; i < (portion << 1); i += 2) /* Mirror image. */
-          dft_out[i] = dft_out[(portion << 1) - i],
-            dft_out[i+1] = -dft_out[(portion << 1) - i + 1];
-        dft_out[portion] = dft_out[1];
-        dft_out[portion + 1] = 0;
-        dft_out[1] = dft_out[0];
-#undef dft_out
-      }
-      else if (WITH_FLOAT64) {
-#define dft_out ((double *)dft_out)
-        for (i = portion + 2; i < (portion << 1); i += 2) /* Mirror image. */
-          dft_out[i] = dft_out[(portion << 1) - i],
-            dft_out[i+1] = -dft_out[(portion << 1) - i + 1];
-        dft_out[portion] = dft_out[1];
-        dft_out[portion + 1] = 0;
-        dft_out[1] = dft_out[0];
-#undef dft_out
-      }
-
-      for (portion <<= 1; i < f->dft_length; i += portion, portion <<= 1) {
-        memcpy((char *)dft_out + (size_t)i * sizeof_real, dft_out, (size_t)portion * sizeof_real);
-        if (IS_FLOAT32)
-        #define dft_out ((float *)dft_out)
-          dft_out[i + 1] = 0;
-        #undef dft_out
-        else if (WITH_FLOAT64)
-        #define dft_out ((double *)dft_out)
-          dft_out[i + 1] = 0;
-        #undef dft_out
-      }
-      if (p->step.integer > 0)
-        rdft_reorder_back(f->dft_length, f->dft_backward_setup, dft_out, p->dft_scratch);
-    } else {
-      if (p->L == 1)
-        memcpy(dft_out, input, (size_t)f->dft_length * sizeof_real);
-      else {
-        memset(dft_out, 0, (size_t)f->dft_length * sizeof_real);
-        if (IS_FLOAT32)
-          for (j = 0, i = p->at.integer; i < f->dft_length; ++j, i += p->L)
-            ((float *)dft_out)[i] = ((float *)input)[j];
-        else if (WITH_FLOAT64)
-          for (j = 0, i = p->at.integer; i < f->dft_length; ++j, i += p->L)
-            ((double *)dft_out)[i] = ((double *)input)[j];
-        p->at.integer = p->L - 1 - divd.rem;
-      }
-      if (p->step.integer > 0)
-        rdft_forward(f->dft_length, f->dft_forward_setup, dft_out, p->dft_scratch);
-      else
-        rdft_oforward(f->dft_length, f->dft_forward_setup, dft_out, p->dft_scratch);
-    }
-
-    if (p->step.integer > 0) {
-      rdft_convolve(f->dft_length, f->dft_backward_setup, dft_out, f->coefs);
-      rdft_backward(f->dft_length, f->dft_backward_setup, dft_out, p->dft_scratch);
-      if ((p->core_flags & CORE_SIMD_DFT) && p->step.integer == 1)
-        memcpy(output, dft_out, (size_t)f->dft_length * sizeof_real);
-      if (p->step.integer != 1) {
-        if (IS_FLOAT32)
-          for (j = 0, i = p->remM; i < f->dft_length - overlap; ++j,
-              i += p->step.integer)
-            ((float *)output)[j] = ((float *)dft_out)[i];
-        else if (WITH_FLOAT64)
-          for (j = 0, i = p->remM; i < f->dft_length - overlap; ++j,
-              i += p->step.integer)
-            ((double *)output)[j] = ((double *)dft_out)[i];
-        p->remM = i - (f->dft_length - overlap);
-        fifo_trim_by(output_fifo, f->dft_length - j);
-      }
-      else fifo_trim_by(output_fifo, overlap);
-    }
-    else { /* F-domain */
-      int m = -p->step.integer;
-      rdft_convolve_portion(f->dft_length >> m, dft_out, f->coefs);
-      rdft_obackward(f->dft_length >> m, f->dft_backward_setup, dft_out, p->dft_scratch);
-      if (p->core_flags & CORE_SIMD_DFT)
-        memcpy(output, dft_out, (size_t)(f->dft_length >> m) * sizeof_real);
-      fifo_trim_by(output_fifo, (((1 << m) - 1) * f->dft_length + overlap) >>m);
-    }
-    (void)RDFT_CB;
-  }
-  p->input_size = (f->dft_length - p->at.integer + p->L - 1) / p->L;
-}
-
-/* Set to 4 x nearest power of 2 or half of that */
-/* if danger of causing too many cache misses. */
-static int set_dft_length(int num_taps, int min, int large)
-{
-  double d = log((double)num_taps) / log(2.);
-  return 1 << range_limit((int)(d + 2.77), min, max((int)(d + 1.77), large));
-}
-
-static void dft_stage_init(
-    unsigned instance, double Fp, double Fs, double Fn, double att,
-    double phase_response, stage_t * p, int L, int M, double * multiplier,
-    unsigned min_dft_size, unsigned large_dft_size, core_flags_t core_flags,
-    fn_t const * RDFT_CB)
-{
-  dft_filter_t * f = &p->shared->dft_filter[instance];
-  int num_taps = 0, dft_length = f->dft_length, i, offset;
-  bool f_domain_m = abs(3-M) == 1 && Fs <= 1;
-  size_t const sizeof_real = sizeof(char) << LOG2_SIZEOF_REAL(core_flags);
-
-  if (!dft_length) {
-    int k = phase_response == 50 && lsx_is_power_of_2(L) && Fn == L? L << 1 : 4;
-    double m, * h = lsx_design_lpf(Fp, Fs, Fn, att, &num_taps, -k, -1.);
-
-    if (phase_response != 50)
-      lsx_fir_to_phase(&h, &num_taps, &f->post_peak, phase_response);
-    else f->post_peak = num_taps / 2;
-
-    dft_length = set_dft_length(num_taps, (int)min_dft_size, (int)large_dft_size);
-    f->coefs = rdft_calloc((size_t)dft_length, sizeof_real);
-    offset = dft_length - num_taps + 1;
-    m = (1. / dft_length) * rdft_multiplier() * L * *multiplier;
-    if (IS_FLOAT32) for (i = 0; i < num_taps; ++i)
-        ((float *)f->coefs)[(i + offset) & (dft_length - 1)] =(float)(h[i] * m);
-    else if (WITH_FLOAT64) for (i = 0; i < num_taps; ++i)
-        ((double *)f->coefs)[(i + offset) & (dft_length - 1)] = h[i] * m;
-    free(h);
-  }
-
-  if (rdft_flags() & RDFT_IS_SIMD)
-    p->dft_out = rdft_malloc(sizeof_real * (size_t)dft_length);
-  if (rdft_flags() & RDFT_NEEDS_SCRATCH)
-    p->dft_scratch = rdft_malloc(2 * sizeof_real * (size_t)dft_length);
-
-  if (!f->dft_length) {
-    void * coef_setup = rdft_forward_setup(dft_length);
-    int Lp = lsx_is_power_of_2(L)? L : 1;
-    int Mp = f_domain_m? M : 1;
-    f->dft_forward_setup = rdft_forward_setup(dft_length / Lp);
-    f->dft_backward_setup = rdft_backward_setup(dft_length / Mp);
-    if (Mp == 1)
-      rdft_forward(dft_length, coef_setup, f->coefs, p->dft_scratch);
-    else
-      rdft_oforward(dft_length, coef_setup, f->coefs, p->dft_scratch);
-    rdft_delete_setup(coef_setup);
-    f->num_taps = num_taps;
-    f->dft_length = dft_length;
-    lsx_debug("fir_len=%i dft_length=%i Fp=%g Fs=%g Fn=%g att=%g %i/%i",
-        num_taps, dft_length, Fp, Fs, Fn, att, L, M);
-  }
-  *multiplier = 1;
-  p->out_in_ratio = (double)L / M;
-  p->core_flags = core_flags;
-  p->rdft_cb = RDFT_CB;
-  p->fn = dft_stage_fn;
-  p->preload = f->post_peak / L;
-  p->at.integer = f->post_peak % L;
-  p->L = L;
-  p->step.integer = f_domain_m? -M/2 : M;
-  p->dft_filter_num = instance;
-  p->block_len = f->dft_length - (f->num_taps - 1);
-  p->phase0 = p->at.integer / p->L;
-  p->input_size = (f->dft_length - p->at.integer + p->L - 1) / p->L;
-}
-
-static struct half_fir_info const * find_half_fir(
-    struct half_fir_info const * firs, size_t len, double att)
-{
-  size_t i;
-  for (i = 0; i + 1 < len && att > firs[i].att; ++i);
-  return &firs[i];
-}
-
-#define have_pre_stage  (preM  * preL  != 1)
-#define have_arb_stage  (arbM  * arbL  != 1)
-#define have_post_stage (postM * postL != 1)
-
-#include "soxr.h"
-
-STATIC char const * _soxr_init(
-  rate_t * const p,             /* Per audio channel. */
-  rate_shared_t * const shared, /* By channels undergoing same rate change. */
-  double const io_ratio,        /* Input rate divided by output rate. */
-  soxr_quality_spec_t const * const q_spec,
-  soxr_runtime_spec_t const * const r_spec,
-  double multiplier,            /* Linear gain to apply during conversion. */
-  cr_core_t const * const core,
-  core_flags_t const core_flags)
-{
-  size_t const sizeof_real = sizeof(char) << LOG2_SIZEOF_REAL(core_flags);
-  double const tolerance = 1 + 1e-5;
-
-  double       bits = q_spec->precision;
-  rolloff_t const rolloff = (rolloff_t)(q_spec->flags & 3);
-  int interpolator = (int)(r_spec->flags & 3) - 1;
-  double const Fp0 = q_spec->passband_end, Fs0 = q_spec->stopband_begin;
-  double const phase_response = q_spec->phase_response, tbw0 = Fs0-Fp0;
-
-  bool const maintain_3dB_pt = !!(q_spec->flags & SOXR_MAINTAIN_3DB_PT);
-  double tbw_tighten = 1, alpha;
-  #define tighten(x) (Fs0-(Fs0-(x))*tbw_tighten)
-
-  double arbM = io_ratio, Fn1, Fp1 = Fp0, Fs1 = Fs0, bits1 = min(bits,33);
-  double att = (bits1 + 1) * linear_to_dB(2.), attArb = att; /* +1: pass+stop */
-  int preL = 1, preM = 1, shr = 0, arbL = 1, postL = 1, postM = 1;
-  bool upsample=false, rational=false, iOpt=!(r_spec->flags&SOXR_NOSMALLINTOPT);
-  bool lq_bits= (q_spec->flags & SOXR_PROMOTE_TO_LQ)? bits <= 16 : bits == 16;
-  bool lq_Fp0 = (q_spec->flags & SOXR_PROMOTE_TO_LQ)? Fp0<=lq_bw0 : Fp0==lq_bw0;
-  int n = 0, i, mode = lq_bits && rolloff == rolloff_medium? io_ratio > 1 ||
-    phase_response != 50 || !lq_Fp0 || Fs0 != 1 : ((int)ceil(bits1) - 6) / 4;
-  struct half_fir_info const * half_fir_info;
-  stage_t * s;
-
-  if (io_ratio < 1 && Fs0 - 1 > 1 - Fp0 / tolerance)
-    return "imaging greater than rolloff";
-  if (.002 / tolerance > tbw0 || tbw0 > .5 * tolerance)
-    return "transition bandwidth not in [0.2,50] % of nyquist";
-  if (.5 / tolerance > Fp0 || Fs0 > 1.5 * tolerance)
-    return "transition band not within [50,150] % of nyquist";
-  if (bits!=0 && (15 > bits || bits > 33))
-    return "precision not in [15,33] bits";
-  if (io_ratio <= 0)
-    return "resampling factor not positive";
-  if (0 > phase_response || phase_response > 100)
-    return "phase response not in [0=min-phase,100=max-phase] %";
-
-  p->core = core;
-  p->io_ratio = io_ratio;
-  if (bits!=0) while (!n++) {                            /* Determine stages: */
-    int try, L, M, x, maxL = interpolator > 0? 1 : mode? 2048 :
-      (int)ceil(r_spec->coef_size_kbytes * 1000. / (U100_l * (int)sizeof_real));
-    double d, epsilon = 0, frac;
-    upsample = arbM < 1;
-    for (i = (int)(.5 * arbM), shr = 0; i >>= 1; arbM *= .5, ++shr);
-    preM = upsample || (arbM > 1.5 && arbM < 2);
-    postM = 1 + (arbM > 1 && preM), arbM /= postM;
-    preL = 1 + (!preM && arbM < 2) + (upsample && mode), arbM *= preL;
-    if ((frac = arbM - (int)arbM)!=0)
-      epsilon = fabs(floor(frac * MULT32 + .5) / (frac * MULT32) - 1);
-    for (i = 1, rational = frac==0; i <= maxL && !rational; ++i) {
-      d = frac * i, try = (int)(d + .5);
-      if ((rational = fabs(try / d - 1) <= epsilon)) {    /* No long doubles! */
-        if (try == i)
-          arbM = ceil(arbM), shr += x = arbM > 3, arbM /= 1 + x;
-        else arbM = i * (int)arbM + try, arbL = i;
-      }
-    }
-    L = preL * arbL, M = (int)(arbM * postM), x = (L|M)&1, L >>= !x, M >>= !x;
-    if (iOpt && postL == 1 && (d = preL * arbL / arbM) > 4 && d != 5) {
-      for (postL = 4, i = (int)(d / 16); (i >>= 1) && postL < 256; postL <<= 1);
-      arbM = arbM * postL / arbL / preL, arbL = 1, n = 0;
-    } else if (rational && (max(L, M) < 3 + 2 * iOpt || L * M < 6 * iOpt))
-      preL = L, preM = M, arbM = arbL = postM = 1;
-    if (!mode && (!rational || !n))
-      ++mode, n = 0;
-  }
-
-  p->num_stages = shr + have_pre_stage + have_arb_stage + have_post_stage;
-  if (!p->num_stages && multiplier != 1) {
-    bits = arbL = 0;                         /* Use cubic_stage in this case. */
-    ++p->num_stages;
-  }
-  p->stages = calloc((size_t)p->num_stages + 1, sizeof(*p->stages));
-  if (!p->stages)
-    return "out of memory";
-  for (i = 0; i < p->num_stages; ++i) {
-    p->stages[i].num = i;
-    p->stages[i].shared = shared;
-    p->stages[i].input_size = 8192;
-  }
-  p->stages[0].is_input = true;
-
-  alpha = postM / (io_ratio * (postL << 0));
-
-  if ((n = p->num_stages) > 1) {                              /* Att. budget: */
-    if (have_arb_stage)
-      att += linear_to_dB(2.), attArb = att, --n;
-    att += linear_to_dB((double)n);
-  }
-
-  half_fir_info = find_half_fir(core->half_firs, core->half_firs_len, att);
-  for (i = 0, s = p->stages; i < shr; ++i, ++s) {
-    s->fn = half_fir_info->fn;
-    s->coefs = half_fir_info->coefs;
-    s->n = half_fir_info->num_coefs;
-    s->pre_post = 4 * s->n;
-    s->preload = s->pre = s->pre_post >> 1;
-  }
-
-  if (have_pre_stage) {
-    if (maintain_3dB_pt && have_post_stage) {    /* Trans. bands overlapping. */
-      double x = tbw0 * lsx_inv_f_resp(-3., att);
-      x = -lsx_f_resp(x / (max(2 * alpha - Fs0, alpha) - Fp0), att);
-      if (x > .035) {
-        tbw_tighten = ((4.3074e-3 - 3.9121e-4 * x) * x - .040009) * x + 1.0014;
-        lsx_debug("tbw_tighten=%g (%gdB)", tbw_tighten, x);
-      }
-    }
-    Fn1 = preM? max(preL, preM) : arbM / arbL;
-    dft_stage_init(0, tighten(Fp1), Fs1, Fn1, att, phase_response, s++, preL,
-        max(preM, 1), &multiplier, r_spec->log2_min_dft_size,
-        r_spec->log2_large_dft_size, core_flags, core->rdft_cb);
-    Fp1 /= Fn1, Fs1 /= Fn1;
-  }
-
-  if (bits==0 && have_arb_stage) {                /* `Quick' cubic arb stage: */
-    s->fn = core->cubic_stage_fn;
-    s->mult = multiplier, multiplier = 1;
-    s->step.whole = (int64_t)(arbM * MULT32 + .5);
-    s->pre_post = max(3, s->step.integer);
-    s->preload = s->pre = 1;
-    s->out_in_ratio = MULT32 / (double)s->step.whole;
-  }
-  else if (have_arb_stage) {                     /* Higher quality arb stage: */
-    static const float rolloffs[] = {-.01f, -.3f, 0, -.103f};
-    poly_fir_t const * f = &core->poly_firs[6*(upsample+!!preM)+mode-!upsample];
-    int order, num_coefs = (int)f->interp[0].scalar, phase_bits, phases;
-    size_t coefs_size;
-    double at, Fp = Fp1, Fs, Fn, mult = upsample? 1 : arbM / arbL;
-    poly_fir1_t const * f1;
-
-    if (!upsample && preM)
-      Fn = 2 * mult, Fs = 3 + fabs(Fs1 - 1);
-    else Fn = 1, Fs = 2 - (mode? Fp1 + (Fs1 - Fp1) * .7 : Fs1);
-
-    if (mode)
-      Fp = Fs - (Fs - Fp) / (1 - lsx_inv_f_resp(rolloffs[rolloff], attArb));
-
-    i = (interpolator < 0? !rational : max(interpolator, !rational)) - 1;
-    do {
-      f1 = &f->interp[++i];
-      assert(f1->fn);
-      if (i)
-        arbM /= arbL, arbL = 1, rational = false;
-      phase_bits = (int)ceil(f1->scalar - log(mult)/log(2.));
-      phases = !rational? (1 << phase_bits) : arbL;
-      if (f->interp[0].scalar==0) {
-        int phases0 = max(phases, 19), n0 = 0;
-        lsx_design_lpf(Fp, Fs, -Fn, attArb, &n0, phases0, f->beta);
-        num_coefs = n0 / phases0 + 1, num_coefs += num_coefs & !preM;
-      }
-      if ((num_coefs & 1) && rational && (arbL & 1))
-        phases <<= 1, arbL <<= 1, arbM *= 2;
-      at = arbL * (s->phase0 = .5 * (num_coefs & 1));
-      order = i + (i && mode > 4);
-      coefs_size = (size_t)(num_coefs4 * phases * (order+1)) * sizeof_real;
-    } while (interpolator < 0 && i < 2 && f->interp[i+1].fn &&
-        coefs_size / 1000 > r_spec->coef_size_kbytes);
-
-    if (!s->shared->poly_fir_coefs) {
-      int num_taps = num_coefs * phases - 1;
-      double * coefs = lsx_design_lpf(
-          Fp, Fs, Fn, attArb, &num_taps, phases, f->beta);
-      s->shared->poly_fir_coefs = prepare_poly_fir_coefs(
-          coefs, num_coefs, phases, order, multiplier, core_flags, &core->mem);
-      lsx_debug("fir_len=%i phases=%i coef_interp=%i size=%.3gk",
-          num_coefs, phases, order, (double)coefs_size / 1000.);
-      free(coefs);
-    }
-    multiplier = 1;
-    s->fn = f1->fn;
-    s->pre_post = num_coefs4 - 1;
-    s->preload = ((num_coefs - 1) >> 1) + (num_coefs4 - num_coefs);
-    s->n = num_coefs4;
-    s->phase_bits = phase_bits;
-    s->L = arbL;
-    s->use_hi_prec_clock =
-      mode>1 && (q_spec->flags & SOXR_HI_PREC_CLOCK) && !rational;
-#if WITH_FLOAT_STD_PREC_CLOCK
-    if (order && !s->use_hi_prec_clock) {
-      s->at.flt = at;
-      s->step.flt = arbM;
-      s->out_in_ratio = (double)(arbL / s->step.flt);
-    } else
-#endif
-    {
-      s->at.whole = (int64_t)(at * MULT32 + .5);
-#if WITH_HI_PREC_CLOCK
-      if (s->use_hi_prec_clock) {
-        double M = arbM * MULT32;
-        s->at.fix.ls.parts.ms = 0x80000000ul;
-        s->step.whole = (int64_t)M;
-        M -= (double)s->step.whole;
-        M *= MULT32 * MULT32;
-        s->step.fix.ls.all = (uint64_t)M;
-      } else
-#endif
-        s->step.whole = (int64_t)(arbM * MULT32 + .5);
-      s->out_in_ratio = MULT32 * arbL / (double)s->step.whole;
-    }
-    ++s;
-  }
-
-  if (have_post_stage)
-    dft_stage_init(1, tighten(Fp0 / (upsample? alpha : 1)), upsample? max(2 -
-        Fs0 / alpha, 1) : Fs0, (double)max(postL, postM), att, phase_response,
-        s++, postL, postM, &multiplier, r_spec->log2_min_dft_size,
-        r_spec->log2_large_dft_size, core_flags, core->rdft_cb);
-
-  lsx_debug("%g: >>%i %i/%i %i/%g %i/%i (%x)", 1/io_ratio,
-      shr, preL, preM, arbL, arbM, postL, postM, core_flags);
-
-  for (i = 0, s = p->stages; i < p->num_stages; ++i, ++s) {
-    fifo_create(&s->fifo, (int)sizeof_real);
-    memset(fifo_reserve(&s->fifo, s->preload), 0,
-        sizeof_real * (size_t)s->preload);
-    lsx_debug_more("%5i|%-5i preload=%i remL=%i",
-        s->pre, s->pre_post-s->pre, s->preload, s->at.integer);
-  }
-  fifo_create(&s->fifo, (int)sizeof_real);
-  return 0;
-}
-
-static bool stage_process(stage_t * stage, bool flushing)
-{
-  fifo_t * fifo = &stage->fifo;
-  bool done = false;
-  int want;
-  while (!done && (want = stage->input_size - fifo_occupancy(fifo)) > 0) {
-    if (stage->is_input) {
-      if (flushing)
-        memset(fifo_reserve(fifo, want), 0, fifo->item_size * (size_t)want);
-      else done = true;
-    }
-    else done = stage_process(stage - 1, flushing);
-  }
-  stage->fn(stage, &stage[1].fifo);
-  return done && fifo_occupancy(fifo) < stage->input_size;
-}
-
-STATIC void _soxr_process(rate_t * p, size_t olen)
-{
-  int const n = p->flushing? min(-(int)p->samples_out, (int)olen) : (int)olen;
-  stage_t * stage = &p->stages[p->num_stages];
-  fifo_t * fifo = &stage->fifo;
-  bool done = false;
-  while (!done && fifo_occupancy(fifo) < (int)n)
-    done = stage->is_input || stage_process(stage - 1, p->flushing);
-}
-
-STATIC real * _soxr_input(rate_t * p, real const * samples, size_t n)
-{
-  if (p->flushing)
-    return 0;
-  p->samples_in += (int64_t)n;
-  return fifo_write(&p->stages[0].fifo, (int)n, samples);
-}
-
-STATIC real const * _soxr_output(rate_t * p, real * samples, size_t * n0)
-{
-  fifo_t * fifo = &p->stages[p->num_stages].fifo;
-  int n = p->flushing? min(-(int)p->samples_out, (int)*n0) : (int)*n0;
-  p->samples_out += n = min(n, fifo_occupancy(fifo));
-  return fifo_read(fifo, (int)(*n0 = (size_t)n), samples);
-}
-
-STATIC void _soxr_flush(rate_t * p)
-{
-  if (p->flushing) return;
-  p->samples_out -= (int64_t)((double)p->samples_in / p->io_ratio + .5);
-  p->samples_in = 0;
-  p->flushing = true;
-}
-
-STATIC void _soxr_close(rate_t * p)
-{
-  if (p->stages) {
-    fn_t const * const RDFT_CB = p->core->rdft_cb;
-    rate_shared_t * shared = p->stages[0].shared;
-    int i;
-
-    for (i = 0; i <= p->num_stages; ++i) {
-      stage_t * s = &p->stages[i];
-      rdft_free(s->dft_scratch);
-      rdft_free(s->dft_out);
-      fifo_delete(&s->fifo);
-    }
-    if (shared) {
-      for (i = 0; i < 2; ++i) {
-        dft_filter_t * f= &shared->dft_filter[i];
-        rdft_free(f->coefs);
-        rdft_delete_setup(f->dft_forward_setup);
-        rdft_delete_setup(f->dft_backward_setup);
-      }
-      p->core->mem.free(shared->poly_fir_coefs);
-      memset(shared, 0, sizeof(*shared));
-    }
-    free(p->stages);
-    (void)RDFT_CB;
-  }
-}
-
-#if defined SOXR_LIB
-STATIC double _soxr_delay(rate_t * p)
-{
-  return (double)p->samples_in / p->io_ratio - (double)p->samples_out;
-}
-
-STATIC void _soxr_sizes(size_t * shared, size_t * channel)
-{
-  *shared = sizeof(rate_shared_t);
-  *channel = sizeof(rate_t);
-}
-#endif
diff --git a/soxr-sys/src/cr.h b/soxr-sys/src/cr.h
deleted file mode 100644
index d6e863799..000000000
--- a/soxr-sys/src/cr.h
+++ /dev/null
@@ -1,178 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#if !defined soxr_cr_included
-#define soxr_cr_included
-
-#define  FIFO_SIZE_T int
-#include "fifo.h"
-
-typedef void real; /* float or double */
-struct stage;
-typedef void (* stage_fn_t)(struct stage * input, fifo_t * output);
-typedef struct half_fir_info {
-  int num_coefs;
-  real const * coefs;
-  stage_fn_t fn, dfn;
-  float att;
-} half_fir_info_t;
-typedef struct {float scalar; stage_fn_t fn;} poly_fir1_t;
-typedef struct {float beta; poly_fir1_t interp[3];} poly_fir_t;
-
-#define U100_l 42
-#define MULT32 (65536. * 65536.)
-
-/* Conceptually: coef_p is &coefs[num_phases][fir_len][interp_order+1]: */
-#define coef(coef_p, interp_order, fir_len, phase_num, coef_interp_num, fir_coef_num) (coef_p)[\
-  (fir_len) * ((interp_order) + 1) * (phase_num) + \
-  ((interp_order) + 1) * (fir_coef_num) + \
-  ((interp_order) - (coef_interp_num))]
-
-/* Conceptually: coef_p is &coefs[num_phases][fir_len/4][interp_order+1][4]: */
-#define coef4(coef_p, interp_order, fir_len, phase_num, coef_interp_num, fir_coef_num) (coef_p)[\
-  (fir_len) * ((interp_order) + 1) * (phase_num) + \
-  ((interp_order) + 1) * ((fir_coef_num) & ~3) + \
-  4 * ((interp_order) - (coef_interp_num)) + \
-  ((fir_coef_num) & 3)]
-
-typedef union { /* Int64 in parts */
-  #if HAVE_BIGENDIAN
-  struct {int32_t ms; uint32_t ls;} parts;
-  #else
-  struct {uint32_t ls; int32_t ms;} parts;
-  #endif
-  int64_t all;
-} int64p_t;
-
-typedef union { /* Uint64 in parts */
-  #if HAVE_BIGENDIAN
-  struct {uint32_t ms, ls;} parts;
-  #else
-  struct {uint32_t ls, ms;} parts;
-  #endif
-  uint64_t all;
-} uint64p_t;
-
-typedef struct {
-  int        dft_length, num_taps, post_peak;
-  void       * dft_forward_setup, * dft_backward_setup;
-  real   * coefs;
-} dft_filter_t;
-
-typedef struct { /* So generated filter coefs may be shared between channels */
-  real   * poly_fir_coefs;
-  dft_filter_t dft_filter[2];
-} rate_shared_t;
-
-typedef double float_step_t; /* Or long double or __float128. */
-
-typedef union { /* Fixed point arithmetic */
-  struct {uint64p_t ls; int64p_t ms;} fix;  /* Hi-prec has ~96 bits. */
-  float_step_t flt;
-} step_t;
-
-#define integer  fix.ms.parts.ms
-#define fraction fix.ms.parts.ls
-#define whole    fix.ms.all
-
-#define CORE_DBL       1
-#define CORE_SIMD_POLY 2
-#define CORE_SIMD_HALF 4
-#define CORE_SIMD_DFT  8
-#define LOG2_SIZEOF_REAL(core_flags) (2 + ((core_flags) & 1))
-
-typedef int core_flags_t;
-
-#if defined SOXR_LIB
-#include "rdft_t.h"
-#else
-typedef void fn_t;
-#endif
-
-typedef struct stage {
-  int        num;
-
-  /* Common to all stage types: */
-  core_flags_t   core_flags;
-  stage_fn_t fn;
-  fifo_t     fifo;
-  int        pre;       /* Number of past samples to store */
-  int        pre_post;  /* pre + number of future samples to store */
-  int        preload;   /* Number of zero samples to pre-load the fifo */
-  double     out_in_ratio; /* For buffer management. */
-  int        input_size;
-  bool       is_input;
-
-  /* For a stage with variable (run-time generated) filter coefs: */
-  fn_t const * rdft_cb;
-  rate_shared_t * shared;
-  unsigned   dft_filter_num; /* Which, if any, of the 2 DFT filters to use */
-  real       * dft_scratch;
-  float      * dft_out;
-  real const * coefs;
-
-  /* For a stage with variable L/M: */
-  step_t     at, step;
-  bool       use_hi_prec_clock;
-  int        L, remM;
-  int        n, phase_bits, block_len;
-  double     mult, phase0;
-} stage_t;
-
-#define stage_occupancy(s) max(0, fifo_occupancy(&(s)->fifo) - (s)->pre_post)
-#define stage_read_p(s) ((sample_t *)fifo_read_ptr(&(s)->fifo) + (s)->pre)
-
-#define lq_bw0  (1385/2048.) /* ~.67625, FP exact. */
-
-typedef enum {rolloff_small, rolloff_medium, rolloff_none} rolloff_t;
-
-typedef struct {
-  void * (* alloc)(size_t);
-  void * (* calloc)(size_t, size_t);
-  void (* free)(void *);
-} alloc_t;
-
-typedef struct {
-  alloc_t mem;
-  half_fir_info_t  const * half_firs;
-  size_t half_firs_len;
-  half_fir_info_t  const * doub_firs;
-  size_t doub_firs_len;
-  stage_fn_t cubic_stage_fn;
-  poly_fir_t const * poly_firs;
-  fn_t * rdft_cb;
-} cr_core_t;
-
-typedef struct rate rate_t;
-struct rate {
-  cr_core_t const * core;
-  double     io_ratio;
-  int64_t    samples_in, samples_out;
-  int        num_stages, flushing;
-  stage_t    * stages;
-};
-
-#if defined SOXR_LIB
-
-#include "soxr.h"
-
-char const * _soxr_init(
-  rate_t * const p,                /* Per audio channel.                            */
-  rate_shared_t * const shared,    /* Between channels (undergoing same rate change)*/
-  double const io_ratio,           /* Input rate divided by output rate.            */
-  soxr_quality_spec_t const * const q_spec,
-  soxr_runtime_spec_t const * const r_spec,
-  double multiplier,               /* Linear gain to apply during conversion.   1   */
-  cr_core_t const * const core,
-  core_flags_t const);
-
-void _soxr_process(struct rate * p, size_t olen);
-real * _soxr_input(struct rate * p, real const * samples, size_t n);
-real const * _soxr_output(struct rate * p, real * samples, size_t * n0);
-void _soxr_flush(struct rate * p);
-void _soxr_close(struct rate * p);
-double _soxr_delay(struct rate * p);
-void _soxr_sizes(size_t * shared, size_t * channel);
-#endif
-
-#endif
diff --git a/soxr-sys/src/cr32.c b/soxr-sys/src/cr32.c
deleted file mode 100644
index b9eb264d0..000000000
--- a/soxr-sys/src/cr32.c
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#define RATE_CB    _soxr_rate32_cb
-#define CORE_STR   "cr32"
-
-#define CORE_TYPE  0
-#include "cr-core.c"
diff --git a/soxr-sys/src/cr32s.c b/soxr-sys/src/cr32s.c
deleted file mode 100644
index 5de2a4336..000000000
--- a/soxr-sys/src/cr32s.c
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#define RATE_CB    _soxr_rate32s_cb
-#define CORE_STR   "cr32s"
-
-#define CORE_TYPE  (CORE_SIMD_POLY|CORE_SIMD_HALF|CORE_SIMD_DFT)
-#include "cr-core.c"
diff --git a/soxr-sys/src/cr64.c b/soxr-sys/src/cr64.c
deleted file mode 100644
index 518cdd761..000000000
--- a/soxr-sys/src/cr64.c
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#define RATE_CB    _soxr_rate64_cb
-#define CORE_STR   "cr64"
-
-#define CORE_TYPE  CORE_DBL
-#include "cr-core.c"
diff --git a/soxr-sys/src/cr64s.c b/soxr-sys/src/cr64s.c
deleted file mode 100644
index 5dcd6f100..000000000
--- a/soxr-sys/src/cr64s.c
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#define RATE_CB    _soxr_rate64s_cb
-#define CORE_STR   "cr64s"
-
-#define CORE_TYPE  (CORE_DBL|CORE_SIMD_POLY|CORE_SIMD_HALF|CORE_SIMD_DFT)
-#include "cr-core.c"
diff --git a/soxr-sys/src/data-io.c b/soxr-sys/src/data-io.c
deleted file mode 100644
index 2a93fda12..000000000
--- a/soxr-sys/src/data-io.c
+++ /dev/null
@@ -1,223 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#include <limits.h>
-#include <math.h>
-#include <string.h>
-
-#include "data-io.h"
-#include "internal.h"
-
-
-
-#define DEINTERLEAVE_FROM(T,flag) do { \
-  unsigned i; \
-  size_t j; \
-  T const * src = *src0; \
-  if (ch > 1) for (j = 0; j < n; ++j) \
-    for (i = 0; i < ch; ++i) dest[i][j] = (DEINTERLEAVE_TO)*src++; \
-  else if (flag) memcpy(dest[0], src, n * sizeof(T)), src = &src[n]; \
-  else for (j = 0; j < n; dest[0][j++] = (DEINTERLEAVE_TO)*src++); \
-  *src0 = src; \
-} while (0)
-
-
-
-#if WITH_CR64 || WITH_CR64S
-void _soxr_deinterleave(double * * dest, /* Round/clipping not needed here */
-    soxr_datatype_t data_type, void const * * src0, size_t n, unsigned ch)
-{
-#define DEINTERLEAVE_TO double
-  switch (data_type & 3) {
-    case SOXR_FLOAT32: DEINTERLEAVE_FROM(float, 0); break;
-    case SOXR_FLOAT64: DEINTERLEAVE_FROM(double, 1); break;
-    case SOXR_INT32:   DEINTERLEAVE_FROM(int32_t, 0); break;
-    case SOXR_INT16:   DEINTERLEAVE_FROM(int16_t, 0); break;
-    default: break;
-  }
-}
-#endif
-
-
-
-#if WITH_CR32 || WITH_CR32S || WITH_VR32
-void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */
-    soxr_datatype_t data_type, void const * * src0, size_t n, unsigned ch)
-{
-#undef DEINTERLEAVE_TO
-#define DEINTERLEAVE_TO float
-  switch (data_type & 3) {
-    case SOXR_FLOAT32: DEINTERLEAVE_FROM(float, 1); break;
-    case SOXR_FLOAT64: DEINTERLEAVE_FROM(double, 0); break;
-    case SOXR_INT32:   DEINTERLEAVE_FROM(int32_t, 0); break;
-    case SOXR_INT16:   DEINTERLEAVE_FROM(int16_t, 0); break;
-    default: break;
-  }
-}
-#endif
-
-
-
-#include "rint.h"
-
-
-
-#if defined FE_INVALID && defined FPU_RINT32 && defined __STDC_VERSION__
-  #if __STDC_VERSION__ >= 199901L
-    #pragma STDC FENV_ACCESS ON
-  #endif
-#endif
-
-#if WITH_CR64 || WITH_CR64S
-#define FLOATX double
-
-#define LSX_RINT_CLIP_2 lsx_rint32_clip_2
-#define LSX_RINT_CLIP lsx_rint32_clip
-#define RINT_CLIP rint32_clip
-#define RINT rint32D
-#if defined FPU_RINT32
-  #define FPU_RINT
-#endif
-#define RINT_T int32_t
-#define RINT_MAX 2147483647L
-#include "rint-clip.h"
-
-#define LSX_RINT_CLIP_2 lsx_rint16_clip_2
-#define LSX_RINT_CLIP lsx_rint16_clip
-#define RINT_CLIP rint16_clip
-#define RINT rint16D
-#if defined FPU_RINT16
-  #define FPU_RINT
-#endif
-#define RINT_T int16_t
-#define RINT_MAX 32767
-#include "rint-clip.h"
-
-#define LSX_RINT_CLIP_2 lsx_rint16_clip_2_dither
-#define LSX_RINT_CLIP lsx_rint16_clip_dither
-#define RINT_CLIP rint16_clip_dither
-#define RINT rint16D
-#if defined FPU_RINT16
-  #define FPU_RINT
-#endif
-#define RINT_T int16_t
-#define RINT_MAX 32767
-#define DITHER
-#include "rint-clip.h"
-
-#undef FLOATX
-#endif
-
-
-
-#if WITH_CR32 || WITH_CR32S || WITH_VR32
-#define FLOATX float
-
-#define LSX_RINT_CLIP_2 lsx_rint32_clip_2_f
-#define LSX_RINT_CLIP lsx_rint32_clip_f
-#define RINT_CLIP rint32_clip_f
-#define RINT rint32F
-#if defined FPU_RINT32
-  #define FPU_RINT
-#endif
-#define RINT_T int32_t
-#define RINT_MAX 2147483647L
-#include "rint-clip.h"
-
-#define LSX_RINT_CLIP_2 lsx_rint16_clip_2_f
-#define LSX_RINT_CLIP lsx_rint16_clip_f
-#define RINT_CLIP rint16_clip_f
-#define RINT rint16F
-#if defined FPU_RINT16
-  #define FPU_RINT
-#endif
-#define RINT_T int16_t
-#define RINT_MAX 32767
-#include "rint-clip.h"
-
-#define LSX_RINT_CLIP_2 lsx_rint16_clip_2_dither_f
-#define LSX_RINT_CLIP lsx_rint16_clip_dither_f
-#define RINT_CLIP rint16_clip_dither_f
-#define RINT rint16D
-#if defined FPU_RINT16
-  #define FPU_RINT
-#endif
-#define RINT_T int16_t
-#define RINT_MAX 32767
-#define DITHER
-#include "rint-clip.h"
-
-#undef FLOATX
-#endif
-
-#if defined FE_INVALID && defined FPU_RINT32 && defined __STDC_VERSION__
-  #if __STDC_VERSION__ >= 199901L
-    #pragma STDC FENV_ACCESS OFF
-  #endif
-#endif
-
-
-
-#define INTERLEAVE_TO(T,flag) do { \
-  unsigned i; \
-  size_t j; \
-  T * dest = *dest0; \
-  if (ch > 1) \
-  for (j = 0; j < n; ++j) for (i = 0; i < ch; ++i) *dest++ = (T)src[i][j]; \
-  else if (flag) memcpy(dest, src[0], n * sizeof(T)), dest = &dest[n]; \
-  else for (j = 0; j < n; *dest++ = (T)src[0][j++]); \
-  *dest0 = dest; \
-  return 0; \
-} while (0)
-
-#if WITH_CR64 || WITH_CR64S
-size_t /* clips */ _soxr_interleave(soxr_datatype_t data_type, void * * dest0,
-  double const * const * src, size_t n, unsigned ch, unsigned long long * seed)
-{
-  switch (data_type & 3) {
-    case SOXR_FLOAT32: INTERLEAVE_TO(float, 0);
-    case SOXR_FLOAT64: INTERLEAVE_TO(double, 1);
-
-    case SOXR_INT32: if (ch == 1)
-        return lsx_rint32_clip(dest0, src[0], n);
-      return lsx_rint32_clip_2(dest0, src, ch, n);
-
-    case SOXR_INT16: if (seed) {
-      if (ch == 1)
-        return lsx_rint16_clip_dither(dest0, src[0], n, seed);
-      return lsx_rint16_clip_2_dither(dest0, src, ch, n, seed);
-    }
-    if (ch == 1)
-        return lsx_rint16_clip(dest0, src[0], n);
-      return lsx_rint16_clip_2(dest0, src, ch, n);
-    default: break;
-  }
-  return 0;
-}
-#endif
-
-#if WITH_CR32 || WITH_CR32S || WITH_VR32
-size_t /* clips */ _soxr_interleave_f(soxr_datatype_t data_type, void * * dest0,
-  float const * const * src, size_t n, unsigned ch, unsigned long long * seed)
-{
-  switch (data_type & 3) {
-    case SOXR_FLOAT32: INTERLEAVE_TO(float, 1);
-    case SOXR_FLOAT64: INTERLEAVE_TO(double, 0);
-
-    case SOXR_INT32: if (ch == 1)
-        return lsx_rint32_clip_f(dest0, src[0], n);
-      return lsx_rint32_clip_2_f(dest0, src, ch, n);
-
-    case SOXR_INT16: if (seed) {
-      if (ch == 1)
-        return lsx_rint16_clip_dither_f(dest0, src[0], n, seed);
-      return lsx_rint16_clip_2_dither_f(dest0, src, ch, n, seed);
-    }
-    if (ch == 1)
-        return lsx_rint16_clip_f(dest0, src[0], n);
-      return lsx_rint16_clip_2_f(dest0, src, ch, n);
-    default: break;
-  }
-  return 0;
-}
-#endif
diff --git a/soxr-sys/src/data-io.h b/soxr-sys/src/data-io.h
deleted file mode 100644
index 28e2d8907..000000000
--- a/soxr-sys/src/data-io.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#if !defined soxr_data_io_included
-#define soxr_data_io_included
-
-#include "soxr.h"
-
-void _soxr_deinterleave(
-    double * * dest,
-    soxr_datatype_t data_type,
-    void const * * src0,
-    size_t n,
-    unsigned ch);
-
-void _soxr_deinterleave_f(
-    float * * dest,
-    soxr_datatype_t data_type,
-    void const * * src0,
-    size_t n,
-    unsigned ch);
-
-size_t /* clips */ _soxr_interleave(
-    soxr_datatype_t data_type,
-    void * * dest,
-    double const * const * src,
-    size_t n,
-    unsigned ch,
-    unsigned long long * seed);
-
-size_t /* clips */ _soxr_interleave_f(
-    soxr_datatype_t data_type,
-    void * * dest,
-    float const * const * src,
-    size_t n,
-    unsigned ch,
-    unsigned long long * seed);
-
-#endif
diff --git a/soxr-sys/src/dbesi0.c b/soxr-sys/src/dbesi0.c
deleted file mode 100644
index 654216eb4..000000000
--- a/soxr-sys/src/dbesi0.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*  Copyright(C) 1996 Takuya OOURA
-
-You may use, copy, modify this code for any purpose and
-without fee.
-
-Package home:  http://www.kurims.kyoto-u.ac.jp/~ooura/bessel.html
-*/
-
-#include "filter.h"
-#define dbesi0 lsx_bessel_I_0
-
-/* Bessel I_0(x) function in double precision */
-
-#include <math.h>
-
-double dbesi0(double x)
-{
-    int k;
-    double w, t, y;
-    static double a[65] = {
-        8.5246820682016865877e-11, 2.5966600546497407288e-9,
-        7.9689994568640180274e-8, 1.9906710409667748239e-6,
-        4.0312469446528002532e-5, 6.4499871606224265421e-4,
-        0.0079012345761930579108, 0.071111111109207045212,
-        0.444444444444724909, 1.7777777777777532045,
-        4.0000000000000011182, 3.99999999999999998,
-        1.0000000000000000001,
-        1.1520919130377195927e-10, 2.2287613013610985225e-9,
-        8.1903951930694585113e-8, 1.9821560631611544984e-6,
-        4.0335461940910133184e-5, 6.4495330974432203401e-4,
-        0.0079013012611467520626, 0.071111038160875566622,
-        0.44444450319062699316, 1.7777777439146450067,
-        4.0000000132337935071, 3.9999999968569015366,
-        1.0000000003426703174,
-        1.5476870780515238488e-10, 1.2685004214732975355e-9,
-        9.2776861851114223267e-8, 1.9063070109379044378e-6,
-        4.0698004389917945832e-5, 6.4370447244298070713e-4,
-        0.0079044749458444976958, 0.071105052411749363882,
-        0.44445280640924755082, 1.7777694934432109713,
-        4.0000055808824003386, 3.9999977081165740932,
-        1.0000004333949319118,
-        2.0675200625006793075e-10, -6.1689554705125681442e-10,
-        1.2436765915401571654e-7, 1.5830429403520613423e-6,
-        4.2947227560776583326e-5, 6.3249861665073441312e-4,
-        0.0079454472840953930811, 0.070994327785661860575,
-        0.44467219586283000332, 1.7774588182255374745,
-        4.0003038986252717972, 3.9998233869142057195,
-        1.0000472932961288324,
-        2.7475684794982708655e-10, -3.8991472076521332023e-9,
-        1.9730170483976049388e-7, 5.9651531561967674521e-7,
-        5.1992971474748995357e-5, 5.7327338675433770752e-4,
-        0.0082293143836530412024, 0.069990934858728039037,
-        0.44726764292723985087, 1.7726685170014087784,
-        4.0062907863712704432, 3.9952750700487845355,
-        1.0016354346654179322
-    };
-    static double b[70] = {
-        6.7852367144945531383e-8, 4.6266061382821826854e-7,
-        6.9703135812354071774e-6, 7.6637663462953234134e-5,
-        7.9113515222612691636e-4, 0.0073401204731103808981,
-        0.060677114958668837046, 0.43994941411651569622,
-        2.7420017097661750609, 14.289661921740860534,
-        59.820609640320710779, 188.78998681199150629,
-        399.8731367825601118, 427.56411572180478514,
-        1.8042097874891098754e-7, 1.2277164312044637357e-6,
-        1.8484393221474274861e-5, 2.0293995900091309208e-4,
-        0.0020918539850246207459, 0.019375315654033949297,
-        0.15985869016767185908, 1.1565260527420641724,
-        7.1896341224206072113, 37.354773811947484532,
-        155.80993164266268457, 489.5211371158540918,
-        1030.9147225169564806, 1093.5883545113746958,
-        4.8017305613187493564e-7, 3.261317843912380074e-6,
-        4.9073137508166159639e-5, 5.3806506676487583755e-4,
-        0.0055387918291051866561, 0.051223717488786549025,
-        0.42190298621367914765, 3.0463625987357355872,
-        18.895299447327733204, 97.915189029455461554,
-        407.13940115493494659, 1274.3088990480582632,
-        2670.9883037012547506, 2815.7166284662544712,
-        1.2789926338424623394e-6, 8.6718263067604918916e-6,
-        1.3041508821299929489e-4, 0.001428224737372747892,
-        0.014684070635768789378, 0.13561403190404185755,
-        1.1152592585977393953, 8.0387088559465389038,
-        49.761318895895479206, 257.2684232313529138,
-        1066.8543146269566231, 3328.3874581009636362,
-        6948.8586598121634874, 7288.4893398212481055,
-        3.409350368197032893e-6, 2.3079025203103376076e-5,
-        3.4691373283901830239e-4, 0.003794994977222908545,
-        0.038974209677945602145, 0.3594948380414878371,
-        2.9522878893539528226, 21.246564609514287056,
-        131.28727387146173141, 677.38107093296675421,
-        2802.3724744545046518, 8718.5731420798254081,
-        18141.348781638832286, 18948.925349296308859
-    };
-    static double c[45] = {
-        2.5568678676452702768e-15, 3.0393953792305924324e-14,
-        6.3343751991094840009e-13, 1.5041298011833009649e-11,
-        4.4569436918556541414e-10, 1.746393051427167951e-8,
-        1.0059224011079852317e-6, 1.0729838945088577089e-4,
-        0.05150322693642527738,
-        5.2527963991711562216e-15, 7.202118481421005641e-15,
-        7.2561421229904797156e-13, 1.482312146673104251e-11,
-        4.4602670450376245434e-10, 1.7463600061788679671e-8,
-        1.005922609132234756e-6, 1.0729838937545111487e-4,
-        0.051503226936437300716,
-        1.3365917359358069908e-14, -1.2932643065888544835e-13,
-        1.7450199447905602915e-12, 1.0419051209056979788e-11,
-        4.58047881980598326e-10, 1.7442405450073548966e-8,
-        1.0059461453281292278e-6, 1.0729837434500161228e-4,
-        0.051503226940658446941,
-        5.3771611477352308649e-14, -1.1396193006413731702e-12,
-        1.2858641335221653409e-11, -5.9802086004570057703e-11,
-        7.3666894305929510222e-10, 1.6731837150730356448e-8,
-        1.0070831435812128922e-6, 1.0729733111203704813e-4,
-        0.051503227360726294675,
-        3.7819492084858931093e-14, -4.8600496888588034879e-13,
-        1.6898350504817224909e-12, 4.5884624327524255865e-11,
-        1.2521615963377513729e-10, 1.8959658437754727957e-8,
-        1.0020716710561353622e-6, 1.073037119856927559e-4,
-        0.05150322383300230775
-    };
-
-    w = fabs(x);
-    if (w < 8.5) {
-        t = w * w * 0.0625;
-        k = 13 * ((int) t);
-        y = (((((((((((a[k] * t + a[k + 1]) * t +
-            a[k + 2]) * t + a[k + 3]) * t + a[k + 4]) * t +
-            a[k + 5]) * t + a[k + 6]) * t + a[k + 7]) * t +
-            a[k + 8]) * t + a[k + 9]) * t + a[k + 10]) * t +
-            a[k + 11]) * t + a[k + 12];
-    } else if (w < 12.5) {
-        k = (int) w;
-        t = w - k;
-        k = 14 * (k - 8);
-        y = ((((((((((((b[k] * t + b[k + 1]) * t +
-            b[k + 2]) * t + b[k + 3]) * t + b[k + 4]) * t +
-            b[k + 5]) * t + b[k + 6]) * t + b[k + 7]) * t +
-            b[k + 8]) * t + b[k + 9]) * t + b[k + 10]) * t +
-            b[k + 11]) * t + b[k + 12]) * t + b[k + 13];
-    } else {
-        t = 60 / w;
-        k = 9 * ((int) t);
-        y = ((((((((c[k] * t + c[k + 1]) * t +
-            c[k + 2]) * t + c[k + 3]) * t + c[k + 4]) * t +
-            c[k + 5]) * t + c[k + 6]) * t + c[k + 7]) * t +
-            c[k + 8]) * sqrt(t) * exp(w);
-    }
-    return y;
-}
diff --git a/soxr-sys/src/dev32s.h b/soxr-sys/src/dev32s.h
deleted file mode 100644
index 7edae868d..000000000
--- a/soxr-sys/src/dev32s.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#if !defined soxr_dev32s_included
-#define soxr_dev32s_included
-
-#if defined __GNUC__
-  #define SIMD_INLINE(T) static __inline T __attribute__((always_inline))
-  #define vAlign __attribute__((aligned (16)))
-#elif defined _MSC_VER
-  #define SIMD_INLINE(T) static __forceinline T
-  #define vAlign __declspec(align(16))
-#endif
-
-#if defined __x86_64__ || defined _M_X64 || defined i386 || defined _M_IX86
-
-#include <xmmintrin.h>
-
-#define vZero()      _mm_setzero_ps()
-#define vSet1(a)     _mm_set_ss(a)
-#define vMul(a,b)    _mm_mul_ps(a,b)
-#define vAdd(a,b)    _mm_add_ps(a,b)
-#define vMac(a,b,c)  vAdd(vMul(a,b),c)
-#define vLds(a)      _mm_set1_ps(a)
-#define vLd(a)       _mm_load_ps(a)
-#define vLdu(a)      _mm_loadu_ps(a)
-
-typedef __m128 v4_t;
-
-SIMD_INLINE(void) vStorSum(float * a, v4_t b) {
-  v4_t t = vAdd(_mm_movehl_ps(b, b), b);
-  _mm_store_ss(a, vAdd(t, _mm_shuffle_ps(t,t,1)));}
-
-#elif defined __arm__
-
-#include <arm_neon.h>
-
-#define vZero()      vdupq_n_f32(0)
-#define vMul(a,b)    vmulq_f32(a,b)
-#define vAdd(a,b)    vaddq_f32(a,b)
-#define vMac(a,b,c)  vmlaq_f32(c,a,b)
-#define vLds(a)      vld1q_dup_f32(&(a))
-#define vLd(a)       vld1q_f32(a)
-#define vLdu(a)      vld1q_f32(a)
-
-typedef float32x4_t v4_t;
-
-SIMD_INLINE(void) vStorSum(float * a, v4_t b) {
-  float32x2_t t = vadd_f32(vget_high_f32(b), vget_low_f32(b));
-  *a = vget_lane_f32(vpadd_f32(t, t), 0);}
-
-#endif
-
-#endif
diff --git a/soxr-sys/src/dev64s.h b/soxr-sys/src/dev64s.h
deleted file mode 100644
index 4672210d1..000000000
--- a/soxr-sys/src/dev64s.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#if !defined soxr_dev64s_included
-#define soxr_dev64s_included
-
-#if defined __GNUC__
-  #define SIMD_INLINE(T) static __inline T __attribute__((always_inline))
-  #define vAlign __attribute__((aligned (32)))
-#elif defined _MSC_VER
-  #define SIMD_INLINE(T) static __forceinline T
-  #define vAlign __declspec(align(32))
-#else
-  #define SIMD_INLINE(T) static __inline T
-#endif
-
-#if defined __x86_64__ || defined _M_X64 || defined i386 || defined _M_IX86
-
-#include <immintrin.h>
-
-#if defined __AVX__
-
-#define vZero()      _mm256_setzero_pd()
-#define vSet1(a)     _mm256_set_pd(0,0,0,a)
-#define vMul(a,b)    _mm256_mul_pd(a,b)
-#define vAdd(a,b)    _mm256_add_pd(a,b)
-#define vMac(a,b,c)  vAdd(vMul(a,b),c) /* Note: gcc -mfma will `fuse' these */
-#define vLds(a)      _mm256_set1_pd(a)
-#define vLd(a)       _mm256_load_pd(a)
-#define vLdu(a)      _mm256_loadu_pd(a)
-
-typedef __m256d v4_t;
-
-SIMD_INLINE(void) vStorSum(double * a, v4_t b) {
-  b = _mm256_hadd_pd(b, _mm256_permute2f128_pd(b,b,1));
-  _mm_store_sd(a, _mm256_castpd256_pd128(_mm256_hadd_pd(b,b)));}
-
-#endif
-
-#endif
-
-#endif
diff --git a/soxr-sys/src/fft4g.c b/soxr-sys/src/fft4g.c
deleted file mode 100644
index cf6293a04..000000000
--- a/soxr-sys/src/fft4g.c
+++ /dev/null
@@ -1,1346 +0,0 @@
-/* Copyright Takuya OOURA, 1996-2001.
-
-You may use, copy, modify and distribute this code for any
-purpose (include commercial use) and without fee.  Please
-refer to this package when you modify this code.
-
-Package home:  http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
-
-Fast Fourier/Cosine/Sine Transform
-    dimension   :one
-    data length :power of 2
-    decimation  :frequency
-    radix       :4, 2
-    data        :inplace
-    table       :use
-functions
-    cdft: Complex Discrete Fourier Transform
-    rdft: Real Discrete Fourier Transform
-    ddct: Discrete Cosine Transform
-    ddst: Discrete Sine Transform
-    dfct: Cosine Transform of RDFT (Real Symmetric DFT)
-    dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
-function prototypes
-    void cdft(int, int, double *, int *, double *);
-    void rdft(int, int, double *, int *, double *);
-    void ddct(int, int, double *, int *, double *);
-    void ddst(int, int, double *, int *, double *);
-    void dfct(int, double *, double *, int *, double *);
-    void dfst(int, double *, double *, int *, double *);
-
-
--------- Complex DFT (Discrete Fourier Transform) --------
-    [definition]
-        <case1>
-            X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
-        <case2>
-            X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
-        (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
-    [usage]
-        <case1>
-            ip[0] = 0; // first time only
-            cdft(2*n, 1, a, ip, w);
-        <case2>
-            ip[0] = 0; // first time only
-            cdft(2*n, -1, a, ip, w);
-    [parameters]
-        2*n            :data length (int)
-                        n >= 1, n = power of 2
-        a[0...2*n-1]   :input/output data (double *)
-                        input data
-                            a[2*j] = Re(x[j]),
-                            a[2*j+1] = Im(x[j]), 0<=j<n
-                        output data
-                            a[2*k] = Re(X[k]),
-                            a[2*k+1] = Im(X[k]), 0<=k<n
-        ip[0...*]      :work area for bit reversal (int *)
-                        length of ip >= 2+sqrt(n)
-                        strictly,
-                        length of ip >=
-                            2+(1<<(int)(log(n+0.5)/log(2))/2).
-                        ip[0],ip[1] are pointers of the cos/sin table.
-        w[0...n/2-1]   :cos/sin table (double *)
-                        w[],ip[] are initialized if ip[0] == 0.
-    [remark]
-        Inverse of
-            cdft(2*n, -1, a, ip, w);
-        is
-            cdft(2*n, 1, a, ip, w);
-            for (j = 0; j <= 2 * n - 1; j++) {
-                a[j] *= 1.0 / n;
-            }
-        .
-
-
--------- Real DFT / Inverse of Real DFT --------
-    [definition]
-        <case1> RDFT
-            R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
-            I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
-        <case2> IRDFT (excluding scale)
-            a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
-                   sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
-                   sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
-    [usage]
-        <case1>
-            ip[0] = 0; // first time only
-            rdft(n, 1, a, ip, w);
-        <case2>
-            ip[0] = 0; // first time only
-            rdft(n, -1, a, ip, w);
-    [parameters]
-        n              :data length (int)
-                        n >= 2, n = power of 2
-        a[0...n-1]     :input/output data (double *)
-                        <case1>
-                            output data
-                                a[2*k] = R[k], 0<=k<n/2
-                                a[2*k+1] = I[k], 0<k<n/2
-                                a[1] = R[n/2]
-                        <case2>
-                            input data
-                                a[2*j] = R[j], 0<=j<n/2
-                                a[2*j+1] = I[j], 0<j<n/2
-                                a[1] = R[n/2]
-        ip[0...*]      :work area for bit reversal (int *)
-                        length of ip >= 2+sqrt(n/2)
-                        strictly,
-                        length of ip >=
-                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
-                        ip[0],ip[1] are pointers of the cos/sin table.
-        w[0...n/2-1]   :cos/sin table (double *)
-                        w[],ip[] are initialized if ip[0] == 0.
-    [remark]
-        Inverse of
-            rdft(n, 1, a, ip, w);
-        is
-            rdft(n, -1, a, ip, w);
-            for (j = 0; j <= n - 1; j++) {
-                a[j] *= 2.0 / n;
-            }
-        .
-
-
--------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
-    [definition]
-        <case1> IDCT (excluding scale)
-            C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
-        <case2> DCT
-            C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
-    [usage]
-        <case1>
-            ip[0] = 0; // first time only
-            ddct(n, 1, a, ip, w);
-        <case2>
-            ip[0] = 0; // first time only
-            ddct(n, -1, a, ip, w);
-    [parameters]
-        n              :data length (int)
-                        n >= 2, n = power of 2
-        a[0...n-1]     :input/output data (double *)
-                        output data
-                            a[k] = C[k], 0<=k<n
-        ip[0...*]      :work area for bit reversal (int *)
-                        length of ip >= 2+sqrt(n/2)
-                        strictly,
-                        length of ip >=
-                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
-                        ip[0],ip[1] are pointers of the cos/sin table.
-        w[0...n*5/4-1] :cos/sin table (double *)
-                        w[],ip[] are initialized if ip[0] == 0.
-    [remark]
-        Inverse of
-            ddct(n, -1, a, ip, w);
-        is
-            a[0] *= 0.5;
-            ddct(n, 1, a, ip, w);
-            for (j = 0; j <= n - 1; j++) {
-                a[j] *= 2.0 / n;
-            }
-        .
-
-
--------- DST (Discrete Sine Transform) / Inverse of DST --------
-    [definition]
-        <case1> IDST (excluding scale)
-            S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
-        <case2> DST
-            S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
-    [usage]
-        <case1>
-            ip[0] = 0; // first time only
-            ddst(n, 1, a, ip, w);
-        <case2>
-            ip[0] = 0; // first time only
-            ddst(n, -1, a, ip, w);
-    [parameters]
-        n              :data length (int)
-                        n >= 2, n = power of 2
-        a[0...n-1]     :input/output data (double *)
-                        <case1>
-                            input data
-                                a[j] = A[j], 0<j<n
-                                a[0] = A[n]
-                            output data
-                                a[k] = S[k], 0<=k<n
-                        <case2>
-                            output data
-                                a[k] = S[k], 0<k<n
-                                a[0] = S[n]
-        ip[0...*]      :work area for bit reversal (int *)
-                        length of ip >= 2+sqrt(n/2)
-                        strictly,
-                        length of ip >=
-                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
-                        ip[0],ip[1] are pointers of the cos/sin table.
-        w[0...n*5/4-1] :cos/sin table (double *)
-                        w[],ip[] are initialized if ip[0] == 0.
-    [remark]
-        Inverse of
-            ddst(n, -1, a, ip, w);
-        is
-            a[0] *= 0.5;
-            ddst(n, 1, a, ip, w);
-            for (j = 0; j <= n - 1; j++) {
-                a[j] *= 2.0 / n;
-            }
-        .
-
-
--------- Cosine Transform of RDFT (Real Symmetric DFT) --------
-    [definition]
-        C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
-    [usage]
-        ip[0] = 0; // first time only
-        dfct(n, a, t, ip, w);
-    [parameters]
-        n              :data length - 1 (int)
-                        n >= 2, n = power of 2
-        a[0...n]       :input/output data (double *)
-                        output data
-                            a[k] = C[k], 0<=k<=n
-        t[0...n/2]     :work area (double *)
-        ip[0...*]      :work area for bit reversal (int *)
-                        length of ip >= 2+sqrt(n/4)
-                        strictly,
-                        length of ip >=
-                            2+(1<<(int)(log(n/4+0.5)/log(2))/2).
-                        ip[0],ip[1] are pointers of the cos/sin table.
-        w[0...n*5/8-1] :cos/sin table (double *)
-                        w[],ip[] are initialized if ip[0] == 0.
-    [remark]
-        Inverse of
-            a[0] *= 0.5;
-            a[n] *= 0.5;
-            dfct(n, a, t, ip, w);
-        is
-            a[0] *= 0.5;
-            a[n] *= 0.5;
-            dfct(n, a, t, ip, w);
-            for (j = 0; j <= n; j++) {
-                a[j] *= 2.0 / n;
-            }
-        .
-
-
--------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
-    [definition]
-        S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
-    [usage]
-        ip[0] = 0; // first time only
-        dfst(n, a, t, ip, w);
-    [parameters]
-        n              :data length + 1 (int)
-                        n >= 2, n = power of 2
-        a[0...n-1]     :input/output data (double *)
-                        output data
-                            a[k] = S[k], 0<k<n
-                        (a[0] is used for work area)
-        t[0...n/2-1]   :work area (double *)
-        ip[0...*]      :work area for bit reversal (int *)
-                        length of ip >= 2+sqrt(n/4)
-                        strictly,
-                        length of ip >=
-                            2+(1<<(int)(log(n/4+0.5)/log(2))/2).
-                        ip[0],ip[1] are pointers of the cos/sin table.
-        w[0...n*5/8-1] :cos/sin table (double *)
-                        w[],ip[] are initialized if ip[0] == 0.
-    [remark]
-        Inverse of
-            dfst(n, a, t, ip, w);
-        is
-            dfst(n, a, t, ip, w);
-            for (j = 1; j <= n - 1; j++) {
-                a[j] *= 2.0 / n;
-            }
-        .
-
-
-Appendix :
-    The cos/sin table is recalculated when the larger table required.
-    w[] and ip[] are compatible with all routines.
-*/
-
-
-#include "math-wrap.h"
-#include "fft4g.h"
-
-#ifdef FFT4G_FLOAT
-  #define double float
-  #define one_half 0.5f
-
-  #define sin(x)   sinf(x)
-  #define cos(x)   cosf(x)
-  #define atan(x)  atanf(x)
-
-  #define cdft  lsx_cdft_f
-  #define rdft  lsx_rdft_f
-  #define ddct  lsx_ddct_f
-  #define ddst  lsx_ddst_f
-  #define dfct  lsx_dfct_f
-  #define dfst  lsx_dfst_f
-#else
-  #define one_half 0.5
-  #define cdft  lsx_cdft
-  #define rdft  lsx_rdft
-  #define ddct  lsx_ddct
-  #define ddst  lsx_ddst
-  #define dfct  lsx_dfct
-  #define dfst  lsx_dfst
-#endif
-
-static void bitrv2conj(int n, int *ip, double *a);
-static void bitrv2(int n, int *ip, double *a);
-static void cft1st(int n, double *a, double const *w);
-static void cftbsub(int n, double *a, double const *w);
-static void cftfsub(int n, double *a, double const *w);
-static void cftmdl(int n, int l, double *a, double const *w);
-static void dctsub(int n, double *a, int nc, double const *c);
-static void dstsub(int n, double *a, int nc, double const *c);
-static void makect(int nc, int *ip, double *c);
-static void makewt(int nw, int *ip, double *w);
-static void rftbsub(int n, double *a, int nc, double const *c);
-static void rftfsub(int n, double *a, int nc, double const *c);
-
-
-void cdft(int n, int isgn, double *a, int *ip, double *w)
-{
-    if (n > (ip[0] << 2)) {
-        makewt(n >> 2, ip, w);
-    }
-    if (n > 4) {
-        if (isgn >= 0) {
-            bitrv2(n, ip + 2, a);
-            cftfsub(n, a, w);
-        } else {
-            bitrv2conj(n, ip + 2, a);
-            cftbsub(n, a, w);
-        }
-    } else if (n == 4) {
-        cftfsub(n, a, w);
-    }
-}
-
-
-void rdft(int n, int isgn, double *a, int *ip, double *w)
-{
-    int nw, nc;
-    double xi;
-
-    nw = ip[0];
-    if (n > (nw << 2)) {
-        nw = n >> 2;
-        makewt(nw, ip, w);
-    }
-    nc = ip[1];
-    if (n > (nc << 2)) {
-        nc = n >> 2;
-        makect(nc, ip, w + nw);
-    }
-    if (isgn >= 0) {
-        if (n > 4) {
-            bitrv2(n, ip + 2, a);
-            cftfsub(n, a, w);
-            rftfsub(n, a, nc, w + nw);
-        } else if (n == 4) {
-            cftfsub(n, a, w);
-        }
-        xi = a[0] - a[1];
-        a[0] += a[1];
-        a[1] = xi;
-    } else {
-        a[1] = one_half * (a[0] - a[1]);
-        a[0] -= a[1];
-        if (n > 4) {
-            rftbsub(n, a, nc, w + nw);
-            bitrv2(n, ip + 2, a);
-            cftbsub(n, a, w);
-        } else if (n == 4) {
-            cftfsub(n, a, w);
-        }
-    }
-}
-
-
-void ddct(int n, int isgn, double *a, int *ip, double *w)
-{
-    int j, nw, nc;
-    double xr;
-
-    nw = ip[0];
-    if (n > (nw << 2)) {
-        nw = n >> 2;
-        makewt(nw, ip, w);
-    }
-    nc = ip[1];
-    if (n > nc) {
-        nc = n;
-        makect(nc, ip, w + nw);
-    }
-    if (isgn < 0) {
-        xr = a[n - 1];
-        for (j = n - 2; j >= 2; j -= 2) {
-            a[j + 1] = a[j] - a[j - 1];
-            a[j] += a[j - 1];
-        }
-        a[1] = a[0] - xr;
-        a[0] += xr;
-        if (n > 4) {
-            rftbsub(n, a, nc, w + nw);
-            bitrv2(n, ip + 2, a);
-            cftbsub(n, a, w);
-        } else if (n == 4) {
-            cftfsub(n, a, w);
-        }
-    }
-    dctsub(n, a, nc, w + nw);
-    if (isgn >= 0) {
-        if (n > 4) {
-            bitrv2(n, ip + 2, a);
-            cftfsub(n, a, w);
-            rftfsub(n, a, nc, w + nw);
-        } else if (n == 4) {
-            cftfsub(n, a, w);
-        }
-        xr = a[0] - a[1];
-        a[0] += a[1];
-        for (j = 2; j < n; j += 2) {
-            a[j - 1] = a[j] - a[j + 1];
-            a[j] += a[j + 1];
-        }
-        a[n - 1] = xr;
-    }
-}
-
-
-void ddst(int n, int isgn, double *a, int *ip, double *w)
-{
-    int j, nw, nc;
-    double xr;
-
-    nw = ip[0];
-    if (n > (nw << 2)) {
-        nw = n >> 2;
-        makewt(nw, ip, w);
-    }
-    nc = ip[1];
-    if (n > nc) {
-        nc = n;
-        makect(nc, ip, w + nw);
-    }
-    if (isgn < 0) {
-        xr = a[n - 1];
-        for (j = n - 2; j >= 2; j -= 2) {
-            a[j + 1] = -a[j] - a[j - 1];
-            a[j] -= a[j - 1];
-        }
-        a[1] = a[0] + xr;
-        a[0] -= xr;
-        if (n > 4) {
-            rftbsub(n, a, nc, w + nw);
-            bitrv2(n, ip + 2, a);
-            cftbsub(n, a, w);
-        } else if (n == 4) {
-            cftfsub(n, a, w);
-        }
-    }
-    dstsub(n, a, nc, w + nw);
-    if (isgn >= 0) {
-        if (n > 4) {
-            bitrv2(n, ip + 2, a);
-            cftfsub(n, a, w);
-            rftfsub(n, a, nc, w + nw);
-        } else if (n == 4) {
-            cftfsub(n, a, w);
-        }
-        xr = a[0] - a[1];
-        a[0] += a[1];
-        for (j = 2; j < n; j += 2) {
-            a[j - 1] = -a[j] - a[j + 1];
-            a[j] -= a[j + 1];
-        }
-        a[n - 1] = -xr;
-    }
-}
-
-
-void dfct(int n, double *a, double *t, int *ip, double *w)
-{
-    int j, k, l, m, mh, nw, nc;
-    double xr, xi, yr, yi;
-
-    nw = ip[0];
-    if (n > (nw << 3)) {
-        nw = n >> 3;
-        makewt(nw, ip, w);
-    }
-    nc = ip[1];
-    if (n > (nc << 1)) {
-        nc = n >> 1;
-        makect(nc, ip, w + nw);
-    }
-    m = n >> 1;
-    yi = a[m];
-    xi = a[0] + a[n];
-    a[0] -= a[n];
-    t[0] = xi - yi;
-    t[m] = xi + yi;
-    if (n > 2) {
-        mh = m >> 1;
-        for (j = 1; j < mh; j++) {
-            k = m - j;
-            xr = a[j] - a[n - j];
-            xi = a[j] + a[n - j];
-            yr = a[k] - a[n - k];
-            yi = a[k] + a[n - k];
-            a[j] = xr;
-            a[k] = yr;
-            t[j] = xi - yi;
-            t[k] = xi + yi;
-        }
-        t[mh] = a[mh] + a[n - mh];
-        a[mh] -= a[n - mh];
-        dctsub(m, a, nc, w + nw);
-        if (m > 4) {
-            bitrv2(m, ip + 2, a);
-            cftfsub(m, a, w);
-            rftfsub(m, a, nc, w + nw);
-        } else if (m == 4) {
-            cftfsub(m, a, w);
-        }
-        a[n - 1] = a[0] - a[1];
-        a[1] = a[0] + a[1];
-        for (j = m - 2; j >= 2; j -= 2) {
-            a[2 * j + 1] = a[j] + a[j + 1];
-            a[2 * j - 1] = a[j] - a[j + 1];
-        }
-        l = 2;
-        m = mh;
-        while (m >= 2) {
-            dctsub(m, t, nc, w + nw);
-            if (m > 4) {
-                bitrv2(m, ip + 2, t);
-                cftfsub(m, t, w);
-                rftfsub(m, t, nc, w + nw);
-            } else if (m == 4) {
-                cftfsub(m, t, w);
-            }
-            a[n - l] = t[0] - t[1];
-            a[l] = t[0] + t[1];
-            k = 0;
-            for (j = 2; j < m; j += 2) {
-                k += l << 2;
-                a[k - l] = t[j] - t[j + 1];
-                a[k + l] = t[j] + t[j + 1];
-            }
-            l <<= 1;
-            mh = m >> 1;
-            for (j = 0; j < mh; j++) {
-                k = m - j;
-                t[j] = t[m + k] - t[m + j];
-                t[k] = t[m + k] + t[m + j];
-            }
-            t[mh] = t[m + mh];
-            m = mh;
-        }
-        a[l] = t[0];
-        a[n] = t[2] - t[1];
-        a[0] = t[2] + t[1];
-    } else {
-        a[1] = a[0];
-        a[2] = t[0];
-        a[0] = t[1];
-    }
-}
-
-
-void dfst(int n, double *a, double *t, int *ip, double *w)
-{
-    int j, k, l, m, mh, nw, nc;
-    double xr, xi, yr, yi;
-
-    nw = ip[0];
-    if (n > (nw << 3)) {
-        nw = n >> 3;
-        makewt(nw, ip, w);
-    }
-    nc = ip[1];
-    if (n > (nc << 1)) {
-        nc = n >> 1;
-        makect(nc, ip, w + nw);
-    }
-    if (n > 2) {
-        m = n >> 1;
-        mh = m >> 1;
-        for (j = 1; j < mh; j++) {
-            k = m - j;
-            xr = a[j] + a[n - j];
-            xi = a[j] - a[n - j];
-            yr = a[k] + a[n - k];
-            yi = a[k] - a[n - k];
-            a[j] = xr;
-            a[k] = yr;
-            t[j] = xi + yi;
-            t[k] = xi - yi;
-        }
-        t[0] = a[mh] - a[n - mh];
-        a[mh] += a[n - mh];
-        a[0] = a[m];
-        dstsub(m, a, nc, w + nw);
-        if (m > 4) {
-            bitrv2(m, ip + 2, a);
-            cftfsub(m, a, w);
-            rftfsub(m, a, nc, w + nw);
-        } else if (m == 4) {
-            cftfsub(m, a, w);
-        }
-        a[n - 1] = a[1] - a[0];
-        a[1] = a[0] + a[1];
-        for (j = m - 2; j >= 2; j -= 2) {
-            a[2 * j + 1] = a[j] - a[j + 1];
-            a[2 * j - 1] = -a[j] - a[j + 1];
-        }
-        l = 2;
-        m = mh;
-        while (m >= 2) {
-            dstsub(m, t, nc, w + nw);
-            if (m > 4) {
-                bitrv2(m, ip + 2, t);
-                cftfsub(m, t, w);
-                rftfsub(m, t, nc, w + nw);
-            } else if (m == 4) {
-                cftfsub(m, t, w);
-            }
-            a[n - l] = t[1] - t[0];
-            a[l] = t[0] + t[1];
-            k = 0;
-            for (j = 2; j < m; j += 2) {
-                k += l << 2;
-                a[k - l] = -t[j] - t[j + 1];
-                a[k + l] = t[j] - t[j + 1];
-            }
-            l <<= 1;
-            mh = m >> 1;
-            for (j = 1; j < mh; j++) {
-                k = m - j;
-                t[j] = t[m + k] + t[m + j];
-                t[k] = t[m + k] - t[m + j];
-            }
-            t[0] = t[m + mh];
-            m = mh;
-        }
-        a[l] = t[0];
-    }
-    a[0] = 0;
-}
-
-
-/* -------- initializing routines -------- */
-
-
-static void makewt(int nw, int *ip, double *w)
-{
-    int j, nwh;
-    double delta, x, y;
-
-    ip[0] = nw;
-    ip[1] = 1;
-    if (nw > 2) {
-        nwh = nw >> 1;
-        delta = atan(1.0) / (double)nwh;
-        w[0] = 1;
-        w[1] = 0;
-        w[nwh] = cos(delta * (double)nwh);
-        w[nwh + 1] = w[nwh];
-        if (nwh > 2) {
-            for (j = 2; j < nwh; j += 2) {
-                x = cos(delta * (double)j);
-                y = sin(delta * (double)j);
-                w[j] = x;
-                w[j + 1] = y;
-                w[nw - j] = y;
-                w[nw - j + 1] = x;
-            }
-            bitrv2(nw, ip + 2, w);
-        }
-    }
-}
-
-
-static void makect(int nc, int *ip, double *c)
-{
-    int j, nch;
-    double delta;
-
-    ip[1] = nc;
-    if (nc > 1) {
-        nch = nc >> 1;
-        delta = atan(1.0) / (double)nch;
-        c[0] = cos(delta * (double)nch);
-        c[nch] = one_half * c[0];
-        for (j = 1; j < nch; j++) {
-            c[j] = one_half * cos(delta * (double)j);
-            c[nc - j] = one_half * sin(delta * (double)j);
-        }
-    }
-}
-
-
-/* -------- child routines -------- */
-
-
-static void bitrv2(int n, int *ip0, double *a)
-{
-    int j, j1, k, k1, l, m, m2, ip[1024];
-    double xr, xi, yr, yi;
-
-    (void)ip0;
-    ip[0] = 0;
-    l = n;
-    m = 1;
-    while ((m << 3) < l) {
-        l >>= 1;
-        for (j = 0; j < m; j++) {
-            ip[m + j] = ip[j] + l;
-        }
-        m <<= 1;
-    }
-    m2 = 2 * m;
-    if ((m << 3) == l) {
-        for (k = 0; k < m; k++) {
-            for (j = 0; j < k; j++) {
-                j1 = 2 * j + ip[k];
-                k1 = 2 * k + ip[j];
-                xr = a[j1];
-                xi = a[j1 + 1];
-                yr = a[k1];
-                yi = a[k1 + 1];
-                a[j1] = yr;
-                a[j1 + 1] = yi;
-                a[k1] = xr;
-                a[k1 + 1] = xi;
-                j1 += m2;
-                k1 += 2 * m2;
-                xr = a[j1];
-                xi = a[j1 + 1];
-                yr = a[k1];
-                yi = a[k1 + 1];
-                a[j1] = yr;
-                a[j1 + 1] = yi;
-                a[k1] = xr;
-                a[k1 + 1] = xi;
-                j1 += m2;
-                k1 -= m2;
-                xr = a[j1];
-                xi = a[j1 + 1];
-                yr = a[k1];
-                yi = a[k1 + 1];
-                a[j1] = yr;
-                a[j1 + 1] = yi;
-                a[k1] = xr;
-                a[k1 + 1] = xi;
-                j1 += m2;
-                k1 += 2 * m2;
-                xr = a[j1];
-                xi = a[j1 + 1];
-                yr = a[k1];
-                yi = a[k1 + 1];
-                a[j1] = yr;
-                a[j1 + 1] = yi;
-                a[k1] = xr;
-                a[k1 + 1] = xi;
-            }
-            j1 = 2 * k + m2 + ip[k];
-            k1 = j1 + m2;
-            xr = a[j1];
-            xi = a[j1 + 1];
-            yr = a[k1];
-            yi = a[k1 + 1];
-            a[j1] = yr;
-            a[j1 + 1] = yi;
-            a[k1] = xr;
-            a[k1 + 1] = xi;
-        }
-    } else {
-        for (k = 1; k < m; k++) {
-            for (j = 0; j < k; j++) {
-                j1 = 2 * j + ip[k];
-                k1 = 2 * k + ip[j];
-                xr = a[j1];
-                xi = a[j1 + 1];
-                yr = a[k1];
-                yi = a[k1 + 1];
-                a[j1] = yr;
-                a[j1 + 1] = yi;
-                a[k1] = xr;
-                a[k1 + 1] = xi;
-                j1 += m2;
-                k1 += m2;
-                xr = a[j1];
-                xi = a[j1 + 1];
-                yr = a[k1];
-                yi = a[k1 + 1];
-                a[j1] = yr;
-                a[j1 + 1] = yi;
-                a[k1] = xr;
-                a[k1 + 1] = xi;
-            }
-        }
-    }
-}
-
-
-static void bitrv2conj(int n, int *ip0, double *a)
-{
-    int j, j1, k, k1, l, m, m2, ip[512];
-    double xr, xi, yr, yi;
-
-    (void)ip0;
-    ip[0] = 0;
-    l = n;
-    m = 1;
-    while ((m << 3) < l) {
-        l >>= 1;
-        for (j = 0; j < m; j++) {
-            ip[m + j] = ip[j] + l;
-        }
-        m <<= 1;
-    }
-    m2 = 2 * m;
-    if ((m << 3) == l) {
-        for (k = 0; k < m; k++) {
-            for (j = 0; j < k; j++) {
-                j1 = 2 * j + ip[k];
-                k1 = 2 * k + ip[j];
-                xr = a[j1];
-                xi = -a[j1 + 1];
-                yr = a[k1];
-                yi = -a[k1 + 1];
-                a[j1] = yr;
-                a[j1 + 1] = yi;
-                a[k1] = xr;
-                a[k1 + 1] = xi;
-                j1 += m2;
-                k1 += 2 * m2;
-                xr = a[j1];
-                xi = -a[j1 + 1];
-                yr = a[k1];
-                yi = -a[k1 + 1];
-                a[j1] = yr;
-                a[j1 + 1] = yi;
-                a[k1] = xr;
-                a[k1 + 1] = xi;
-                j1 += m2;
-                k1 -= m2;
-                xr = a[j1];
-                xi = -a[j1 + 1];
-                yr = a[k1];
-                yi = -a[k1 + 1];
-                a[j1] = yr;
-                a[j1 + 1] = yi;
-                a[k1] = xr;
-                a[k1 + 1] = xi;
-                j1 += m2;
-                k1 += 2 * m2;
-                xr = a[j1];
-                xi = -a[j1 + 1];
-                yr = a[k1];
-                yi = -a[k1 + 1];
-                a[j1] = yr;
-                a[j1 + 1] = yi;
-                a[k1] = xr;
-                a[k1 + 1] = xi;
-            }
-            k1 = 2 * k + ip[k];
-            a[k1 + 1] = -a[k1 + 1];
-            j1 = k1 + m2;
-            k1 = j1 + m2;
-            xr = a[j1];
-            xi = -a[j1 + 1];
-            yr = a[k1];
-            yi = -a[k1 + 1];
-            a[j1] = yr;
-            a[j1 + 1] = yi;
-            a[k1] = xr;
-            a[k1 + 1] = xi;
-            k1 += m2;
-            a[k1 + 1] = -a[k1 + 1];
-        }
-    } else {
-        a[1] = -a[1];
-        a[m2 + 1] = -a[m2 + 1];
-        for (k = 1; k < m; k++) {
-            for (j = 0; j < k; j++) {
-                j1 = 2 * j + ip[k];
-                k1 = 2 * k + ip[j];
-                xr = a[j1];
-                xi = -a[j1 + 1];
-                yr = a[k1];
-                yi = -a[k1 + 1];
-                a[j1] = yr;
-                a[j1 + 1] = yi;
-                a[k1] = xr;
-                a[k1 + 1] = xi;
-                j1 += m2;
-                k1 += m2;
-                xr = a[j1];
-                xi = -a[j1 + 1];
-                yr = a[k1];
-                yi = -a[k1 + 1];
-                a[j1] = yr;
-                a[j1 + 1] = yi;
-                a[k1] = xr;
-                a[k1 + 1] = xi;
-            }
-            k1 = 2 * k + ip[k];
-            a[k1 + 1] = -a[k1 + 1];
-            a[k1 + m2 + 1] = -a[k1 + m2 + 1];
-        }
-    }
-}
-
-
-static void cftfsub(int n, double *a, double const *w)
-{
-    int j, j1, j2, j3, l;
-    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
-
-    l = 2;
-    if (n > 8) {
-        cft1st(n, a, w);
-        l = 8;
-        while ((l << 2) < n) {
-            cftmdl(n, l, a, w);
-            l <<= 2;
-        }
-    }
-    if ((l << 2) == n) {
-        for (j = 0; j < l; j += 2) {
-            j1 = j + l;
-            j2 = j1 + l;
-            j3 = j2 + l;
-            x0r = a[j] + a[j1];
-            x0i = a[j + 1] + a[j1 + 1];
-            x1r = a[j] - a[j1];
-            x1i = a[j + 1] - a[j1 + 1];
-            x2r = a[j2] + a[j3];
-            x2i = a[j2 + 1] + a[j3 + 1];
-            x3r = a[j2] - a[j3];
-            x3i = a[j2 + 1] - a[j3 + 1];
-            a[j] = x0r + x2r;
-            a[j + 1] = x0i + x2i;
-            a[j2] = x0r - x2r;
-            a[j2 + 1] = x0i - x2i;
-            a[j1] = x1r - x3i;
-            a[j1 + 1] = x1i + x3r;
-            a[j3] = x1r + x3i;
-            a[j3 + 1] = x1i - x3r;
-        }
-    } else {
-        for (j = 0; j < l; j += 2) {
-            j1 = j + l;
-            x0r = a[j] - a[j1];
-            x0i = a[j + 1] - a[j1 + 1];
-            a[j] += a[j1];
-            a[j + 1] += a[j1 + 1];
-            a[j1] = x0r;
-            a[j1 + 1] = x0i;
-        }
-    }
-}
-
-
-static void cftbsub(int n, double *a, double const *w)
-{
-    int j, j1, j2, j3, l;
-    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
-
-    l = 2;
-    if (n > 8) {
-        cft1st(n, a, w);
-        l = 8;
-        while ((l << 2) < n) {
-            cftmdl(n, l, a, w);
-            l <<= 2;
-        }
-    }
-    if ((l << 2) == n) {
-        for (j = 0; j < l; j += 2) {
-            j1 = j + l;
-            j2 = j1 + l;
-            j3 = j2 + l;
-            x0r = a[j] + a[j1];
-            x0i = -a[j + 1] - a[j1 + 1];
-            x1r = a[j] - a[j1];
-            x1i = -a[j + 1] + a[j1 + 1];
-            x2r = a[j2] + a[j3];
-            x2i = a[j2 + 1] + a[j3 + 1];
-            x3r = a[j2] - a[j3];
-            x3i = a[j2 + 1] - a[j3 + 1];
-            a[j] = x0r + x2r;
-            a[j + 1] = x0i - x2i;
-            a[j2] = x0r - x2r;
-            a[j2 + 1] = x0i + x2i;
-            a[j1] = x1r - x3i;
-            a[j1 + 1] = x1i - x3r;
-            a[j3] = x1r + x3i;
-            a[j3 + 1] = x1i + x3r;
-        }
-    } else {
-        for (j = 0; j < l; j += 2) {
-            j1 = j + l;
-            x0r = a[j] - a[j1];
-            x0i = -a[j + 1] + a[j1 + 1];
-            a[j] += a[j1];
-            a[j + 1] = -a[j + 1] - a[j1 + 1];
-            a[j1] = x0r;
-            a[j1 + 1] = x0i;
-        }
-    }
-}
-
-
-static void cft1st(int n, double *a, double const *w)
-{
-    int j, k1, k2;
-    double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
-    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
-
-    x0r = a[0] + a[2];
-    x0i = a[1] + a[3];
-    x1r = a[0] - a[2];
-    x1i = a[1] - a[3];
-    x2r = a[4] + a[6];
-    x2i = a[5] + a[7];
-    x3r = a[4] - a[6];
-    x3i = a[5] - a[7];
-    a[0] = x0r + x2r;
-    a[1] = x0i + x2i;
-    a[4] = x0r - x2r;
-    a[5] = x0i - x2i;
-    a[2] = x1r - x3i;
-    a[3] = x1i + x3r;
-    a[6] = x1r + x3i;
-    a[7] = x1i - x3r;
-    wk1r = w[2];
-    x0r = a[8] + a[10];
-    x0i = a[9] + a[11];
-    x1r = a[8] - a[10];
-    x1i = a[9] - a[11];
-    x2r = a[12] + a[14];
-    x2i = a[13] + a[15];
-    x3r = a[12] - a[14];
-    x3i = a[13] - a[15];
-    a[8] = x0r + x2r;
-    a[9] = x0i + x2i;
-    a[12] = x2i - x0i;
-    a[13] = x0r - x2r;
-    x0r = x1r - x3i;
-    x0i = x1i + x3r;
-    a[10] = wk1r * (x0r - x0i);
-    a[11] = wk1r * (x0r + x0i);
-    x0r = x3i + x1r;
-    x0i = x3r - x1i;
-    a[14] = wk1r * (x0i - x0r);
-    a[15] = wk1r * (x0i + x0r);
-    k1 = 0;
-    for (j = 16; j < n; j += 16) {
-        k1 += 2;
-        k2 = 2 * k1;
-        wk2r = w[k1];
-        wk2i = w[k1 + 1];
-        wk1r = w[k2];
-        wk1i = w[k2 + 1];
-        wk3r = wk1r - 2 * wk2i * wk1i;
-        wk3i = 2 * wk2i * wk1r - wk1i;
-        x0r = a[j] + a[j + 2];
-        x0i = a[j + 1] + a[j + 3];
-        x1r = a[j] - a[j + 2];
-        x1i = a[j + 1] - a[j + 3];
-        x2r = a[j + 4] + a[j + 6];
-        x2i = a[j + 5] + a[j + 7];
-        x3r = a[j + 4] - a[j + 6];
-        x3i = a[j + 5] - a[j + 7];
-        a[j] = x0r + x2r;
-        a[j + 1] = x0i + x2i;
-        x0r -= x2r;
-        x0i -= x2i;
-        a[j + 4] = wk2r * x0r - wk2i * x0i;
-        a[j + 5] = wk2r * x0i + wk2i * x0r;
-        x0r = x1r - x3i;
-        x0i = x1i + x3r;
-        a[j + 2] = wk1r * x0r - wk1i * x0i;
-        a[j + 3] = wk1r * x0i + wk1i * x0r;
-        x0r = x1r + x3i;
-        x0i = x1i - x3r;
-        a[j + 6] = wk3r * x0r - wk3i * x0i;
-        a[j + 7] = wk3r * x0i + wk3i * x0r;
-        wk1r = w[k2 + 2];
-        wk1i = w[k2 + 3];
-        wk3r = wk1r - 2 * wk2r * wk1i;
-        wk3i = 2 * wk2r * wk1r - wk1i;
-        x0r = a[j + 8] + a[j + 10];
-        x0i = a[j + 9] + a[j + 11];
-        x1r = a[j + 8] - a[j + 10];
-        x1i = a[j + 9] - a[j + 11];
-        x2r = a[j + 12] + a[j + 14];
-        x2i = a[j + 13] + a[j + 15];
-        x3r = a[j + 12] - a[j + 14];
-        x3i = a[j + 13] - a[j + 15];
-        a[j + 8] = x0r + x2r;
-        a[j + 9] = x0i + x2i;
-        x0r -= x2r;
-        x0i -= x2i;
-        a[j + 12] = -wk2i * x0r - wk2r * x0i;
-        a[j + 13] = -wk2i * x0i + wk2r * x0r;
-        x0r = x1r - x3i;
-        x0i = x1i + x3r;
-        a[j + 10] = wk1r * x0r - wk1i * x0i;
-        a[j + 11] = wk1r * x0i + wk1i * x0r;
-        x0r = x1r + x3i;
-        x0i = x1i - x3r;
-        a[j + 14] = wk3r * x0r - wk3i * x0i;
-        a[j + 15] = wk3r * x0i + wk3i * x0r;
-    }
-}
-
-
-static void cftmdl(int n, int l, double *a, double const *w)
-{
-    int j, j1, j2, j3, k, k1, k2, m, m2;
-    double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
-    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
-
-    m = l << 2;
-    for (j = 0; j < l; j += 2) {
-        j1 = j + l;
-        j2 = j1 + l;
-        j3 = j2 + l;
-        x0r = a[j] + a[j1];
-        x0i = a[j + 1] + a[j1 + 1];
-        x1r = a[j] - a[j1];
-        x1i = a[j + 1] - a[j1 + 1];
-        x2r = a[j2] + a[j3];
-        x2i = a[j2 + 1] + a[j3 + 1];
-        x3r = a[j2] - a[j3];
-        x3i = a[j2 + 1] - a[j3 + 1];
-        a[j] = x0r + x2r;
-        a[j + 1] = x0i + x2i;
-        a[j2] = x0r - x2r;
-        a[j2 + 1] = x0i - x2i;
-        a[j1] = x1r - x3i;
-        a[j1 + 1] = x1i + x3r;
-        a[j3] = x1r + x3i;
-        a[j3 + 1] = x1i - x3r;
-    }
-    wk1r = w[2];
-    for (j = m; j < l + m; j += 2) {
-        j1 = j + l;
-        j2 = j1 + l;
-        j3 = j2 + l;
-        x0r = a[j] + a[j1];
-        x0i = a[j + 1] + a[j1 + 1];
-        x1r = a[j] - a[j1];
-        x1i = a[j + 1] - a[j1 + 1];
-        x2r = a[j2] + a[j3];
-        x2i = a[j2 + 1] + a[j3 + 1];
-        x3r = a[j2] - a[j3];
-        x3i = a[j2 + 1] - a[j3 + 1];
-        a[j] = x0r + x2r;
-        a[j + 1] = x0i + x2i;
-        a[j2] = x2i - x0i;
-        a[j2 + 1] = x0r - x2r;
-        x0r = x1r - x3i;
-        x0i = x1i + x3r;
-        a[j1] = wk1r * (x0r - x0i);
-        a[j1 + 1] = wk1r * (x0r + x0i);
-        x0r = x3i + x1r;
-        x0i = x3r - x1i;
-        a[j3] = wk1r * (x0i - x0r);
-        a[j3 + 1] = wk1r * (x0i + x0r);
-    }
-    k1 = 0;
-    m2 = 2 * m;
-    for (k = m2; k < n; k += m2) {
-        k1 += 2;
-        k2 = 2 * k1;
-        wk2r = w[k1];
-        wk2i = w[k1 + 1];
-        wk1r = w[k2];
-        wk1i = w[k2 + 1];
-        wk3r = wk1r - 2 * wk2i * wk1i;
-        wk3i = 2 * wk2i * wk1r - wk1i;
-        for (j = k; j < l + k; j += 2) {
-            j1 = j + l;
-            j2 = j1 + l;
-            j3 = j2 + l;
-            x0r = a[j] + a[j1];
-            x0i = a[j + 1] + a[j1 + 1];
-            x1r = a[j] - a[j1];
-            x1i = a[j + 1] - a[j1 + 1];
-            x2r = a[j2] + a[j3];
-            x2i = a[j2 + 1] + a[j3 + 1];
-            x3r = a[j2] - a[j3];
-            x3i = a[j2 + 1] - a[j3 + 1];
-            a[j] = x0r + x2r;
-            a[j + 1] = x0i + x2i;
-            x0r -= x2r;
-            x0i -= x2i;
-            a[j2] = wk2r * x0r - wk2i * x0i;
-            a[j2 + 1] = wk2r * x0i + wk2i * x0r;
-            x0r = x1r - x3i;
-            x0i = x1i + x3r;
-            a[j1] = wk1r * x0r - wk1i * x0i;
-            a[j1 + 1] = wk1r * x0i + wk1i * x0r;
-            x0r = x1r + x3i;
-            x0i = x1i - x3r;
-            a[j3] = wk3r * x0r - wk3i * x0i;
-            a[j3 + 1] = wk3r * x0i + wk3i * x0r;
-        }
-        wk1r = w[k2 + 2];
-        wk1i = w[k2 + 3];
-        wk3r = wk1r - 2 * wk2r * wk1i;
-        wk3i = 2 * wk2r * wk1r - wk1i;
-        for (j = k + m; j < l + (k + m); j += 2) {
-            j1 = j + l;
-            j2 = j1 + l;
-            j3 = j2 + l;
-            x0r = a[j] + a[j1];
-            x0i = a[j + 1] + a[j1 + 1];
-            x1r = a[j] - a[j1];
-            x1i = a[j + 1] - a[j1 + 1];
-            x2r = a[j2] + a[j3];
-            x2i = a[j2 + 1] + a[j3 + 1];
-            x3r = a[j2] - a[j3];
-            x3i = a[j2 + 1] - a[j3 + 1];
-            a[j] = x0r + x2r;
-            a[j + 1] = x0i + x2i;
-            x0r -= x2r;
-            x0i -= x2i;
-            a[j2] = -wk2i * x0r - wk2r * x0i;
-            a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
-            x0r = x1r - x3i;
-            x0i = x1i + x3r;
-            a[j1] = wk1r * x0r - wk1i * x0i;
-            a[j1 + 1] = wk1r * x0i + wk1i * x0r;
-            x0r = x1r + x3i;
-            x0i = x1i - x3r;
-            a[j3] = wk3r * x0r - wk3i * x0i;
-            a[j3 + 1] = wk3r * x0i + wk3i * x0r;
-        }
-    }
-}
-
-
-static void rftfsub(int n, double *a, int nc, double const *c)
-{
-    int j, k, kk, ks, m;
-    double wkr, wki, xr, xi, yr, yi;
-
-    m = n >> 1;
-    ks = 2 * nc / m;
-    kk = 0;
-    for (j = 2; j < m; j += 2) {
-        k = n - j;
-        kk += ks;
-        wkr = one_half - c[nc - kk];
-        wki = c[kk];
-        xr = a[j] - a[k];
-        xi = a[j + 1] + a[k + 1];
-        yr = wkr * xr - wki * xi;
-        yi = wkr * xi + wki * xr;
-        a[j] -= yr;
-        a[j + 1] -= yi;
-        a[k] += yr;
-        a[k + 1] -= yi;
-    }
-}
-
-
-static void rftbsub(int n, double *a, int nc, double const *c)
-{
-    int j, k, kk, ks, m;
-    double wkr, wki, xr, xi, yr, yi;
-
-    a[1] = -a[1];
-    m = n >> 1;
-    ks = 2 * nc / m;
-    kk = 0;
-    for (j = 2; j < m; j += 2) {
-        k = n - j;
-        kk += ks;
-        wkr = one_half - c[nc - kk];
-        wki = c[kk];
-        xr = a[j] - a[k];
-        xi = a[j + 1] + a[k + 1];
-        yr = wkr * xr + wki * xi;
-        yi = wkr * xi - wki * xr;
-        a[j] -= yr;
-        a[j + 1] = yi - a[j + 1];
-        a[k] += yr;
-        a[k + 1] = yi - a[k + 1];
-    }
-    a[m + 1] = -a[m + 1];
-}
-
-
-static void dctsub(int n, double *a, int nc, double const *c)
-{
-    int j, k, kk, ks, m;
-    double wkr, wki, xr;
-
-    m = n >> 1;
-    ks = nc / n;
-    kk = 0;
-    for (j = 1; j < m; j++) {
-        k = n - j;
-        kk += ks;
-        wkr = c[kk] - c[nc - kk];
-        wki = c[kk] + c[nc - kk];
-        xr = wki * a[j] - wkr * a[k];
-        a[j] = wkr * a[j] + wki * a[k];
-        a[k] = xr;
-    }
-    a[m] *= c[0];
-}
-
-
-static void dstsub(int n, double *a, int nc, double const *c)
-{
-    int j, k, kk, ks, m;
-    double wkr, wki, xr;
-
-    m = n >> 1;
-    ks = nc / n;
-    kk = 0;
-    for (j = 1; j < m; j++) {
-        k = n - j;
-        kk += ks;
-        wkr = c[kk] - c[nc - kk];
-        wki = c[kk] + c[nc - kk];
-        xr = wki * a[k] - wkr * a[j];
-        a[k] = wkr * a[k] + wki * a[j];
-        a[j] = xr;
-    }
-    a[m] *= c[0];
-}
diff --git a/soxr-sys/src/fft4g.h b/soxr-sys/src/fft4g.h
deleted file mode 100644
index 0f906abcf..000000000
--- a/soxr-sys/src/fft4g.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-void lsx_cdft(int, int, double *, int *, double *);
-void lsx_rdft(int, int, double *, int *, double *);
-void lsx_ddct(int, int, double *, int *, double *);
-void lsx_ddst(int, int, double *, int *, double *);
-void lsx_dfct(int, double *, double *, int *, double *);
-void lsx_dfst(int, double *, double *, int *, double *);
-
-void lsx_cdft_f(int, int, float *, int *, float *);
-void lsx_rdft_f(int, int, float *, int *, float *);
-void lsx_ddct_f(int, int, float *, int *, float *);
-void lsx_ddst_f(int, int, float *, int *, float *);
-void lsx_dfct_f(int, float *, float *, int *, float *);
-void lsx_dfst_f(int, float *, float *, int *, float *);
-
-#define dft_br_len(l) (2ul + (1ul << (int)(log(l / 2 + .5) / log(2.)) / 2))
-#define dft_sc_len(l) ((unsigned long)l / 2)
-
-/* Over-allocate h by 2 to use these macros */
-#define LSX_PACK(h, n)   h[1] = h[n]
-#define LSX_UNPACK(h, n) h[n] = h[1], h[n + 1] = h[1] = 0;
diff --git a/soxr-sys/src/fft4g32.c b/soxr-sys/src/fft4g32.c
deleted file mode 100644
index 7a31ba4bb..000000000
--- a/soxr-sys/src/fft4g32.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#include <stdlib.h>
-#include "filter.h"
-#define FFT4G_FLOAT
-#include "fft4g.c"
-#include "soxr-config.h"
-
-#if WITH_CR32
-#include "rdft_t.h"
-static void * null(void) {return 0;}
-static void forward (int length, void * setup, double * H) {lsx_safe_rdft_f(length,  1, H); (void)setup;}
-static void backward(int length, void * setup, double * H) {lsx_safe_rdft_f(length, -1, H); (void)setup;}
-static int multiplier(void) {return 2;}
-static void nothing(void) {}
-static int flags(void) {return 0;}
-
-fn_t _soxr_rdft32_cb[] = {
-  (fn_t)null,
-  (fn_t)null,
-  (fn_t)nothing,
-  (fn_t)forward,
-  (fn_t)forward,
-  (fn_t)backward,
-  (fn_t)backward,
-  (fn_t)_soxr_ordered_convolve_f,
-  (fn_t)_soxr_ordered_partial_convolve_f,
-  (fn_t)multiplier,
-  (fn_t)nothing,
-  (fn_t)malloc,
-  (fn_t)calloc,
-  (fn_t)free,
-  (fn_t)flags,
-};
-#endif
diff --git a/soxr-sys/src/fft4g32s.c b/soxr-sys/src/fft4g32s.c
deleted file mode 100644
index 8ce9726ef..000000000
--- a/soxr-sys/src/fft4g32s.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#include "filter.h"
-#include "util32s.h"
-#include "rdft_t.h"
-
-static void * null(void) {return 0;}
-static void nothing(void) {}
-static void forward (int length, void * setup, float * H) {lsx_safe_rdft_f(length,  1, H); (void)setup;}
-static void backward(int length, void * setup, float * H) {lsx_safe_rdft_f(length, -1, H); (void)setup;}
-static int multiplier(void) {return 2;}
-static int flags(void) {return RDFT_IS_SIMD;}
-
-fn_t _soxr_rdft32s_cb[] = {
-  (fn_t)null,
-  (fn_t)null,
-  (fn_t)nothing,
-  (fn_t)forward,
-  (fn_t)forward,
-  (fn_t)backward,
-  (fn_t)backward,
-  (fn_t)ORDERED_CONVOLVE_SIMD,
-  (fn_t)ORDERED_PARTIAL_CONVOLVE_SIMD,
-  (fn_t)multiplier,
-  (fn_t)nothing,
-  (fn_t)SIMD_ALIGNED_MALLOC,
-  (fn_t)SIMD_ALIGNED_CALLOC,
-  (fn_t)SIMD_ALIGNED_FREE,
-  (fn_t)flags,
-};
diff --git a/soxr-sys/src/fft4g64.c b/soxr-sys/src/fft4g64.c
deleted file mode 100644
index 0018516a0..000000000
--- a/soxr-sys/src/fft4g64.c
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#include <stdlib.h>
-#include "filter.h"
-#include "fft4g.c"
-#include "soxr-config.h"
-
-#if WITH_CR64
-static void * null(void) {return 0;}
-static void nothing(void) {}
-static void forward (int length, void * setup, double * H) {lsx_safe_rdft(length,  1, H); (void)setup;}
-static void backward(int length, void * setup, double * H) {lsx_safe_rdft(length, -1, H); (void)setup;}
-static int multiplier(void) {return 2;}
-static int flags(void) {return 0;}
-
-typedef void (* fn_t)(void);
-fn_t _soxr_rdft64_cb[] = {
-  (fn_t)null,
-  (fn_t)null,
-  (fn_t)nothing,
-  (fn_t)forward,
-  (fn_t)forward,
-  (fn_t)backward,
-  (fn_t)backward,
-  (fn_t)_soxr_ordered_convolve,
-  (fn_t)_soxr_ordered_partial_convolve,
-  (fn_t)multiplier,
-  (fn_t)nothing,
-  (fn_t)malloc,
-  (fn_t)calloc,
-  (fn_t)free,
-  (fn_t)flags,
-};
-#endif
diff --git a/soxr-sys/src/fft4g_cache.h b/soxr-sys/src/fft4g_cache.h
deleted file mode 100644
index d776c16c4..000000000
--- a/soxr-sys/src/fft4g_cache.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-static int * LSX_FFT_BR;
-static DFT_FLOAT * LSX_FFT_SC;
-static int FFT_LEN = -1;
-static ccrw2_t FFT_CACHE_CCRW;
-
-void LSX_INIT_FFT_CACHE(void)
-{
-  if (FFT_LEN >= 0)
-    return;
-  assert(LSX_FFT_BR == NULL);
-  assert(LSX_FFT_SC == NULL);
-  assert(FFT_LEN == -1);
-  ccrw2_init(FFT_CACHE_CCRW);
-  FFT_LEN = 0;
-}
-
-void LSX_CLEAR_FFT_CACHE(void)
-{
-  assert(FFT_LEN >= 0);
-  ccrw2_clear(FFT_CACHE_CCRW);
-  free(LSX_FFT_BR);
-  free(LSX_FFT_SC);
-  LSX_FFT_SC = NULL;
-  LSX_FFT_BR = NULL;
-  FFT_LEN = -1;
-}
-
-static bool UPDATE_FFT_CACHE(int len)
-{
-  LSX_INIT_FFT_CACHE();
-  assert(lsx_is_power_of_2(len));
-  assert(FFT_LEN >= 0);
-  ccrw2_become_reader(FFT_CACHE_CCRW);
-  if (len > FFT_LEN) {
-    ccrw2_cease_reading(FFT_CACHE_CCRW);
-    ccrw2_become_writer(FFT_CACHE_CCRW);
-    if (len > FFT_LEN) {
-      int old_n = FFT_LEN;
-      FFT_LEN = len;
-      LSX_FFT_BR = realloc(LSX_FFT_BR, dft_br_len(FFT_LEN) * sizeof(*LSX_FFT_BR));
-      LSX_FFT_SC = realloc(LSX_FFT_SC, dft_sc_len(FFT_LEN) * sizeof(*LSX_FFT_SC));
-      if (!old_n) {
-        LSX_FFT_BR[0] = 0;
-#if SOXR_LIB
-        atexit(LSX_CLEAR_FFT_CACHE);
-#endif
-      }
-      return true;
-    }
-    ccrw2_cease_writing(FFT_CACHE_CCRW);
-    ccrw2_become_reader(FFT_CACHE_CCRW);
-  }
-  return false;
-}
-
-static void DONE_WITH_FFT_CACHE(bool is_writer)
-{
-  if (is_writer)
-    ccrw2_cease_writing(FFT_CACHE_CCRW);
-  else ccrw2_cease_reading(FFT_CACHE_CCRW);
-}
-
-void LSX_SAFE_RDFT(int len, int type, DFT_FLOAT * d)
-{
-  bool is_writer = UPDATE_FFT_CACHE(len);
-  LSX_RDFT(len, type, d, LSX_FFT_BR, LSX_FFT_SC);
-  DONE_WITH_FFT_CACHE(is_writer);
-}
-
-void LSX_SAFE_CDFT(int len, int type, DFT_FLOAT * d)
-{
-  bool is_writer = UPDATE_FFT_CACHE(len);
-  LSX_CDFT(len, type, d, LSX_FFT_BR, LSX_FFT_SC);
-  DONE_WITH_FFT_CACHE(is_writer);
-}
-
-#undef UPDATE_FFT_CACHE
-#undef LSX_SAFE_RDFT
-#undef LSX_SAFE_CDFT
-#undef LSX_RDFT
-#undef LSX_INIT_FFT_CACHE
-#undef LSX_FFT_SC
-#undef LSX_FFT_BR
-#undef LSX_CLEAR_FFT_CACHE
-#undef LSX_CDFT
-#undef FFT_LEN
-#undef FFT_CACHE_CCRW
-#undef DONE_WITH_FFT_CACHE
-#undef DFT_FLOAT
diff --git a/soxr-sys/src/fifo.h b/soxr-sys/src/fifo.h
deleted file mode 100644
index 33af9fe63..000000000
--- a/soxr-sys/src/fifo.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#ifndef soxr_fifo_included
-#define soxr_fifo_included
-
-#if !defined FIFO_SIZE_T
-#define FIFO_SIZE_T size_t
-#endif
-
-#if !defined FIFO_REALLOC
-#include <stdlib.h>
-  #define FIFO_REALLOC(a,b,c) realloc(a,b)
-  #undef FIFO_FREE
-  #define FIFO_FREE free
-  #undef FIFO_MALLOC
-  #define FIFO_MALLOC malloc
-#endif
-
-typedef struct {
-  char * data;
-  size_t allocation;   /* Number of bytes allocated for data. */
-  size_t item_size;    /* Size of each item in data */
-  size_t begin;        /* Offset of the first byte to read. */
-  size_t end;          /* 1 + Offset of the last byte byte to read. */
-} fifo_t;
-
-#if !defined FIFO_MIN
-  #define FIFO_MIN 0x4000
-#endif
-
-#if !defined UNUSED
-  #define UNUSED
-#endif
-
-UNUSED static void fifo_clear(fifo_t * f)
-{
-  f->end = f->begin = 0;
-}
-
-UNUSED static void * fifo_reserve(fifo_t * f, FIFO_SIZE_T n0)
-{
-  size_t n = (size_t)n0;
-  n *= f->item_size;
-
-  if (f->begin == f->end)
-    fifo_clear(f);
-
-  while (1) {
-    if (f->end + n <= f->allocation) {
-      void *p = f->data + f->end;
-
-      f->end += n;
-      return p;
-    }
-    if (f->begin > FIFO_MIN) {
-      memmove(f->data, f->data + f->begin, f->end - f->begin);
-      f->end -= f->begin;
-      f->begin = 0;
-      continue;
-    }
-    f->data = FIFO_REALLOC(f->data, f->allocation + n, f->allocation);
-    f->allocation += n;
-    if (!f->data)
-      return 0;
-  }
-}
-
-UNUSED static void * fifo_write(fifo_t * f, FIFO_SIZE_T n0, void const * data)
-{
-  size_t n = (size_t)n0;
-  void * s = fifo_reserve(f, n0);
-  if (data)
-    memcpy(s, data, n * f->item_size);
-  return s;
-}
-
-UNUSED static void fifo_trim_to(fifo_t * f, FIFO_SIZE_T n0)
-{
-  size_t n = (size_t)n0;
-  n *= f->item_size;
-  f->end = f->begin + n;
-}
-
-UNUSED static void fifo_trim_by(fifo_t * f, FIFO_SIZE_T n0)
-{
-  size_t n = (size_t)n0;
-  n *= f->item_size;
-  f->end -= n;
-}
-
-UNUSED static FIFO_SIZE_T fifo_occupancy(fifo_t * f)
-{
-  return (FIFO_SIZE_T)((f->end - f->begin) / f->item_size);
-}
-
-UNUSED static void * fifo_read(fifo_t * f, FIFO_SIZE_T n0, void * data)
-{
-  size_t n = (size_t)n0;
-  char * ret = f->data + f->begin;
-  n *= f->item_size;
-  if (n > (f->end - f->begin))
-    return NULL;
-  if (data)
-    memcpy(data, ret, (size_t)n);
-  f->begin += n;
-  return ret;
-}
-
-#define fifo_read_ptr(f) fifo_read(f, (FIFO_SIZE_T)0, NULL)
-
-UNUSED static void fifo_delete(fifo_t * f)
-{
-  FIFO_FREE(f->data);
-}
-
-UNUSED static int fifo_create(fifo_t * f, FIFO_SIZE_T item_size)
-{
-  f->item_size = (size_t)item_size;
-  f->allocation = FIFO_MIN;
-  fifo_clear(f);
-  return !(f->data = FIFO_MALLOC(f->allocation));
-}
-
-#endif
diff --git a/soxr-sys/src/filter.c b/soxr-sys/src/filter.c
deleted file mode 100644
index 019d24d90..000000000
--- a/soxr-sys/src/filter.c
+++ /dev/null
@@ -1,277 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#include "filter.h"
-
-#include "math-wrap.h"
-#include <assert.h>
-#include <string.h>
-#include <stdlib.h>
-
-#include "fft4g.h"
-#include "ccrw2.h"
-
-#if 1 || WITH_CR64 || WITH_CR64S /* Always need this, for lsx_fir_to_phase. */
-#define DFT_FLOAT double
-#define DONE_WITH_FFT_CACHE done_with_fft_cache
-#define FFT_CACHE_CCRW fft_cache_ccrw
-#define FFT_LEN fft_len
-#define LSX_CDFT lsx_cdft
-#define LSX_CLEAR_FFT_CACHE lsx_clear_fft_cache
-#define LSX_FFT_BR lsx_fft_br
-#define LSX_FFT_SC lsx_fft_sc
-#define LSX_INIT_FFT_CACHE lsx_init_fft_cache
-#define LSX_RDFT lsx_rdft
-#define LSX_SAFE_CDFT lsx_safe_cdft
-#define LSX_SAFE_RDFT lsx_safe_rdft
-#define UPDATE_FFT_CACHE update_fft_cache
-#include "fft4g_cache.h"
-#endif
-
-#if (WITH_CR32 && !AVCODEC_FOUND) || (WITH_CR32S && !AVCODEC_FOUND && !WITH_PFFFT)
-#define DFT_FLOAT float
-#define DONE_WITH_FFT_CACHE done_with_fft_cache_f
-#define FFT_CACHE_CCRW fft_cache_ccrw_f
-#define FFT_LEN fft_len_f
-#define LSX_CDFT lsx_cdft_f
-#define LSX_CLEAR_FFT_CACHE lsx_clear_fft_cache_f
-#define LSX_FFT_BR lsx_fft_br_f
-#define LSX_FFT_SC lsx_fft_sc_f
-#define LSX_INIT_FFT_CACHE lsx_init_fft_cache_f
-#define LSX_RDFT lsx_rdft_f
-#define LSX_SAFE_CDFT lsx_safe_cdft_f
-#define LSX_SAFE_RDFT lsx_safe_rdft_f
-#define UPDATE_FFT_CACHE update_fft_cache_f
-#include "fft4g_cache.h"
-#endif
-
-#if WITH_CR64 || WITH_CR64S || !SOXR_LIB
-#define DFT_FLOAT double
-#define ORDERED_CONVOLVE lsx_ordered_convolve
-#define ORDERED_PARTIAL_CONVOLVE lsx_ordered_partial_convolve
-#include "rdft.h"
-#endif
-
-#if WITH_CR32
-#define DFT_FLOAT float
-#define ORDERED_CONVOLVE lsx_ordered_convolve_f
-#define ORDERED_PARTIAL_CONVOLVE lsx_ordered_partial_convolve_f
-#include "rdft.h"
-#endif
-
-double lsx_kaiser_beta(double att, double tr_bw)
-{
-  if (att >= 60) {
-    static const double coefs[][4] = {
-      {-6.784957e-10,1.02856e-05,0.1087556,-0.8988365+.001},
-      {-6.897885e-10,1.027433e-05,0.10876,-0.8994658+.002},
-      {-1.000683e-09,1.030092e-05,0.1087677,-0.9007898+.003},
-      {-3.654474e-10,1.040631e-05,0.1087085,-0.8977766+.006},
-      {8.106988e-09,6.983091e-06,0.1091387,-0.9172048+.015},
-      {9.519571e-09,7.272678e-06,0.1090068,-0.9140768+.025},
-      {-5.626821e-09,1.342186e-05,0.1083999,-0.9065452+.05},
-      {-9.965946e-08,5.073548e-05,0.1040967,-0.7672778+.085},
-      {1.604808e-07,-5.856462e-05,0.1185998,-1.34824+.1},
-      {-1.511964e-07,6.363034e-05,0.1064627,-0.9876665+.18},
-    };
-    double realm = log(tr_bw/.0005)/log(2.);
-    double const * c0 = coefs[range_limit(  (int)realm, 0, (int)array_length(coefs)-1)];
-    double const * c1 = coefs[range_limit(1+(int)realm, 0, (int)array_length(coefs)-1)];
-    double b0 = ((c0[0]*att + c0[1])*att + c0[2])*att + c0[3];
-    double b1 = ((c1[0]*att + c1[1])*att + c1[2])*att + c1[3];
-    return b0 + (b1 - b0) * (realm - (int)realm);
-  }
-  if (att > 50   ) return .1102 * (att - 8.7);
-  if (att > 20.96) return .58417 * pow(att -20.96, .4) + .07886 * (att - 20.96);
-  return 0;
-}
-
-double * lsx_make_lpf(
-    int num_taps, double Fc, double beta, double rho, double scale)
-{
-  int i, m = num_taps - 1;
-  double * h = malloc((size_t)num_taps * sizeof(*h));
-  double mult = scale / lsx_bessel_I_0(beta), mult1 = 1 / (.5 * m + rho);
-  assert(Fc >= 0 && Fc <= 1);
-  lsx_debug("make_lpf(n=%i Fc=%.7g beta=%g rho=%g scale=%g)",
-      num_taps, Fc, beta, rho, scale);
-
-  if (h) for (i = 0; i <= m / 2; ++i) {
-    double z = i - .5 * m, x = z * M_PI, y = z * mult1;
-    h[i] = x!=0? sin(Fc * x) / x : Fc;
-    h[i] *= lsx_bessel_I_0(beta * sqrt(1 - y * y)) * mult;
-    if (m - i != i)
-      h[m - i] = h[i];
-  }
-  return h;
-}
-
-void lsx_kaiser_params(double att, double Fc, double tr_bw, double * beta, int * num_taps)
-{
-  *beta = *beta < 0? lsx_kaiser_beta(att, tr_bw * .5 / Fc): *beta;
-  att = att < 60? (att - 7.95) / (2.285 * M_PI * 2) :
-    ((.0007528358-1.577737e-05**beta)**beta+.6248022)**beta+.06186902;
-  *num_taps = !*num_taps? (int)ceil(att/tr_bw + 1) : *num_taps;
-}
-
-double * lsx_design_lpf(
-    double Fp,      /* End of pass-band */
-    double Fs,      /* Start of stop-band */
-    double Fn,      /* Nyquist freq; e.g. 0.5, 1, PI */
-    double att,     /* Stop-band attenuation in dB */
-    int * num_taps, /* 0: value will be estimated */
-    int k,          /* >0: number of phases; <0: num_taps = 1 (mod -k) */
-    double beta)    /* <0: value will be estimated */
-{
-  int n = *num_taps, phases = max(k, 1), modulo = max(-k, 1);
-  double tr_bw, Fc, rho = phases == 1? .5 : att < 120? .63 : .75;
-
-  lsx_debug_more("./sinctest %-12.7g %-12.7g %g 0 %-5g %i %i 50 %g %g -4 >1",
-      Fp, Fs, Fn, att, *num_taps, k, beta, rho);
-
-  Fp /= fabs(Fn), Fs /= fabs(Fn);        /* Normalise to Fn = 1 */
-  tr_bw = .5 * (Fs - Fp); /* Transition band-width: 6dB to stop points */
-  tr_bw /= phases, Fs /= phases;
-  tr_bw = min(tr_bw, .5 * Fs);
-  Fc = Fs - tr_bw;
-  assert(Fc - tr_bw >= 0);
-  lsx_kaiser_params(att, Fc, tr_bw, &beta, num_taps);
-  if (!n)
-    *num_taps = phases > 1? *num_taps / phases * phases + phases - 1 :
-      (*num_taps + modulo - 2) / modulo * modulo + 1;
-  return Fn < 0? 0 : lsx_make_lpf(*num_taps, Fc, beta, rho, (double)phases);
-}
-
-static double safe_log(double x)
-{
-  assert(x >= 0);
-  if (x!=0)
-    return log(x);
-  lsx_debug("log(0)");
-  return -26;
-}
-
-void lsx_fir_to_phase(double * * h, int * len, int * post_len, double phase)
-{
-  double * pi_wraps, * work, phase1 = (phase > 50 ? 100 - phase : phase) / 50;
-  int i, work_len, begin, end, imp_peak = 0, peak = 0;
-  double imp_sum = 0, peak_imp_sum = 0;
-  double prev_angle2 = 0, cum_2pi = 0, prev_angle1 = 0, cum_1pi = 0;
-
-  for (i = *len, work_len = 2 * 2 * 8; i > 1; work_len <<= 1, i >>= 1);
-
-  work = calloc((size_t)work_len + 2, sizeof(*work)); /* +2: (UN)PACK */
-  pi_wraps = malloc((((size_t)work_len + 2) / 2) * sizeof(*pi_wraps));
-
-  memcpy(work, *h, (size_t)*len * sizeof(*work));
-  lsx_safe_rdft(work_len, 1, work); /* Cepstral: */
-  LSX_UNPACK(work, work_len);
-
-  for (i = 0; i <= work_len; i += 2) {
-    double angle = atan2(work[i + 1], work[i]);
-    double detect = 2 * M_PI;
-    double delta = angle - prev_angle2;
-    double adjust = detect * ((delta < -detect * .7) - (delta > detect * .7));
-    prev_angle2 = angle;
-    cum_2pi += adjust;
-    angle += cum_2pi;
-    detect = M_PI;
-    delta = angle - prev_angle1;
-    adjust = detect * ((delta < -detect * .7) - (delta > detect * .7));
-    prev_angle1 = angle;
-    cum_1pi += fabs(adjust); /* fabs for when 2pi and 1pi have combined */
-    pi_wraps[i >> 1] = cum_1pi;
-
-    work[i] = safe_log(sqrt(sqr(work[i]) + sqr(work[i + 1])));
-    work[i + 1] = 0;
-  }
-  LSX_PACK(work, work_len);
-  lsx_safe_rdft(work_len, -1, work);
-  for (i = 0; i < work_len; ++i) work[i] *= 2. / work_len;
-
-  for (i = 1; i < work_len / 2; ++i) { /* Window to reject acausal components */
-    work[i] *= 2;
-    work[i + work_len / 2] = 0;
-  }
-  lsx_safe_rdft(work_len, 1, work);
-
-  for (i = 2; i < work_len; i += 2) /* Interpolate between linear & min phase */
-    work[i + 1] = phase1 * i / work_len * pi_wraps[work_len >> 1] +
-        (1 - phase1) * (work[i + 1] + pi_wraps[i >> 1]) - pi_wraps[i >> 1];
-
-  work[0] = exp(work[0]), work[1] = exp(work[1]);
-  for (i = 2; i < work_len; i += 2) {
-    double x = exp(work[i]);
-    work[i    ] = x * cos(work[i + 1]);
-    work[i + 1] = x * sin(work[i + 1]);
-  }
-
-  lsx_safe_rdft(work_len, -1, work);
-  for (i = 0; i < work_len; ++i) work[i] *= 2. / work_len;
-
-  /* Find peak pos. */
-  for (i = 0; i <= (int)(pi_wraps[work_len >> 1] / M_PI + .5); ++i) {
-    imp_sum += work[i];
-    if (fabs(imp_sum) > fabs(peak_imp_sum)) {
-      peak_imp_sum = imp_sum;
-      peak = i;
-    }
-    if (work[i] > work[imp_peak]) /* For debug check only */
-      imp_peak = i;
-  }
-  while (peak && fabs(work[peak-1]) > fabs(work[peak]) && work[peak-1] * work[peak] > 0)
-    --peak;
-
-  if (phase1==0)
-    begin = 0;
-  else if (phase1 == 1)
-    begin = peak - *len / 2;
-  else {
-    begin = (int)((.997 - (2 - phase1) * .22) * *len + .5);
-    end   = (int)((.997 + (0 - phase1) * .22) * *len + .5);
-    begin = peak - (begin & ~3);
-    end   = peak + 1 + ((end + 3) & ~3);
-    *len = end - begin;
-    *h = realloc(*h, (size_t)*len * sizeof(**h));
-  }
-  for (i = 0; i < *len; ++i) (*h)[i] =
-    work[(begin + (phase > 50 ? *len - 1 - i : i) + work_len) & (work_len - 1)];
-  *post_len = phase > 50 ? peak - begin : begin + *len - (peak + 1);
-
-  lsx_debug("nPI=%g peak-sum@%i=%g (val@%i=%g); len=%i post=%i (%g%%)",
-      pi_wraps[work_len >> 1] / M_PI, peak, peak_imp_sum, imp_peak,
-      work[imp_peak], *len, *post_len, 100 - 100. * *post_len / (*len - 1));
-  free(pi_wraps), free(work);
-}
-
-#define F_x(F,expr) static double F(double x) {return expr;}
-F_x(sinePhi, ((2.0517e-07*x-1.1303e-04)*x+.023154)*x+.55924 )
-F_x(sinePsi, ((9.0667e-08*x-5.6114e-05)*x+.013658)*x+1.0977 )
-F_x(sinePow, log(.5)/log(sin(x*.5)) )
-#define dB_to_linear(x) exp((x) * (M_LN10 * 0.05))
-
-double lsx_f_resp(double t, double a)
-{
-  double x;
-  if (t > (a <= 160? .8 : .82)) {
-    double a1 = a+15;
-    double p = .00035*a+.375;
-    double w = 1/(1-.597)*asin(pow((a1-10.6)/a1,1/p));
-    double c = 1+asin(pow(1-a/a1,1/p))/w;
-    return a1*(pow(sin((c-t)*w),p)-1);
-  }
-  if (t > .5)
-    x = sinePsi(a), x = pow(sin((1-t) * x), sinePow(x));
-  else
-    x = sinePhi(a), x = 1 - pow(sin(t * x), sinePow(x));
-  return linear_to_dB(x);
-}
-
-double lsx_inv_f_resp(double drop, double a)
-{
-  double x = sinePhi(a), s;
-  drop = dB_to_linear(drop);
-  s = drop > .5 ? 1 - drop : drop;
-  x = asin(pow(s, 1/sinePow(x))) / x;
-  return drop > .5? x : 1 -x;
-}
diff --git a/soxr-sys/src/filter.h b/soxr-sys/src/filter.h
deleted file mode 100644
index ccb3ba836..000000000
--- a/soxr-sys/src/filter.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#if !defined soxr_filter_included
-#define soxr_filter_included
-
-#include "aliases.h"
-
-double lsx_bessel_I_0(double x);
-void lsx_init_fft_cache(void);
-void lsx_clear_fft_cache(void);
-void lsx_init_fft_cache_f(void);
-void lsx_clear_fft_cache_f(void);
-#define lsx_is_power_of_2(x) !(x < 2 || (x & (x - 1)))
-void lsx_safe_rdft(int len, int type, double * d);
-void lsx_safe_cdft(int len, int type, double * d);
-void lsx_safe_rdft_f(int len, int type, float * d);
-void lsx_safe_cdft_f(int len, int type, float * d);
-void lsx_ordered_convolve(int n, void * not_used, double * a, const double * b);
-void lsx_ordered_convolve_f(int n, void * not_used, float * a, const float * b);
-void lsx_ordered_partial_convolve(int n, double * a, const double * b);
-void lsx_ordered_partial_convolve_f(int n, float * a, const float * b);
-
-double lsx_kaiser_beta(double att, double tr_bw);
-double * lsx_make_lpf(int num_taps, double Fc, double beta, double rho,
-    double scale);
-void lsx_kaiser_params(double att, double Fc, double tr_bw, double * beta, int * num_taps);
-double * lsx_design_lpf(
-    double Fp,      /* End of pass-band */
-    double Fs,      /* Start of stop-band */
-    double Fn,      /* Nyquist freq; e.g. 0.5, 1, PI; < 0: dummy run */
-    double att,     /* Stop-band attenuation in dB */
-    int * num_taps, /* 0: value will be estimated */
-    int k,          /* >0: number of phases; <0: num_taps = 1 (mod -k) */
-    double beta);   /* <0: value will be estimated */
-
-void lsx_fir_to_phase(double * * h, int * len,
-    int * post_len, double phase0);
-
-double lsx_f_resp(double t, double a);
-double lsx_inv_f_resp(double drop, double a);
-#define lsx_to_3dB(a) (1 - lsx_inv_f_resp(-3., a))
-
-#endif
diff --git a/soxr-sys/src/half-coefs.h b/soxr-sys/src/half-coefs.h
deleted file mode 100644
index a5a0882bc..000000000
--- a/soxr-sys/src/half-coefs.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#if defined __GNUC__
-  #pragma GCC system_header
-#elif defined __SUNPRO_C
-  #pragma disable_warn
-#elif defined _MSC_VER
-  #pragma warning(push, 1)
-#endif
-
-#if CORE_TYPE & CORE_SIMD_HALF
-  #define VALIGN vAlign
-#else
-  #define VALIGN
-#endif
-
-#if !(CORE_TYPE & CORE_SIMD_HALF)
-static VALIGN const sample_t half_fir_coefs_7[] = {
- 3.1062656496657370e-01, -8.4998810699955796e-02,  3.4007044621123500e-02,
--1.2839903789829387e-02,  3.9899380181723145e-03, -8.9355202017945374e-04,
- 1.0918292424806546e-04,
-};
-#endif
-
-static VALIGN const sample_t half_fir_coefs_8[] = {
- 3.1154652365332069e-01, -8.7344917685739543e-02,  3.6814458353637280e-02,
--1.5189204581464479e-02,  5.4540855610738801e-03, -1.5643862626630416e-03,
- 3.1816575906323303e-04, -3.4799449225005688e-05,
-};
-
-static VALIGN const sample_t half_fir_coefs_9[] = {
- 3.1227034755311189e-01, -8.9221517147969526e-02,  3.9139704015071934e-02,
--1.7250558515852023e-02,  6.8589440230476112e-03, -2.3045049636430419e-03,
- 6.0963740543348963e-04, -1.1323803957431231e-04,  1.1197769991000046e-05,
-};
-
-#if CORE_TYPE & CORE_DBL
-static VALIGN const sample_t half_fir_coefs_10[] = {
- 3.1285456012000523e-01, -9.0756740799292787e-02,  4.1096398104193160e-02,
--1.9066319572525220e-02,  8.1840569787684902e-03, -3.0766876176359834e-03,
- 9.6396524429277980e-04, -2.3585679989922018e-04,  4.0252189026627833e-05,
--3.6298196342497932e-06,
-};
-
-static VALIGN const sample_t half_fir_coefs_11[] = {
- 3.1333588822574199e-01, -9.2035898673019811e-02,  4.2765169698406408e-02,
--2.0673580894964429e-02,  9.4225426824512421e-03, -3.8563379950013192e-03,
- 1.3634742159642453e-03, -3.9874150714431009e-04,  9.0586723632664806e-05,
--1.4285617244076783e-05,  1.1834642946400529e-06,
-};
-
-static VALIGN const sample_t half_fir_coefs_12[] = {
- 3.1373928463345568e-01, -9.3118180335301962e-02,  4.4205005881659098e-02,
--2.2103860986973051e-02,  1.0574689371162864e-02, -4.6276428065385065e-03,
- 1.7936153397572132e-03, -5.9617527051353237e-04,  1.6314517495669067e-04,
--3.4555126770115446e-05,  5.0617615610782593e-06, -3.8768958592971409e-07,
-};
-
-static VALIGN const sample_t half_fir_coefs_13[] = {
- 3.1408224847888910e-01, -9.4045836332667387e-02,  4.5459878763259978e-02,
--2.3383369012219993e-02,  1.1644273044890753e-02, -5.3806714579057013e-03,
- 2.2429072878264022e-03, -8.2204347506606424e-04,  2.5724946477840893e-04,
--6.6072709864248668e-05,  1.3099163296288644e-05, -1.7907147069136000e-06,
- 1.2750825595240592e-07,
-};
-#endif
-
-#undef VALIGN
-
-#if defined __SUNPRO_C
-  #pragma enable_warn
-#elif defined _MSC_VER
-  #pragma warning(pop)
-#endif
diff --git a/soxr-sys/src/half-fir.h b/soxr-sys/src/half-fir.h
deleted file mode 100644
index 782be1bc7..000000000
--- a/soxr-sys/src/half-fir.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-/* Decimate by 2 using a FIR with odd length (LEN). */
-/* Input must be preceded and followed by LEN >> 1 samples. */
-
-#define COEFS ((sample_t const *)p->coefs)
-
-#if SIMD_SSE
-  #define BEGINNING v4_t sum, q1, q2, t
-  #define ____ \
-    q1 = _mm_shuffle_ps(t=vLdu(input+2*j),vLdu(input+2*j+4),_MM_SHUFFLE(3,1,3,1)); \
-    q2 = _mm_shuffle_ps(vLdu(input-2*j-4),vLdu(input-2*j-8),_MM_SHUFFLE(1,3,1,3)); \
-    sum = vAdd(j? sum : vMul(vSet1(.5), t), vMul(vAdd(q1, q2), vLd(COEFS+j))); \
-    j += 4;
-  #define __ \
-    q1 = _mm_shuffle_ps(vLdu(input+2*j), vLdu(input-2*j-4), _MM_SHUFFLE(1,3,3,1)); \
-    q2 = _mm_loadl_pi(q2, (__m64*)(COEFS+j)), q2 = _mm_movelh_ps(q2, q2); \
-    sum = vAdd(sum, vMul(q1, q2)); \
-    j += 2;
-  #define _ \
-    q1 = _mm_add_ss(_mm_load_ss(input+2*j+1), _mm_load_ss(input-2*j-1)); \
-    sum = _mm_add_ss(sum, _mm_mul_ss(q1, _mm_load_ss(COEFS+j))); \
-    ++j;
-  #define END vStorSum(output+i, sum)
-/* #elif SIMD_AVX; No good solution found. */
-/* #elif SIMD_NEON; No need: gcc -O3 does a good job by itself. */
-#else
-  #define BEGINNING sample_t sum = input[0] * .5f
-  #define ____ __ __
-  #define __ _ _
-  #define _ sum += (input[-(2*j +1)] + input[(2*j +1)]) * COEFS[j], ++j;
-  #define END output[i] = sum
-#endif
-
-
-
-static void FUNCTION_H(stage_t * p, fifo_t * output_fifo)
-{
-  sample_t const * __restrict input = stage_read_p(p);
-  int num_in = min(stage_occupancy(p), p->input_size);
-  int i, num_out = (num_in + 1) >> 1;
-  sample_t * __restrict output = fifo_reserve(output_fifo, num_out);
-
-  for (i = 0; i < num_out; ++i, input += 2) {
-    int j = 0;
-    BEGINNING; CONVOLVE; END;
-  }
-  fifo_read(&p->fifo, 2 * num_out, NULL);
-}
-
-
-
-#undef _
-#undef __
-#undef ____
-#undef BEGINNING
-#undef END
-#undef COEFS
-#undef CONVOLVE
-#undef FUNCTION_H
diff --git a/soxr-sys/src/internal.h b/soxr-sys/src/internal.h
deleted file mode 100644
index 08924d500..000000000
--- a/soxr-sys/src/internal.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#if !defined soxr_internal_included
-#define soxr_internal_included
-
-#include "std-types.h"
-
-
-
-#undef min
-#undef max
-#define min(a, b) ((a) <= (b) ? (a) : (b))
-#define max(a, b) ((a) >= (b) ? (a) : (b))
-
-
-
-#define range_limit(x, lower, upper) (min(max(x, lower), upper))
-#define linear_to_dB(x) (log10(x) * 20)
-#define array_length(a) (sizeof(a)/sizeof(a[0]))
-#if !defined AL
-#define AL(a) array_length(a)
-#endif
-#define iAL(a) (int)AL(a)
-#define sqr(a) ((a) * (a))
-
-
-
-#if defined __GNUC__
-  #define UNUSED __attribute__ ((unused))
-#else
-  #define UNUSED
-#endif
-
-
-
-#if !WITH_DEV_TRACE
-  #ifdef __GNUC__
-    void lsx_dummy(char const *, ...);
-  #else
-    static __inline void lsx_dummy(char const * x, ...) {}
-  #endif
-  #define lsx_debug if(0) lsx_dummy
-  #define lsx_debug_more lsx_debug
-#else
-  extern int _soxr_trace_level;
-  void _soxr_trace(char const * fmt, ...);
-  #define lsx_debug      if (_soxr_trace_level > 0) _soxr_trace
-  #define lsx_debug_more if (_soxr_trace_level > 1) _soxr_trace
-#endif
-
-
-
-/* soxr_quality_spec_t.flags: */
-
-#define SOXR_ROLLOFF_LSR2Q     3u    /* Reserved for internal use. */
-#define SOXR_ROLLOFF_MASK      3u    /* For masking these bits. */
-#define SOXR_MAINTAIN_3DB_PT   4u    /* Reserved for internal use. */
-#define SOXR_PROMOTE_TO_LQ    64u    /* Reserved for internal use. */
-
-
-
-/* soxr_runtime_spec_t.flags: */
-
-#define SOXR_STRICT_BUFFERING  4u    /* Reserved for future use. */
-#define SOXR_NOSMALLINTOPT     8u    /* For test purposes only. */
-
-
-
-/* soxr_quality_spec recipe: */
-
-#define SOXR_PRECISIONQ         11   /* Quality specified by the precision parameter. */
-
-#define SOXR_PHASE_MASK         0x30 /* For masking these bits. */
-
-
-
-/* soxr_quality_spec flags: */
-
-#define RESET_ON_CLEAR   (1u<<31)
-
-
-
-#endif
diff --git a/soxr-sys/src/math-wrap.h b/soxr-sys/src/math-wrap.h
deleted file mode 100644
index 8a526f13e..000000000
--- a/soxr-sys/src/math-wrap.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#if !defined soxr_math_wrap_included
-#define soxr_math_wrap_included
-
-#include <math.h>
-
-#if defined __STRICT_ANSI__
-  #define sinf(x)  (float)sin ((double)(x))
-  #define cosf(x)  (float)cos ((double)(x))
-  #define atanf(x) (float)atan((double)(x))
-#endif
-
-#if !defined M_PI
-  #define M_PI    3.141592653589793238462643383279502884
-#endif
-
-#if !defined M_LN10
-  #define M_LN10  2.302585092994045684017991454684364208
-#endif
-
-#if !defined M_SQRT2
-  #define M_SQRT2 1.414213562373095048801688724209698079
-#endif
-
-#if !defined M_LN2
-  #define M_LN2   0.693147180559945309417232121458176568
-#endif
-
-#endif
diff --git a/soxr-sys/src/pffft-avx.h b/soxr-sys/src/pffft-avx.h
deleted file mode 100644
index ace19b57d..000000000
--- a/soxr-sys/src/pffft-avx.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-/* AVX support macros */
-
-#if !defined soxr_avx_included
-#define soxr_avx_included
-
-#include <immintrin.h>
-
-typedef __m256d v4sf;
-#define VZERO() _mm256_setzero_pd()
-#define VMUL(a,b) _mm256_mul_pd(a,b)
-#define VADD(a,b) _mm256_add_pd(a,b)
-#define VMADD(a,b,c) VADD(VMUL(a,b),c) /* Note: gcc -mfma will `fuse' these */
-#define VSUB(a,b) _mm256_sub_pd(a,b)
-#define LD_PS1(p) _mm256_set1_pd(p)
-#define INTERLEAVE2(in1, in2, out1, out2) {v4sf \
-  t1 = _mm256_unpacklo_pd(in1, in2), \
-  t2 = _mm256_unpackhi_pd(in1, in2); \
-  out1 = _mm256_permute2f128_pd(t1,t2,0x20); \
-  out2 = _mm256_permute2f128_pd(t1,t2,0x31); }
-#define UNINTERLEAVE2(in1, in2, out1, out2) {v4sf \
-  t1 = _mm256_permute2f128_pd(in1,in2,0x20), \
-  t2 = _mm256_permute2f128_pd(in1,in2,0x31); \
-  out1 = _mm256_unpacklo_pd(t1, t2); \
-  out2 = _mm256_unpackhi_pd(t1, t2);}
-#define VTRANSPOSE4(x0,x1,x2,x3) {v4sf \
-  t0 = _mm256_shuffle_pd(x0,x1, 0x0), \
-  t2 = _mm256_shuffle_pd(x0,x1, 0xf), \
-  t1 = _mm256_shuffle_pd(x2,x3, 0x0), \
-  t3 = _mm256_shuffle_pd(x2,x3, 0xf); \
-  x0 = _mm256_permute2f128_pd(t0,t1, 0x20); \
-  x1 = _mm256_permute2f128_pd(t2,t3, 0x20); \
-  x2 = _mm256_permute2f128_pd(t0,t1, 0x31); \
-  x3 = _mm256_permute2f128_pd(t2,t3, 0x31);}
-#define VSWAPHL(a,b) _mm256_permute2f128_pd(b, a, 0x30)
-#define VALIGNED(ptr) ((((long)(ptr)) & 0x1F) == 0)
-
-#endif
diff --git a/soxr-sys/src/pffft-wrap.c b/soxr-sys/src/pffft-wrap.c
deleted file mode 100644
index c920f06ea..000000000
--- a/soxr-sys/src/pffft-wrap.c
+++ /dev/null
@@ -1,110 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#if !defined PFFT_MACROS_ONLY
-
-#include "math-wrap.h"
-
-#if PFFFT_DOUBLE
-  #include "util64s.h"
-#else
-  #include "util32s.h"
-  #define sin(x) sinf(x)
-  #define cos(x) cosf(x)
-#endif
-
-#define pffft_aligned_free    SIMD_ALIGNED_FREE
-#define pffft_aligned_malloc  SIMD_ALIGNED_MALLOC
-#define pffft_aligned_calloc  SIMD_ALIGNED_CALLOC
-
-#undef inline
-#define inline __inline
-
-#endif
-
-
-
-#include "pffft.c"
-
-
-
-#if !defined PFFT_MACROS_ONLY
-
-#if !defined PFFFT_SIMD_DISABLE
-
-static void pffft_zconvolve(PFFFT_Setup *s, const float *a, const float *b, float *ab) {
-  int i, Ncvec = s->Ncvec;
-  const v4sf * /*RESTRICT*/ va = (const v4sf*)a;
-  const v4sf * RESTRICT vb = (const v4sf*)b;
-  v4sf * /*RESTRICT*/ vab = (v4sf*)ab;
-
-  float ar, ai, br, bi;
-
-#ifdef __arm__
-  __builtin_prefetch(va);
-  __builtin_prefetch(vb);
-  __builtin_prefetch(va+2);
-  __builtin_prefetch(vb+2);
-  __builtin_prefetch(va+4);
-  __builtin_prefetch(vb+4);
-  __builtin_prefetch(va+6);
-  __builtin_prefetch(vb+6);
-#endif
-
-  assert(VALIGNED(a) && VALIGNED(b) && VALIGNED(ab));
-  ar = ((v4sf_union*)va)[0].f[0];
-  ai = ((v4sf_union*)va)[1].f[0];
-  br = ((v4sf_union*)vb)[0].f[0];
-  bi = ((v4sf_union*)vb)[1].f[0];
-
-  for (i=0; i < Ncvec; i += 2) {
-    v4sf ar, ai, br, bi;
-    ar = va[2*i+0]; ai = va[2*i+1];
-    br = vb[2*i+0]; bi = vb[2*i+1];
-    VCPLXMUL(ar, ai, br, bi);
-    vab[2*i+0] = ar;
-    vab[2*i+1] = ai;
-    ar = va[2*i+2]; ai = va[2*i+3];
-    br = vb[2*i+2]; bi = vb[2*i+3];
-    VCPLXMUL(ar, ai, br, bi);
-    vab[2*i+2] = ar;
-    vab[2*i+3] = ai;
-  }
-  if (s->transform == PFFFT_REAL) {
-    ((v4sf_union*)vab)[0].f[0] = ar*br;
-    ((v4sf_union*)vab)[1].f[0] = ai*bi;
-  }
-}
-
-#else
-
-static void pffft_zconvolve(PFFFT_Setup *s, const float *a, const float *b, float *ab) {
-  int i, Ncvec = s->Ncvec;
-
-  if (s->transform == PFFFT_REAL) {
-    /* take care of the fftpack ordering */
-    ab[0] = a[0]*b[0];
-    ab[2*Ncvec-1] = a[2*Ncvec-1]*b[2*Ncvec-1];
-    ++ab; ++a; ++b; --Ncvec;
-  }
-  for (i=0; i < Ncvec; ++i) {
-    float ar, ai, br, bi;
-    ar = a[2*i+0]; ai = a[2*i+1];
-    br = b[2*i+0]; bi = b[2*i+1];
-    VCPLXMUL(ar, ai, br, bi);
-    ab[2*i+0] = ar;
-    ab[2*i+1] = ai;
-  }
-}
-
-#endif
-
-#include <string.h>
-
-static void pffft_reorder_back(int length, void * setup, float * data, float * work)
-{
-  memcpy(work, data, (unsigned)length * sizeof(*work));
-  pffft_zreorder(setup, work, data, PFFFT_BACKWARD);
-}
-
-#endif
diff --git a/soxr-sys/src/pffft.c b/soxr-sys/src/pffft.c
deleted file mode 100644
index 46c841e74..000000000
--- a/soxr-sys/src/pffft.c
+++ /dev/null
@@ -1,1946 +0,0 @@
-/* https://bitbucket.org/jpommier/pffft/raw/483453d8f7661058e74aa4e7cf5c27bcd7887e7a/pffft.c
- * with minor changes for libsoxr. */
-
-/* Copyright (c) 2013  Julien Pommier ( pommier@modartt.com )
-
-   Based on original fortran 77 code from FFTPACKv4 from NETLIB
-   (http://www.netlib.org/fftpack), authored by Dr Paul Swarztrauber
-   of NCAR, in 1985.
-
-   As confirmed by the NCAR fftpack software curators, the following
-   FFTPACKv5 license applies to FFTPACKv4 sources. My changes are
-   released under the same terms.
-
-   FFTPACK license:
-
-   http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html
-
-   Copyright (c) 2004 the University Corporation for Atmospheric
-   Research ("UCAR"). All rights reserved. Developed by NCAR's
-   Computational and Information Systems Laboratory, UCAR,
-   www.cisl.ucar.edu.
-
-   Redistribution and use of the Software in source and binary forms,
-   with or without modification, is permitted provided that the
-   following conditions are met:
-
-   - Neither the names of NCAR's Computational and Information Systems
-   Laboratory, the University Corporation for Atmospheric Research,
-   nor the names of its sponsors or contributors may be used to
-   endorse or promote products derived from this Software without
-   specific prior written permission.
-
-   - Redistributions of source code must retain the above copyright
-   notices, this list of conditions, and the disclaimer below.
-
-   - Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions, and the disclaimer below in the
-   documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-   EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-   NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
-   HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
-   EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
-   SOFTWARE.
-
-
-   PFFFT : a Pretty Fast FFT.
-
-   This file is largerly based on the original FFTPACK implementation, modified in
-   order to take advantage of SIMD instructions of modern CPUs.
-*/
-
-/*
-  ChangeLog:
-  - 2011/10/02, version 1: This is the very first release of this file.
-*/
-
-#include "pffft.h"
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-#include <assert.h>
-
-/* detect compiler flavour */
-#if defined(_MSC_VER)
-#  define COMPILER_MSVC
-#elif defined(__GNUC__)
-#  define COMPILER_GCC
-#endif
-
-#if defined(COMPILER_GCC)
-#  define ALWAYS_INLINE(return_type) inline return_type __attribute__ ((always_inline))
-#  define NEVER_INLINE(return_type) return_type __attribute__ ((noinline))
-#  define RESTRICT __restrict
-#  define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ varname__[size__];
-#elif defined(COMPILER_MSVC)
-#  define ALWAYS_INLINE(return_type) __forceinline return_type
-#  define NEVER_INLINE(return_type) __declspec(noinline) return_type
-#  define RESTRICT __restrict
-#  define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (type__*)_alloca(size__ * sizeof(type__))
-#endif
-
-
-/*
-   vector support macros: the rest of the code is independant of
-   SSE/Altivec/NEON -- adding support for other platforms with 4-element
-   vectors should be limited to these macros
-*/
-
-
-/* define PFFFT_SIMD_DISABLE if you want to use scalar code instead of simd code */
-/*#define PFFFT_SIMD_DISABLE */
-
-/*
-   Altivec support macros
-*/
-#if !defined(PFFFT_SIMD_DISABLE) && (defined(__ppc__) || defined(__ppc64__))
-typedef vector float v4sf;
-#  define SIMD_SZ 4
-#  define VZERO() ((vector float) vec_splat_u8(0))
-#  define VMUL(a,b) vec_madd(a,b, VZERO())
-#  define VADD(a,b) vec_add(a,b)
-#  define VMADD(a,b,c) vec_madd(a,b,c)
-#  define VSUB(a,b) vec_sub(a,b)
-inline v4sf ld_ps1(const float *p) { v4sf v=vec_lde(0,p); return vec_splat(vec_perm(v, v, vec_lvsl(0, p)), 0); }
-#  define LD_PS1(p) ld_ps1(&p)
-#  define INTERLEAVE2(in1, in2, out1, out2) { v4sf tmp__ = vec_mergeh(in1, in2); out2 = vec_mergel(in1, in2); out1 = tmp__; }
-#  define UNINTERLEAVE2(in1, in2, out1, out2) {                           \
-    vector unsigned char vperm1 =  (vector unsigned char)(0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); \
-    vector unsigned char vperm2 =  (vector unsigned char)(4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); \
-    v4sf tmp__ = vec_perm(in1, in2, vperm1); out2 = vec_perm(in1, in2, vperm2); out1 = tmp__; \
-  }
-#  define VTRANSPOSE4(x0,x1,x2,x3) {              \
-    v4sf y0 = vec_mergeh(x0, x2);               \
-    v4sf y1 = vec_mergel(x0, x2);               \
-    v4sf y2 = vec_mergeh(x1, x3);               \
-    v4sf y3 = vec_mergel(x1, x3);               \
-    x0 = vec_mergeh(y0, y2);                    \
-    x1 = vec_mergel(y0, y2);                    \
-    x2 = vec_mergeh(y1, y3);                    \
-    x3 = vec_mergel(y1, y3);                    \
-  }
-#  define VSWAPHL(a,b) vec_perm(a,b, (vector unsigned char)(16,17,18,19,20,21,22,23,8,9,10,11,12,13,14,15))
-#  define VALIGNED(ptr) ((((long)(ptr)) & 0xF) == 0)
-
-/*
-  SSE1 support macros
-*/
-#elif !defined(PFFFT_SIMD_DISABLE) && (defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(_M_IX86))
-
-#  define SIMD_SZ 4 /* 4 floats by simd vector -- this is pretty much hardcoded in the preprocess/finalize functions anyway so you will have to work if you want to enable AVX with its 256-bit vectors. */
-
-#if !PFFFT_DOUBLE
-#include <xmmintrin.h>
-typedef __m128 v4sf;
-#  define VZERO() _mm_setzero_ps()
-#  define VMUL(a,b) _mm_mul_ps(a,b)
-#  define VADD(a,b) _mm_add_ps(a,b)
-#  define VMADD(a,b,c) _mm_add_ps(_mm_mul_ps(a,b), c)
-#  define VSUB(a,b) _mm_sub_ps(a,b)
-#  define LD_PS1(p) _mm_set1_ps(p)
-#  define INTERLEAVE2(in1, in2, out1, out2) { v4sf tmp__ = _mm_unpacklo_ps(in1, in2); out2 = _mm_unpackhi_ps(in1, in2); out1 = tmp__; }
-#  define UNINTERLEAVE2(in1, in2, out1, out2) { v4sf tmp__ = _mm_shuffle_ps(in1, in2, _MM_SHUFFLE(2,0,2,0)); out2 = _mm_shuffle_ps(in1, in2, _MM_SHUFFLE(3,1,3,1)); out1 = tmp__; }
-#  define VTRANSPOSE4(x0,x1,x2,x3) _MM_TRANSPOSE4_PS(x0,x1,x2,x3)
-#  define VSWAPHL(a,b) _mm_shuffle_ps(b, a, _MM_SHUFFLE(3,2,1,0))
-#  define VALIGNED(ptr) ((((long)(ptr)) & 0xF) == 0)
-
-#else
-#include "pffft-avx.h"
-#endif
-
-/*
-  ARM NEON support macros
-*/
-#elif !defined(PFFFT_SIMD_DISABLE) && defined(__arm__)
-#  include <arm_neon.h>
-typedef float32x4_t v4sf;
-#  define SIMD_SZ 4
-#  define VZERO() vdupq_n_f32(0)
-#  define VMUL(a,b) vmulq_f32(a,b)
-#  define VADD(a,b) vaddq_f32(a,b)
-#  define VMADD(a,b,c) vmlaq_f32(c,a,b)
-#  define VSUB(a,b) vsubq_f32(a,b)
-#  define LD_PS1(p) vld1q_dup_f32(&(p))
-#  define INTERLEAVE2(in1, in2, out1, out2) { float32x4x2_t tmp__ = vzipq_f32(in1,in2); out1=tmp__.val[0]; out2=tmp__.val[1]; }
-#  define UNINTERLEAVE2(in1, in2, out1, out2) { float32x4x2_t tmp__ = vuzpq_f32(in1,in2); out1=tmp__.val[0]; out2=tmp__.val[1]; }
-#  define VTRANSPOSE4(x0,x1,x2,x3) {                                    \
-    float32x4x2_t t0_ = vzipq_f32(x0, x2);                              \
-    float32x4x2_t t1_ = vzipq_f32(x1, x3);                              \
-    float32x4x2_t u0_ = vzipq_f32(t0_.val[0], t1_.val[0]);              \
-    float32x4x2_t u1_ = vzipq_f32(t0_.val[1], t1_.val[1]);              \
-    x0 = u0_.val[0]; x1 = u0_.val[1]; x2 = u1_.val[0]; x3 = u1_.val[1]; \
-  }
-/* marginally faster version */
-/*#  define VTRANSPOSE4(x0,x1,x2,x3) { asm("vtrn.32 %q0, %q1;\n vtrn.32 %q2,%q3\n vswp %f0,%e2\n vswp %f1,%e3" : "+w"(x0), "+w"(x1), "+w"(x2), "+w"(x3)::); } */
-#  define VSWAPHL(a,b) vcombine_f32(vget_low_f32(b), vget_high_f32(a))
-#  define VALIGNED(ptr) ((((long)(ptr)) & 0x3) == 0)
-#else
-#  if !defined(PFFFT_SIMD_DISABLE)
-#    warning "building with simd disabled !\n";
-#    define PFFFT_SIMD_DISABLE /* fallback to scalar code */
-#  endif
-#endif
-
-#if PFFFT_DOUBLE
-#define float double
-#endif
-
-/* fallback mode for situations where SSE/Altivec are not available, use scalar mode instead */
-#ifdef PFFFT_SIMD_DISABLE
-typedef float v4sf;
-#  define SIMD_SZ 1
-#  define VZERO() 0.f
-#  define VMUL(a,b) ((a)*(b))
-#  define VADD(a,b) ((a)+(b))
-#  define VMADD(a,b,c) ((a)*(b)+(c))
-#  define VSUB(a,b) ((a)-(b))
-#  define LD_PS1(p) (p)
-#  define VALIGNED(ptr) ((((long)(ptr)) & 0x3) == 0)
-#endif
-
-/* shortcuts for complex multiplcations */
-#define VCPLXMUL(ar,ai,br,bi) { v4sf tmp; tmp=VMUL(ar,bi); ar=VMUL(ar,br); ar=VSUB(ar,VMUL(ai,bi)); ai=VMUL(ai,br); ai=VADD(ai,tmp); }
-#define VCPLXMULCONJ(ar,ai,br,bi) { v4sf tmp; tmp=VMUL(ar,bi); ar=VMUL(ar,br); ar=VADD(ar,VMUL(ai,bi)); ai=VMUL(ai,br); ai=VSUB(ai,tmp); }
-#ifndef SVMUL
-/* multiply a scalar with a vector */
-#define SVMUL(f,v) VMUL(LD_PS1(f),v)
-#endif
-
-#if !defined PFFT_MACROS_ONLY
-
-#if !defined(PFFFT_SIMD_DISABLE)
-typedef union v4sf_union {
-  v4sf  v;
-  float f[4];
-} v4sf_union;
-
-#if 0
-#include <string.h>
-
-#define assertv4(v,f0,f1,f2,f3) assert(v.f[0] == (f0) && v.f[1] == (f1) && v.f[2] == (f2) && v.f[3] == (f3))
-
-/* detect bugs with the vector support macros */
-void validate_pffft_simd(void);
-void validate_pffft_simd(void) {
-  float f[16] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 };
-  v4sf_union a0, a1, a2, a3, t, u;
-  memcpy(a0.f, f, 4*sizeof(float));
-  memcpy(a1.f, f+4, 4*sizeof(float));
-  memcpy(a2.f, f+8, 4*sizeof(float));
-  memcpy(a3.f, f+12, 4*sizeof(float));
-
-  t = a0; u = a1; t.v = VZERO();
-  printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 0, 0, 0, 0);
-  t.v = VADD(a1.v, a2.v);
-  printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 12, 14, 16, 18);
-  t.v = VMUL(a1.v, a2.v);
-  printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 45, 60, 77);
-  t.v = VMADD(a1.v, a2.v,a0.v);
-  printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 46, 62, 80);
-  INTERLEAVE2(a1.v,a2.v,t.v,u.v);
-  printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
-  assertv4(t, 4, 8, 5, 9); assertv4(u, 6, 10, 7, 11);
-  UNINTERLEAVE2(a1.v,a2.v,t.v,u.v);
-  printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
-  assertv4(t, 4, 6, 8, 10); assertv4(u, 5, 7, 9, 11);
-
-  t.v=LD_PS1(f[15]);
-  printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
-  assertv4(t, 15, 15, 15, 15);
-  t.v = VSWAPHL(a1.v, a2.v);
-  printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
-  assertv4(t, 8, 9, 6, 7);
-  VTRANSPOSE4(a0.v, a1.v, a2.v, a3.v);
-  printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g]\n",
-         a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3],
-         a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]);
-  assertv4(a0, 0, 4, 8, 12); assertv4(a1, 1, 5, 9, 13); assertv4(a2, 2, 6, 10, 14); assertv4(a3, 3, 7, 11, 15);
-}
-#endif
-#endif /*!PFFFT_SIMD_DISABLE */
-
-#if 0
-/* SSE and co like 16-bytes aligned pointers */
-#define MALLOC_V4SF_ALIGNMENT 64 /* with a 64-byte alignment, we are even aligned on L2 cache lines... */
-void *pffft_aligned_malloc(size_t nb_bytes) {
-  void *p, *p0 = malloc(nb_bytes + MALLOC_V4SF_ALIGNMENT);
-  if (!p0) return (void *) 0;
-  p = (void *) (((size_t) p0 + MALLOC_V4SF_ALIGNMENT) & (~((size_t) (MALLOC_V4SF_ALIGNMENT-1))));
-  *((void **) p - 1) = p0;
-  return p;
-}
-
-void pffft_aligned_free(void *p) {
-  if (p) free(*((void **) p - 1));
-}
-
-int pffft_simd_size() { return SIMD_SZ; }
-#endif
-
-/*
-  passf2 and passb2 has been merged here, fsign = -1 for passf2, +1 for passb2
-*/
-static NEVER_INLINE(void) passf2_ps(int ido, int l1, const v4sf *cc, v4sf *ch, const float *wa1, float fsign) {
-  int k, i;
-  int l1ido = l1*ido;
-  if (ido <= 2) {
-    for (k=0; k < l1ido; k += ido, ch += ido, cc+= 2*ido) {
-      ch[0]         = VADD(cc[0], cc[ido+0]);
-      ch[l1ido]     = VSUB(cc[0], cc[ido+0]);
-      ch[1]         = VADD(cc[1], cc[ido+1]);
-      ch[l1ido + 1] = VSUB(cc[1], cc[ido+1]);
-    }
-  } else {
-    for (k=0; k < l1ido; k += ido, ch += ido, cc += 2*ido) {
-      for (i=0; i<ido-1; i+=2) {
-        v4sf tr2 = VSUB(cc[i+0], cc[i+ido+0]);
-        v4sf ti2 = VSUB(cc[i+1], cc[i+ido+1]);
-        v4sf wr = LD_PS1(wa1[i]), wi = VMUL(LD_PS1(fsign), LD_PS1(wa1[i+1]));
-        ch[i]   = VADD(cc[i+0], cc[i+ido+0]);
-        ch[i+1] = VADD(cc[i+1], cc[i+ido+1]);
-        VCPLXMUL(tr2, ti2, wr, wi);
-        ch[i+l1ido]   = tr2;
-        ch[i+l1ido+1] = ti2;
-      }
-    }
-  }
-}
-
-/*
-  passf3 and passb3 has been merged here, fsign = -1 for passf3, +1 for passb3
-*/
-#if 0
-static NEVER_INLINE(void) passf3_ps(int ido, int l1, const v4sf *cc, v4sf *ch,
-                                    const float *wa1, const float *wa2, float fsign) {
-  static const float taur = -0.5f;
-  float taui = 0.866025403784439f*fsign;
-  int i, k;
-  v4sf tr2, ti2, cr2, ci2, cr3, ci3, dr2, di2, dr3, di3;
-  int l1ido = l1*ido;
-  float wr1, wi1, wr2, wi2;
-  assert(ido > 2);
-  for (k=0; k< l1ido; k += ido, cc+= 3*ido, ch +=ido) {
-    for (i=0; i<ido-1; i+=2) {
-      tr2 = VADD(cc[i+ido], cc[i+2*ido]);
-      cr2 = VADD(cc[i], SVMUL(taur,tr2));
-      ch[i]    = VADD(cc[i], tr2);
-      ti2 = VADD(cc[i+ido+1], cc[i+2*ido+1]);
-      ci2 = VADD(cc[i    +1], SVMUL(taur,ti2));
-      ch[i+1]  = VADD(cc[i+1], ti2);
-      cr3 = SVMUL(taui, VSUB(cc[i+ido], cc[i+2*ido]));
-      ci3 = SVMUL(taui, VSUB(cc[i+ido+1], cc[i+2*ido+1]));
-      dr2 = VSUB(cr2, ci3);
-      dr3 = VADD(cr2, ci3);
-      di2 = VADD(ci2, cr3);
-      di3 = VSUB(ci2, cr3);
-      wr1=wa1[i], wi1=fsign*wa1[i+1], wr2=wa2[i], wi2=fsign*wa2[i+1];
-      VCPLXMUL(dr2, di2, LD_PS1(wr1), LD_PS1(wi1));
-      ch[i+l1ido] = dr2;
-      ch[i+l1ido + 1] = di2;
-      VCPLXMUL(dr3, di3, LD_PS1(wr2), LD_PS1(wi2));
-      ch[i+2*l1ido] = dr3;
-      ch[i+2*l1ido+1] = di3;
-    }
-  }
-} /* passf3 */
-#endif
-
-static NEVER_INLINE(void) passf4_ps(int ido, int l1, const v4sf *cc, v4sf *ch,
-                                    const float *wa1, const float *wa2, const float *wa3, float fsign) {
-  /* isign == -1 for forward transform and +1 for backward transform */
-
-  int i, k;
-  v4sf ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
-  int l1ido = l1*ido;
-  if (ido == 2) {
-    for (k=0; k < l1ido; k += ido, ch += ido, cc += 4*ido) {
-      tr1 = VSUB(cc[0], cc[2*ido + 0]);
-      tr2 = VADD(cc[0], cc[2*ido + 0]);
-      ti1 = VSUB(cc[1], cc[2*ido + 1]);
-      ti2 = VADD(cc[1], cc[2*ido + 1]);
-      ti4 = VMUL(VSUB(cc[1*ido + 0], cc[3*ido + 0]), LD_PS1(fsign));
-      tr4 = VMUL(VSUB(cc[3*ido + 1], cc[1*ido + 1]), LD_PS1(fsign));
-      tr3 = VADD(cc[ido + 0], cc[3*ido + 0]);
-      ti3 = VADD(cc[ido + 1], cc[3*ido + 1]);
-
-      ch[0*l1ido + 0] = VADD(tr2, tr3);
-      ch[0*l1ido + 1] = VADD(ti2, ti3);
-      ch[1*l1ido + 0] = VADD(tr1, tr4);
-      ch[1*l1ido + 1] = VADD(ti1, ti4);
-      ch[2*l1ido + 0] = VSUB(tr2, tr3);
-      ch[2*l1ido + 1] = VSUB(ti2, ti3);
-      ch[3*l1ido + 0] = VSUB(tr1, tr4);
-      ch[3*l1ido + 1] = VSUB(ti1, ti4);
-    }
-  } else {
-    for (k=0; k < l1ido; k += ido, ch+=ido, cc += 4*ido) {
-      for (i=0; i<ido-1; i+=2) {
-        float wr1, wi1, wr2, wi2, wr3, wi3;
-        tr1 = VSUB(cc[i + 0], cc[i + 2*ido + 0]);
-        tr2 = VADD(cc[i + 0], cc[i + 2*ido + 0]);
-        ti1 = VSUB(cc[i + 1], cc[i + 2*ido + 1]);
-        ti2 = VADD(cc[i + 1], cc[i + 2*ido + 1]);
-        tr4 = VMUL(VSUB(cc[i + 3*ido + 1], cc[i + 1*ido + 1]), LD_PS1(fsign));
-        ti4 = VMUL(VSUB(cc[i + 1*ido + 0], cc[i + 3*ido + 0]), LD_PS1(fsign));
-        tr3 = VADD(cc[i + ido + 0], cc[i + 3*ido + 0]);
-        ti3 = VADD(cc[i + ido + 1], cc[i + 3*ido + 1]);
-
-        ch[i] = VADD(tr2, tr3);
-        cr3    = VSUB(tr2, tr3);
-        ch[i + 1] = VADD(ti2, ti3);
-        ci3 = VSUB(ti2, ti3);
-
-        cr2 = VADD(tr1, tr4);
-        cr4 = VSUB(tr1, tr4);
-        ci2 = VADD(ti1, ti4);
-        ci4 = VSUB(ti1, ti4);
-        wr1=wa1[i], wi1=fsign*wa1[i+1];
-        VCPLXMUL(cr2, ci2, LD_PS1(wr1), LD_PS1(wi1));
-        wr2=wa2[i], wi2=fsign*wa2[i+1];
-        ch[i + l1ido] = cr2;
-        ch[i + l1ido + 1] = ci2;
-
-        VCPLXMUL(cr3, ci3, LD_PS1(wr2), LD_PS1(wi2));
-        wr3=wa3[i], wi3=fsign*wa3[i+1];
-        ch[i + 2*l1ido] = cr3;
-        ch[i + 2*l1ido + 1] = ci3;
-
-        VCPLXMUL(cr4, ci4, LD_PS1(wr3), LD_PS1(wi3));
-        ch[i + 3*l1ido] = cr4;
-        ch[i + 3*l1ido + 1] = ci4;
-      }
-    }
-  }
-} /* passf4 */
-
-#if 0
-/*
-  passf5 and passb5 has been merged here, fsign = -1 for passf5, +1 for passb5
-*/
-static NEVER_INLINE(void) passf5_ps(int ido, int l1, const v4sf *cc, v4sf *ch,
-                                    const float *wa1, const float *wa2,
-                                    const float *wa3, const float *wa4, float fsign) {
-  static const float tr11 = .309016994374947f;
-  const float ti11 = .951056516295154f*fsign;
-  static const float tr12 = -.809016994374947f;
-  const float ti12 = .587785252292473f*fsign;
-
-  /* Local variables */
-  int i, k;
-  v4sf ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4, ti2, ti3,
-    ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5;
-
-  float wr1, wi1, wr2, wi2, wr3, wi3, wr4, wi4;
-
-#define cc_ref(a_1,a_2) cc[(a_2-1)*ido + a_1 + 1]
-#define ch_ref(a_1,a_3) ch[(a_3-1)*l1*ido + a_1 + 1]
-
-  assert(ido > 2);
-  for (k = 0; k < l1; ++k, cc += 5*ido, ch += ido) {
-    for (i = 0; i < ido-1; i += 2) {
-      ti5 = VSUB(cc_ref(i  , 2), cc_ref(i  , 5));
-      ti2 = VADD(cc_ref(i  , 2), cc_ref(i  , 5));
-      ti4 = VSUB(cc_ref(i  , 3), cc_ref(i  , 4));
-      ti3 = VADD(cc_ref(i  , 3), cc_ref(i  , 4));
-      tr5 = VSUB(cc_ref(i-1, 2), cc_ref(i-1, 5));
-      tr2 = VADD(cc_ref(i-1, 2), cc_ref(i-1, 5));
-      tr4 = VSUB(cc_ref(i-1, 3), cc_ref(i-1, 4));
-      tr3 = VADD(cc_ref(i-1, 3), cc_ref(i-1, 4));
-      ch_ref(i-1, 1) = VADD(cc_ref(i-1, 1), VADD(tr2, tr3));
-      ch_ref(i  , 1) = VADD(cc_ref(i  , 1), VADD(ti2, ti3));
-      cr2 = VADD(cc_ref(i-1, 1), VADD(SVMUL(tr11, tr2),SVMUL(tr12, tr3)));
-      ci2 = VADD(cc_ref(i  , 1), VADD(SVMUL(tr11, ti2),SVMUL(tr12, ti3)));
-      cr3 = VADD(cc_ref(i-1, 1), VADD(SVMUL(tr12, tr2),SVMUL(tr11, tr3)));
-      ci3 = VADD(cc_ref(i  , 1), VADD(SVMUL(tr12, ti2),SVMUL(tr11, ti3)));
-      cr5 = VADD(SVMUL(ti11, tr5), SVMUL(ti12, tr4));
-      ci5 = VADD(SVMUL(ti11, ti5), SVMUL(ti12, ti4));
-      cr4 = VSUB(SVMUL(ti12, tr5), SVMUL(ti11, tr4));
-      ci4 = VSUB(SVMUL(ti12, ti5), SVMUL(ti11, ti4));
-      dr3 = VSUB(cr3, ci4);
-      dr4 = VADD(cr3, ci4);
-      di3 = VADD(ci3, cr4);
-      di4 = VSUB(ci3, cr4);
-      dr5 = VADD(cr2, ci5);
-      dr2 = VSUB(cr2, ci5);
-      di5 = VSUB(ci2, cr5);
-      di2 = VADD(ci2, cr5);
-      wr1=wa1[i], wi1=fsign*wa1[i+1], wr2=wa2[i], wi2=fsign*wa2[i+1];
-      wr3=wa3[i], wi3=fsign*wa3[i+1], wr4=wa4[i], wi4=fsign*wa4[i+1];
-      VCPLXMUL(dr2, di2, LD_PS1(wr1), LD_PS1(wi1));
-      ch_ref(i - 1, 2) = dr2;
-      ch_ref(i, 2)     = di2;
-      VCPLXMUL(dr3, di3, LD_PS1(wr2), LD_PS1(wi2));
-      ch_ref(i - 1, 3) = dr3;
-      ch_ref(i, 3)     = di3;
-      VCPLXMUL(dr4, di4, LD_PS1(wr3), LD_PS1(wi3));
-      ch_ref(i - 1, 4) = dr4;
-      ch_ref(i, 4)     = di4;
-      VCPLXMUL(dr5, di5, LD_PS1(wr4), LD_PS1(wi4));
-      ch_ref(i - 1, 5) = dr5;
-      ch_ref(i, 5)     = di5;
-    }
-  }
-#undef ch_ref
-#undef cc_ref
-}
-#endif
-
-static NEVER_INLINE(void) radf2_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT ch, const float *wa1) {
-  static const float minus_one = -1.f;
-  int i, k, l1ido = l1*ido;
-  for (k=0; k < l1ido; k += ido) {
-    v4sf a = cc[k], b = cc[k + l1ido];
-    ch[2*k] = VADD(a, b);
-    ch[2*(k+ido)-1] = VSUB(a, b);
-  }
-  if (ido < 2) return;
-  if (ido != 2) {
-    for (k=0; k < l1ido; k += ido) {
-      for (i=2; i<ido; i+=2) {
-        v4sf tr2 = cc[i - 1 + k + l1ido], ti2 = cc[i + k + l1ido];
-        v4sf br = cc[i - 1 + k], bi = cc[i + k];
-        VCPLXMULCONJ(tr2, ti2, LD_PS1(wa1[i - 2]), LD_PS1(wa1[i - 1]));
-        ch[i + 2*k] = VADD(bi, ti2);
-        ch[2*(k+ido) - i] = VSUB(ti2, bi);
-        ch[i - 1 + 2*k] = VADD(br, tr2);
-        ch[2*(k+ido) - i -1] = VSUB(br, tr2);
-      }
-    }
-    if (ido % 2 == 1) return;
-  }
-  for (k=0; k < l1ido; k += ido) {
-    ch[2*k + ido] = SVMUL(minus_one, cc[ido-1 + k + l1ido]);
-    ch[2*k + ido-1] = cc[k + ido-1];
-  }
-} /* radf2 */
-
-
-static NEVER_INLINE(void) radb2_ps(int ido, int l1, const v4sf *cc, v4sf *ch, const float *wa1) {
-  static const float minus_two=-2;
-  int i, k, l1ido = l1*ido;
-  v4sf a,b,c,d, tr2, ti2;
-  for (k=0; k < l1ido; k += ido) {
-    a = cc[2*k]; b = cc[2*(k+ido) - 1];
-    ch[k] = VADD(a, b);
-    ch[k + l1ido] =VSUB(a, b);
-  }
-  if (ido < 2) return;
-  if (ido != 2) {
-    for (k = 0; k < l1ido; k += ido) {
-      for (i = 2; i < ido; i += 2) {
-        a = cc[i-1 + 2*k]; b = cc[2*(k + ido) - i - 1];
-        c = cc[i+0 + 2*k]; d = cc[2*(k + ido) - i + 0];
-        ch[i-1 + k] = VADD(a, b);
-        tr2 = VSUB(a, b);
-        ch[i+0 + k] = VSUB(c, d);
-        ti2 = VADD(c, d);
-        VCPLXMUL(tr2, ti2, LD_PS1(wa1[i - 2]), LD_PS1(wa1[i - 1]));
-        ch[i-1 + k + l1ido] = tr2;
-        ch[i+0 + k + l1ido] = ti2;
-      }
-    }
-    if (ido % 2 == 1) return;
-  }
-  for (k = 0; k < l1ido; k += ido) {
-    a = cc[2*k + ido-1]; b = cc[2*k + ido];
-    ch[k + ido-1] = VADD(a,a);
-    ch[k + ido-1 + l1ido] = SVMUL(minus_two, b);
-  }
-} /* radb2 */
-
-#if 0
-static void radf3_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT ch,
-                     const float *wa1, const float *wa2) {
-  static const float taur = -0.5f;
-  static const float taui = 0.866025403784439f;
-  int i, k, ic;
-  v4sf ci2, di2, di3, cr2, dr2, dr3, ti2, ti3, tr2, tr3, wr1, wi1, wr2, wi2;
-  for (k=0; k<l1; k++) {
-    cr2 = VADD(cc[(k + l1)*ido], cc[(k + 2*l1)*ido]);
-    ch[3*k*ido] = VADD(cc[k*ido], cr2);
-    ch[(3*k+2)*ido] = SVMUL(taui, VSUB(cc[(k + l1*2)*ido], cc[(k + l1)*ido]));
-    ch[ido-1 + (3*k + 1)*ido] = VADD(cc[k*ido], SVMUL(taur, cr2));
-  }
-  if (ido == 1) return;
-  for (k=0; k<l1; k++) {
-    for (i=2; i<ido; i+=2) {
-      ic = ido - i;
-      wr1 = LD_PS1(wa1[i - 2]); wi1 = LD_PS1(wa1[i - 1]);
-      dr2 = cc[i - 1 + (k + l1)*ido]; di2 = cc[i + (k + l1)*ido];
-      VCPLXMULCONJ(dr2, di2, wr1, wi1);
-
-      wr2 = LD_PS1(wa2[i - 2]); wi2 = LD_PS1(wa2[i - 1]);
-      dr3 = cc[i - 1 + (k + l1*2)*ido]; di3 = cc[i + (k + l1*2)*ido];
-      VCPLXMULCONJ(dr3, di3, wr2, wi2);
-
-      cr2 = VADD(dr2, dr3);
-      ci2 = VADD(di2, di3);
-      ch[i - 1 + 3*k*ido] = VADD(cc[i - 1 + k*ido], cr2);
-      ch[i + 3*k*ido] = VADD(cc[i + k*ido], ci2);
-      tr2 = VADD(cc[i - 1 + k*ido], SVMUL(taur, cr2));
-      ti2 = VADD(cc[i + k*ido], SVMUL(taur, ci2));
-      tr3 = SVMUL(taui, VSUB(di2, di3));
-      ti3 = SVMUL(taui, VSUB(dr3, dr2));
-      ch[i - 1 + (3*k + 2)*ido] = VADD(tr2, tr3);
-      ch[ic - 1 + (3*k + 1)*ido] = VSUB(tr2, tr3);
-      ch[i + (3*k + 2)*ido] = VADD(ti2, ti3);
-      ch[ic + (3*k + 1)*ido] = VSUB(ti3, ti2);
-    }
-  }
-} /* radf3 */
-
-
-static void radb3_ps(int ido, int l1, const v4sf *RESTRICT cc, v4sf *RESTRICT ch,
-                     const float *wa1, const float *wa2)
-{
-  static const float taur = -0.5f;
-  static const float taui = 0.866025403784439f;
-  static const float taui_2 = 0.866025403784439f*2;
-  int i, k, ic;
-  v4sf ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2;
-  for (k=0; k<l1; k++) {
-    tr2 = cc[ido-1 + (3*k + 1)*ido]; tr2 = VADD(tr2,tr2);
-    cr2 = VMADD(LD_PS1(taur), tr2, cc[3*k*ido]);
-    ch[k*ido] = VADD(cc[3*k*ido], tr2);
-    ci3 = SVMUL(taui_2, cc[(3*k + 2)*ido]);
-    ch[(k + l1)*ido] = VSUB(cr2, ci3);
-    ch[(k + 2*l1)*ido] = VADD(cr2, ci3);
-  }
-  if (ido == 1) return;
-  for (k=0; k<l1; k++) {
-    for (i=2; i<ido; i+=2) {
-      ic = ido - i;
-      tr2 = VADD(cc[i - 1 + (3*k + 2)*ido], cc[ic - 1 + (3*k + 1)*ido]);
-      cr2 = VMADD(LD_PS1(taur), tr2, cc[i - 1 + 3*k*ido]);
-      ch[i - 1 + k*ido] = VADD(cc[i - 1 + 3*k*ido], tr2);
-      ti2 = VSUB(cc[i + (3*k + 2)*ido], cc[ic + (3*k + 1)*ido]);
-      ci2 = VMADD(LD_PS1(taur), ti2, cc[i + 3*k*ido]);
-      ch[i + k*ido] = VADD(cc[i + 3*k*ido], ti2);
-      cr3 = SVMUL(taui, VSUB(cc[i - 1 + (3*k + 2)*ido], cc[ic - 1 + (3*k + 1)*ido]));
-      ci3 = SVMUL(taui, VADD(cc[i + (3*k + 2)*ido], cc[ic + (3*k + 1)*ido]));
-      dr2 = VSUB(cr2, ci3);
-      dr3 = VADD(cr2, ci3);
-      di2 = VADD(ci2, cr3);
-      di3 = VSUB(ci2, cr3);
-      VCPLXMUL(dr2, di2, LD_PS1(wa1[i-2]), LD_PS1(wa1[i-1]));
-      ch[i - 1 + (k + l1)*ido] = dr2;
-      ch[i + (k + l1)*ido] = di2;
-      VCPLXMUL(dr3, di3, LD_PS1(wa2[i-2]), LD_PS1(wa2[i-1]));
-      ch[i - 1 + (k + 2*l1)*ido] = dr3;
-      ch[i + (k + 2*l1)*ido] = di3;
-    }
-  }
-} /* radb3 */
-#endif
-
-static NEVER_INLINE(void) radf4_ps(int ido, int l1, const v4sf *RESTRICT cc, v4sf * RESTRICT ch,
-                                   const float * RESTRICT wa1, const float * RESTRICT wa2, const float * RESTRICT wa3)
-{
-  static const float minus_hsqt2 = (float)-0.7071067811865475;
-  int i, k, l1ido = l1*ido;
-  {
-    const v4sf *RESTRICT cc_ = cc, * RESTRICT cc_end = cc + l1ido;
-    v4sf * RESTRICT ch_ = ch;
-    while (cc < cc_end) {
-      /* this loop represents between 25% and 40% of total radf4_ps cost ! */
-      v4sf a0 = cc[0], a1 = cc[l1ido];
-      v4sf a2 = cc[2*l1ido], a3 = cc[3*l1ido];
-      v4sf tr1 = VADD(a1, a3);
-      v4sf tr2 = VADD(a0, a2);
-      ch[2*ido-1] = VSUB(a0, a2);
-      ch[2*ido  ] = VSUB(a3, a1);
-      ch[0      ] = VADD(tr1, tr2);
-      ch[4*ido-1] = VSUB(tr2, tr1);
-      cc += ido; ch += 4*ido;
-    }
-    cc = cc_; ch = ch_;
-  }
-  if (ido < 2) return;
-  if (ido != 2) {
-    for (k = 0; k < l1ido; k += ido) {
-      const v4sf * RESTRICT pc = (v4sf*)(cc + 1 + k);
-      for (i=2; i<ido; i += 2, pc += 2) {
-        int ic = ido - i;
-        v4sf wr, wi, cr2, ci2, cr3, ci3, cr4, ci4;
-        v4sf tr1, ti1, tr2, ti2, tr3, ti3, tr4, ti4;
-
-        cr2 = pc[1*l1ido+0];
-        ci2 = pc[1*l1ido+1];
-        wr=LD_PS1(wa1[i - 2]);
-        wi=LD_PS1(wa1[i - 1]);
-        VCPLXMULCONJ(cr2,ci2,wr,wi);
-
-        cr3 = pc[2*l1ido+0];
-        ci3 = pc[2*l1ido+1];
-        wr = LD_PS1(wa2[i-2]);
-        wi = LD_PS1(wa2[i-1]);
-        VCPLXMULCONJ(cr3, ci3, wr, wi);
-
-        cr4 = pc[3*l1ido];
-        ci4 = pc[3*l1ido+1];
-        wr = LD_PS1(wa3[i-2]);
-        wi = LD_PS1(wa3[i-1]);
-        VCPLXMULCONJ(cr4, ci4, wr, wi);
-
-        /* at this point, on SSE, five of "cr2 cr3 cr4 ci2 ci3 ci4" should be loaded in registers */
-
-        tr1 = VADD(cr2,cr4);
-        tr4 = VSUB(cr4,cr2);
-        tr2 = VADD(pc[0],cr3);
-        tr3 = VSUB(pc[0],cr3);
-        ch[i - 1 + 4*k] = VADD(tr1,tr2);
-        ch[ic - 1 + 4*k + 3*ido] = VSUB(tr2,tr1); /* at this point tr1 and tr2 can be disposed */
-        ti1 = VADD(ci2,ci4);
-        ti4 = VSUB(ci2,ci4);
-        ch[i - 1 + 4*k + 2*ido] = VADD(ti4,tr3);
-        ch[ic - 1 + 4*k + 1*ido] = VSUB(tr3,ti4); /* dispose tr3, ti4 */
-        ti2 = VADD(pc[1],ci3);
-        ti3 = VSUB(pc[1],ci3);
-        ch[i + 4*k] = VADD(ti1, ti2);
-        ch[ic + 4*k + 3*ido] = VSUB(ti1, ti2);
-        ch[i + 4*k + 2*ido] = VADD(tr4, ti3);
-        ch[ic + 4*k + 1*ido] = VSUB(tr4, ti3);
-      }
-    }
-    if (ido % 2 == 1) return;
-  }
-  for (k=0; k<l1ido; k += ido) {
-    v4sf a = cc[ido-1 + k + l1ido], b = cc[ido-1 + k + 3*l1ido];
-    v4sf c = cc[ido-1 + k], d = cc[ido-1 + k + 2*l1ido];
-    v4sf ti1 = SVMUL(minus_hsqt2, VADD(a, b));
-    v4sf tr1 = SVMUL(minus_hsqt2, VSUB(b, a));
-    ch[ido-1 + 4*k] = VADD(tr1, c);
-    ch[ido-1 + 4*k + 2*ido] = VSUB(c, tr1);
-    ch[4*k + 1*ido] = VSUB(ti1, d);
-    ch[4*k + 3*ido] = VADD(ti1, d);
-  }
-} /* radf4 */
-
-
-static NEVER_INLINE(void) radb4_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT ch,
-                                   const float * RESTRICT wa1, const float * RESTRICT wa2, const float *RESTRICT wa3)
-{
-  static const float minus_sqrt2 = (float)-1.414213562373095;
-  static const float two = 2.f;
-  int i, k, l1ido = l1*ido;
-  v4sf ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
-  {
-    const v4sf *RESTRICT cc_ = cc, * RESTRICT ch_end = ch + l1ido;
-    v4sf *ch_ = ch;
-    while (ch < ch_end) {
-      v4sf a = cc[0], b = cc[4*ido-1];
-      v4sf c = cc[2*ido], d = cc[2*ido-1];
-      tr3 = SVMUL(two,d);
-      tr2 = VADD(a,b);
-      tr1 = VSUB(a,b);
-      tr4 = SVMUL(two,c);
-      ch[0*l1ido] = VADD(tr2, tr3);
-      ch[2*l1ido] = VSUB(tr2, tr3);
-      ch[1*l1ido] = VSUB(tr1, tr4);
-      ch[3*l1ido] = VADD(tr1, tr4);
-
-      cc += 4*ido; ch += ido;
-    }
-    cc = cc_; ch = ch_;
-  }
-  if (ido < 2) return;
-  if (ido != 2) {
-    for (k = 0; k < l1ido; k += ido) {
-      const v4sf * RESTRICT pc = (v4sf*)(cc - 1 + 4*k);
-      v4sf * RESTRICT ph = (v4sf*)(ch + k + 1);
-      for (i = 2; i < ido; i += 2) {
-
-        tr1 = VSUB(pc[i], pc[4*ido - i]);
-        tr2 = VADD(pc[i], pc[4*ido - i]);
-        ti4 = VSUB(pc[2*ido + i], pc[2*ido - i]);
-        tr3 = VADD(pc[2*ido + i], pc[2*ido - i]);
-        ph[0] = VADD(tr2, tr3);
-        cr3 = VSUB(tr2, tr3);
-
-        ti3 = VSUB(pc[2*ido + i + 1], pc[2*ido - i + 1]);
-        tr4 = VADD(pc[2*ido + i + 1], pc[2*ido - i + 1]);
-        cr2 = VSUB(tr1, tr4);
-        cr4 = VADD(tr1, tr4);
-
-        ti1 = VADD(pc[i + 1], pc[4*ido - i + 1]);
-        ti2 = VSUB(pc[i + 1], pc[4*ido - i + 1]);
-
-        ph[1] = VADD(ti2, ti3); ph += l1ido;
-        ci3 = VSUB(ti2, ti3);
-        ci2 = VADD(ti1, ti4);
-        ci4 = VSUB(ti1, ti4);
-        VCPLXMUL(cr2, ci2, LD_PS1(wa1[i-2]), LD_PS1(wa1[i-1]));
-        ph[0] = cr2;
-        ph[1] = ci2; ph += l1ido;
-        VCPLXMUL(cr3, ci3, LD_PS1(wa2[i-2]), LD_PS1(wa2[i-1]));
-        ph[0] = cr3;
-        ph[1] = ci3; ph += l1ido;
-        VCPLXMUL(cr4, ci4, LD_PS1(wa3[i-2]), LD_PS1(wa3[i-1]));
-        ph[0] = cr4;
-        ph[1] = ci4; ph = ph - 3*l1ido + 2;
-      }
-    }
-    if (ido % 2 == 1) return;
-  }
-  for (k=0; k < l1ido; k+=ido) {
-    int i0 = 4*k + ido;
-    v4sf c = cc[i0-1], d = cc[i0 + 2*ido-1];
-    v4sf a = cc[i0+0], b = cc[i0 + 2*ido+0];
-    tr1 = VSUB(c,d);
-    tr2 = VADD(c,d);
-    ti1 = VADD(b,a);
-    ti2 = VSUB(b,a);
-    ch[ido-1 + k + 0*l1ido] = VADD(tr2,tr2);
-    ch[ido-1 + k + 1*l1ido] = SVMUL(minus_sqrt2, VSUB(ti1, tr1));
-    ch[ido-1 + k + 2*l1ido] = VADD(ti2, ti2);
-    ch[ido-1 + k + 3*l1ido] = SVMUL(minus_sqrt2, VADD(ti1, tr1));
-  }
-} /* radb4 */
-
-#if 0
-static void radf5_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT ch,
-                     const float *wa1, const float *wa2, const float *wa3, const float *wa4)
-{
-  static const float tr11 = .309016994374947f;
-  static const float ti11 = .951056516295154f;
-  static const float tr12 = -.809016994374947f;
-  static const float ti12 = .587785252292473f;
-
-  /* System generated locals */
-  int cc_offset, ch_offset;
-
-  /* Local variables */
-  int i, k, ic;
-  v4sf ci2, di2, ci4, ci5, di3, di4, di5, ci3, cr2, cr3, dr2, dr3, dr4, dr5,
-    cr5, cr4, ti2, ti3, ti5, ti4, tr2, tr3, tr4, tr5;
-  int idp2;
-
-
-#define cc_ref(a_1,a_2,a_3) cc[((a_3)*l1 + (a_2))*ido + a_1]
-#define ch_ref(a_1,a_2,a_3) ch[((a_3)*5 + (a_2))*ido + a_1]
-
-  /* Parameter adjustments */
-  ch_offset = 1 + ido * 6;
-  ch -= ch_offset;
-  cc_offset = 1 + ido * (1 + l1);
-  cc -= cc_offset;
-
-  /* Function Body */
-  for (k = 1; k <= l1; ++k) {
-    cr2 = VADD(cc_ref(1, k, 5), cc_ref(1, k, 2));
-    ci5 = VSUB(cc_ref(1, k, 5), cc_ref(1, k, 2));
-    cr3 = VADD(cc_ref(1, k, 4), cc_ref(1, k, 3));
-    ci4 = VSUB(cc_ref(1, k, 4), cc_ref(1, k, 3));
-    ch_ref(1, 1, k) = VADD(cc_ref(1, k, 1), VADD(cr2, cr3));
-    ch_ref(ido, 2, k) = VADD(cc_ref(1, k, 1), VADD(SVMUL(tr11, cr2), SVMUL(tr12, cr3)));
-    ch_ref(1, 3, k) = VADD(SVMUL(ti11, ci5), SVMUL(ti12, ci4));
-    ch_ref(ido, 4, k) = VADD(cc_ref(1, k, 1), VADD(SVMUL(tr12, cr2), SVMUL(tr11, cr3)));
-    ch_ref(1, 5, k) = VSUB(SVMUL(ti12, ci5), SVMUL(ti11, ci4));
-    /*printf("pffft: radf5, k=%d ch_ref=%f, ci4=%f\n", k, ch_ref(1, 5, k), ci4); */
-  }
-  if (ido == 1) {
-    return;
-  }
-  idp2 = ido + 2;
-  for (k = 1; k <= l1; ++k) {
-    for (i = 3; i <= ido; i += 2) {
-      ic = idp2 - i;
-      dr2 = LD_PS1(wa1[i-3]); di2 = LD_PS1(wa1[i-2]);
-      dr3 = LD_PS1(wa2[i-3]); di3 = LD_PS1(wa2[i-2]);
-      dr4 = LD_PS1(wa3[i-3]); di4 = LD_PS1(wa3[i-2]);
-      dr5 = LD_PS1(wa4[i-3]); di5 = LD_PS1(wa4[i-2]);
-      VCPLXMULCONJ(dr2, di2, cc_ref(i-1, k, 2), cc_ref(i, k, 2));
-      VCPLXMULCONJ(dr3, di3, cc_ref(i-1, k, 3), cc_ref(i, k, 3));
-      VCPLXMULCONJ(dr4, di4, cc_ref(i-1, k, 4), cc_ref(i, k, 4));
-      VCPLXMULCONJ(dr5, di5, cc_ref(i-1, k, 5), cc_ref(i, k, 5));
-      cr2 = VADD(dr2, dr5);
-      ci5 = VSUB(dr5, dr2);
-      cr5 = VSUB(di2, di5);
-      ci2 = VADD(di2, di5);
-      cr3 = VADD(dr3, dr4);
-      ci4 = VSUB(dr4, dr3);
-      cr4 = VSUB(di3, di4);
-      ci3 = VADD(di3, di4);
-      ch_ref(i - 1, 1, k) = VADD(cc_ref(i - 1, k, 1), VADD(cr2, cr3));
-      ch_ref(i, 1, k) = VSUB(cc_ref(i, k, 1), VADD(ci2, ci3));/* */
-      tr2 = VADD(cc_ref(i - 1, k, 1), VADD(SVMUL(tr11, cr2), SVMUL(tr12, cr3)));
-      ti2 = VSUB(cc_ref(i, k, 1), VADD(SVMUL(tr11, ci2), SVMUL(tr12, ci3)));/* */
-      tr3 = VADD(cc_ref(i - 1, k, 1), VADD(SVMUL(tr12, cr2), SVMUL(tr11, cr3)));
-      ti3 = VSUB(cc_ref(i, k, 1), VADD(SVMUL(tr12, ci2), SVMUL(tr11, ci3)));/* */
-      tr5 = VADD(SVMUL(ti11, cr5), SVMUL(ti12, cr4));
-      ti5 = VADD(SVMUL(ti11, ci5), SVMUL(ti12, ci4));
-      tr4 = VSUB(SVMUL(ti12, cr5), SVMUL(ti11, cr4));
-      ti4 = VSUB(SVMUL(ti12, ci5), SVMUL(ti11, ci4));
-      ch_ref(i - 1, 3, k) = VSUB(tr2, tr5);
-      ch_ref(ic - 1, 2, k) = VADD(tr2, tr5);
-      ch_ref(i, 3, k) = VADD(ti2, ti5);
-      ch_ref(ic, 2, k) = VSUB(ti5, ti2);
-      ch_ref(i - 1, 5, k) = VSUB(tr3, tr4);
-      ch_ref(ic - 1, 4, k) = VADD(tr3, tr4);
-      ch_ref(i, 5, k) = VADD(ti3, ti4);
-      ch_ref(ic, 4, k) = VSUB(ti4, ti3);
-    }
-  }
-#undef cc_ref
-#undef ch_ref
-} /* radf5 */
-
-static void radb5_ps(int ido, int l1, const v4sf *RESTRICT cc, v4sf *RESTRICT ch,
-                  const float *wa1, const float *wa2, const float *wa3, const float *wa4)
-{
-  static const float tr11 = .309016994374947f;
-  static const float ti11 = .951056516295154f;
-  static const float tr12 = -.809016994374947f;
-  static const float ti12 = .587785252292473f;
-
-  int cc_offset, ch_offset;
-
-  /* Local variables */
-  int i, k, ic;
-  v4sf ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4, ti2, ti3,
-    ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5;
-  int idp2;
-
-#define cc_ref(a_1,a_2,a_3) cc[((a_3)*5 + (a_2))*ido + a_1]
-#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1]
-
-  /* Parameter adjustments */
-  ch_offset = 1 + ido * (1 + l1);
-  ch -= ch_offset;
-  cc_offset = 1 + ido * 6;
-  cc -= cc_offset;
-
-  /* Function Body */
-  for (k = 1; k <= l1; ++k) {
-    ti5 = VADD(cc_ref(1, 3, k), cc_ref(1, 3, k));
-    ti4 = VADD(cc_ref(1, 5, k), cc_ref(1, 5, k));
-    tr2 = VADD(cc_ref(ido, 2, k), cc_ref(ido, 2, k));
-    tr3 = VADD(cc_ref(ido, 4, k), cc_ref(ido, 4, k));
-    ch_ref(1, k, 1) = VADD(cc_ref(1, 1, k), VADD(tr2, tr3));
-    cr2 = VADD(cc_ref(1, 1, k), VADD(SVMUL(tr11, tr2), SVMUL(tr12, tr3)));
-    cr3 = VADD(cc_ref(1, 1, k), VADD(SVMUL(tr12, tr2), SVMUL(tr11, tr3)));
-    ci5 = VADD(SVMUL(ti11, ti5), SVMUL(ti12, ti4));
-    ci4 = VSUB(SVMUL(ti12, ti5), SVMUL(ti11, ti4));
-    ch_ref(1, k, 2) = VSUB(cr2, ci5);
-    ch_ref(1, k, 3) = VSUB(cr3, ci4);
-    ch_ref(1, k, 4) = VADD(cr3, ci4);
-    ch_ref(1, k, 5) = VADD(cr2, ci5);
-  }
-  if (ido == 1) {
-    return;
-  }
-  idp2 = ido + 2;
-  for (k = 1; k <= l1; ++k) {
-    for (i = 3; i <= ido; i += 2) {
-      ic = idp2 - i;
-      ti5 = VADD(cc_ref(i  , 3, k), cc_ref(ic  , 2, k));
-      ti2 = VSUB(cc_ref(i  , 3, k), cc_ref(ic  , 2, k));
-      ti4 = VADD(cc_ref(i  , 5, k), cc_ref(ic  , 4, k));
-      ti3 = VSUB(cc_ref(i  , 5, k), cc_ref(ic  , 4, k));
-      tr5 = VSUB(cc_ref(i-1, 3, k), cc_ref(ic-1, 2, k));
-      tr2 = VADD(cc_ref(i-1, 3, k), cc_ref(ic-1, 2, k));
-      tr4 = VSUB(cc_ref(i-1, 5, k), cc_ref(ic-1, 4, k));
-      tr3 = VADD(cc_ref(i-1, 5, k), cc_ref(ic-1, 4, k));
-      ch_ref(i - 1, k, 1) = VADD(cc_ref(i-1, 1, k), VADD(tr2, tr3));
-      ch_ref(i, k, 1) = VADD(cc_ref(i, 1, k), VADD(ti2, ti3));
-      cr2 = VADD(cc_ref(i-1, 1, k), VADD(SVMUL(tr11, tr2), SVMUL(tr12, tr3)));
-      ci2 = VADD(cc_ref(i  , 1, k), VADD(SVMUL(tr11, ti2), SVMUL(tr12, ti3)));
-      cr3 = VADD(cc_ref(i-1, 1, k), VADD(SVMUL(tr12, tr2), SVMUL(tr11, tr3)));
-      ci3 = VADD(cc_ref(i  , 1, k), VADD(SVMUL(tr12, ti2), SVMUL(tr11, ti3)));
-      cr5 = VADD(SVMUL(ti11, tr5), SVMUL(ti12, tr4));
-      ci5 = VADD(SVMUL(ti11, ti5), SVMUL(ti12, ti4));
-      cr4 = VSUB(SVMUL(ti12, tr5), SVMUL(ti11, tr4));
-      ci4 = VSUB(SVMUL(ti12, ti5), SVMUL(ti11, ti4));
-      dr3 = VSUB(cr3, ci4);
-      dr4 = VADD(cr3, ci4);
-      di3 = VADD(ci3, cr4);
-      di4 = VSUB(ci3, cr4);
-      dr5 = VADD(cr2, ci5);
-      dr2 = VSUB(cr2, ci5);
-      di5 = VSUB(ci2, cr5);
-      di2 = VADD(ci2, cr5);
-      VCPLXMUL(dr2, di2, LD_PS1(wa1[i-3]), LD_PS1(wa1[i-2]));
-      VCPLXMUL(dr3, di3, LD_PS1(wa2[i-3]), LD_PS1(wa2[i-2]));
-      VCPLXMUL(dr4, di4, LD_PS1(wa3[i-3]), LD_PS1(wa3[i-2]));
-      VCPLXMUL(dr5, di5, LD_PS1(wa4[i-3]), LD_PS1(wa4[i-2]));
-
-      ch_ref(i-1, k, 2) = dr2; ch_ref(i, k, 2) = di2;
-      ch_ref(i-1, k, 3) = dr3; ch_ref(i, k, 3) = di3;
-      ch_ref(i-1, k, 4) = dr4; ch_ref(i, k, 4) = di4;
-      ch_ref(i-1, k, 5) = dr5; ch_ref(i, k, 5) = di5;
-    }
-  }
-#undef cc_ref
-#undef ch_ref
-} /* radb5 */
-#endif
-
-static NEVER_INLINE(v4sf *) rfftf1_ps(int n, const v4sf *input_readonly, v4sf *work1, v4sf *work2,
-                                      const float *wa, const int *ifac) {
-  v4sf *in  = (v4sf*)input_readonly;
-  v4sf *out = (in == work2 ? work1 : work2);
-  int nf = ifac[1], k1;
-  int l2 = n;
-  int iw = n-1;
-  assert(in != out && work1 != work2);
-  for (k1 = 1; k1 <= nf; ++k1) {
-    int kh = nf - k1;
-    int ip = ifac[kh + 2];
-    int l1 = l2 / ip;
-    int ido = n / l2;
-    iw -= (ip - 1)*ido;
-    switch (ip) {
-#if 0
-      case 5: {
-        int ix2 = iw + ido;
-        int ix3 = ix2 + ido;
-        int ix4 = ix3 + ido;
-        radf5_ps(ido, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4]);
-      } break;
-#endif
-      case 4: {
-        int ix2 = iw + ido;
-        int ix3 = ix2 + ido;
-        radf4_ps(ido, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3]);
-      } break;
-#if 0
-      case 3: {
-        int ix2 = iw + ido;
-        radf3_ps(ido, l1, in, out, &wa[iw], &wa[ix2]);
-      } break;
-#endif
-      case 2:
-        radf2_ps(ido, l1, in, out, &wa[iw]);
-        break;
-      default:
-        assert(0);
-        break;
-    }
-    l2 = l1;
-    if (out == work2) {
-      out = work1; in = work2;
-    } else {
-      out = work2; in = work1;
-    }
-  }
-  return in; /* this is in fact the output .. */
-} /* rfftf1 */
-
-static NEVER_INLINE(v4sf *) rfftb1_ps(int n, const v4sf *input_readonly, v4sf *work1, v4sf *work2,
-                                      const float *wa, const int *ifac) {
-  v4sf *in  = (v4sf*)input_readonly;
-  v4sf *out = (in == work2 ? work1 : work2);
-  int nf = ifac[1], k1;
-  int l1 = 1;
-  int iw = 0;
-  assert(in != out);
-  for (k1=1; k1<=nf; k1++) {
-    int ip = ifac[k1 + 1];
-    int l2 = ip*l1;
-    int ido = n / l2;
-    switch (ip) {
-#if 0
-      case 5: {
-        int ix2 = iw + ido;
-        int ix3 = ix2 + ido;
-        int ix4 = ix3 + ido;
-        radb5_ps(ido, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4]);
-      } break;
-#endif
-      case 4: {
-        int ix2 = iw + ido;
-        int ix3 = ix2 + ido;
-        radb4_ps(ido, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3]);
-      } break;
-#if 0
-      case 3: {
-        int ix2 = iw + ido;
-        radb3_ps(ido, l1, in, out, &wa[iw], &wa[ix2]);
-      } break;
-#endif
-      case 2:
-        radb2_ps(ido, l1, in, out, &wa[iw]);
-        break;
-      default:
-        assert(0);
-        break;
-    }
-    l1 = l2;
-    iw += (ip - 1)*ido;
-
-    if (out == work2) {
-      out = work1; in = work2;
-    } else {
-      out = work2; in = work1;
-    }
-  }
-  return in; /* this is in fact the output .. */
-}
-
-static int decompose(int n, int *ifac, const int *ntryh) {
-  int nl = n, nf = 0, i, j = 0;
-  for (j=0; ntryh[j]; ++j) {
-    int ntry = ntryh[j];
-    while (nl != 1) {
-      int nq = nl / ntry;
-      int nr = nl - ntry * nq;
-      if (nr == 0) {
-        ifac[2+nf++] = ntry;
-        nl = nq;
-        if (ntry == 2 && nf != 1) {
-          for (i = 2; i <= nf; ++i) {
-            int ib = nf - i + 2;
-            ifac[ib + 1] = ifac[ib];
-          }
-          ifac[2] = 2;
-        }
-      } else break;
-    }
-  }
-  ifac[0] = n;
-  ifac[1] = nf;
-  return nf;
-}
-
-
-
-static void rffti1_ps(int n, float *wa, int *ifac)
-{
-  static const int ntryh[] = { 4,2,3,5,0 };
-  int k1, j, ii;
-
-  int nf = decompose(n,ifac,ntryh);
-  float argh = (float)((2*M_PI) / n);
-  int is = 0;
-  int nfm1 = nf - 1;
-  int l1 = 1;
-  for (k1 = 1; k1 <= nfm1; k1++) {
-    int ip = ifac[k1 + 1];
-    int ld = 0;
-    int l2 = l1*ip;
-    int ido = n / l2;
-    int ipm = ip - 1;
-    for (j = 1; j <= ipm; ++j) {
-      float argld;
-      int i = is, fi=0;
-      ld += l1;
-      argld = (float)ld*argh;
-      for (ii = 3; ii <= ido; ii += 2) {
-        i += 2;
-        fi += 1;
-        wa[i - 2] = cos((float)fi*argld);
-        wa[i - 1] = sin((float)fi*argld);
-      }
-      is += ido;
-    }
-    l1 = l2;
-  }
-} /* rffti1 */
-
-static
-void cffti1_ps(int n, float *wa, int *ifac)
-{
-  static const int ntryh[] = { 5,3,4,2,0 };
-  int k1, j, ii;
-
-  int nf = decompose(n,ifac,ntryh);
-  float argh = (float)((2*M_PI)/n);
-  int i = 1;
-  int l1 = 1;
-  for (k1=1; k1<=nf; k1++) {
-    int ip = ifac[k1+1];
-    int ld = 0;
-    int l2 = l1*ip;
-    int ido = n / l2;
-    int idot = ido + ido + 2;
-    int ipm = ip - 1;
-    for (j=1; j<=ipm; j++) {
-      float argld;
-      int i1 = i, fi = 0;
-      wa[i-1] = 1;
-      wa[i] = 0;
-      ld += l1;
-      argld = (float)ld*argh;
-      for (ii = 4; ii <= idot; ii += 2) {
-        i += 2;
-        fi += 1;
-        wa[i-1] = cos((float)fi*argld);
-        wa[i] = sin((float)fi*argld);
-      }
-      if (ip > 5) {
-        wa[i1-1] = wa[i-1];
-        wa[i1] = wa[i];
-      }
-    }
-    l1 = l2;
-  }
-} /* cffti1 */
-
-
-static
-v4sf *cfftf1_ps(int n, const v4sf *input_readonly, v4sf *work1, v4sf *work2, const float *wa, const int *ifac, int isign) {
-  v4sf *in  = (v4sf*)input_readonly;
-  v4sf *out = (in == work2 ? work1 : work2);
-  int nf = ifac[1], k1;
-  int l1 = 1;
-  int iw = 0;
-  assert(in != out && work1 != work2);
-  for (k1=2; k1<=nf+1; k1++) {
-    int ip = ifac[k1];
-    int l2 = ip*l1;
-    int ido = n / l2;
-    int idot = ido + ido;
-    switch (ip) {
-#if 0
-      case 5: {
-        int ix2 = iw + idot;
-        int ix3 = ix2 + idot;
-        int ix4 = ix3 + idot;
-        passf5_ps(idot, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], (float)isign);
-      } break;
-#endif
-      case 4: {
-        int ix2 = iw + idot;
-        int ix3 = ix2 + idot;
-        passf4_ps(idot, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3], (float)isign);
-      } break;
-      case 2: {
-        passf2_ps(idot, l1, in, out, &wa[iw], (float)isign);
-      } break;
-#if 0
-      case 3: {
-        int ix2 = iw + idot;
-        passf3_ps(idot, l1, in, out, &wa[iw], &wa[ix2], (float)isign);
-      } break;
-#endif
-      default:
-        assert(0);
-    }
-    l1 = l2;
-    iw += (ip - 1)*idot;
-    if (out == work2) {
-      out = work1; in = work2;
-    } else {
-      out = work2; in = work1;
-    }
-  }
-
-  return in; /* this is in fact the output .. */
-}
-
-
-struct PFFFT_Setup {
-  int     N;
-  int     Ncvec; /* nb of complex simd vectors (N/4 if PFFFT_COMPLEX, N/8 if PFFFT_REAL) */
-  int ifac[15];
-  pffft_transform_t transform;
-  v4sf *data; /* allocated room for twiddle coefs */
-  float *e;    /* points into 'data' , N/4*3 elements */
-  float *twiddle; /* points into 'data', N/4 elements */
-};
-
-static
-PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform) {
-  PFFFT_Setup *s = (PFFFT_Setup*)malloc(sizeof(PFFFT_Setup));
-  int k, m;
-  if (!s) return s;
-  /* unfortunately, the fft size must be a multiple of 16 for complex FFTs
-     and 32 for real FFTs -- a lot of stuff would need to be rewritten to
-     handle other cases (or maybe just switch to a scalar fft, I don't know..) */
-  if (transform == PFFFT_REAL) { assert((N%(2*SIMD_SZ*SIMD_SZ))==0 && N>0); }
-  if (transform == PFFFT_COMPLEX) { assert((N%(SIMD_SZ*SIMD_SZ))==0 && N>0); }
-  /*assert((N % 32) == 0); */
-  s->N = N;
-  s->transform = transform;
-  /* nb of complex simd vectors */
-  s->Ncvec = (transform == PFFFT_REAL ? N/2 : N)/SIMD_SZ;
-  s->data = (v4sf*)pffft_aligned_malloc(2*(size_t)s->Ncvec * sizeof(v4sf));
-  if (!s->data) {free(s); return 0;}
-  s->e = (float*)s->data;
-  s->twiddle = (float*)(s->data + (2*s->Ncvec*(SIMD_SZ-1))/SIMD_SZ);
-
-  if (transform == PFFFT_REAL) {
-    for (k=0; k < s->Ncvec; ++k) {
-      int i = k/SIMD_SZ;
-      int j = k%SIMD_SZ;
-      for (m=0; m < SIMD_SZ-1; ++m) {
-        float A = (float)(-2*M_PI*(m+1)*k / N);
-        s->e[(2*(i*3 + m) + 0) * SIMD_SZ + j] = cos(A);
-        s->e[(2*(i*3 + m) + 1) * SIMD_SZ + j] = sin(A);
-      }
-    }
-    rffti1_ps(N/SIMD_SZ, s->twiddle, s->ifac);
-  } else {
-    for (k=0; k < s->Ncvec; ++k) {
-      int i = k/SIMD_SZ;
-      int j = k%SIMD_SZ;
-      for (m=0; m < SIMD_SZ-1; ++m) {
-        float A = (float)(-2*M_PI*(m+1)*k / N);
-        s->e[(2*(i*3 + m) + 0)*SIMD_SZ + j] = cos(A);
-        s->e[(2*(i*3 + m) + 1)*SIMD_SZ + j] = sin(A);
-      }
-    }
-    cffti1_ps(N/SIMD_SZ, s->twiddle, s->ifac);
-  }
-
-  /* check that N is decomposable with allowed prime factors */
-  for (k=0, m=1; k < s->ifac[1]; ++k) { m *= s->ifac[2+k]; }
-  if (m != N/SIMD_SZ) {
-    pffft_destroy_setup(s); s = 0;
-  }
-
-  return s;
-}
-
-
-static
-void pffft_destroy_setup(PFFFT_Setup *s) {
-  if (!s) return;
-  pffft_aligned_free(s->data);
-  free(s);
-}
-
-#if !defined(PFFFT_SIMD_DISABLE)
-
-/* [0 0 1 2 3 4 5 6 7 8] -> [0 8 7 6 5 4 3 2 1] */
-static void reversed_copy(int N, const v4sf *in, int in_stride, v4sf *out) {
-  v4sf g0, g1;
-  int k;
-  INTERLEAVE2(in[0], in[1], g0, g1); in += in_stride;
-
-  *--out = VSWAPHL(g0, g1); /* [g0l, g0h], [g1l g1h] -> [g1l, g0h] */
-  for (k=1; k < N; ++k) {
-    v4sf h0, h1;
-    INTERLEAVE2(in[0], in[1], h0, h1); in += in_stride;
-    *--out = VSWAPHL(g1, h0);
-    *--out = VSWAPHL(h0, h1);
-    g1 = h1;
-  }
-  *--out = VSWAPHL(g1, g0);
-}
-
-static void unreversed_copy(int N, const v4sf *in, v4sf *out, int out_stride) {
-  v4sf g0, g1, h0, h1;
-  int k;
-  g0 = g1 = in[0]; ++in;
-  for (k=1; k < N; ++k) {
-    h0 = *in++; h1 = *in++;
-    g1 = VSWAPHL(g1, h0);
-    h0 = VSWAPHL(h0, h1);
-    UNINTERLEAVE2(h0, g1, out[0], out[1]); out += out_stride;
-    g1 = h1;
-  }
-  h0 = *in++; h1 = g0;
-  g1 = VSWAPHL(g1, h0);
-  h0 = VSWAPHL(h0, h1);
-  UNINTERLEAVE2(h0, g1, out[0], out[1]);
-}
-
-static
-void pffft_zreorder(PFFFT_Setup *setup, const float *in, float *out, pffft_direction_t direction) {
-  int k, N = setup->N, Ncvec = setup->Ncvec;
-  const v4sf *vin = (const v4sf*)in;
-  v4sf *vout = (v4sf*)out;
-  assert(in != out);
-  if (setup->transform == PFFFT_REAL) {
-    int k, dk = N/32;
-    if (direction == PFFFT_FORWARD) {
-      for (k=0; k < dk; ++k) {
-        INTERLEAVE2(vin[k*8 + 0], vin[k*8 + 1], vout[2*(0*dk + k) + 0], vout[2*(0*dk + k) + 1]);
-        INTERLEAVE2(vin[k*8 + 4], vin[k*8 + 5], vout[2*(2*dk + k) + 0], vout[2*(2*dk + k) + 1]);
-      }
-      reversed_copy(dk, vin+2, 8, (v4sf*)(out + N/2));
-      reversed_copy(dk, vin+6, 8, (v4sf*)(out + N));
-    } else {
-      for (k=0; k < dk; ++k) {
-        UNINTERLEAVE2(vin[2*(0*dk + k) + 0], vin[2*(0*dk + k) + 1], vout[k*8 + 0], vout[k*8 + 1]);
-        UNINTERLEAVE2(vin[2*(2*dk + k) + 0], vin[2*(2*dk + k) + 1], vout[k*8 + 4], vout[k*8 + 5]);
-      }
-      unreversed_copy(dk, (v4sf*)(in + N/4), (v4sf*)(out + N - 6*SIMD_SZ), -8);
-      unreversed_copy(dk, (v4sf*)(in + 3*N/4), (v4sf*)(out + N - 2*SIMD_SZ), -8);
-    }
-  } else {
-    if (direction == PFFFT_FORWARD) {
-      for (k=0; k < Ncvec; ++k) {
-        int kk = (k/4) + (k%4)*(Ncvec/4);
-        INTERLEAVE2(vin[k*2], vin[k*2+1], vout[kk*2], vout[kk*2+1]);
-      }
-    } else {
-      for (k=0; k < Ncvec; ++k) {
-        int kk = (k/4) + (k%4)*(Ncvec/4);
-        UNINTERLEAVE2(vin[kk*2], vin[kk*2+1], vout[k*2], vout[k*2+1]);
-      }
-    }
-  }
-}
-
-static
-void pffft_cplx_finalize(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) {
-  int k, dk = Ncvec/SIMD_SZ; /* number of 4x4 matrix blocks */
-  v4sf r0, i0, r1, i1, r2, i2, r3, i3;
-  v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1;
-  assert(in != out);
-  for (k=0; k < dk; ++k) {
-    r0 = in[8*k+0]; i0 = in[8*k+1];
-    r1 = in[8*k+2]; i1 = in[8*k+3];
-    r2 = in[8*k+4]; i2 = in[8*k+5];
-    r3 = in[8*k+6]; i3 = in[8*k+7];
-    VTRANSPOSE4(r0,r1,r2,r3);
-    VTRANSPOSE4(i0,i1,i2,i3);
-    VCPLXMUL(r1,i1,e[k*6+0],e[k*6+1]);
-    VCPLXMUL(r2,i2,e[k*6+2],e[k*6+3]);
-    VCPLXMUL(r3,i3,e[k*6+4],e[k*6+5]);
-
-    sr0 = VADD(r0,r2); dr0 = VSUB(r0, r2);
-    sr1 = VADD(r1,r3); dr1 = VSUB(r1, r3);
-    si0 = VADD(i0,i2); di0 = VSUB(i0, i2);
-    si1 = VADD(i1,i3); di1 = VSUB(i1, i3);
-
-    /*
-      transformation for each column is:
-
-      [1   1   1   1   0   0   0   0]   [r0]
-      [1   0  -1   0   0  -1   0   1]   [r1]
-      [1  -1   1  -1   0   0   0   0]   [r2]
-      [1   0  -1   0   0   1   0  -1]   [r3]
-      [0   0   0   0   1   1   1   1] * [i0]
-      [0   1   0  -1   1   0  -1   0]   [i1]
-      [0   0   0   0   1  -1   1  -1]   [i2]
-      [0  -1   0   1   1   0  -1   0]   [i3]
-    */
-
-    r0 = VADD(sr0, sr1); i0 = VADD(si0, si1);
-    r1 = VADD(dr0, di1); i1 = VSUB(di0, dr1);
-    r2 = VSUB(sr0, sr1); i2 = VSUB(si0, si1);
-    r3 = VSUB(dr0, di1); i3 = VADD(di0, dr1);
-
-    *out++ = r0; *out++ = i0; *out++ = r1; *out++ = i1;
-    *out++ = r2; *out++ = i2; *out++ = r3; *out++ = i3;
-  }
-}
-
-static
-void pffft_cplx_preprocess(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) {
-  int k, dk = Ncvec/SIMD_SZ; /* number of 4x4 matrix blocks */
-  v4sf r0, i0, r1, i1, r2, i2, r3, i3;
-  v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1;
-  assert(in != out);
-  for (k=0; k < dk; ++k) {
-    r0 = in[8*k+0]; i0 = in[8*k+1];
-    r1 = in[8*k+2]; i1 = in[8*k+3];
-    r2 = in[8*k+4]; i2 = in[8*k+5];
-    r3 = in[8*k+6]; i3 = in[8*k+7];
-
-    sr0 = VADD(r0,r2); dr0 = VSUB(r0, r2);
-    sr1 = VADD(r1,r3); dr1 = VSUB(r1, r3);
-    si0 = VADD(i0,i2); di0 = VSUB(i0, i2);
-    si1 = VADD(i1,i3); di1 = VSUB(i1, i3);
-
-    r0 = VADD(sr0, sr1); i0 = VADD(si0, si1);
-    r1 = VSUB(dr0, di1); i1 = VADD(di0, dr1);
-    r2 = VSUB(sr0, sr1); i2 = VSUB(si0, si1);
-    r3 = VADD(dr0, di1); i3 = VSUB(di0, dr1);
-
-    VCPLXMULCONJ(r1,i1,e[k*6+0],e[k*6+1]);
-    VCPLXMULCONJ(r2,i2,e[k*6+2],e[k*6+3]);
-    VCPLXMULCONJ(r3,i3,e[k*6+4],e[k*6+5]);
-
-    VTRANSPOSE4(r0,r1,r2,r3);
-    VTRANSPOSE4(i0,i1,i2,i3);
-
-    *out++ = r0; *out++ = i0; *out++ = r1; *out++ = i1;
-    *out++ = r2; *out++ = i2; *out++ = r3; *out++ = i3;
-  }
-}
-
-
-static ALWAYS_INLINE(void) pffft_real_finalize_4x4(const v4sf *in0, const v4sf *in1, const v4sf *in,
-                            const v4sf *e, v4sf *out) {
-  v4sf r0, i0, r1, i1, r2, i2, r3, i3;
-  v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1;
-  r0 = *in0; i0 = *in1;
-  r1 = *in++; i1 = *in++; r2 = *in++; i2 = *in++; r3 = *in++; i3 = *in++;
-  VTRANSPOSE4(r0,r1,r2,r3);
-  VTRANSPOSE4(i0,i1,i2,i3);
-
-  /*
-    transformation for each column is:
-
-    [1   1   1   1   0   0   0   0]   [r0]
-    [1   0  -1   0   0  -1   0   1]   [r1]
-    [1   0  -1   0   0   1   0  -1]   [r2]
-    [1  -1   1  -1   0   0   0   0]   [r3]
-    [0   0   0   0   1   1   1   1] * [i0]
-    [0  -1   0   1  -1   0   1   0]   [i1]
-    [0  -1   0   1   1   0  -1   0]   [i2]
-    [0   0   0   0  -1   1  -1   1]   [i3]
-  */
-
-  /*cerr << "matrix initial, before e , REAL:\n 1: " << r0 << "\n 1: " << r1 << "\n 1: " << r2 << "\n 1: " << r3 << "\n"; */
-  /*cerr << "matrix initial, before e, IMAG :\n 1: " << i0 << "\n 1: " << i1 << "\n 1: " << i2 << "\n 1: " << i3 << "\n"; */
-
-  VCPLXMUL(r1,i1,e[0],e[1]);
-  VCPLXMUL(r2,i2,e[2],e[3]);
-  VCPLXMUL(r3,i3,e[4],e[5]);
-
-  /*cerr << "matrix initial, real part:\n 1: " << r0 << "\n 1: " << r1 << "\n 1: " << r2 << "\n 1: " << r3 << "\n"; */
-  /*cerr << "matrix initial, imag part:\n 1: " << i0 << "\n 1: " << i1 << "\n 1: " << i2 << "\n 1: " << i3 << "\n"; */
-
-  sr0 = VADD(r0,r2); dr0 = VSUB(r0,r2);
-  sr1 = VADD(r1,r3); dr1 = VSUB(r3,r1);
-  si0 = VADD(i0,i2); di0 = VSUB(i0,i2);
-  si1 = VADD(i1,i3); di1 = VSUB(i3,i1);
-
-  r0 = VADD(sr0, sr1);
-  r3 = VSUB(sr0, sr1);
-  i0 = VADD(si0, si1);
-  i3 = VSUB(si1, si0);
-  r1 = VADD(dr0, di1);
-  r2 = VSUB(dr0, di1);
-  i1 = VSUB(dr1, di0);
-  i2 = VADD(dr1, di0);
-
-  *out++ = r0;
-  *out++ = i0;
-  *out++ = r1;
-  *out++ = i1;
-  *out++ = r2;
-  *out++ = i2;
-  *out++ = r3;
-  *out++ = i3;
-
-}
-
-static NEVER_INLINE(void) pffft_real_finalize(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) {
-  int k, dk = Ncvec/SIMD_SZ; /* number of 4x4 matrix blocks */
-  /* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */
-
-  v4sf_union cr, ci, *uout = (v4sf_union*)out;
-  v4sf save = in[7], zero=VZERO();
-  float xr0, xi0, xr1, xi1, xr2, xi2, xr3, xi3;
-  static const float s = (float)(M_SQRT2/2);
-
-  cr.v = in[0]; ci.v = in[Ncvec*2-1];
-  assert(in != out);
-  pffft_real_finalize_4x4(&zero, &zero, in+1, e, out);
-
-  /*
-    [cr0 cr1 cr2 cr3 ci0 ci1 ci2 ci3]
-
-    [Xr(1)]  ] [1   1   1   1   0   0   0   0]
-    [Xr(N/4) ] [0   0   0   0   1   s   0  -s]
-    [Xr(N/2) ] [1   0  -1   0   0   0   0   0]
-    [Xr(3N/4)] [0   0   0   0   1  -s   0   s]
-    [Xi(1)   ] [1  -1   1  -1   0   0   0   0]
-    [Xi(N/4) ] [0   0   0   0   0  -s  -1  -s]
-    [Xi(N/2) ] [0  -1   0   1   0   0   0   0]
-    [Xi(3N/4)] [0   0   0   0   0  -s   1  -s]
-  */
-
-  xr0=(cr.f[0]+cr.f[2]) + (cr.f[1]+cr.f[3]); uout[0].f[0] = xr0;
-  xi0=(cr.f[0]+cr.f[2]) - (cr.f[1]+cr.f[3]); uout[1].f[0] = xi0;
-  xr2=(cr.f[0]-cr.f[2]);                     uout[4].f[0] = xr2;
-  xi2=(cr.f[3]-cr.f[1]);                     uout[5].f[0] = xi2;
-  xr1= ci.f[0] + s*(ci.f[1]-ci.f[3]);        uout[2].f[0] = xr1;
-  xi1=-ci.f[2] - s*(ci.f[1]+ci.f[3]);        uout[3].f[0] = xi1;
-  xr3= ci.f[0] - s*(ci.f[1]-ci.f[3]);        uout[6].f[0] = xr3;
-  xi3= ci.f[2] - s*(ci.f[1]+ci.f[3]);        uout[7].f[0] = xi3;
-
-  for (k=1; k < dk; ++k) {
-    v4sf save_next = in[8*k+7];
-    pffft_real_finalize_4x4(&save, &in[8*k+0], in + 8*k+1,
-                           e + k*6, out + k*8);
-    save = save_next;
-  }
-
-}
-
-static ALWAYS_INLINE(void) pffft_real_preprocess_4x4(const v4sf *in,
-                                             const v4sf *e, v4sf *out, int first) {
-  v4sf r0=in[0], i0=in[1], r1=in[2], i1=in[3], r2=in[4], i2=in[5], r3=in[6], i3=in[7];
-  /*
-    transformation for each column is:
-
-    [1   1   1   1   0   0   0   0]   [r0]
-    [1   0   0  -1   0  -1  -1   0]   [r1]
-    [1  -1  -1   1   0   0   0   0]   [r2]
-    [1   0   0  -1   0   1   1   0]   [r3]
-    [0   0   0   0   1  -1   1  -1] * [i0]
-    [0  -1   1   0   1   0   0   1]   [i1]
-    [0   0   0   0   1   1  -1  -1]   [i2]
-    [0   1  -1   0   1   0   0   1]   [i3]
-  */
-
-  v4sf sr0 = VADD(r0,r3), dr0 = VSUB(r0,r3);
-  v4sf sr1 = VADD(r1,r2), dr1 = VSUB(r1,r2);
-  v4sf si0 = VADD(i0,i3), di0 = VSUB(i0,i3);
-  v4sf si1 = VADD(i1,i2), di1 = VSUB(i1,i2);
-
-  r0 = VADD(sr0, sr1);
-  r2 = VSUB(sr0, sr1);
-  r1 = VSUB(dr0, si1);
-  r3 = VADD(dr0, si1);
-  i0 = VSUB(di0, di1);
-  i2 = VADD(di0, di1);
-  i1 = VSUB(si0, dr1);
-  i3 = VADD(si0, dr1);
-
-  VCPLXMULCONJ(r1,i1,e[0],e[1]);
-  VCPLXMULCONJ(r2,i2,e[2],e[3]);
-  VCPLXMULCONJ(r3,i3,e[4],e[5]);
-
-  VTRANSPOSE4(r0,r1,r2,r3);
-  VTRANSPOSE4(i0,i1,i2,i3);
-
-  if (!first) {
-    *out++ = r0;
-    *out++ = i0;
-  }
-  *out++ = r1;
-  *out++ = i1;
-  *out++ = r2;
-  *out++ = i2;
-  *out++ = r3;
-  *out++ = i3;
-}
-
-static NEVER_INLINE(void) pffft_real_preprocess(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) {
-  int k, dk = Ncvec/SIMD_SZ; /* number of 4x4 matrix blocks */
-  /* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */
-
-  v4sf_union Xr, Xi, *uout = (v4sf_union*)out;
-  float cr0, ci0, cr1, ci1, cr2, ci2, cr3, ci3;
-  static const float s = (float)M_SQRT2;
-  assert(in != out);
-  for (k=0; k < 4; ++k) {
-    Xr.f[k] = ((float*)in)[8*k];
-    Xi.f[k] = ((float*)in)[8*k+4];
-  }
-
-  pffft_real_preprocess_4x4(in, e, out+1, 1); /* will write only 6 values */
-
-  /*
-    [Xr0 Xr1 Xr2 Xr3 Xi0 Xi1 Xi2 Xi3]
-
-    [cr0] [1   0   2   0   1   0   0   0]
-    [cr1] [1   0   0   0  -1   0  -2   0]
-    [cr2] [1   0  -2   0   1   0   0   0]
-    [cr3] [1   0   0   0  -1   0   2   0]
-    [ci0] [0   2   0   2   0   0   0   0]
-    [ci1] [0   s   0  -s   0  -s   0  -s]
-    [ci2] [0   0   0   0   0  -2   0   2]
-    [ci3] [0  -s   0   s   0  -s   0  -s]
-  */
-  for (k=1; k < dk; ++k) {
-    pffft_real_preprocess_4x4(in+8*k, e + k*6, out-1+k*8, 0);
-  }
-
-  cr0=(Xr.f[0]+Xi.f[0]) + 2*Xr.f[2]; uout[0].f[0] = cr0;
-  cr1=(Xr.f[0]-Xi.f[0]) - 2*Xi.f[2]; uout[0].f[1] = cr1;
-  cr2=(Xr.f[0]+Xi.f[0]) - 2*Xr.f[2]; uout[0].f[2] = cr2;
-  cr3=(Xr.f[0]-Xi.f[0]) + 2*Xi.f[2]; uout[0].f[3] = cr3;
-  ci0= 2*(Xr.f[1]+Xr.f[3]);                       uout[2*Ncvec-1].f[0] = ci0;
-  ci1= s*(Xr.f[1]-Xr.f[3]) - s*(Xi.f[1]+Xi.f[3]); uout[2*Ncvec-1].f[1] = ci1;
-  ci2= 2*(Xi.f[3]-Xi.f[1]);                       uout[2*Ncvec-1].f[2] = ci2;
-  ci3=-s*(Xr.f[1]-Xr.f[3]) - s*(Xi.f[1]+Xi.f[3]); uout[2*Ncvec-1].f[3] = ci3;
-}
-
-
-static
-void pffft_transform_internal(PFFFT_Setup *setup, const float *finput, float *foutput, v4sf *scratch,
-                             pffft_direction_t direction, int ordered) {
-  int k, Ncvec   = setup->Ncvec;
-  int nf_odd = (setup->ifac[1] & 1);
-
-#if 0
-  /* temporary buffer is allocated on the stack if the scratch pointer is NULL */
-  int stack_allocate = (scratch == 0 ? Ncvec*2 : 1);
-  VLA_ARRAY_ON_STACK(v4sf, scratch_on_stack, stack_allocate);
-#endif
-
-  const v4sf *vinput = (const v4sf*)finput;
-  v4sf *voutput      = (v4sf*)foutput;
-  v4sf *buff[2];
-  int ib = (nf_odd ^ ordered ? 1 : 0);
-  buff[0] = voutput; buff[1] = scratch;
-
-  assert(VALIGNED(finput) && VALIGNED(foutput));
-
-  /*assert(finput != foutput); */
-  if (direction == PFFFT_FORWARD) {
-    ib = !ib;
-    if (setup->transform == PFFFT_REAL) {
-      ib = (rfftf1_ps(Ncvec*2, vinput, buff[ib], buff[!ib],
-                      setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1);
-      pffft_real_finalize(Ncvec, buff[ib], buff[!ib], (v4sf*)setup->e);
-    } else {
-      v4sf *tmp = buff[ib];
-      for (k=0; k < Ncvec; ++k) {
-        UNINTERLEAVE2(vinput[k*2], vinput[k*2+1], tmp[k*2], tmp[k*2+1]);
-      }
-      ib = (cfftf1_ps(Ncvec, buff[ib], buff[!ib], buff[ib],
-                      setup->twiddle, &setup->ifac[0], -1) == buff[0] ? 0 : 1);
-      pffft_cplx_finalize(Ncvec, buff[ib], buff[!ib], (v4sf*)setup->e);
-    }
-    if (ordered) {
-      pffft_zreorder(setup, (float*)buff[!ib], (float*)buff[ib], PFFFT_FORWARD);
-    } else ib = !ib;
-  } else {
-    if (vinput == buff[ib]) {
-      ib = !ib; /* may happen when finput == foutput */
-    }
-    if (ordered) {
-      pffft_zreorder(setup, (float*)vinput, (float*)buff[ib], PFFFT_BACKWARD);
-      vinput = buff[ib]; ib = !ib;
-    }
-    if (setup->transform == PFFFT_REAL) {
-      pffft_real_preprocess(Ncvec, vinput, buff[ib], (v4sf*)setup->e);
-      ib = (rfftb1_ps(Ncvec*2, buff[ib], buff[0], buff[1],
-                      setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1);
-    } else {
-      pffft_cplx_preprocess(Ncvec, vinput, buff[ib], (v4sf*)setup->e);
-      ib = (cfftf1_ps(Ncvec, buff[ib], buff[0], buff[1],
-                      setup->twiddle, &setup->ifac[0], +1) == buff[0] ? 0 : 1);
-      for (k=0; k < Ncvec; ++k) {
-        INTERLEAVE2(buff[ib][k*2], buff[ib][k*2+1], buff[ib][k*2], buff[ib][k*2+1]);
-      }
-    }
-  }
-
-  if (buff[ib] != voutput) {
-    /* extra copy required -- this situation should only happen when finput == foutput */
-    assert(finput==foutput);
-    for (k=0; k < Ncvec; ++k) {
-      v4sf a = buff[ib][2*k], b = buff[ib][2*k+1];
-      voutput[2*k] = a; voutput[2*k+1] = b;
-    }
-    ib = !ib;
-  }
-  assert(buff[ib] == voutput);
-}
-
-#if 0
-void pffft_zconvolve_accumulate(PFFFT_Setup *s, const float *a, const float *b, float *ab, float scaling) {
-  int Ncvec = s->Ncvec;
-  const v4sf * RESTRICT va = (const v4sf*)a;
-  const v4sf * RESTRICT vb = (const v4sf*)b;
-  v4sf * RESTRICT vab = (v4sf*)ab;
-
-#ifdef __arm__
-  __builtin_prefetch(va);
-  __builtin_prefetch(vb);
-  __builtin_prefetch(vab);
-  __builtin_prefetch(va+2);
-  __builtin_prefetch(vb+2);
-  __builtin_prefetch(vab+2);
-  __builtin_prefetch(va+4);
-  __builtin_prefetch(vb+4);
-  __builtin_prefetch(vab+4);
-  __builtin_prefetch(va+6);
-  __builtin_prefetch(vb+6);
-  __builtin_prefetch(vab+6);
-# ifndef __clang__
-#   define ZCONVOLVE_USING_INLINE_NEON_ASM
-# endif
-#endif
-
-  float ar, ai, br, bi, abr, abi;
-#ifndef ZCONVOLVE_USING_INLINE_ASM
-  v4sf vscal = LD_PS1(scaling);
-  int i;
-#endif
-
-  assert(VALIGNED(a) && VALIGNED(b) && VALIGNED(ab));
-  ar = ((v4sf_union*)va)[0].f[0];
-  ai = ((v4sf_union*)va)[1].f[0];
-  br = ((v4sf_union*)vb)[0].f[0];
-  bi = ((v4sf_union*)vb)[1].f[0];
-  abr = ((v4sf_union*)vab)[0].f[0];
-  abi = ((v4sf_union*)vab)[1].f[0];
-
-#ifdef ZCONVOLVE_USING_INLINE_ASM /* inline asm version, unfortunately miscompiled by clang 3.2, at least on ubuntu.. so this will be restricted to gcc */
-  const float *a_ = a, *b_ = b; float *ab_ = ab;
-  int N = Ncvec;
-  asm volatile("mov         r8, %2                  \n"
-               "vdup.f32    q15, %4                 \n"
-               "1:                                  \n"
-               "pld         [%0,#64]                \n"
-               "pld         [%1,#64]                \n"
-               "pld         [%2,#64]                \n"
-               "pld         [%0,#96]                \n"
-               "pld         [%1,#96]                \n"
-               "pld         [%2,#96]                \n"
-               "vld1.f32    {q0,q1},   [%0,:128]!         \n"
-               "vld1.f32    {q4,q5},   [%1,:128]!         \n"
-               "vld1.f32    {q2,q3},   [%0,:128]!         \n"
-               "vld1.f32    {q6,q7},   [%1,:128]!         \n"
-               "vld1.f32    {q8,q9},   [r8,:128]!          \n"
-
-               "vmul.f32    q10, q0, q4             \n"
-               "vmul.f32    q11, q0, q5             \n"
-               "vmul.f32    q12, q2, q6             \n"
-               "vmul.f32    q13, q2, q7             \n"
-               "vmls.f32    q10, q1, q5             \n"
-               "vmla.f32    q11, q1, q4             \n"
-               "vld1.f32    {q0,q1}, [r8,:128]!     \n"
-               "vmls.f32    q12, q3, q7             \n"
-               "vmla.f32    q13, q3, q6             \n"
-               "vmla.f32    q8, q10, q15            \n"
-               "vmla.f32    q9, q11, q15            \n"
-               "vmla.f32    q0, q12, q15            \n"
-               "vmla.f32    q1, q13, q15            \n"
-               "vst1.f32    {q8,q9},[%2,:128]!    \n"
-               "vst1.f32    {q0,q1},[%2,:128]!    \n"
-               "subs        %3, #2                  \n"
-               "bne         1b                      \n"
-               : "+r"(a_), "+r"(b_), "+r"(ab_), "+r"(N) : "r"(scaling) : "r8", "q0","q1","q2","q3","q4","q5","q6","q7","q8","q9", "q10","q11","q12","q13","q15","memory");
-#else /* default routine, works fine for non-arm cpus with current compilers */
-  for (i=0; i < Ncvec; i += 2) {
-    v4sf ar, ai, br, bi;
-    ar = va[2*i+0]; ai = va[2*i+1];
-    br = vb[2*i+0]; bi = vb[2*i+1];
-    VCPLXMUL(ar, ai, br, bi);
-    vab[2*i+0] = VMADD(ar, vscal, vab[2*i+0]);
-    vab[2*i+1] = VMADD(ai, vscal, vab[2*i+1]);
-    ar = va[2*i+2]; ai = va[2*i+3];
-    br = vb[2*i+2]; bi = vb[2*i+3];
-    VCPLXMUL(ar, ai, br, bi);
-    vab[2*i+2] = VMADD(ar, vscal, vab[2*i+2]);
-    vab[2*i+3] = VMADD(ai, vscal, vab[2*i+3]);
-  }
-#endif
-  if (s->transform == PFFFT_REAL) {
-    ((v4sf_union*)vab)[0].f[0] = abr + ar*br*scaling;
-    ((v4sf_union*)vab)[1].f[0] = abi + ai*bi*scaling;
-  }
-}
-#endif
-
-
-#else /* defined(PFFFT_SIMD_DISABLE) */
-
-/* standard routine using scalar floats, without SIMD stuff. */
-
-#define pffft_zreorder_nosimd pffft_zreorder
-static
-void pffft_zreorder_nosimd(PFFFT_Setup *setup, const float *in, float *out, pffft_direction_t direction) {
-  int k, N = setup->N;
-  if (setup->transform == PFFFT_COMPLEX) {
-    for (k=0; k < 2*N; ++k) out[k] = in[k];
-    return;
-  }
-  else if (direction == PFFFT_FORWARD) {
-    float x_N = in[N-1];
-    for (k=N-1; k > 1; --k) out[k] = in[k-1];
-    out[0] = in[0];
-    out[1] = x_N;
-  } else {
-    float x_N = in[1];
-    for (k=1; k < N-1; ++k) out[k] = in[k+1];
-    out[0] = in[0];
-    out[N-1] = x_N;
-  }
-}
-
-#define pffft_transform_internal_nosimd pffft_transform_internal
-static
-void pffft_transform_internal_nosimd(PFFFT_Setup *setup, const float *input, float *output, float *scratch,
-                                    pffft_direction_t direction, int ordered) {
-  int Ncvec   = setup->Ncvec;
-  int nf_odd = (setup->ifac[1] & 1);
-
-#if 0
-  /* temporary buffer is allocated on the stack if the scratch pointer is NULL */
-  int stack_allocate = (scratch == 0 ? Ncvec*2 : 1);
-  VLA_ARRAY_ON_STACK(v4sf, scratch_on_stack, stack_allocate);
-#endif
-  float *buff[2];
-  int ib;
-  /* if (scratch == 0) scratch = scratch_on_stack; */
-  buff[0] = output; buff[1] = scratch;
-
-  if (setup->transform == PFFFT_COMPLEX) ordered = 0; /* it is always ordered. */
-  ib = (nf_odd ^ ordered ? 1 : 0);
-
-  if (direction == PFFFT_FORWARD) {
-    if (setup->transform == PFFFT_REAL) {
-      ib = (rfftf1_ps(Ncvec*2, input, buff[ib], buff[!ib],
-                      setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1);
-    } else {
-      ib = (cfftf1_ps(Ncvec, input, buff[ib], buff[!ib],
-                      setup->twiddle, &setup->ifac[0], -1) == buff[0] ? 0 : 1);
-    }
-    if (ordered) {
-      pffft_zreorder(setup, buff[ib], buff[!ib], PFFFT_FORWARD); ib = !ib;
-    }
-  } else {
-    if (input == buff[ib]) {
-      ib = !ib; /* may happen when finput == foutput */
-    }
-    if (ordered) {
-      pffft_zreorder(setup, input, buff[!ib], PFFFT_BACKWARD);
-      input = buff[!ib];
-    }
-    if (setup->transform == PFFFT_REAL) {
-      ib = (rfftb1_ps(Ncvec*2, input, buff[ib], buff[!ib],
-                      setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1);
-    } else {
-      ib = (cfftf1_ps(Ncvec, input, buff[ib], buff[!ib],
-                      setup->twiddle, &setup->ifac[0], +1) == buff[0] ? 0 : 1);
-    }
-  }
-  if (buff[ib] != output) {
-    int k;
-    /* extra copy required -- this situation should happens only when finput == foutput */
-    assert(input==output);
-    for (k=0; k < Ncvec; ++k) {
-      float a = buff[ib][2*k], b = buff[ib][2*k+1];
-      output[2*k] = a; output[2*k+1] = b;
-    }
-    ib = !ib;
-  }
-  assert(buff[ib] == output);
-}
-
-#if 0
-#define pffft_zconvolve_accumulate_nosimd pffft_zconvolve_accumulate
-void pffft_zconvolve_accumulate_nosimd(PFFFT_Setup *s, const float *a, const float *b,
-                                       float *ab, float scaling) {
-  int i, Ncvec = s->Ncvec;
-
-  if (s->transform == PFFFT_REAL) {
-    /* take care of the fftpack ordering */
-    ab[0] += a[0]*b[0]*scaling;
-    ab[2*Ncvec-1] += a[2*Ncvec-1]*b[2*Ncvec-1]*scaling;
-    ++ab; ++a; ++b; --Ncvec;
-  }
-  for (i=0; i < Ncvec; ++i) {
-    float ar, ai, br, bi;
-    ar = a[2*i+0]; ai = a[2*i+1];
-    br = b[2*i+0]; bi = b[2*i+1];
-    VCPLXMUL(ar, ai, br, bi);
-    ab[2*i+0] += ar*scaling;
-    ab[2*i+1] += ai*scaling;
-  }
-}
-#endif
-
-#endif /* defined(PFFFT_SIMD_DISABLE) */
-
-static
-void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction) {
-  pffft_transform_internal(setup, input, output, (v4sf*)work, direction, 0);
-}
-
-static
-void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction) {
-  pffft_transform_internal(setup, input, output, (v4sf*)work, direction, 1);
-}
-
-#endif
diff --git a/soxr-sys/src/pffft.h b/soxr-sys/src/pffft.h
deleted file mode 100644
index 63522cacb..000000000
--- a/soxr-sys/src/pffft.h
+++ /dev/null
@@ -1,197 +0,0 @@
-/* https://bitbucket.org/jpommier/pffft/raw/483453d8f7661058e74aa4e7cf5c27bcd7887e7a/pffft.h
- * with minor changes for libsoxr. */
-
-#if !defined PFFT_MACROS_ONLY
-
-/* Copyright (c) 2013  Julien Pommier ( pommier@modartt.com )
-
-   Based on original fortran 77 code from FFTPACKv4 from NETLIB,
-   authored by Dr Paul Swarztrauber of NCAR, in 1985.
-
-   As confirmed by the NCAR fftpack software curators, the following
-   FFTPACKv5 license applies to FFTPACKv4 sources. My changes are
-   released under the same terms.
-
-   FFTPACK license:
-
-   http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html
-
-   Copyright (c) 2004 the University Corporation for Atmospheric
-   Research ("UCAR"). All rights reserved. Developed by NCAR's
-   Computational and Information Systems Laboratory, UCAR,
-   www.cisl.ucar.edu.
-
-   Redistribution and use of the Software in source and binary forms,
-   with or without modification, is permitted provided that the
-   following conditions are met:
-
-   - Neither the names of NCAR's Computational and Information Systems
-   Laboratory, the University Corporation for Atmospheric Research,
-   nor the names of its sponsors or contributors may be used to
-   endorse or promote products derived from this Software without
-   specific prior written permission.
-
-   - Redistributions of source code must retain the above copyright
-   notices, this list of conditions, and the disclaimer below.
-
-   - Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions, and the disclaimer below in the
-   documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-   EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-   NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
-   HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
-   EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
-   SOFTWARE.
-*/
-
-/*
-   PFFFT : a Pretty Fast FFT.
-
-   This is basically an adaptation of the single precision fftpack
-   (v4) as found on netlib taking advantage of SIMD instruction found
-   on cpus such as intel x86 (SSE1), powerpc (Altivec), and arm (NEON).
-
-   For architectures where no SIMD instruction is available, the code
-   falls back to a scalar version.
-
-   Restrictions:
-
-   - 1D transforms only, with 32-bit single precision.
-
-   - supports only transforms for inputs of length N of the form
-   N=(2^a)*(3^b)*(5^c), a >= 5, b >=0, c >= 0 (32, 48, 64, 96, 128,
-   144, 160, etc are all acceptable lengths). Performance is best for
-   128<=N<=8192.
-
-   - all (float*) pointers in the functions below are expected to
-   have an "simd-compatible" alignment, that is 16 bytes on x86 and
-   powerpc CPUs.
-
-   You can allocate such buffers with the functions
-   pffft_aligned_malloc / pffft_aligned_free (or with stuff like
-   posix_memalign..)
-
-*/
-
-#ifndef PFFFT_H
-#define PFFFT_H
-
-#include <stddef.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if PFFFT_DOUBLE
-#define float double
-#endif
-
-  /* opaque struct holding internal stuff (precomputed twiddle factors)
-     this struct can be shared by many threads as it contains only
-     read-only data.
-  */
-  typedef struct PFFFT_Setup PFFFT_Setup;
-
-  /* direction of the transform */
-  typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t;
-
-  /* type of transform */
-  typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t;
-
-  /*
-    prepare for performing transforms of size N -- the returned
-    PFFFT_Setup structure is read-only so it can safely be shared by
-    multiple concurrent threads.
-  */
-  static
-  PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform);
-  static
-  void pffft_destroy_setup(PFFFT_Setup *);
-  /*
-     Perform a Fourier transform , The z-domain data is stored in the
-     most efficient order for transforming it back, or using it for
-     convolution. If you need to have its content sorted in the
-     "usual" way, that is as an array of interleaved complex numbers,
-     either use pffft_transform_ordered , or call pffft_zreorder after
-     the forward fft, and before the backward fft.
-
-     Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x.
-     Typically you will want to scale the backward transform by 1/N.
-
-     The 'work' pointer should point to an area of N (2*N for complex
-     fft) floats, properly aligned. If 'work' is NULL, then stack will
-     be used instead (this is probably the best strategy for small
-     FFTs, say for N < 16384).
-
-     input and output may alias.
-  */
-  static
-  void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction);
-
-  /*
-     Similar to pffft_transform, but makes sure that the output is
-     ordered as expected (interleaved complex numbers).  This is
-     similar to calling pffft_transform and then pffft_zreorder.
-
-     input and output may alias.
-  */
-  static
-  void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction);
-
-  /*
-     call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(...,
-     PFFFT_FORWARD) if you want to have the frequency components in
-     the correct "canonical" order, as interleaved complex numbers.
-
-     (for real transforms, both 0-frequency and half frequency
-     components, which are real, are assembled in the first entry as
-     F(0)+i*F(n/2+1). Note that the original fftpack did place
-     F(n/2+1) at the end of the arrays).
-
-     input and output should not alias.
-  */
-  static
-  void pffft_zreorder(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction);
-
-  /*
-     Perform a multiplication of the frequency components of dft_a and
-     dft_b and accumulate them into dft_ab. The arrays should have
-     been obtained with pffft_transform(.., PFFFT_FORWARD) and should
-     *not* have been reordered with pffft_zreorder (otherwise just
-     perform the operation yourself as the dft coefs are stored as
-     interleaved complex numbers).
-
-     the operation performed is: dft_ab += (dft_a * fdt_b)*scaling
-
-     The dft_a, dft_b and dft_ab pointers may alias.
-  */
-  void pffft_zconvolve_accumulate(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
-
-  /*
-    the float buffers must have the correct alignment (16-byte boundary
-    on intel and powerpc). This function may be used to obtain such
-    correctly aligned buffers.
-  */
-#if 0
-  void *pffft_aligned_malloc(size_t nb_bytes);
-  void pffft_aligned_free(void *);
-
-  /* return 4 or 1 wether support SSE/Altivec instructions was enable when building pffft.c */
-  int pffft_simd_size();
-#endif
-
-#undef float
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
-
-#endif
diff --git a/soxr-sys/src/pffft32.c b/soxr-sys/src/pffft32.c
deleted file mode 100644
index f48080949..000000000
--- a/soxr-sys/src/pffft32.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#define SIMD_ALIGNED_FREE free
-#define SIMD_ALIGNED_MALLOC malloc
-#define PFFFT_SIMD_DISABLE
-#define PFFFT_DOUBLE 0
-#include "pffft-wrap.c"
-
-#include "filter.h"
-#include "rdft_t.h"
-
-static void * setup(int len) {return pffft_new_setup(len, PFFFT_REAL);}
-static void delete_setup(void * setup) {pffft_destroy_setup(setup);}
-static void forward  (int length, void * setup, float * h, float * scratch) {pffft_transform        (setup, h, h, scratch, PFFFT_FORWARD); (void)length;}
-static void oforward (int length, void * setup, float * h, float * scratch) {pffft_transform_ordered(setup, h, h, scratch, PFFFT_FORWARD); (void)length;}
-static void backward (int length, void * setup, float * H, float * scratch) {pffft_transform        (setup, H, H, scratch, PFFFT_BACKWARD);(void)length;}
-static void obackward(int length, void * setup, float * H, float * scratch) {pffft_transform_ordered(setup, H, H, scratch, PFFFT_BACKWARD);(void)length;}
-static void convolve(int length, void * setup, float * H, float const * with) { pffft_zconvolve(setup, H, with, H);  (void)length;}
-static int multiplier(void) {return 1;}
-static int flags(void) {return RDFT_NEEDS_SCRATCH;}
-
-fn_t _soxr_rdft32_cb[] = {
-  (fn_t)setup,
-  (fn_t)setup,
-  (fn_t)delete_setup,
-  (fn_t)forward,
-  (fn_t)oforward,
-  (fn_t)backward,
-  (fn_t)obackward,
-  (fn_t)convolve,
-  (fn_t)_soxr_ordered_partial_convolve_f,
-  (fn_t)multiplier,
-  (fn_t)pffft_reorder_back,
-  (fn_t)malloc,
-  (fn_t)calloc,
-  (fn_t)free,
-  (fn_t)flags,
-};
diff --git a/soxr-sys/src/pffft32s.c b/soxr-sys/src/pffft32s.c
deleted file mode 100644
index 7798a45c0..000000000
--- a/soxr-sys/src/pffft32s.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#define PFFFT_DOUBLE 0
-#include "pffft-wrap.c"
-
-#include "rdft_t.h"
-
-static void * setup(int len) {return pffft_new_setup(len, PFFFT_REAL);}
-static void forward  (int length, void * setup, float * h, float * scratch) {pffft_transform        (setup, h, h, scratch, PFFFT_FORWARD); (void)length;}
-static void oforward (int length, void * setup, float * h, float * scratch) {pffft_transform_ordered(setup, h, h, scratch, PFFFT_FORWARD); (void)length;}
-static void backward (int length, void * setup, float * H, float * scratch) {pffft_transform        (setup, H, H, scratch, PFFFT_BACKWARD);(void)length;}
-static void obackward(int length, void * setup, float * H, float * scratch) {pffft_transform_ordered(setup, H, H, scratch, PFFFT_BACKWARD);(void)length;}
-static void convolve(int length, void * setup, float * H, float const * with) {pffft_zconvolve(setup, H, with, H); (void)length;}
-static int multiplier(void) {return 1;}
-static int flags(void) {return RDFT_IS_SIMD | RDFT_NEEDS_SCRATCH;}
-
-fn_t _soxr_rdft32s_cb[] = {
-  (fn_t)setup,
-  (fn_t)setup,
-  (fn_t)pffft_destroy_setup,
-  (fn_t)forward,
-  (fn_t)oforward,
-  (fn_t)backward,
-  (fn_t)obackward,
-  (fn_t)convolve,
-  (fn_t)ORDERED_PARTIAL_CONVOLVE_SIMD,
-  (fn_t)multiplier,
-  (fn_t)pffft_reorder_back,
-  (fn_t)SIMD_ALIGNED_MALLOC,
-  (fn_t)SIMD_ALIGNED_CALLOC,
-  (fn_t)SIMD_ALIGNED_FREE,
-  (fn_t)flags,
-};
diff --git a/soxr-sys/src/pffft64s.c b/soxr-sys/src/pffft64s.c
deleted file mode 100644
index 7c37c9d4d..000000000
--- a/soxr-sys/src/pffft64s.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#define PFFFT_DOUBLE 1
-#include "pffft-wrap.c"
-
-#include "rdft_t.h"
-
-static void * setup(int len) {return pffft_new_setup(len, PFFFT_REAL);}
-static void forward  (int length, void * setup, double * h, double * scratch) {pffft_transform        (setup, h, h, scratch, PFFFT_FORWARD); (void)length;}
-static void oforward (int length, void * setup, double * h, double * scratch) {pffft_transform_ordered(setup, h, h, scratch, PFFFT_FORWARD); (void)length;}
-static void backward (int length, void * setup, double * H, double * scratch) {pffft_transform        (setup, H, H, scratch, PFFFT_BACKWARD);(void)length;}
-static void obackward(int length, void * setup, double * H, double * scratch) {pffft_transform_ordered(setup, H, H, scratch, PFFFT_BACKWARD);(void)length;}
-static void convolve(int length, void * setup, double * H, double const * with) {pffft_zconvolve(setup, H, with, H); (void)length;}
-static int multiplier(void) {return 1;}
-static int flags(void) {return RDFT_IS_SIMD | RDFT_NEEDS_SCRATCH;}
-
-fn_t _soxr_rdft64s_cb[] = {
-  (fn_t)setup,
-  (fn_t)setup,
-  (fn_t)pffft_destroy_setup,
-  (fn_t)forward,
-  (fn_t)oforward,
-  (fn_t)backward,
-  (fn_t)obackward,
-  (fn_t)convolve,
-  (fn_t)ORDERED_PARTIAL_CONVOLVE_SIMD,
-  (fn_t)multiplier,
-  (fn_t)pffft_reorder_back,
-  (fn_t)SIMD_ALIGNED_MALLOC,
-  (fn_t)SIMD_ALIGNED_CALLOC,
-  (fn_t)SIMD_ALIGNED_FREE,
-  (fn_t)flags,
-};
diff --git a/soxr-sys/src/poly-fir.h b/soxr-sys/src/poly-fir.h
deleted file mode 100644
index d138e030f..000000000
--- a/soxr-sys/src/poly-fir.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-/* Resample using an interpolated poly-phase FIR with length LEN. */
-/* Input must be followed by FIR_LENGTH-1 samples. */
-
-#if COEF_INTERP != 1 && COEF_INTERP != 2 && COEF_INTERP != 3
-  #error COEF_INTERP
-#endif
-
-#if SIMD_AVX || SIMD_SSE || SIMD_NEON
-  #define N (FIR_LENGTH>>2)
-
-  #if COEF_INTERP == 1
-    #define _ sum=vMac(vMac(b,X,a),vLdu(in+j*4),sum), ++j;
-  #elif COEF_INTERP == 2
-    #define _ sum=vMac(vMac(vMac(c,X,b),X,a),vLdu(in+j*4),sum), ++j;
-  #else
-    #define _ sum=vMac(vMac(vMac(vMac(d,X,c),X,b),X,a),vLdu(in+j*4),sum), ++j;
-  #endif
-
-  #define a coefs[(COEF_INTERP+1)*(N*phase+j)+(COEF_INTERP-0)]
-  #define b coefs[(COEF_INTERP+1)*(N*phase+j)+(COEF_INTERP-1)]
-  #define c coefs[(COEF_INTERP+1)*(N*phase+j)+(COEF_INTERP-2)]
-  #define d coefs[(COEF_INTERP+1)*(N*phase+j)+(COEF_INTERP-3)]
-
-  #define BEGINNING v4_t X = vLds(x), sum = vZero(); \
-      v4_t const * const __restrict coefs = (v4_t *)COEFS
-  #define END vStorSum(output+i, sum)
-  #define cc(n) case n: core(n); break
-  #define CORE(n) switch (n) {cc(2); cc(3); cc(4); cc(5); cc(6); default: core(n);}
-#else
-  #define N FIR_LENGTH
-
-  #if COEF_INTERP == 1
-    #define _ sum += (b*x + a)*in[j], ++j;
-  #elif COEF_INTERP == 2
-    #define _ sum += ((c*x + b)*x + a)*in[j], ++j;
-  #else
-    #define _ sum += (((d*x + c)*x + b)*x + a)*in[j], ++j;
-  #endif
-
-  #define a (coef(COEFS, COEF_INTERP, N, phase, 0,j))
-  #define b (coef(COEFS, COEF_INTERP, N, phase, 1,j))
-  #define c (coef(COEFS, COEF_INTERP, N, phase, 2,j))
-  #define d (coef(COEFS, COEF_INTERP, N, phase, 3,j))
-
-  #define BEGINNING sample_t sum = 0
-  #define END output[i] = sum
-  #define CORE(n) core(n)
-#endif
-
-
-
-#define floatPrecCore(n) { \
-  float_step_t at = p->at.flt; \
-  for (i = 0; (int)at < num_in; ++i, at += p->step.flt) { \
-    sample_t const * const __restrict in = input + (int)at; \
-    float_step_t frac = at - (int)at; \
-    int phase = (int)(frac * (1 << PHASE_BITS)); \
-    sample_t x = (sample_t)(frac * (1 << PHASE_BITS) - phase); \
-    int j = 0; \
-    BEGINNING; CONVOLVE(n); END; \
-  } \
-  fifo_read(&p->fifo, (int)at, NULL); \
-  p->at.flt = at - (int)at; } /* Could round to 1 in some cirmcumstances. */
-
-
-
-#define highPrecCore(n) { \
-  step_t at; at.fix = p->at.fix; \
-  for (i = 0; at.integer < num_in; ++i, \
-      at.fix.ls.all += p->step.fix.ls.all, \
-      at.whole += p->step.whole + (at.fix.ls.all < p->step.fix.ls.all)) { \
-    sample_t const * const __restrict in = input + at.integer; \
-    uint32_t frac = at.fraction; \
-    int phase = (int)(frac >> (32 - PHASE_BITS)); /* High-order bits */ \
-    /* Low-order bits, scaled to [0,1): */ \
-    sample_t x = (sample_t)((frac << PHASE_BITS) * (1 / MULT32)); \
-    int j = 0; \
-    BEGINNING; CONVOLVE(n); END; \
-  } \
-  fifo_read(&p->fifo, at.integer, NULL); \
-  p->at.whole = at.fraction; \
-  p->at.fix.ls = at.fix.ls; }
-
-
-
-#define stdPrecCore(n) { \
-  int64p_t at; at.all = p->at.whole; \
-  for (i = 0; at.parts.ms < num_in; ++i, at.all += p->step.whole) { \
-    sample_t const * const __restrict in = input + at.parts.ms; \
-    uint32_t const frac = at.parts.ls; \
-    int phase = (int)(frac >> (32 - PHASE_BITS)); /* high-order bits */ \
-    /* Low-order bits, scaled to [0,1): */ \
-    sample_t x = (sample_t)((frac << PHASE_BITS) * (1 / MULT32)); \
-    int j = 0; \
-    BEGINNING; CONVOLVE(n); END; \
-  } \
-  fifo_read(&p->fifo, at.parts.ms, NULL); \
-  p->at.whole = at.parts.ls; }
-
-
-
-#if WITH_FLOAT_STD_PREC_CLOCK
-  #define SPCORE floatPrecCore
-#else
-  #define SPCORE stdPrecCore
-#endif
-
-
-
-#if WITH_HI_PREC_CLOCK
-  #define core(n) if (p->use_hi_prec_clock) highPrecCore(n) else SPCORE(n)
-#else
-  #define core(n) SPCORE(n)
-#endif
-
-
-
-static void FUNCTION(stage_t * p, fifo_t * output_fifo)
-{
-  sample_t const * input = stage_read_p(p);
-  int num_in = min(stage_occupancy(p), p->input_size);
-  int i, max_num_out = 1 + (int)(num_in * p->out_in_ratio);
-  sample_t * const __restrict output = fifo_reserve(output_fifo, max_num_out);
-
-  CORE(N);
-  assert(max_num_out - i >= 0);
-  fifo_trim_by(output_fifo, max_num_out - i);
-}
-
-
-
-#undef _
-#undef a
-#undef b
-#undef c
-#undef d
-#undef CORE
-#undef cc
-#undef core
-#undef COEF_INTERP
-#undef N
-#undef BEGINNING
-#undef END
-#undef CONVOLVE
-#undef FIR_LENGTH
-#undef FUNCTION
-#undef PHASE_BITS
diff --git a/soxr-sys/src/poly-fir0.h b/soxr-sys/src/poly-fir0.h
deleted file mode 100644
index 76fca2d6b..000000000
--- a/soxr-sys/src/poly-fir0.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-/* Resample using a non-interpolated poly-phase FIR with length LEN. */
-/* Input must be followed by FIR_LENGTH-1 samples. */
-
-#if SIMD_AVX || SIMD_SSE || SIMD_NEON
-  #define N (FIR_LENGTH>>2)
-  #define BEGINNING v4_t sum = vZero(); \
-      v4_t const * const __restrict coefs = (v4_t *)COEFS + N * rem;
-  #define _ sum = vMac(vLdu(at+j*4), coefs[j], sum), ++j;
-  #define END vStorSum(output+i, sum)
-  #define cc(n) case n: core(n); break
-  #define CORE(n) switch (n) {cc(2); cc(3); cc(4); cc(5); cc(6); default: core(n);}
-#else
-  #define N FIR_LENGTH
-  #define BEGINNING sample_t sum = 0; \
-      sample_t const * const __restrict coefs = (sample_t *)COEFS + N * rem;
-  #define _ sum += coefs[j]*at[j], ++j;
-  #define END output[i] = sum
-  #define CORE(n) core(n)
-#endif
-
-#define core(n) \
-  for (i = 0; at < num_in * p->L; ++i, at += step) { \
-    int const div = at / p->L, rem = at % p->L; \
-    sample_t const * const __restrict at = input + div; \
-    int j = 0; BEGINNING; CONVOLVE(n); END;}
-
-static void FUNCTION(stage_t * p, fifo_t * output_fifo)
-{
-  int num_in = min(stage_occupancy(p), p->input_size);
-  if (num_in) {
-    sample_t const * input = stage_read_p(p);
-    int at = p->at.integer, step = p->step.integer;
-    int i, num_out = (num_in * p->L - at + step - 1) / step;
-    sample_t * __restrict output = fifo_reserve(output_fifo, num_out);
-
-    CORE(N);
-    assert(i == num_out);
-    fifo_read(&p->fifo, at / p->L, NULL);
-    p->at.integer = at % p->L;
-  }
-}
-
-#undef _
-#undef CORE
-#undef cc
-#undef core
-#undef N
-#undef BEGINNING
-#undef MIDDLE
-#undef END
-#undef CONVOLVE
-#undef FIR_LENGTH
-#undef FUNCTION
diff --git a/soxr-sys/src/rdft.h b/soxr-sys/src/rdft.h
deleted file mode 100644
index 59ba17417..000000000
--- a/soxr-sys/src/rdft.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-void ORDERED_CONVOLVE(int n, void * not_used, DFT_FLOAT * a, const DFT_FLOAT * b)
-{
-  int i;
-  a[0] *= b[0];
-  a[1] *= b[1];
-  for (i = 2; i < n; i += 2) {
-    DFT_FLOAT tmp = a[i];
-    a[i  ] = b[i  ] * tmp - b[i+1] * a[i+1];
-    a[i+1] = b[i+1] * tmp + b[i  ] * a[i+1];
-  }
-  (void)not_used;
-}
-
-void ORDERED_PARTIAL_CONVOLVE(int n, DFT_FLOAT * a, const DFT_FLOAT * b)
-{
-  int i;
-  a[0] *= b[0];
-  for (i = 2; i < n; i += 2) {
-    DFT_FLOAT tmp = a[i];
-    a[i  ] = b[i  ] * tmp - b[i+1] * a[i+1];
-    a[i+1] = b[i+1] * tmp + b[i  ] * a[i+1];
-  }
-  a[1] = b[i] * a[i] - b[i+1] * a[i+1];
-}
-
-#undef ORDERED_CONVOLVE
-#undef ORDERED_PARTIAL_CONVOLVE
-#undef DFT_FLOAT
diff --git a/soxr-sys/src/rdft_t.h b/soxr-sys/src/rdft_t.h
deleted file mode 100644
index 293d9c37b..000000000
--- a/soxr-sys/src/rdft_t.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-typedef void (* fn_t)(void);
-
-#define rdft_forward_setup    (*(void * (*)(int))RDFT_CB[0])
-#define rdft_backward_setup   (*(void * (*)(int))RDFT_CB[1])
-#define rdft_delete_setup     (*(void (*)(void *))RDFT_CB[2])
-#define rdft_forward          (*(void (*)(int, void *, void *, void *))RDFT_CB[3])
-#define rdft_oforward         (*(void (*)(int, void *, void *, void *))RDFT_CB[4])
-#define rdft_backward         (*(void (*)(int, void *, void *, void *))RDFT_CB[5])
-#define rdft_obackward        (*(void (*)(int, void *, void *, void *))RDFT_CB[6])
-#define rdft_convolve         (*(void (*)(int, void *, void *, void const *))RDFT_CB[7])
-#define rdft_convolve_portion (*(void (*)(int, void *, void const *))RDFT_CB[8])
-#define rdft_multiplier       (*(int (*)(void))RDFT_CB[9])
-#define rdft_reorder_back     (*(void (*)(int, void *, void *, void *))RDFT_CB[10])
-#define rdft_malloc           (*(void * (*)(size_t))RDFT_CB[11])
-#define rdft_calloc           (*(void * (*)(size_t, size_t))RDFT_CB[12])
-#define rdft_free             (*(void (*)(void *))RDFT_CB[13])
-#define rdft_flags            (*(int (*)(void))RDFT_CB[14])
-
-/* Flag templates: */
-#define RDFT_IS_SIMD       1
-#define RDFT_NEEDS_SCRATCH 2
diff --git a/soxr-sys/src/rint-clip.h b/soxr-sys/src/rint-clip.h
deleted file mode 100644
index 3294f4eaf..000000000
--- a/soxr-sys/src/rint-clip.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#if defined DITHER
-
-#define DITHERING + (1./32)*(int)(((ran1>>=3)&31)-((ran2>>=3)&31))
-#define DITHER_RAND (seed = 1664525ULL * seed + 1013904223ULL) >> 3
-#define DITHER_VARS unsigned long long ran1 = DITHER_RAND, ran2 = DITHER_RAND
-#define SEED_ARG , unsigned long long * seed0
-#define SAVE_SEED *seed0 = seed
-#define COPY_SEED unsigned long long seed = *seed0;
-#define COPY_SEED1 unsigned long long seed1 = seed
-#define PASS_SEED1 , &seed1
-#define PASS_SEED  , &seed
-#define FLOATD double
-
-#else
-
-#define DITHERING
-#define DITHER_VARS
-#define SEED_ARG
-#define SAVE_SEED
-#define COPY_SEED
-#define COPY_SEED1
-#define PASS_SEED1
-#define PASS_SEED
-#define FLOATD FLOATX
-
-#endif
-
-#define DO_16 _;_;_;_;_;_;_;_;_;_;_;_;_;_;_;_
-
-
-
-#if defined FE_INVALID && defined FPU_RINT
-static void RINT_CLIP(RINT_T * const dest, FLOATX const * const src,
-    unsigned stride, size_t i, size_t const n, size_t * const clips SEED_ARG)
-{
-  COPY_SEED
-  DITHER_VARS;
-  for (; i < n; ++i) {
-    fe_clear_invalid();
-    FLOATD const d = src[i] DITHERING;
-    RINT(dest[stride * i], d);
-    if (fe_test_invalid()) {
-      fe_clear_invalid();
-      dest[stride * i] = d > 0? RINT_MAX : -RINT_MAX - 1;
-      ++*clips;
-    }
-  }
-  SAVE_SEED;
-}
-#endif
-
-
-
-static size_t LSX_RINT_CLIP(void * * const dest0, FLOATX const * const src,
-    size_t const n SEED_ARG)
-{
-  size_t i, clips = 0;
-  RINT_T * dest = *dest0;
-  COPY_SEED
-#if defined FE_INVALID && defined FPU_RINT
-#define _ RINT(dest[i], src[i] DITHERING); ++i
-  for (i = 0; i < (n & ~15u);) {
-    fe_clear_invalid();
-    COPY_SEED1;
-    DITHER_VARS;
-    DO_16;
-    if (fe_test_invalid()) {
-      fe_clear_invalid();
-      RINT_CLIP(dest, src, 1, i - 16, i, &clips PASS_SEED1);
-    }
-  }
-  RINT_CLIP(dest, src, 1, i, n, &clips PASS_SEED);
-#else
-#define _ d = src[i] DITHERING, dest[i++] = (RINT_T)(d > 0? \
-    d+.5 >= N? ++clips, N-1 : d+.5 : d-.5 <= -N-1? ++clips, -N:d-.5)
-  const double N = 1. + RINT_MAX;
-  double d;
-  for (i = 0; i < (n & ~15u);) {
-    DITHER_VARS;
-    DO_16;
-  }
-  {
-    DITHER_VARS;
-    for (; i < n; _);
-  }
-#endif
-  SAVE_SEED;
-  *dest0 = dest + n;
-  return clips;
-}
-#undef _
-
-
-
-static size_t LSX_RINT_CLIP_2(void * * dest0, FLOATX const * const * srcs,
-    unsigned const stride, size_t const n SEED_ARG)
-{
-  unsigned j;
-  size_t i, clips = 0;
-  RINT_T * dest = *dest0;
-  COPY_SEED
-#if defined FE_INVALID && defined FPU_RINT
-#define _ RINT(dest[stride * i], src[i] DITHERING); ++i
-  for (j = 0; j < stride; ++j, ++dest) {
-    FLOATX const * const src = srcs[j];
-    for (i = 0; i < (n & ~15u);) {
-      fe_clear_invalid();
-      COPY_SEED1;
-      DITHER_VARS;
-      DO_16;
-      if (fe_test_invalid()) {
-        fe_clear_invalid();
-        RINT_CLIP(dest, src, stride, i - 16, i, &clips PASS_SEED1);
-      }
-    }
-    RINT_CLIP(dest, src, stride, i, n, &clips PASS_SEED);
-  }
-#else
-#define _ d = src[i] DITHERING, dest[stride * i++] = (RINT_T)(d > 0? \
-    d+.5 >= N? ++clips, N-1 : d+.5 : d-.5 <= -N-1? ++clips, -N:d-.5)
-  const double N = 1. + RINT_MAX;
-  double d;
-  for (j = 0; j < stride; ++j, ++dest) {
-    FLOATX const * const src = srcs[j];
-    for (i = 0; i < (n & ~15u);) {
-      DITHER_VARS;
-      DO_16;
-    }
-    {
-      DITHER_VARS;
-      for (; i < n; _);
-    }
-  }
-#endif
-  SAVE_SEED;
-  *dest0 = dest + stride * (n - 1);
-  return clips;
-}
-#undef _
-
-#undef FLOATD
-#undef PASS_SEED
-#undef PASS_SEED1
-#undef COPY_SEED1
-#undef COPY_SEED
-#undef SAVE_SEED
-#undef SEED_ARG
-#undef DITHER_VARS
-#undef DITHERING
-#undef DITHER
-
-#undef RINT_MAX
-#undef RINT_T
-#undef FPU_RINT
-#undef RINT
-#undef RINT_CLIP
-#undef LSX_RINT_CLIP
-#undef LSX_RINT_CLIP_2
diff --git a/soxr-sys/src/rint.h b/soxr-sys/src/rint.h
deleted file mode 100644
index 2f1dfbed6..000000000
--- a/soxr-sys/src/rint.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#if !defined soxr_rint_included
-#define soxr_rint_included
-
-#include "std-types.h"
-
-/* For x86, compiler-supplied versions of these functions (where available)
- * can have poor performance (e.g. mingw32), so prefer these asm versions: */
-
-#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
-  #define FPU_RINT32
-  #define FPU_RINT16
-  #define rint32D(a,b) __asm__ __volatile__("fistpl %0": "=m"(a): "t"(b): "st")
-  #define rint16D(a,b) __asm__ __volatile__("fistps %0": "=m"(a): "t"(b): "st")
-  #define rint32F rint32D
-  #define rint16F rint16D
-  #define FE_INVALID 1
-  static __inline int fe_test_invalid(void) {
-    int status_word;
-    __asm__ __volatile__("fnstsw %%ax": "=a"(status_word));
-    return status_word & FE_INVALID;
-  }
-  static __inline int fe_clear_invalid(void) {
-    int32_t status[7];
-    __asm__ __volatile__("fnstenv %0": "=m"(status));
-    status[1] &= ~FE_INVALID;
-    __asm__ __volatile__("fldenv %0": : "m"(*status));
-    return 0;
-  }
-#elif defined _MSC_VER && defined _M_IX86
-  #define FPU_RINT32
-  #define FPU_RINT16
-  #define rint_fn(N,Y,X) \
-    static __inline void N(Y *y, X x) {Y t; {__asm fld x __asm fistp t} *y=t;}
-  rint_fn(rint32d, int32_t, double)
-  rint_fn(rint32f, int32_t, float )
-  rint_fn(rint16d, int16_t, double)
-  rint_fn(rint16f, int16_t, float )
-  #define rint32D(y,x) rint32d(&(y),x)
-  #define rint32F(y,x) rint32f(&(y),x)
-  #define rint16D(y,x) rint16d(&(y),x)
-  #define rint16F(y,x) rint16f(&(y),x)
-  #define FE_INVALID 1
-  static __inline int fe_test_invalid(void) {
-    short status_word;
-    __asm fnstsw status_word
-    return status_word & FE_INVALID;
-  }
-  static __inline int fe_clear_invalid(void) {
-    int32_t status[7];
-    __asm fnstenv status
-    status[1] &= ~FE_INVALID;
-    __asm fldenv status
-    return 0;
-  }
-#elif defined _MSC_VER && defined _M_X64
-  #include <emmintrin.h>
-  #include <float.h>
-  #define FPU_RINT32
-  #define FPU_RINT16
-  static __inline void rint32d(int32_t *y, double x) {
-    *y = _mm_cvtsd_si32(_mm_load_sd(&x));}
-  static __inline void rint32f(int32_t *y, float  x) {
-    *y = _mm_cvtss_si32(_mm_load_ss(&x));}
-  static __inline void rint16d(int16_t *y, double x) {
-    x = x*65536+32738; *y = (int16_t)(_mm_cvtsd_si32(_mm_load_sd(&x)) >> 16);}
-  #define rint32D(y,x) rint32d(&(y),x)
-  #define rint32F(y,x) rint32f(&(y),x)
-  #define rint16D(y,x) rint16d(&(y),x)
-  #define rint16F(y,x) rint16d(&(y),(double)(x))
-  #define FE_INVALID 1
-  #define fe_test_invalid() (_statusfp() & _SW_INVALID)
-  #define fe_clear_invalid _clearfp /* Note: clears all. */
-#elif HAVE_LRINT && LONG_MAX == 2147483647L && HAVE_FENV_H
-  #include <math.h>
-  #include <fenv.h>
-  #define FPU_RINT32
-  #define rint32D(y,x) ((y)=lrint(x))
-  #define rint32F(y,x) ((y)=lrintf(x))
-  #define fe_test_invalid() fetestexcept(FE_INVALID)
-  #define fe_clear_invalid() feclearexcept(FE_INVALID)
-#endif
-
-#if !defined FPU_RINT32
-  #define rint32D(y,x) ((y)=(int32_t)((x) < 0? x - .5 : x + .5))
-  #define rint32F(y,x) rint32D(y,(double)(x))
-#endif
-
-#if !defined FPU_RINT16
-  #define rint16D(y,x) ((y)=(int16_t)((x) < 0? x - .5 : x + .5))
-  #define rint16F(y,x) rint16D(y,(double)(x))
-#endif
-
-static __inline int32_t rint32(double input) {
-  int32_t result; rint32D(result, input); return result;}
-
-static __inline int16_t rint16(double input) {
-  int16_t result; rint16D(result, input); return result;}
-
-#endif
diff --git a/soxr-sys/src/samplerate.h b/soxr-sys/src/samplerate.h
deleted file mode 100644
index 911cc5d0c..000000000
--- a/soxr-sys/src/samplerate.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "soxr-lsr.h"
diff --git a/soxr-sys/src/soxr-config.h b/soxr-sys/src/soxr-config.h
deleted file mode 100644
index a559b5f10..000000000
--- a/soxr-sys/src/soxr-config.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-
-#if !defined soxr_config_included
-#define soxr_config_included
-
-#define AVCODEC_FOUND 0
-#define AVUTIL_FOUND 0
-#define WITH_PFFFT 0
-
-#define HAVE_FENV_H 1
-#define HAVE_STDBOOL_H 1
-#define HAVE_STDINT_H 1
-#define HAVE_LRINT 0
-#define HAVE_BIGENDIAN 0
-
-#define WITH_CR32 1
-#define WITH_CR32S 0
-#define WITH_CR64 0
-#define WITH_CR64S 0
-#define WITH_VR32 1
-
-#define WITH_HI_PREC_CLOCK 0
-#define WITH_FLOAT_STD_PREC_CLOCK 0
-#define WITH_DEV_TRACE 0
-
-#endif
diff --git a/soxr-sys/src/soxr-lsr.c b/soxr-sys/src/soxr-lsr.c
deleted file mode 100644
index 58ab50a21..000000000
--- a/soxr-sys/src/soxr-lsr.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-18 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-/* Wrapper mostly compatible with `libsamplerate'. */
-
-#include <assert.h>
-#include <stdlib.h>
-#include "soxr.h"
-#include "soxr-lsr.h"
-#include "rint.h"
-
-
-
-SRC_STATE *src_new(SRC_SRCTYPE id, int channels, SRC_ERROR * error)
-{
-  return src_callback_new(0, id, channels, error, 0);
-}
-
-
-
-SRC_ERROR src_process(SRC_STATE *p, SRC_DATA * io)
-{
-  size_t idone , odone;
-
-  if (!p || !io) return -1;
-
-  soxr_set_error(
-      p, soxr_set_io_ratio(p, 1/io->src_ratio, (size_t)io->output_frames));
-
-  soxr_process(p, io->data_in,                                  /* hack: */
-      (size_t)(io->end_of_input? ~io->input_frames : io->input_frames),
-      &idone, io->data_out, (size_t)io->output_frames, &odone);
-
-  io->input_frames_used = (long)idone, io->output_frames_gen = (long)odone;
-  return -!!soxr_error(p);
-}
-
-
-
-SRC_ERROR src_set_ratio(SRC_STATE * p, double oi_ratio)
-{
-  return -!!soxr_set_io_ratio(p, 1/oi_ratio, 0);
-}
-
-
-
-SRC_ERROR src_reset(SRC_STATE * p)
-{
-  return -!!soxr_clear(p);
-}
-
-
-
-SRC_ERROR src_error(SRC_STATE * p)
-{
-  return -!!soxr_error(p);
-}
-
-
-
-SRC_STATE * src_delete(SRC_STATE * p)
-{
-  soxr_delete(p);
-  return 0;
-}
-
-
-
-SRC_STATE *src_callback_new(src_callback_t fn,
-    SRC_SRCTYPE id, int channels, SRC_ERROR * error0, void * p)
-{
-  soxr_quality_spec_t q_spec = soxr_quality_spec(SOXR_LSR0Q + (unsigned)id, 0);
-  char const * e = getenv("SOXR_LSR_NUM_THREADS");
-  soxr_runtime_spec_t r_spec = soxr_runtime_spec(!(e && atoi(e) != 1));
-  soxr_error_t error;
-  soxr_t soxr = 0;
-
-  assert (channels > 0);
-  soxr = soxr_create(0, 0, (unsigned)channels, &error, 0, &q_spec, &r_spec);
-
-  if (soxr)
-    error = soxr_set_input_fn(soxr, (soxr_input_fn_t)fn, p, 0);
-
-  if (error0)
-    *error0 = -!!error;
-
-  return soxr;
-}
-
-
-
-long src_callback_read(SRC_STATE *p, double oi_ratio, long olen, float * obuf)
-{
-  if (!p || olen < 0) return -1;
-
-  soxr_set_error(p, soxr_set_io_ratio(p, 1/oi_ratio, (size_t)olen));
-  return (long)soxr_output(p, obuf, (size_t)olen);
-}
-
-
-
-SRC_ERROR src_simple(SRC_DATA * io, SRC_SRCTYPE id, int channels)
-{
-  size_t idone, odone;
-  soxr_error_t error;
-  soxr_quality_spec_t q_spec = soxr_quality_spec(SOXR_LSR0Q + (unsigned)id, 0);
-  char const * e = getenv("SOXR_LSR_NUM_THREADS");
-  soxr_runtime_spec_t r_spec = soxr_runtime_spec(!(e && atoi(e) != 1));
-
-  if (!io || channels<=0 || io->input_frames<0 || io->output_frames<0) return-1;
-
-  error = soxr_oneshot(1, io->src_ratio, (unsigned)channels, io->data_in,
-      (size_t)io->input_frames, &idone, io->data_out, (size_t)io->output_frames,
-      &odone, 0, &q_spec, &r_spec);
-
-  io->input_frames_used = (long)idone, io->output_frames_gen = (long)odone;
-
-  return -!!error;
-}
-
-
-
-char const * src_get_name(SRC_SRCTYPE id)
-{
-  static char const * const names[] = {
-    "LSR best sinc", "LSR medium sinc", "LSR fastest sinc",
-    "LSR ZOH", "LSR linear", "SoX VHQ"};
-
-  return (unsigned)id < 5u + !getenv("SOXR_LSR_STRICT")? names[id] : 0;
-}
-
-
-
-char const * src_get_description(SRC_SRCTYPE id)
-{
-  return src_get_name(id);
-}
-
-
-
-char const * src_get_version(void)
-{
-  return soxr_version();
-}
-
-
-
-char const * src_strerror(SRC_ERROR error)
-{
-  return error == 1? "Placeholder." : error ? "soxr error" : soxr_strerror(0);
-}
-
-
-
-int src_is_valid_ratio(double oi_ratio)
-{
-  return getenv("SOXR_LSR_STRICT")?
-    oi_ratio >= 1./256 && oi_ratio <= 256 : oi_ratio > 0;
-}
-
-
-
-void src_short_to_float_array(short const * src, float * dest, int len)
-{
-  assert (src && dest);
-
-  while (len--) dest[len] = (float)(src[len] * (1 / (1. + SHRT_MAX)));
-}
-
-
-
-void src_float_to_short_array(float const * src, short * dest, int len)
-{
-  double d, N = 1. + SHRT_MAX;
-  assert (src && dest);
-
-  while (len--) d = src[len] * N, dest[len] =
-    (short)(d > N - 1? (short)(N - 1) : d < -N? (short)-N : rint16(d));
-}
-
-
-
-void src_int_to_float_array(int const * src, float * dest, int len)
-{
-  assert (src && dest);
-  while (len--) dest[len] = (float)(src[len] * (1 / (32768. * 65536.)));
-}
-
-
-
-void src_float_to_int_array(float const * src, int * dest, int len)
-{
-  double d, N = 32768. * 65536.; /* N.B. int32, not int! (Also above fn.) */
-  assert (src && dest);
-
-  while (len--) d = src[len] * N, dest[len] =
-    d >= N - 1? (int)(N - 1) : d < -N? (int)(-N) : rint32(d);
-}
diff --git a/soxr-sys/src/soxr-lsr.h b/soxr-sys/src/soxr-lsr.h
deleted file mode 100644
index b1cc24706..000000000
--- a/soxr-sys/src/soxr-lsr.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-18 robs@users.sourceforge.net
- *
- * This library is free software; you can redistribute it and/or modify it
- * under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * This library is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
- * General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this library; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-/* Wrapper compatible with `libsamplerate' (constant-rate).
- * (Libsoxr's native API can be found in soxr.h).  */
-
-#if !defined SAMPLERATE_H
-#define SAMPLERATE_H
-#if defined __cplusplus
-  extern "C" {
-#endif
-
-#if defined SOXR_DLL
-  #if defined soxr_lsr_EXPORTS
-    #define SOXR __declspec(dllexport)
-  #else
-    #define SOXR __declspec(dllimport)
-  #endif
-#elif defined SOXR_VISIBILITY && defined __GNUC__ && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 1)
-  #define SOXR __attribute__ ((visibility("default")))
-#else
-  #define SOXR
-#endif
-
-typedef float   SRC_SAMPLE;
-enum SRC_SRCTYPE_e {SRC_SINC_BEST_QUALITY, SRC_SINC_MEDIUM_QUALITY,
-                    SRC_SINC_FASTEST, SRC_ZERO_ORDER_HOLD, SRC_LINEAR};
-typedef int     SRC_SRCTYPE;
-typedef int     SRC_ERROR;
-typedef long    (* src_callback_t)(void *, SRC_SAMPLE * *);
-typedef struct  soxr SRC_STATE;
-typedef struct  SRC_DATA {
-  SRC_SAMPLE    * data_in, * data_out;
-  long          input_frames, output_frames;
-  long          input_frames_used, output_frames_gen;
-  int           end_of_input;
-  double        src_ratio;
-} SRC_DATA;
-SOXR SRC_STATE *   src_new(SRC_SRCTYPE, int num_channels, SRC_ERROR *);
-SOXR SRC_ERROR     src_process  (SRC_STATE *, SRC_DATA *);
-SOXR SRC_ERROR     src_set_ratio(SRC_STATE *, double);
-SOXR SRC_ERROR     src_reset    (SRC_STATE *);
-SOXR SRC_ERROR     src_error    (SRC_STATE *);
-SOXR SRC_STATE *   src_delete   (SRC_STATE *);
-SOXR SRC_STATE *   src_callback_new(
-                    src_callback_t, SRC_SRCTYPE, int, SRC_ERROR *, void *);
-SOXR long          src_callback_read(
-                    SRC_STATE *, double src_ratio, long, SRC_SAMPLE *);
-SOXR SRC_ERROR     src_simple(SRC_DATA *, SRC_SRCTYPE, int);
-SOXR char const *  src_get_name(SRC_SRCTYPE);
-SOXR char const *  src_get_description(SRC_SRCTYPE);
-SOXR char const *  src_get_version(void);
-SOXR char const *  src_strerror(SRC_ERROR);
-SOXR int           src_is_valid_ratio(double);
-SOXR void          src_short_to_float_array(short const *, float *, int);
-SOXR void          src_float_to_short_array(float const *, short *, int);
-SOXR void          src_int_to_float_array(int const *, float *, int);
-SOXR void          src_float_to_int_array(float const *, int *, int);
-
-#undef SOXR
-#if defined __cplusplus
-  }
-#endif
-#endif
diff --git a/soxr-sys/src/soxr.c b/soxr-sys/src/soxr.c
deleted file mode 100644
index 0ece116bf..000000000
--- a/soxr-sys/src/soxr.c
+++ /dev/null
@@ -1,843 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-18 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-#include "soxr.h"
-#include "data-io.h"
-#include "internal.h"
-
-#if AVUTIL_FOUND
-  #include <libavutil/cpu.h>
-#endif
-
-
-
-#if WITH_DEV_TRACE
-
-#include <stdarg.h>
-#include <stdio.h>
-
-int _soxr_trace_level;
-
-void _soxr_trace(char const * fmt, ...)
-{
-  va_list args;
-  va_start(args, fmt);
-  vfprintf(stderr, fmt, args);
-  fputc('\n', stderr);
-  va_end(args);
-}
-
-#endif
-
-
-
-char const * soxr_version(void)
-{
-  return "libsoxr-" SOXR_THIS_VERSION_STR;
-}
-
-
-
-
-typedef void sample_t; /* float or double */
-typedef void (* fn_t)(void);
-typedef fn_t control_block_t[10];
-
-#define resampler_input        (*(sample_t * (*)(void *, sample_t * samples, size_t   n))p->control_block[0])
-#define resampler_process      (*(void (*)(void *, size_t))p->control_block[1])
-#define resampler_output       (*(sample_t const * (*)(void *, sample_t * samples, size_t * n))p->control_block[2])
-#define resampler_flush        (*(void (*)(void *))p->control_block[3])
-#define resampler_close        (*(void (*)(void *))p->control_block[4])
-#define resampler_delay        (*(double (*)(void *))p->control_block[5])
-#define resampler_sizes        (*(void (*)(size_t * shared, size_t * channel))p->control_block[6])
-#define resampler_create       (*(char const * (*)(void * channel, void * shared, double io_ratio, soxr_quality_spec_t * q_spec, soxr_runtime_spec_t * r_spec, double scale))p->control_block[7])
-#define resampler_set_io_ratio (*(void (*)(void *, double io_ratio, size_t len))p->control_block[8])
-#define resampler_id           (*(char const * (*)(void))p->control_block[9])
-
-typedef void * resampler_t; /* For one channel. */
-typedef void * resampler_shared_t; /* Between channels. */
-typedef void (* deinterleave_t)(sample_t * * dest,
-    soxr_datatype_t data_type, void const * * src0, size_t n, unsigned ch);
-typedef size_t (* interleave_t)(soxr_datatype_t data_type, void * * dest,
-    sample_t const * const * src, size_t, unsigned, unsigned long long *);
-
-struct soxr {
-  unsigned num_channels;
-  double io_ratio;
-  soxr_error_t error;
-  soxr_quality_spec_t q_spec;
-  soxr_io_spec_t io_spec;
-  soxr_runtime_spec_t runtime_spec;
-
-  void * input_fn_state;
-  soxr_input_fn_t input_fn;
-  size_t max_ilen;
-
-  resampler_shared_t shared;
-  resampler_t * resamplers;
-  control_block_t control_block;
-  deinterleave_t deinterleave;
-  interleave_t interleave;
-
-  void * * channel_ptrs;
-  size_t clips;
-  unsigned long long seed;
-  int flushing;
-};
-
-
-
-#if WITH_CR32 || WITH_CR32S || WITH_CR64 || WITH_CR64S
-  #include "filter.h"
-#else
-  #define lsx_to_3dB(x) ((x)/(x))
-#endif
-
-
-
-soxr_quality_spec_t soxr_quality_spec(unsigned long recipe, unsigned long flags)
-{
-  soxr_quality_spec_t spec, * p = &spec;
-  unsigned q = recipe & 0xf;                         /* TODO: move to soxr-lsr.c: */
-  unsigned quality = q > SOXR_LSR2Q+2? SOXR_VHQ : q > SOXR_LSR2Q? SOXR_QQ : q;
-  double rej;
-  memset(p, 0, sizeof(*p));
-  if (quality > SOXR_PRECISIONQ) {
-    p->e = "invalid quality type";
-    return spec;
-  }
-  flags |= quality < SOXR_LSR0Q ? RESET_ON_CLEAR : 0;
-  p->phase_response = "\62\31\144"[(recipe & 0x30)>>4];
-  p->stopband_begin = 1;
-  p->precision =
-    quality == SOXR_QQ      ?  0 :
-    quality <= SOXR_16_BITQ ? 16 :
-    quality <= SOXR_32_BITQ ?  4 + quality * 4 :
-    quality <= SOXR_LSR2Q   ? 55 - quality * 4 : /* TODO: move to soxr-lsr.c */
-    0;
-  rej = p->precision * linear_to_dB(2.);
-  p->flags = flags;
-  if (quality <= SOXR_32_BITQ || quality == SOXR_PRECISIONQ) {
-    #define LOW_Q_BW0     (1385 / 2048.) /* 0.67625 rounded to be a FP exact. */
-    p->passband_end = quality == 1? LOW_Q_BW0 : 1 - .05 / lsx_to_3dB(rej);
-    if (quality <= 2)
-      p->flags &= ~SOXR_ROLLOFF_NONE, p->flags |= SOXR_ROLLOFF_MEDIUM;
-  }
-  else { /* TODO: move to soxr-lsr.c */
-    static float const bw[] = {.931f, .832f, .663f};
-    p->passband_end = bw[quality - SOXR_LSR0Q];
-    if (quality == SOXR_LSR2Q) {
-      p->flags &= ~SOXR_ROLLOFF_NONE;
-      p->flags |= SOXR_ROLLOFF_LSR2Q | SOXR_PROMOTE_TO_LQ;
-    }
-  }
-  if (recipe & SOXR_STEEP_FILTER)
-    p->passband_end = 1 - .01 / lsx_to_3dB(rej);
-  return spec;
-}
-
-
-
-char const * soxr_engine(soxr_t p)
-{
-  return resampler_id();
-}
-
-
-
-size_t * soxr_num_clips(soxr_t p)
-{
-  return &p->clips;
-}
-
-
-
-soxr_error_t soxr_error(soxr_t p)
-{
-  return p->error;
-}
-
-
-
-soxr_runtime_spec_t soxr_runtime_spec(unsigned num_threads)
-{
-  soxr_runtime_spec_t spec, * p = &spec;
-  memset(p, 0, sizeof(*p));
-  p->log2_min_dft_size = 10;
-  p->log2_large_dft_size = 17;
-  p->coef_size_kbytes = 400;
-  p->num_threads = num_threads;
-  return spec;
-}
-
-
-
-soxr_io_spec_t soxr_io_spec(
-  soxr_datatype_t itype,
-  soxr_datatype_t otype)
-{
-  soxr_io_spec_t spec, * p = &spec;
-  memset(p, 0, sizeof(*p));
-  if ((itype | otype) >= SOXR_SPLIT * 2)
-    p->e = "invalid io datatype(s)";
-  else {
-    p->itype = itype;
-    p->otype = otype;
-    p->scale = 1;
-  }
-  return spec;
-}
-
-
-
-#if (WITH_CR32S && WITH_CR32) || (WITH_CR64S && WITH_CR64)
-  #if defined __GNUC__ && defined __x86_64__
-    #define CPUID(type, eax_, ebx_, ecx_, edx_) \
-      __asm__ __volatile__ ( \
-        "cpuid \n\t" \
-        : "=a" (eax_), "=b" (ebx_), "=c" (ecx_), "=d" (edx_) \
-        : "a" (type), "c" (0));
-  #elif defined __GNUC__ && defined __i386__
-    #define CPUID(type, eax_, ebx_, ecx_, edx_) \
-      __asm__ __volatile__ ( \
-        "mov %%ebx, %%edi \n\t" \
-        "cpuid \n\t" \
-        "xchg %%edi, %%ebx \n\t" \
-        : "=a" (eax_), "=D" (ebx_), "=c" (ecx_), "=d" (edx_) \
-        : "a" (type), "c" (0));
-  #elif defined _M_X64 && defined _MSC_VER && _MSC_VER > 1500
-     void __cpuidex(int CPUInfo[4], int info_type, int ecxvalue);
-     #pragma intrinsic(__cpuidex)
-     #define CPUID(type, eax_, ebx_, ecx_, edx_) do { \
-       int regs[4]; \
-       __cpuidex(regs, type, 0); \
-       eax_ = regs[0], ebx_ = regs[1], ecx_ = regs[2], edx_ = regs[3]; \
-     } while(0)
-  #elif defined _M_X64 && defined _MSC_VER
-     void __cpuidex(int CPUInfo[4], int info_type);
-     #pragma intrinsic(__cpuidex)
-     #define CPUID(type, eax_, ebx_, ecx_, edx_) do { \
-       int regs[4]; \
-       __cpuidex(regs, type); \
-       eax_ = regs[0], ebx_ = regs[1], ecx_ = regs[2], edx_ = regs[3]; \
-     } while(0)
-  #elif defined _M_IX86 && defined _MSC_VER
-    #define CPUID(type, eax_, ebx_, ecx_, edx_) \
-      __asm pushad \
-      __asm mov eax, type \
-      __asm xor ecx, ecx \
-      __asm cpuid \
-      __asm mov eax_, eax \
-      __asm mov ebx_, ebx \
-      __asm mov ecx_, ecx \
-      __asm mov edx_, edx \
-      __asm popad
-  #endif
-#endif
-
-
-
-#if WITH_CR32S && WITH_CR32
-  static bool cpu_has_simd32(void)
-  {
-  #if defined __x86_64__ || defined _M_X64
-    return true;
-  #elif defined __i386__ || defined _M_IX86
-    enum {SSE = 1 << 25, SSE2 = 1 << 26};
-    unsigned eax_, ebx_, ecx_, edx_;
-    CPUID(1, eax_, ebx_, ecx_, edx_);
-    return (edx_ & (SSE|SSE2)) != 0;
-  #elif defined AV_CPU_FLAG_NEON
-    return !!(av_get_cpu_flags() & AV_CPU_FLAG_NEON);
-  #else
-    return false;
-  #endif
-  }
-
-  static bool should_use_simd32(void)
-  {
-    char const * e;
-    return ((e = getenv("SOXR_USE_SIMD"  )))? !!atoi(e) :
-           ((e = getenv("SOXR_USE_SIMD32")))? !!atoi(e) : cpu_has_simd32();
-  }
-#else
-  #define should_use_simd32() true
-#endif
-
-
-
-#if WITH_CR64S && WITH_CR64
-  #if defined __GNUC__
-    #define XGETBV(type, eax_, edx_) \
-      __asm__ __volatile__ ( \
-        ".byte 0x0f, 0x01, 0xd0\n" \
-        : "=a"(eax_), "=d"(edx_) : "c" (type));
-  #elif defined _M_X64 && defined _MSC_FULL_VER && _MSC_FULL_VER >= 160040219
-    #include <immintrin.h>
-    #define XGETBV(type, eax_, edx_) do { \
-      union {uint64_t x; uint32_t y[2];} a = {_xgetbv(0)}; \
-      eax_ = a.y[0], edx_ = a.y[1]; \
-     } while(0)
-  #elif defined _M_IX86 && defined _MSC_VER
-    #define XGETBV(type, eax_, edx_) \
-      __asm pushad \
-      __asm mov ecx, type \
-      __asm _emit 0x0f \
-      __asm _emit 0x01 \
-      __asm _emit 0xd0 \
-      __asm mov eax_, eax \
-      __asm mov edx_, edx \
-      __asm popad
-  #else
-    #define XGETBV(type, eax_, edx_) eax_ = edx_ = 0
-  #endif
-
-  static bool cpu_has_simd64(void)
-  {
-    enum {OSXSAVE = 1 << 27, AVX = 1 << 28};
-    unsigned eax_, ebx_, ecx_, edx_;
-    CPUID(1, eax_, ebx_, ecx_, edx_);
-    if ((ecx_ & (OSXSAVE|AVX)) == (OSXSAVE|AVX)) {
-      XGETBV(0, eax_, edx_);
-      return (eax_ & 6) == 6;
-    }
-    return false;
-  }
-
-  static bool should_use_simd64(void)
-  {
-    char const * e;
-    return ((e = getenv("SOXR_USE_SIMD"  )))? !!atoi(e) :
-           ((e = getenv("SOXR_USE_SIMD64")))? !!atoi(e) : cpu_has_simd64();
-  }
-#else
-  #define should_use_simd64() true
-#endif
-
-
-
-extern control_block_t
-  _soxr_rate32_cb,
-  _soxr_rate32s_cb,
-  _soxr_rate64_cb,
-  _soxr_rate64s_cb,
-  _soxr_vr32_cb;
-
-
-
-static void runtime_num(char const * env_name,
-    int min, int max, unsigned * field)
-{
-  char const * e = getenv(env_name);
-  if (e) {
-    int i = atoi(e);
-    if (i >= min && i <= max)
-      *field = (unsigned)i;
-  }
-}
-
-
-
-static void runtime_flag(char const * env_name,
-    unsigned n_bits, unsigned n_shift, unsigned long * flags)
-{
-  char const * e = getenv(env_name);
-  if (e) {
-    int i = atoi(e);
-    unsigned long mask = (1UL << n_bits) - 1;
-    if (i >= 0 && i <= (int)mask)
-      *flags &= ~(mask << n_shift), *flags |= ((unsigned long)i << n_shift);
-  }
-}
-
-
-
-soxr_t soxr_create(
-  double input_rate, double output_rate,
-  unsigned num_channels,
-  soxr_error_t * error0,
-  soxr_io_spec_t const * io_spec,
-  soxr_quality_spec_t const * q_spec,
-  soxr_runtime_spec_t const * runtime_spec)
-{
-  double io_ratio = output_rate!=0? input_rate!=0?
-    input_rate / output_rate : -1 : input_rate!=0? -1 : 0;
-  static const float datatype_full_scale[] = {1, 1, 65536.*32768, 32768};
-  soxr_t p = 0;
-  soxr_error_t error = 0;
-
-#if WITH_DEV_TRACE
-#define _(x) (char)(sizeof(x)>=10? 'a'+(char)(sizeof(x)-10):'0'+(char)sizeof(x))
-  char const * e = getenv("SOXR_TRACE");
-  _soxr_trace_level = e? atoi(e) : 0;
-  {
-    static char const arch[] = {_(char), _(short), _(int), _(long), _(long long)
-      , ' ', _(float), _(double), _(long double)
-      , ' ', _(int *), _(int (*)(int))
-      , ' ', HAVE_BIGENDIAN ? 'B' : 'L'
-#if defined _OPENMP
-      , ' ', 'O', 'M', 'P'
-#endif
-      , 0};
-#undef _
-    lsx_debug("arch: %s", arch);
-  }
-#endif
-
-  if (q_spec && q_spec->e)  error = q_spec->e;
-  else if (io_spec && (io_spec->itype | io_spec->otype) >= SOXR_SPLIT * 2)
-    error = "invalid io datatype(s)";
-
-  if (!error && !(p = calloc(sizeof(*p), 1))) error = "malloc failed";
-
-  if (p) {
-    control_block_t * control_block;
-
-    p->q_spec = q_spec? *q_spec : soxr_quality_spec(SOXR_HQ, 0);
-
-    if (q_spec) { /* Backwards compatibility with original API: */
-      if (p->q_spec.passband_end > 2)
-        p->q_spec.passband_end /= 100;
-      if (p->q_spec.stopband_begin > 2)
-        p->q_spec.stopband_begin = 2 - p->q_spec.stopband_begin / 100;
-    }
-
-    p->io_ratio = io_ratio;
-    p->num_channels = num_channels;
-    if (io_spec)
-      p->io_spec = *io_spec;
-    else
-      p->io_spec.scale = 1;
-
-    p->runtime_spec = runtime_spec? *runtime_spec : soxr_runtime_spec(1);
-
-    runtime_num("SOXR_MIN_DFT_SIZE", 8, 15, &p->runtime_spec.log2_min_dft_size);
-    runtime_num("SOXR_LARGE_DFT_SIZE", 8, 20, &p->runtime_spec.log2_large_dft_size);
-    runtime_num("SOXR_COEFS_SIZE", 100, 800, &p->runtime_spec.coef_size_kbytes);
-    runtime_num("SOXR_NUM_THREADS", 0, 64, &p->runtime_spec.num_threads);
-    runtime_flag("SOXR_COEF_INTERP", 2, 0, &p->runtime_spec.flags);
-
-    runtime_flag("SOXR_STRICT_BUF", 1, 2, &p->runtime_spec.flags);
-    runtime_flag("SOXR_NOSMALLINTOPT", 1, 3, &p->runtime_spec.flags);
-
-    p->io_spec.scale *= datatype_full_scale[p->io_spec.otype & 3] /
-                        datatype_full_scale[p->io_spec.itype & 3];
-
-    //p->seed = (unsigned long)time(0) ^ (unsigned long)(size_t)p;
-    p->seed = 0xc2ec33ef97a5ULL; /* Fixed dithering seed for deterministic int16 output */
-
-#if WITH_CR32 || WITH_CR32S || WITH_VR32
-    if (0
-#if WITH_VR32
-        || ((!WITH_CR32 && !WITH_CR32S) || (p->q_spec.flags & SOXR_VR))
-#endif
-#if WITH_CR32 || WITH_CR32S
-        || !(WITH_CR64 || WITH_CR64S) || (p->q_spec.precision <= 20 && !(p->q_spec.flags & SOXR_DOUBLE_PRECISION))
-#endif
-        ) {
-      p->deinterleave = (deinterleave_t)_soxr_deinterleave_f;
-      p->interleave = (interleave_t)_soxr_interleave_f;
-      control_block =
-#if WITH_VR32
-          ((!WITH_CR32 && !WITH_CR32S) || (p->q_spec.flags & SOXR_VR))? &_soxr_vr32_cb :
-#endif
-#if WITH_CR32S
-          !WITH_CR32 || should_use_simd32()? &_soxr_rate32s_cb :
-#endif
-          &_soxr_rate32_cb;
-    }
-#if WITH_CR64 || WITH_CR64S
-    else
-#endif
-#endif
-#if WITH_CR64 || WITH_CR64S
-    {
-      p->deinterleave = (deinterleave_t)_soxr_deinterleave;
-      p->interleave = (interleave_t)_soxr_interleave;
-      control_block =
-#if WITH_CR64S
-          !WITH_CR64 || should_use_simd64()? &_soxr_rate64s_cb :
-#endif
-          &_soxr_rate64_cb;
-    }
-#endif
-    memcpy(&p->control_block, control_block, sizeof(p->control_block));
-
-    if (p->num_channels && io_ratio!=0)
-      error = soxr_set_io_ratio(p, io_ratio, 0);
-  }
-  if (error)
-    soxr_delete(p), p = 0;
-  if (error0)
-    *error0 = error;
-  return p;
-}
-
-
-
-soxr_error_t soxr_set_input_fn(soxr_t p,
-    soxr_input_fn_t input_fn, void * input_fn_state, size_t max_ilen)
-{
-  p->input_fn_state = input_fn_state;
-  p->input_fn = input_fn;
-  p->max_ilen = max_ilen? max_ilen : (size_t)-1;
-  return 0;
-}
-
-
-
-static void soxr_delete0(soxr_t p)
-{
-  unsigned i;
-
-  if (p->resamplers) for (i = 0; i < p->num_channels; ++i) {
-    if (p->resamplers[i])
-      resampler_close(p->resamplers[i]);
-    free(p->resamplers[i]);
-  }
-  free(p->resamplers);
-  free(p->channel_ptrs);
-  free(p->shared);
-
-  memset(p, 0, sizeof(*p));
-}
-
-
-
-double soxr_delay(soxr_t p)
-{
-  return
-    (p && !p->error && p->resamplers)? resampler_delay(p->resamplers[0]) : 0;
-}
-
-
-
-static soxr_error_t fatal_error(soxr_t p, soxr_error_t error)
-{
-  soxr_delete0(p);
-  return p->error = error;
-}
-
-
-
-static soxr_error_t initialise(soxr_t p)
-{
-  unsigned i;
-  size_t shared_size, channel_size;
-
-  resampler_sizes(&shared_size, &channel_size);
-  p->channel_ptrs = calloc(sizeof(*p->channel_ptrs), p->num_channels);
-  p->shared = calloc(shared_size, 1);
-  p->resamplers = calloc(sizeof(*p->resamplers), p->num_channels);
-  if (!p->shared || !p->channel_ptrs || !p->resamplers)
-    return fatal_error(p, "malloc failed");
-
-  for (i = 0; i < p->num_channels; ++i) {
-    soxr_error_t error;
-    if (!(p->resamplers[i] = calloc(channel_size, 1)))
-      return fatal_error(p, "malloc failed");
-    error = resampler_create(
-        p->resamplers[i],
-        p->shared,
-        p->io_ratio,
-        &p->q_spec,
-        &p->runtime_spec,
-        p->io_spec.scale);
-    if (error)
-      return fatal_error(p, error);
-  }
-  return 0;
-}
-
-
-
-soxr_error_t soxr_set_num_channels(soxr_t p, unsigned num_channels)
-{
-  if (!p)                return "invalid soxr_t pointer";
-  if (num_channels == p->num_channels) return p->error;
-  if (!num_channels)     return "invalid # of channels";
-  if (p->resamplers)     return "# of channels can't be changed";
-  p->num_channels = num_channels;
-  return soxr_set_io_ratio(p, p->io_ratio, 0);
-}
-
-
-
-soxr_error_t soxr_set_io_ratio(soxr_t p, double io_ratio, size_t slew_len)
-{
-  unsigned i;
-  soxr_error_t error;
-  if (!p)                 return "invalid soxr_t pointer";
-  if ((error = p->error)) return error;
-  if (!p->num_channels)   return "must set # channels before O/I ratio";
-  if (io_ratio <= 0)      return "I/O ratio out-of-range";
-  if (!p->channel_ptrs) {
-    p->io_ratio = io_ratio;
-    return initialise(p);
-  }
-  if (p->control_block[8]) {
-    for (i = 0; !error && i < p->num_channels; ++i)
-      resampler_set_io_ratio(p->resamplers[i], io_ratio, slew_len);
-    return error;
-  }
-  return fabs(p->io_ratio - io_ratio) < 1e-15? 0 :
-    "varying O/I ratio is not supported with this quality level";
-}
-
-
-
-void soxr_delete(soxr_t p)
-{
-  if (p)
-    soxr_delete0(p), free(p);
-}
-
-
-
-soxr_error_t soxr_clear(soxr_t p) /* TODO: this, properly. */
-{
-  if (p) {
-    struct soxr tmp = *p;
-    soxr_delete0(p);
-    memset(p, 0, sizeof(*p));
-    p->input_fn = tmp.input_fn;
-    p->runtime_spec = tmp.runtime_spec;
-    p->q_spec = tmp.q_spec;
-    p->io_spec = tmp.io_spec;
-    p->num_channels = tmp.num_channels;
-    p->input_fn_state = tmp.input_fn_state;
-    memcpy(p->control_block, tmp.control_block, sizeof(p->control_block));
-    p->deinterleave = tmp.deinterleave;
-    p->interleave = tmp.interleave;
-    return (p->q_spec.flags & RESET_ON_CLEAR)?
-      soxr_set_io_ratio(p, tmp.io_ratio, 0) : 0;
-  }
-  return "invalid soxr_t pointer";
-}
-
-
-
-static void soxr_input_1ch(soxr_t p, unsigned i, soxr_cbuf_t src, size_t len)
-{
-  sample_t * dest = resampler_input(p->resamplers[i], NULL, len);
-  (*p->deinterleave)(&dest, p->io_spec.itype, &src, len, 1);
-}
-
-
-
-static size_t soxr_input(soxr_t p, void const * in, size_t len)
-{
-  bool separated = !!(p->io_spec.itype & SOXR_SPLIT);
-  unsigned i;
-  if (!p || p->error) return 0;
-  if (!in && len) {p->error = "null input buffer pointer"; return 0;}
-  if (!len) {
-    p->flushing = true;
-    return 0;
-  }
-  if (separated)
-    for (i = 0; i < p->num_channels; ++i)
-      soxr_input_1ch(p, i, ((soxr_cbufs_t)in)[i], len);
-  else {
-    for (i = 0; i < p->num_channels; ++i)
-      p->channel_ptrs[i] = resampler_input(p->resamplers[i], NULL, len);
-    (*p->deinterleave)(
-        (sample_t **)p->channel_ptrs, p->io_spec.itype, &in, len, p->num_channels);
-  }
-  return len;
-}
-
-
-
-static size_t soxr_output_1ch(soxr_t p, unsigned i, soxr_buf_t dest, size_t len, bool separated)
-{
-  sample_t const * src;
-  if (p->flushing)
-    resampler_flush(p->resamplers[i]);
-  resampler_process(p->resamplers[i], len);
-  src = resampler_output(p->resamplers[i], NULL, &len);
-  if (separated)
-    p->clips += (p->interleave)(p->io_spec.otype, &dest, &src,
-      len, 1, (p->io_spec.flags & SOXR_NO_DITHER)? 0 : &p->seed);
-  else p->channel_ptrs[i] = (void /* const */ *)src;
-  return len;
-}
-
-
-
-static size_t soxr_output_no_callback(soxr_t p, soxr_buf_t out, size_t len)
-{
-  unsigned u;
-  size_t done = 0;
-  bool separated = !!(p->io_spec.otype & SOXR_SPLIT);
-#if defined _OPENMP
-  int i;
-  if (!p->runtime_spec.num_threads && p->num_channels > 1)
-#pragma omp parallel for
-  for (i = 0; i < (int)p->num_channels; ++i) {
-    size_t done1;
-    done1 = soxr_output_1ch(p, (unsigned)i, ((soxr_bufs_t)out)[i], len, separated);
-    if (!i)
-      done = done1;
-  } else
-#endif
-  for (u = 0; u < p->num_channels; ++u)
-    done = soxr_output_1ch(p, u, ((soxr_bufs_t)out)[u], len, separated);
-
-  if (!separated)
-    p->clips += (p->interleave)(p->io_spec.otype, &out, (sample_t const * const *)p->channel_ptrs,
-        done, p->num_channels, (p->io_spec.flags & SOXR_NO_DITHER)? 0 : &p->seed);
-  return done;
-}
-
-
-
-size_t soxr_output(soxr_t p, void * out, size_t len0)
-{
-  size_t odone, odone0 = 0, olen = len0, osize, idone;
-  size_t ilen = min(p->max_ilen, (size_t)ceil((double)olen *p->io_ratio));
-  void const * in = out; /* Set to !=0, so that caller may leave unset. */
-  bool was_flushing;
-
-  if (!p || p->error) return 0;
-  if (!out && len0) {p->error = "null output buffer pointer"; return 0;}
-
-  do {
-    odone = soxr_output_no_callback(p, out, olen);
-    odone0 += odone;
-    if (odone0 == len0 || !p->input_fn || p->flushing)
-      break;
-
-    osize = soxr_datatype_size(p->io_spec.otype) * p->num_channels;
-    out = (char *)out + osize * odone;
-    olen -= odone;
-    idone = p->input_fn(p->input_fn_state, &in, ilen);
-    was_flushing = p->flushing;
-    if (!in)
-      p->error = "input function reported failure";
-    else soxr_input(p, in, idone);
-  } while (odone || idone || (!was_flushing && p->flushing));
-  return odone0;
-}
-
-
-
-static size_t soxr_i_for_o(soxr_t p, size_t olen, size_t ilen)
-{
-  size_t result;
-#if 0
-  if (p->runtime_spec.flags & SOXR_STRICT_BUFFERING)
-    result = rate_i_for_o(p->resamplers[0], olen);
-  else
-#endif
-    result = (size_t)ceil((double)olen * p->io_ratio);
-  return min(result, ilen);
-}
-
-
-
-#if 0
-static size_t soxr_o_for_i(soxr_t p, size_t ilen, size_t olen)
-{
-  size_t result = (size_t)ceil((double)ilen / p->io_ratio);
-  return min(result, olen);
-}
-#endif
-
-
-
-soxr_error_t soxr_process(soxr_t p,
-    void const * in , size_t ilen0, size_t * idone0,
-    void       * out, size_t olen , size_t * odone0)
-{
-  size_t ilen, idone, odone = 0;
-  unsigned u;
-  bool flush_requested = false;
-
-  if (!p) return "null pointer";
-
-  if (!in)
-    flush_requested = true, ilen = ilen0 = 0;
-  else {
-    if ((ptrdiff_t)ilen0 < 0)
-      flush_requested = true, ilen0 = ~ilen0;
-    if (idone0 && (1 || flush_requested))
-      ilen = soxr_i_for_o(p, olen, ilen0);
-    else
-      ilen = ilen0/*, olen = soxr_o_for_i(p, ilen, olen)*/;
-  }
-  p->flushing |= ilen == ilen0 && flush_requested;
-
-  if (!out && !in)
-    idone = ilen;
-  else if (p->io_spec.itype & p->io_spec.otype & SOXR_SPLIT) { /* Both i & o */
-#if defined _OPENMP
-    int i;
-    if (!p->runtime_spec.num_threads && p->num_channels > 1)
-#pragma omp parallel for
-    for (i = 0; i < (int)p->num_channels; ++i) {
-      size_t done;
-      if (in)
-        soxr_input_1ch(p, (unsigned)i, ((soxr_cbufs_t)in)[i], ilen);
-      done = soxr_output_1ch(p, (unsigned)i, ((soxr_bufs_t)out)[i], olen, true);
-      if (!i)
-        odone = done;
-    } else
-#endif
-    for (u = 0; u < p->num_channels; ++u) {
-      if (in)
-        soxr_input_1ch(p, u, ((soxr_cbufs_t)in)[u], ilen);
-      odone = soxr_output_1ch(p, u, ((soxr_bufs_t)out)[u], olen, true);
-    }
-    idone = ilen;
-  }
-  else {
-    idone = ilen? soxr_input (p, in , ilen) : 0;
-    odone = soxr_output(p, out, olen);
-  }
-  if (idone0) *idone0 = idone;
-  if (odone0) *odone0 = odone;
-  return p->error;
-}
-
-
-
-soxr_error_t soxr_oneshot(
-    double irate, double orate,
-    unsigned num_channels,
-    void const * in , size_t ilen, size_t * idone,
-    void * out, size_t olen, size_t * odone,
-    soxr_io_spec_t const * io_spec,
-    soxr_quality_spec_t const * q_spec,
-    soxr_runtime_spec_t const * runtime_spec)
-{
-  soxr_t resampler;
-  soxr_error_t error = q_spec? q_spec->e : 0;
-  if (!error) {
-    soxr_quality_spec_t q_spec1;
-    if (!q_spec)
-      q_spec1 = soxr_quality_spec(SOXR_LQ, 0), q_spec = &q_spec1;
-    resampler = soxr_create(irate, orate, num_channels,
-        &error, io_spec, q_spec, runtime_spec);
-  }
-  if (!error) {
-    error = soxr_process(resampler, in, ~ilen, idone, out, olen, odone);
-    soxr_delete(resampler);
-  }
-  return error;
-}
-
-
-
-soxr_error_t soxr_set_error(soxr_t p, soxr_error_t error)
-{
-  if (!p) return "null pointer";
-  if (!p->error && p->error != error) return p->error;
-  p->error = error;
-  return 0;
-}
diff --git a/soxr-sys/src/soxr.h b/soxr-sys/src/soxr.h
deleted file mode 100644
index 09ec7c466..000000000
--- a/soxr-sys/src/soxr.h
+++ /dev/null
@@ -1,344 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-18 robs@users.sourceforge.net
- *
- * This library is free software; you can redistribute it and/or modify it
- * under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * This library is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
- * General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this library; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-
-
-/* -------------------------------- Gubbins --------------------------------- */
-
-#if !defined soxr_included
-#define soxr_included
-
-
-#if defined __cplusplus
-  #include <cstddef>
-  extern "C" {
-#else
-  #include <stddef.h>
-#endif
-
-#if defined SOXR_DLL
-  #if defined soxr_EXPORTS
-    #define SOXR __declspec(dllexport)
-  #else
-    #define SOXR __declspec(dllimport)
-  #endif
-#elif defined SOXR_VISIBILITY && defined __GNUC__ && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 1)
-  #define SOXR __attribute__ ((visibility("default")))
-#else
-  #define SOXR
-#endif
-
-typedef struct soxr_io_spec soxr_io_spec_t;
-typedef struct soxr_quality_spec soxr_quality_spec_t;
-typedef struct soxr_runtime_spec soxr_runtime_spec_t;
-
-
-
-/* ---------------------------- API conventions --------------------------------
-
-Buffer lengths (and occupancies) are expressed as the number of contained
-samples per channel.
-
-Parameter names for buffer lengths have the suffix `len'.
-
-A single-character `i' or 'o' is often used in names to give context as
-input or output (e.g. ilen, olen).                                            */
-
-
-
-/* --------------------------- Version management --------------------------- */
-
-/* E.g. #if SOXR_THIS_VERSION >= SOXR_VERSION(0,1,1) ...                      */
-
-#define SOXR_VERSION(x,y,z)     (((x)<<16)|((y)<<8)|(z))
-#define SOXR_THIS_VERSION       SOXR_VERSION(0,1,3)
-#define SOXR_THIS_VERSION_STR               "0.1.3"
-
-
-
-/* --------------------------- Type declarations ---------------------------- */
-
-typedef struct soxr * soxr_t;          /* A resampler for 1 or more channels. */
-typedef char const * soxr_error_t;                /* 0:no-error; non-0:error. */
-
-typedef void       * soxr_buf_t;  /* 1 buffer of channel-interleaved samples. */
-typedef void const * soxr_cbuf_t;                        /* Ditto; read-only. */
-
-typedef soxr_buf_t const  * soxr_bufs_t;/* Or, a separate buffer for each ch. */
-typedef soxr_cbuf_t const * soxr_cbufs_t;                /* Ditto; read-only. */
-
-typedef void const * soxr_in_t;      /* Either a soxr_cbuf_t or soxr_cbufs_t,
-                                        depending on itype in soxr_io_spec_t. */
-typedef void       * soxr_out_t;     /* Either a soxr_buf_t or soxr_bufs_t,
-                                        depending on otype in soxr_io_spec_t. */
-
-
-
-/* --------------------------- API main functions --------------------------- */
-
-SOXR char const * soxr_version(void);  /* Query library version: "libsoxr-x.y.z" */
-
-#define soxr_strerror(e)               /* Soxr counterpart to strerror. */     \
-    ((e)?(e):"no error")
-
-
-/* Create a stream resampler: */
-
-SOXR soxr_t soxr_create(
-    double      input_rate,      /* Input sample-rate. */
-    double      output_rate,     /* Output sample-rate. */
-    unsigned    num_channels,    /* Number of channels to be used. */
-        /* All following arguments are optional (may be set to NULL). */
-    soxr_error_t *,              /* To report any error during creation. */
-    soxr_io_spec_t const *,      /* To specify non-default I/O formats. */
-    soxr_quality_spec_t const *, /* To specify non-default resampling quality.*/
-    soxr_runtime_spec_t const *);/* To specify non-default runtime resources.
-
-    Default io_spec      is per soxr_io_spec(SOXR_FLOAT32_I, SOXR_FLOAT32_I)
-    Default quality_spec is per soxr_quality_spec(SOXR_HQ, 0)
-    Default runtime_spec is per soxr_runtime_spec(1)                          */
-
-
-
-/* If not using an app-supplied input function, after creating a stream
- * resampler, repeatedly call: */
-
-SOXR soxr_error_t soxr_process(
-    soxr_t      resampler,      /* As returned by soxr_create. */
-                            /* Input (to be resampled): */
-    soxr_in_t   in,             /* Input buffer(s); may be NULL (see below). */
-    size_t      ilen,           /* Input buf. length (samples per channel). */
-    size_t      * idone,        /* To return actual # samples used (<= ilen). */
-                            /* Output (resampled): */
-    soxr_out_t  out,            /* Output buffer(s).*/
-    size_t      olen,           /* Output buf. length (samples per channel). */
-    size_t      * odone);       /* To return actual # samples out (<= olen).
-
-    Note that no special meaning is associated with ilen or olen equal to
-    zero.  End-of-input (i.e. no data is available nor shall be available)
-    may be indicated by seting `in' to NULL.                                  */
-
-
-
-/* If using an app-supplied input function, it must look and behave like this:*/
-
-typedef size_t /* data_len */
-  (* soxr_input_fn_t)(         /* Supply data to be resampled. */
-    void * input_fn_state,     /* As given to soxr_set_input_fn (below). */
-    soxr_in_t * data,          /* Returned data; see below. N.B. ptr to ptr(s)*/
-    size_t requested_len);     /* Samples per channel, >= returned data_len.
-
-  data_len  *data     Indicates    Meaning
-   ------- -------   ------------  -------------------------
-     !=0     !=0       Success     *data contains data to be
-                                   input to the resampler.
-      0    !=0 (or   End-of-input  No data is available nor
-           not set)                shall be available.
-      0       0        Failure     An error occurred whilst trying to
-                                   source data to be input to the resampler.  */
-
-/* and be registered with a previously created stream resampler using: */
-
-SOXR soxr_error_t soxr_set_input_fn(/* Set (or reset) an input function.*/
-    soxr_t resampler,            /* As returned by soxr_create. */
-    soxr_input_fn_t,             /* Function to supply data to be resampled.*/
-    void * input_fn_state,       /* If needed by the input function. */
-    size_t max_ilen);            /* Maximum value for input fn. requested_len.*/
-
-/* then repeatedly call: */
-
-SOXR size_t /*odone*/ soxr_output(/* Resample and output a block of data.*/
-    soxr_t resampler,            /* As returned by soxr_create. */
-    soxr_out_t data,             /* App-supplied buffer(s) for resampled data.*/
-    size_t olen);                /* Amount of data to output; >= odone. */
-
-
-
-/* Common stream resampler operations: */
-
-SOXR soxr_error_t soxr_error(soxr_t);   /* Query error status. */
-SOXR size_t   * soxr_num_clips(soxr_t); /* Query int. clip counter (for R/W). */
-SOXR double     soxr_delay(soxr_t);  /* Query current delay in output samples.*/
-SOXR char const * soxr_engine(soxr_t);  /* Query resampling engine name. */
-
-SOXR soxr_error_t soxr_clear(soxr_t); /* Ready for fresh signal, same config. */
-SOXR void         soxr_delete(soxr_t);  /* Free resources. */
-
-
-
-/* `Short-cut', single call to resample a (probably short) signal held entirely
- * in memory.  See soxr_create and soxr_process above for parameter details.
- * Note that unlike soxr_create however, the default quality spec. for
- * soxr_oneshot is per soxr_quality_spec(SOXR_LQ, 0). */
-
-SOXR soxr_error_t soxr_oneshot(
-    double         input_rate,
-    double         output_rate,
-    unsigned       num_channels,
-    soxr_in_t    in , size_t ilen, size_t * idone,
-    soxr_out_t   out, size_t olen, size_t * odone,
-    soxr_io_spec_t const *,
-    soxr_quality_spec_t const *,
-    soxr_runtime_spec_t const *);
-
-
-
-/* For variable-rate resampling. See example # 5 for how to create a
- * variable-rate resampler and how to use this function. */
-
-SOXR soxr_error_t soxr_set_io_ratio(soxr_t, double io_ratio, size_t slew_len);
-
-
-
-/* -------------------------- API type definitions -------------------------- */
-
-typedef enum {          /* Datatypes supported for I/O to/from the resampler: */
-  /* Internal; do not use: */
-  SOXR_FLOAT32, SOXR_FLOAT64, SOXR_INT32, SOXR_INT16, SOXR_SPLIT = 4,
-
-  /* Use for interleaved channels: */
-  SOXR_FLOAT32_I = SOXR_FLOAT32, SOXR_FLOAT64_I, SOXR_INT32_I, SOXR_INT16_I,
-
-  /* Use for split channels: */
-  SOXR_FLOAT32_S = SOXR_SPLIT  , SOXR_FLOAT64_S, SOXR_INT32_S, SOXR_INT16_S
-
-} soxr_datatype_t;
-
-#define soxr_datatype_size(x)  /* Returns `sizeof' a soxr_datatype_t sample. */\
-  ((unsigned char *)"\4\10\4\2")[(x)&3]
-
-
-
-struct soxr_io_spec {                                            /* Typically */
-  soxr_datatype_t itype;     /* Input datatype.                SOXR_FLOAT32_I */
-  soxr_datatype_t otype;     /* Output datatype.               SOXR_FLOAT32_I */
-  double scale;              /* Linear gain to apply during resampling.  1    */
-  void * e;                  /* Reserved for internal use                0    */
-  unsigned long flags;       /* Per the following #defines.              0    */
-};
-
-#define SOXR_TPDF              0     /* Applicable only if otype is INT16. */
-#define SOXR_NO_DITHER         8u    /* Disable the above. */
-
-
-
-struct soxr_quality_spec {                                       /* Typically */
-  double precision;         /* Conversion precision (in bits).           20   */
-  double phase_response;    /* 0=minimum, ... 50=linear, ... 100=maximum 50   */
-  double passband_end;      /* 0dB pt. bandwidth to preserve; nyquist=1  0.913*/
-  double stopband_begin;    /* Aliasing/imaging control; > passband_end   1   */
-  void * e;                 /* Reserved for internal use.                 0   */
-  unsigned long flags;      /* Per the following #defines.                0   */
-};
-
-#define SOXR_ROLLOFF_SMALL     0u    /* <= 0.01 dB */
-#define SOXR_ROLLOFF_MEDIUM    1u    /* <= 0.35 dB */
-#define SOXR_ROLLOFF_NONE      2u    /* For Chebyshev bandwidth. */
-
-#define SOXR_HI_PREC_CLOCK     8u  /* Increase `irrational' ratio accuracy. */
-#define SOXR_DOUBLE_PRECISION 16u  /* Use D.P. calcs even if precision <= 20. */
-#define SOXR_VR               32u  /* Variable-rate resampling. */
-
-
-
-struct soxr_runtime_spec {                                       /* Typically */
-  unsigned log2_min_dft_size;   /* For DFT efficiency. [8,15]           10    */
-  unsigned log2_large_dft_size; /* For DFT efficiency. [8,20]           17    */
-  unsigned coef_size_kbytes;    /* For SOXR_COEF_INTERP_AUTO (below).   400   */
-  unsigned num_threads;         /* 0: per OMP_NUM_THREADS; 1: 1 thread.  1    */
-  void * e;                     /* Reserved for internal use.            0    */
-  unsigned long flags;          /* Per the following #defines.           0    */
-};
-                                   /* For `irrational' ratios only: */
-#define SOXR_COEF_INTERP_AUTO  0u    /* Auto select coef. interpolation. */
-#define SOXR_COEF_INTERP_LOW   2u    /* Man. select: less CPU, more memory. */
-#define SOXR_COEF_INTERP_HIGH  3u    /* Man. select: more CPU, less memory. */
-
-
-
-/* -------------------------- API type constructors ------------------------- */
-
-/* These functions allow setting of the most commonly-used structure
- * parameters, with other parameters being given default values.  The default
- * values may then be overridden, directly in the structure, if needed.  */
-
-SOXR soxr_quality_spec_t soxr_quality_spec(
-    unsigned long recipe,       /* Per the #defines immediately below. */
-    unsigned long flags);       /* As soxr_quality_spec_t.flags. */
-
-                                  /* The 5 standard qualities found in SoX: */
-#define SOXR_QQ                 0   /* 'Quick' cubic interpolation. */
-#define SOXR_LQ                 1   /* 'Low' 16-bit with larger rolloff. */
-#define SOXR_MQ                 2   /* 'Medium' 16-bit with medium rolloff. */
-#define SOXR_HQ                 SOXR_20_BITQ /* 'High quality'. */
-#define SOXR_VHQ                SOXR_28_BITQ /* 'Very high quality'. */
-
-#define SOXR_16_BITQ            3
-#define SOXR_20_BITQ            4
-#define SOXR_24_BITQ            5
-#define SOXR_28_BITQ            6
-#define SOXR_32_BITQ            7
-                                /* Reserved for internal use (to be removed): */
-#define SOXR_LSR0Q              8     /* 'Best sinc'. */
-#define SOXR_LSR1Q              9     /* 'Medium sinc'. */
-#define SOXR_LSR2Q              10    /* 'Fast sinc'. */
-
-#define SOXR_LINEAR_PHASE       0x00
-#define SOXR_INTERMEDIATE_PHASE 0x10
-#define SOXR_MINIMUM_PHASE      0x30
-
-#define SOXR_STEEP_FILTER       0x40
-
-
-
-SOXR soxr_runtime_spec_t soxr_runtime_spec(
-    unsigned num_threads);
-
-
-
-SOXR soxr_io_spec_t soxr_io_spec(
-    soxr_datatype_t itype,
-    soxr_datatype_t otype);
-
-
-
-/* --------------------------- Advanced use only ---------------------------- */
-
-/* For new designs, the following functions/usage will probably not be needed.
- * They might be useful when adding soxr into an existing design where values
- * for the resampling-rate and/or number-of-channels parameters to soxr_create
- * are not available when that function will be called.  In such cases, the
- * relevant soxr_create parameter(s) can be given as 0, then one or both of the
- * following (as appropriate) later invoked (but prior to calling soxr_process
- * or soxr_output):
- *
- * soxr_set_error(soxr, soxr_set_io_ratio(soxr, io_ratio, 0));
- * soxr_set_error(soxr, soxr_set_num_channels(soxr, num_channels));
- */
-
-SOXR soxr_error_t soxr_set_error(soxr_t, soxr_error_t);
-SOXR soxr_error_t soxr_set_num_channels(soxr_t, unsigned);
-
-
-
-#undef SOXR
-
-#if defined __cplusplus
-}
-#endif
-
-#endif
diff --git a/soxr-sys/src/soxr.rs b/soxr-sys/src/soxr.rs
index ec2a0fd72..8e8085933 100644
--- a/soxr-sys/src/soxr.rs
+++ b/soxr-sys/src/soxr.rs
@@ -1,4 +1,4 @@
-/* automatically generated by rust-bindgen 0.69.4 */
+/* automatically generated by rust-bindgen 0.71.1 */
 
 pub const SOXR_THIS_VERSION_STR: &[u8; 6] = b"0.1.3\0";
 pub const SOXR_TPDF: u32 = 0;
@@ -45,10 +45,10 @@ pub type soxr_bufs_t = *const soxr_buf_t;
 pub type soxr_cbufs_t = *const soxr_cbuf_t;
 pub type soxr_in_t = *const ::std::os::raw::c_void;
 pub type soxr_out_t = *mut ::std::os::raw::c_void;
-extern "C" {
+unsafe extern "C" {
     pub fn soxr_version() -> *const ::std::os::raw::c_char;
 }
-extern "C" {
+unsafe extern "C" {
     pub fn soxr_create(
         input_rate: f64,
         output_rate: f64,
@@ -59,7 +59,7 @@ extern "C" {
         arg4: *const soxr_runtime_spec_t,
     ) -> soxr_t;
 }
-extern "C" {
+unsafe extern "C" {
     pub fn soxr_process(
         resampler: soxr_t,
         in_: soxr_in_t,
@@ -77,7 +77,7 @@ pub type soxr_input_fn_t = ::std::option::Option<
         requested_len: usize,
     ) -> usize,
 >;
-extern "C" {
+unsafe extern "C" {
     pub fn soxr_set_input_fn(
         resampler: soxr_t,
         arg1: soxr_input_fn_t,
@@ -85,28 +85,28 @@ extern "C" {
         max_ilen: usize,
     ) -> soxr_error_t;
 }
-extern "C" {
+unsafe extern "C" {
     pub fn soxr_output(resampler: soxr_t, data: soxr_out_t, olen: usize) -> usize;
 }
-extern "C" {
+unsafe extern "C" {
     pub fn soxr_error(arg1: soxr_t) -> soxr_error_t;
 }
-extern "C" {
+unsafe extern "C" {
     pub fn soxr_num_clips(arg1: soxr_t) -> *mut usize;
 }
-extern "C" {
+unsafe extern "C" {
     pub fn soxr_delay(arg1: soxr_t) -> f64;
 }
-extern "C" {
+unsafe extern "C" {
     pub fn soxr_engine(arg1: soxr_t) -> *const ::std::os::raw::c_char;
 }
-extern "C" {
+unsafe extern "C" {
     pub fn soxr_clear(arg1: soxr_t) -> soxr_error_t;
 }
-extern "C" {
+unsafe extern "C" {
     pub fn soxr_delete(arg1: soxr_t);
 }
-extern "C" {
+unsafe extern "C" {
     pub fn soxr_oneshot(
         input_rate: f64,
         output_rate: f64,
@@ -122,7 +122,7 @@ extern "C" {
         arg3: *const soxr_runtime_spec_t,
     ) -> soxr_error_t;
 }
-extern "C" {
+unsafe extern "C" {
     pub fn soxr_set_io_ratio(arg1: soxr_t, io_ratio: f64, slew_len: usize) -> soxr_error_t;
 }
 pub const soxr_datatype_t_SOXR_FLOAT32: soxr_datatype_t = 0;
@@ -148,46 +148,16 @@ pub struct soxr_io_spec {
     pub e: *mut ::std::os::raw::c_void,
     pub flags: ::std::os::raw::c_ulong,
 }
-#[test]
-fn bindgen_test_layout_soxr_io_spec() {
-    const UNINIT: ::std::mem::MaybeUninit<soxr_io_spec> = ::std::mem::MaybeUninit::uninit();
-    let ptr = UNINIT.as_ptr();
-    assert_eq!(
-        ::std::mem::size_of::<soxr_io_spec>(),
-        32usize,
-        concat!("Size of: ", stringify!(soxr_io_spec))
-    );
-    assert_eq!(
-        ::std::mem::align_of::<soxr_io_spec>(),
-        8usize,
-        concat!("Alignment of ", stringify!(soxr_io_spec))
-    );
-    assert_eq!(
-        unsafe { ::std::ptr::addr_of!((*ptr).itype) as usize - ptr as usize },
-        0usize,
-        concat!("Offset of field: ", stringify!(soxr_io_spec), "::", stringify!(itype))
-    );
-    assert_eq!(
-        unsafe { ::std::ptr::addr_of!((*ptr).otype) as usize - ptr as usize },
-        4usize,
-        concat!("Offset of field: ", stringify!(soxr_io_spec), "::", stringify!(otype))
-    );
-    assert_eq!(
-        unsafe { ::std::ptr::addr_of!((*ptr).scale) as usize - ptr as usize },
-        8usize,
-        concat!("Offset of field: ", stringify!(soxr_io_spec), "::", stringify!(scale))
-    );
-    assert_eq!(
-        unsafe { ::std::ptr::addr_of!((*ptr).e) as usize - ptr as usize },
-        16usize,
-        concat!("Offset of field: ", stringify!(soxr_io_spec), "::", stringify!(e))
-    );
-    assert_eq!(
-        unsafe { ::std::ptr::addr_of!((*ptr).flags) as usize - ptr as usize },
-        24usize,
-        concat!("Offset of field: ", stringify!(soxr_io_spec), "::", stringify!(flags))
-    );
-}
+#[allow(clippy::unnecessary_operation, clippy::identity_op)]
+const _: () = {
+    ["Size of soxr_io_spec"][::std::mem::size_of::<soxr_io_spec>() - 32usize];
+    ["Alignment of soxr_io_spec"][::std::mem::align_of::<soxr_io_spec>() - 8usize];
+    ["Offset of field: soxr_io_spec::itype"][::std::mem::offset_of!(soxr_io_spec, itype) - 0usize];
+    ["Offset of field: soxr_io_spec::otype"][::std::mem::offset_of!(soxr_io_spec, otype) - 4usize];
+    ["Offset of field: soxr_io_spec::scale"][::std::mem::offset_of!(soxr_io_spec, scale) - 8usize];
+    ["Offset of field: soxr_io_spec::e"][::std::mem::offset_of!(soxr_io_spec, e) - 16usize];
+    ["Offset of field: soxr_io_spec::flags"][::std::mem::offset_of!(soxr_io_spec, flags) - 24usize];
+};
 #[repr(C)]
 #[derive(Debug, Copy, Clone)]
 pub struct soxr_quality_spec {
@@ -198,61 +168,23 @@ pub struct soxr_quality_spec {
     pub e: *mut ::std::os::raw::c_void,
     pub flags: ::std::os::raw::c_ulong,
 }
-#[test]
-fn bindgen_test_layout_soxr_quality_spec() {
-    const UNINIT: ::std::mem::MaybeUninit<soxr_quality_spec> = ::std::mem::MaybeUninit::uninit();
-    let ptr = UNINIT.as_ptr();
-    assert_eq!(
-        ::std::mem::size_of::<soxr_quality_spec>(),
-        48usize,
-        concat!("Size of: ", stringify!(soxr_quality_spec))
-    );
-    assert_eq!(
-        ::std::mem::align_of::<soxr_quality_spec>(),
-        8usize,
-        concat!("Alignment of ", stringify!(soxr_quality_spec))
-    );
-    assert_eq!(
-        unsafe { ::std::ptr::addr_of!((*ptr).precision) as usize - ptr as usize },
-        0usize,
-        concat!("Offset of field: ", stringify!(soxr_quality_spec), "::", stringify!(precision))
-    );
-    assert_eq!(
-        unsafe { ::std::ptr::addr_of!((*ptr).phase_response) as usize - ptr as usize },
-        8usize,
-        concat!(
-            "Offset of field: ",
-            stringify!(soxr_quality_spec),
-            "::",
-            stringify!(phase_response)
-        )
-    );
-    assert_eq!(
-        unsafe { ::std::ptr::addr_of!((*ptr).passband_end) as usize - ptr as usize },
-        16usize,
-        concat!("Offset of field: ", stringify!(soxr_quality_spec), "::", stringify!(passband_end))
-    );
-    assert_eq!(
-        unsafe { ::std::ptr::addr_of!((*ptr).stopband_begin) as usize - ptr as usize },
-        24usize,
-        concat!(
-            "Offset of field: ",
-            stringify!(soxr_quality_spec),
-            "::",
-            stringify!(stopband_begin)
-        )
-    );
-    assert_eq!(
-        unsafe { ::std::ptr::addr_of!((*ptr).e) as usize - ptr as usize },
-        32usize,
-        concat!("Offset of field: ", stringify!(soxr_quality_spec), "::", stringify!(e))
-    );
-    assert_eq!(
-        unsafe { ::std::ptr::addr_of!((*ptr).flags) as usize - ptr as usize },
-        40usize,
-        concat!("Offset of field: ", stringify!(soxr_quality_spec), "::", stringify!(flags))
-    );
-}
+#[allow(clippy::unnecessary_operation, clippy::identity_op)]
+const _: () = {
+    ["Size of soxr_quality_spec"][::std::mem::size_of::<soxr_quality_spec>() - 48usize];
+    ["Alignment of soxr_quality_spec"][::std::mem::align_of::<soxr_quality_spec>() - 8usize];
+    ["Offset of field: soxr_quality_spec::precision"]
+        [::std::mem::offset_of!(soxr_quality_spec, precision) - 0usize];
+    ["Offset of field: soxr_quality_spec::phase_response"]
+        [::std::mem::offset_of!(soxr_quality_spec, phase_response) - 8usize];
+    ["Offset of field: soxr_quality_spec::passband_end"]
+        [::std::mem::offset_of!(soxr_quality_spec, passband_end) - 16usize];
+    ["Offset of field: soxr_quality_spec::stopband_begin"]
+        [::std::mem::offset_of!(soxr_quality_spec, stopband_begin) - 24usize];
+    ["Offset of field: soxr_quality_spec::e"]
+        [::std::mem::offset_of!(soxr_quality_spec, e) - 32usize];
+    ["Offset of field: soxr_quality_spec::flags"]
+        [::std::mem::offset_of!(soxr_quality_spec, flags) - 40usize];
+};
 #[repr(C)]
 #[derive(Debug, Copy, Clone)]
 pub struct soxr_runtime_spec {
@@ -263,81 +195,38 @@ pub struct soxr_runtime_spec {
     pub e: *mut ::std::os::raw::c_void,
     pub flags: ::std::os::raw::c_ulong,
 }
-#[test]
-fn bindgen_test_layout_soxr_runtime_spec() {
-    const UNINIT: ::std::mem::MaybeUninit<soxr_runtime_spec> = ::std::mem::MaybeUninit::uninit();
-    let ptr = UNINIT.as_ptr();
-    assert_eq!(
-        ::std::mem::size_of::<soxr_runtime_spec>(),
-        32usize,
-        concat!("Size of: ", stringify!(soxr_runtime_spec))
-    );
-    assert_eq!(
-        ::std::mem::align_of::<soxr_runtime_spec>(),
-        8usize,
-        concat!("Alignment of ", stringify!(soxr_runtime_spec))
-    );
-    assert_eq!(
-        unsafe { ::std::ptr::addr_of!((*ptr).log2_min_dft_size) as usize - ptr as usize },
-        0usize,
-        concat!(
-            "Offset of field: ",
-            stringify!(soxr_runtime_spec),
-            "::",
-            stringify!(log2_min_dft_size)
-        )
-    );
-    assert_eq!(
-        unsafe { ::std::ptr::addr_of!((*ptr).log2_large_dft_size) as usize - ptr as usize },
-        4usize,
-        concat!(
-            "Offset of field: ",
-            stringify!(soxr_runtime_spec),
-            "::",
-            stringify!(log2_large_dft_size)
-        )
-    );
-    assert_eq!(
-        unsafe { ::std::ptr::addr_of!((*ptr).coef_size_kbytes) as usize - ptr as usize },
-        8usize,
-        concat!(
-            "Offset of field: ",
-            stringify!(soxr_runtime_spec),
-            "::",
-            stringify!(coef_size_kbytes)
-        )
-    );
-    assert_eq!(
-        unsafe { ::std::ptr::addr_of!((*ptr).num_threads) as usize - ptr as usize },
-        12usize,
-        concat!("Offset of field: ", stringify!(soxr_runtime_spec), "::", stringify!(num_threads))
-    );
-    assert_eq!(
-        unsafe { ::std::ptr::addr_of!((*ptr).e) as usize - ptr as usize },
-        16usize,
-        concat!("Offset of field: ", stringify!(soxr_runtime_spec), "::", stringify!(e))
-    );
-    assert_eq!(
-        unsafe { ::std::ptr::addr_of!((*ptr).flags) as usize - ptr as usize },
-        24usize,
-        concat!("Offset of field: ", stringify!(soxr_runtime_spec), "::", stringify!(flags))
-    );
-}
-extern "C" {
+#[allow(clippy::unnecessary_operation, clippy::identity_op)]
+const _: () = {
+    ["Size of soxr_runtime_spec"][::std::mem::size_of::<soxr_runtime_spec>() - 32usize];
+    ["Alignment of soxr_runtime_spec"][::std::mem::align_of::<soxr_runtime_spec>() - 8usize];
+    ["Offset of field: soxr_runtime_spec::log2_min_dft_size"]
+        [::std::mem::offset_of!(soxr_runtime_spec, log2_min_dft_size) - 0usize];
+    ["Offset of field: soxr_runtime_spec::log2_large_dft_size"]
+        [::std::mem::offset_of!(soxr_runtime_spec, log2_large_dft_size) - 4usize];
+    ["Offset of field: soxr_runtime_spec::coef_size_kbytes"]
+        [::std::mem::offset_of!(soxr_runtime_spec, coef_size_kbytes) - 8usize];
+    ["Offset of field: soxr_runtime_spec::num_threads"]
+        [::std::mem::offset_of!(soxr_runtime_spec, num_threads) - 12usize];
+    ["Offset of field: soxr_runtime_spec::e"]
+        [::std::mem::offset_of!(soxr_runtime_spec, e) - 16usize];
+    ["Offset of field: soxr_runtime_spec::flags"]
+        [::std::mem::offset_of!(soxr_runtime_spec, flags) - 24usize];
+};
+unsafe extern "C" {
     pub fn soxr_quality_spec(
         recipe: ::std::os::raw::c_ulong,
         flags: ::std::os::raw::c_ulong,
     ) -> soxr_quality_spec_t;
 }
-extern "C" {
+unsafe extern "C" {
     pub fn soxr_runtime_spec(num_threads: ::std::os::raw::c_uint) -> soxr_runtime_spec_t;
 }
-extern "C" {
+unsafe extern "C" {
     pub fn soxr_io_spec(itype: soxr_datatype_t, otype: soxr_datatype_t) -> soxr_io_spec_t;
 }
-extern "C" {
+unsafe extern "C" {
     pub fn soxr_set_error(arg1: soxr_t, arg2: soxr_error_t) -> soxr_error_t;
 }
-extern "C" {
+unsafe extern "C" {
     pub fn soxr_set_num_channels(arg1: soxr_t, arg2: ::std::os::raw::c_uint) -> soxr_error_t;
 }
diff --git a/soxr-sys/src/std-types.h b/soxr-sys/src/std-types.h
deleted file mode 100644
index c5e8636ac..000000000
--- a/soxr-sys/src/std-types.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#if !defined soxr_std_types_included
-#define soxr_std_types_included
-
-#include "soxr-config.h"
-
-#include <limits.h>
-
-#if HAVE_STDBOOL_H
-  #include <stdbool.h>
-#else
-  #undef bool
-  #undef false
-  #undef true
-  #define bool int
-  #define false 0
-  #define true 1
-#endif
-
-#if HAVE_STDINT_H
-  #include <stdint.h>
-#else
-  #undef int16_t
-  #undef int32_t
-  #undef int64_t
-  #undef uint32_t
-  #undef uint64_t
-  #define int16_t short
-  #if LONG_MAX > 2147483647L
-    #define int32_t int
-    #define int64_t long
-  #elif LONG_MAX < 2147483647L
-  #error this library requires that 'long int' has at least 32-bits
-  #else
-    #define int32_t long
-    #if defined _MSC_VER
-      #define int64_t __int64
-    #else
-      #define int64_t long long
-    #endif
-  #endif
-  #define uint32_t unsigned int32_t
-  #define uint64_t unsigned int64_t
-#endif
-
-#endif
diff --git a/soxr-sys/src/util-simd.c b/soxr-sys/src/util-simd.c
deleted file mode 100644
index ec548fdee..000000000
--- a/soxr-sys/src/util-simd.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#include <assert.h>
-#include <string.h>
-#include <stdlib.h>
-
-#include "soxr-config.h"
-
-#define SIMD_ALIGNMENT (sizeof(float) * (1 + (PFFFT_DOUBLE|AVCODEC_FOUND)) * 4)
-
-void * SIMD_ALIGNED_MALLOC(size_t size)
-{
-  char * p1 = 0, * p = malloc(size + SIMD_ALIGNMENT);
-  if (p) {
-    p1 = (char *)((size_t)(p + SIMD_ALIGNMENT) & ~(SIMD_ALIGNMENT - 1));
-    *((void * *)p1 - 1) = p;
-  }
-  return p1;
-}
-
-
-
-void * SIMD_ALIGNED_CALLOC(size_t nmemb, size_t size)
-{
-  void * p = SIMD_ALIGNED_MALLOC(nmemb * size);
-  if (p)
-    memset(p, 0, nmemb * size);
-  return p;
-}
-
-
-
-void SIMD_ALIGNED_FREE(void * p1)
-{
-  if (p1)
-    free(*((void * *)p1 - 1));
-}
-
-
-
-#define PFFT_MACROS_ONLY
-#include "pffft.c"
-
-
-
-void ORDERED_CONVOLVE_SIMD(int n, void * not_used, float * a, float const * b)
-{
-  int i;
-  float ab0, ab1;
-  v4sf       *   RESTRICT   va = (v4sf       *)a;
-  v4sf const *   RESTRICT   vb = (v4sf const *)b;
-  assert(VALIGNED(a) && VALIGNED(b));
-  ab0 = a[0] * b[0], ab1 = a[1] * b[1];
-  for (i = 0; i < n / 4; i += 2) {
-    v4sf a1r = va[i+0], a1i = va[i+1];
-    v4sf b1r = vb[i+0], b1i = vb[i+1];
-    UNINTERLEAVE2(a1r, a1i, a1r, a1i);
-    UNINTERLEAVE2(b1r, b1i, b1r, b1i);
-    VCPLXMUL(a1r, a1i, b1r, b1i);
-    INTERLEAVE2(a1r, a1i, a1r, a1i);
-    va[i+0] = a1r, va[i+1] = a1i;
-  }
-  a[0] = ab0, a[1] = ab1;
-  (void)not_used;
-}
-
-
-
-void ORDERED_PARTIAL_CONVOLVE_SIMD(int n, float * a, float const * b)
-{
-  int i;
-  float ab0;
-  v4sf       *   RESTRICT   va = (v4sf       *)a;
-  v4sf const *   RESTRICT   vb = (v4sf const *)b;
-  assert(VALIGNED(a) && VALIGNED(b));
-  ab0 = a[0] * b[0];
-  for (i = 0; i < n / 4; i += 2) {
-    v4sf a1r = va[i+0], a1i = va[i+1];
-    v4sf b1r = vb[i+0], b1i = vb[i+1];
-    UNINTERLEAVE2(a1r, a1i, a1r, a1i);
-    UNINTERLEAVE2(b1r, b1i, b1r, b1i);
-    VCPLXMUL(a1r, a1i, b1r, b1i);
-    INTERLEAVE2(a1r, a1i, a1r, a1i);
-    va[i+0] = a1r, va[i+1] = a1i;
-  }
-  a[0] = ab0;
-  a[1] = b[n] * a[n] - b[n+1] * a[n+1];
-}
diff --git a/soxr-sys/src/util32s.c b/soxr-sys/src/util32s.c
deleted file mode 100644
index b9c9e08bd..000000000
--- a/soxr-sys/src/util32s.c
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#define PFFFT_DOUBLE 0
-
-#include "util32s.h"
-
-#include "util-simd.c"
diff --git a/soxr-sys/src/util32s.h b/soxr-sys/src/util32s.h
deleted file mode 100644
index 12226e501..000000000
--- a/soxr-sys/src/util32s.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#if !defined soxr_util32s_included
-#define soxr_util32s_included
-
-#include <stddef.h>
-
-void * _soxr_simd32_aligned_malloc(size_t);
-void * _soxr_simd32_aligned_calloc(size_t, size_t);
-void _soxr_simd32_aligned_free(void *);
-
-#define SIMD_ALIGNED_MALLOC _soxr_simd32_aligned_malloc
-#define SIMD_ALIGNED_CALLOC _soxr_simd32_aligned_calloc
-#define SIMD_ALIGNED_FREE _soxr_simd32_aligned_free
-
-void _soxr_ordered_convolve_simd32(int n, void * not_used, float * a, float const * b);
-void _soxr_ordered_partial_convolve_simd32(int n, float * a, float const * b);
-
-#define ORDERED_CONVOLVE_SIMD _soxr_ordered_convolve_simd32
-#define ORDERED_PARTIAL_CONVOLVE_SIMD _soxr_ordered_partial_convolve_simd32
-
-#endif
diff --git a/soxr-sys/src/util64s.c b/soxr-sys/src/util64s.c
deleted file mode 100644
index 0faa9e9ef..000000000
--- a/soxr-sys/src/util64s.c
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#define PFFFT_DOUBLE 1
-
-#include "util64s.h"
-
-#include "util-simd.c"
diff --git a/soxr-sys/src/util64s.h b/soxr-sys/src/util64s.h
deleted file mode 100644
index 7beeb8991..000000000
--- a/soxr-sys/src/util64s.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-#if !defined soxr_util64s_included
-#define soxr_util64s_included
-
-#include <stddef.h>
-
-void * _soxr_simd64_aligned_malloc(size_t);
-void * _soxr_simd64_aligned_calloc(size_t, size_t);
-void _soxr_simd64_aligned_free(void *);
-
-#define SIMD_ALIGNED_MALLOC _soxr_simd64_aligned_malloc
-#define SIMD_ALIGNED_CALLOC _soxr_simd64_aligned_calloc
-#define SIMD_ALIGNED_FREE _soxr_simd64_aligned_free
-
-void _soxr_ordered_convolve_simd64(int n, void * not_used, double * a, double const * b);
-void _soxr_ordered_partial_convolve_simd64(int n, double * a, double const * b);
-
-#define ORDERED_CONVOLVE_SIMD _soxr_ordered_convolve_simd64
-#define ORDERED_PARTIAL_CONVOLVE_SIMD _soxr_ordered_partial_convolve_simd64
-
-#endif
diff --git a/soxr-sys/src/vr-coefs.c b/soxr-sys/src/vr-coefs.c
deleted file mode 100644
index a57bec8c2..000000000
--- a/soxr-sys/src/vr-coefs.c
+++ /dev/null
@@ -1,115 +0,0 @@
-/* SoX Resampler Library         Copyright (c) 2013 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-/* Generate the filter coefficients for variable-rate resampling. */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#define PI 3.14159265358979323846            /* Since M_PI can't be relied on */
-
-static void print(double * h, int m, double l, char const * name)
-{                                                      /* Print out a filter: */
-  int i, N = l? (int)(l*m)-(l>1) : m, R=(N+1)/2;
-  int a = !l||l>1? 0:N-R, b = l>1? R:N;
-  printf("static float const %s[] = {\n", name);
-  if (l>1) printf(" 0.f,"); else if (!l) l=1;
-  for (i=a; h && i<b; ++i, printf("% .9gf,%c",l*h[i-1],"\n "[(i-a)&3 && i<b]));
-  puts("};\n");
-  free(h);
-}
-                                                  /* Parks McClellan FIR LPF: */
-#define even_adj(f) ((N&1)? 1 : cos(PI*.5*(f)))
-#define W(f) (((f) < Fp+1e-9? weight : 1) * even_adj(f))      /* Weighting fn */
-#define D(f) (((f) < Fp+1e-9) / even_adj(f))           /* Desired response fn */
-#define F(i) ((i) <= end[0]? (i)*inc[0] : 1-(end[1]-(i))*inc[1])
-#define EE(x,z) (_1 != x 1 && x E[i] > 0 && x E[i] >= x E[i z 1])
-#define PEAK do {if (k<NP+1) peak[k]=i; ++k,_1=(E[i]>0)-(E[i]<0);} while (0)
-
-typedef struct {double x, beta, gamma;} coef_t;
-
-static double amp_response(coef_t * coef, int R, double f, int i)
-{
-  double n = 0, d = 0, x = cos(PI*f), t;
-  for (; i < R; d += t = coef[i].beta / t, n += coef[i].gamma * t, ++i)
-    if (fabs(t = x - coef[i].x) < 1e-9) return coef[i].gamma;
-  return n/d;
-}
-
-static void fir(int m, double l, double Fp0, double Fs0,
-    double weight0, int density, char const * name)
-{
-  double Fp=Fp0/l, Fs=Fs0/l, weight=1/weight0, inc[2], Ws=1-Fs;
-  int N = (int)(l*m)-(l>1), R=(N+1)/2, NP=R+1, grid_size=1+density*R+1, pass=0;
-  int n1 = Ws>=(2*R-1)*Fp? 1:(int)(R*Fp/(Fp+Ws)+.5), n2=NP-n1, _1, i, j, k;
-  int    * peak = calloc(sizeof(*peak), (size_t)(NP+1)), * P=peak, end[2];
-  coef_t * coef = calloc(sizeof(*coef), (size_t)(NP));
-  float  * E    = calloc(sizeof(*E   ), (size_t)(grid_size));
-  double d, n, e, f, mult, delta, sum, hi, lo, * A = (double*)E, *h=0;
-
-  if (!P || !coef || !E) goto END;
-  end[0] = n1 * density, end[1] = grid_size-1;     /* Create prototype peaks: */
-  inc[0] = Fp/end[0],    inc[1] = n2==1? 0 : Ws / ((n2-1)*density);
-  for (i=0; i<n1; P[n1-1-i] = end[0] - i*density,++i);
-  for (i=0; i<n2; P[n1+i] = 1+end[0] + i*density,++i);
-
-  do {                                               /* Coefs for amp. resp.: */
-    for (i = 0; i<NP; coef[i].x = cos(PI*F(P[i])), ++i);
-    for (_1=-1, n=d=i=0; i < NP; ++i) {
-      for (mult = 1, j = 0; j < R; ++j) if (j != i) mult *= coef[i].x-coef[j].x;
-      if (mult) coef[i].beta = 1/mult; else goto END;
-      if (i != R) mult *= coef[i].x - coef[R].x;
-      f = F(P[i]), n += D(f)/mult, d += (_1=-_1)/(W(f)*mult);
-    }
-    for (delta = n/d, _1 = -1, i = 0; i < R; ++i)
-      f = F(P[i]), coef[i].gamma = D(f)-(_1=-_1)*delta/W(f);
-    for (i = 0; i <= end[1]; ++i)            /* Amplitude response and error: */
-      f = F(i), E[i] = (float)(W(f)*(D(f) - amp_response(coef, R, f, 0)));
-
-    i = k = _1 = 0;                                        /* Find new peaks: */
-    if (end[0]) if (EE(+,+) || EE(-,+)) PEAK;                       /* At F=0 */
-    for (++i, j = 0; j < 2; ++j) {                              /* In band j: */
-      for (; i < end[j]; ++i)
-        if ((EE(+,-) && E[i]>E[i+1]) || (EE(-,-) && E[i]<E[i+1])) PEAK;
-      if (!j) {PEAK; ++i; PEAK; ++i;}                           /* At Fp & Fs */
-    }
-    if (i==end[1]) if (EE(+,-) || EE(-,-)) PEAK;                    /* At F=1 */
-    if ((unsigned)(k = k-NP) > 1) goto END;                  /* Too many/few? */
-    P = peak + k * (fabs(E[peak[0]]) < fabs(E[peak[NP]]));         /* rm 1st? */
-
-    for (lo = hi = fabs(E[P[0]]), i=1; i<NP; ++i)              /* Converged?: */
-      e = fabs(E[P[i]]), lo = e<lo? e:lo, hi = e>hi? e:hi;
-  } while ((hi-lo)/hi > .001 && ++pass < 20);
-                      /* Create impulse response from final amp. resp. coefs: */
-  if (!(h = malloc(sizeof(*h)*(size_t)N))) goto END;
-  for (i = 0; i < R; f = 2.*i/N, A[i++] = amp_response(coef,R,f,0)*even_adj(f));
-  for (i = 0; i < R; h[N-1-i] = h[i] = sum/N, ++i)
-    for (sum=*A, j=1; j<R; sum += 2*cos(2*PI*(i-(N-1)/2.)/N*j)*A[j], ++j);
-  END: free(coef), free(E), free(peak);
-  print(h, m, l, name);
-}
-                                  /* Half-band IIR LPF (Mitra DSP 3/e, 13_9): */
-static void iir(int N, double Fp, char const * name)
-{
-  double d=tan(PI*.5*Fp), r=d*d, t=sqrt(1-r*r), n=(1-sqrt(t))/(1+sqrt(t))*.5;
-  double x=(n*n)*(n*n), Q=(((150*x+15)*x+2)*x+1)*n, q=pow(Q,.25), *h;
-  int i=0, j, _1;
-  if (!(h = malloc(sizeof(*h)*(size_t)N))) goto END;
-  for (; i<N; t=n*q/d, t=t*t, t=sqrt((1-t*r)*(1-t/r))/(1+t), h[i++]=(1-t)/(1+t))
-    for (_1=1, d=-.5, n=j=0, x=(i+1)*PI/(N+.5); j<7; ++j, _1=-_1)
-      n += _1*pow(Q,j*(j+1))*sin(x*(j+.5)), d += _1*pow(Q,j*j)*cos(x*j);
-  END: print(h, N, 0, name);
-}
-
-int main(int argc, char **argv)
-{
-  puts("/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net");
-  puts(" * Licence for this file: LGPL v2.1                  See LICENCE for details. */\n");
-
-  fir(241,  1, .45,  .5, 160, 32, "half_fir_coefs");
-  fir( 24, .5, .25,  .5,   1, 31, "fast_half_fir_coefs");
-  fir( 20, 12, .9 , 1.5, 160, 58, "coefs0_d");
-  fir( 12,  6, .45, 1.5,  80, 29, "coefs0_u");
-  iir( 15, .492, "iir_coefs");
-  return 0*argc*!argv;
-}
diff --git a/soxr-sys/src/vr-coefs.h b/soxr-sys/src/vr-coefs.h
deleted file mode 100644
index e44138ed3..000000000
--- a/soxr-sys/src/vr-coefs.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-static float const half_fir_coefs[] = {
- 0.471112154f,  0.316907549f,  0.0286963396f, -0.101927032f,
--0.0281272982f,  0.0568029535f,  0.027196876f, -0.0360795942f,
--0.0259313561f,  0.023641162f,  0.0243660538f, -0.0151238564f,
--0.0225440668f,  0.00886927471f,  0.0205146088f, -0.00411434209f,
--0.0183312132f,  0.000458525335f,  0.0160497772f,  0.00233248286f,
--0.0137265989f, -0.0044106884f,  0.011416442f,  0.005885487f,
--0.00917074467f, -0.00684373006f,  0.00703601669f,  0.00736018933f,
--0.00505250698f, -0.00750298261f,  0.00325317131f,  0.00733618346f,
--0.00166298445f, -0.00692082025f,  0.000298598848f,  0.00631493711f,
- 0.000831644129f, -0.0055731438f, -0.00172737872f,  0.00474591812f,
- 0.0023955814f, -0.0038788491f, -0.00284969263f,  0.00301194082f,
- 0.00310854264f, -0.00217906496f, -0.00319514679f,  0.00140761062f,
- 0.00313542959f, -0.000718361916f, -0.00295694328f,  0.000125607323f,
- 0.00268763625f,  0.000362527878f, -0.00235472525f, -0.000743552559f,
- 0.00198371228f,  0.00101991741f, -0.0015975797f, -0.00119820218f,
- 0.00121618271f,  0.0012882279f, -0.000855849209f, -0.00130214036f,
- 0.000529184474f,  0.00125350876f, -0.000245067778f, -0.00115647977f,
- 8.82118676e-06f,  0.00102502052f,  0.000177478031f, -0.000872275256f,
--0.000314572995f,  0.000710055602f,  0.000405526007f, -0.000548470439f,
--0.000455174442f,  0.000395698685f,  0.000469579667f, -0.000257895884f,
--0.000455495078f,  0.000139222702f,  0.000419883982f, -4.19753541e-05f,
--0.00036950051f, -3.32020844e-05f,  0.000310554015f,  8.7050045e-05f,
--0.000248456595f, -0.000121389974f,  0.000187662656f,  0.000138813233f,
--0.000131587954f, -0.000142374865f,  8.26090549e-05f,  0.000135318039f,
--4.21208043e-05f, -0.000120830917f,  1.06505085e-05f,  0.00010185819f,
- 1.20015129e-05f, -8.09558888e-05f, -2.65925299e-05f,  6.02101571e-05f,
- 3.42775752e-05f, -4.11911155e-05f, -3.64462477e-05f,  2.49654252e-05f,
- 3.46090513e-05f, -1.21078107e-05f, -3.03027209e-05f,  2.73562006e-06f,
- 2.51329043e-05f,  3.66157998e-06f, -2.0990973e-05f, -9.38752332e-06f,
- 2.07133365e-05f,  3.2060847e-05f,  1.98462364e-05f,  4.90328648e-06f,
--5.28550107e-07f,
-};
-
-static float const fast_half_fir_coefs[] = {
- 0.309418476f, -0.0819805418f,  0.0305513441f, -0.0101582224f,
- 0.00251293175f, -0.000346895324f,
-};
-
-static float const coefs0_d[] = {
- 0.f, 1.40520362e-05f,  2.32939994e-05f,  4.00699869e-05f,  6.18938797e-05f,
- 8.79406317e-05f,  0.000116304226f,  0.000143862785f,  0.000166286173f,
- 0.000178229431f,  0.00017374107f,  0.00014689118f,  9.25928444e-05f,
- 7.55567388e-06f, -0.000108723934f, -0.000253061416f, -0.000417917952f,
--0.000591117466f, -0.000756082504f, -0.000892686881f, -0.000978762367f,
--0.000992225841f, -0.00091370246f, -0.000729430325f, -0.000434153678f,
--3.36489703e-05f,  0.000453499646f,  0.000995243588f,  0.00154683724f,
- 0.00205322353f,  0.00245307376f,  0.0026843294f,  0.0026908874f,
- 0.00242986868f,  0.00187874742f,  0.00104150259f, -4.70759945e-05f,
--0.00131972748f, -0.00267834298f, -0.00399923407f, -0.00514205849f,
--0.00596200535f, -0.00632441105f, -0.00612058374f, -0.00528328869f,
--0.00380015804f, -0.0017232609f,  0.000826765169f,  0.0036632503f,
- 0.00654337507f,  0.00918536843f,  0.0112922007f,  0.0125801323f,
- 0.0128097433f,  0.0118164904f,  0.00953750551f,  0.00603133188f,
- 0.00148762708f, -0.00377544588f, -0.009327395f, -0.014655127f,
--0.0192047839f, -0.0224328082f, -0.0238620596f, -0.0231377935f,
--0.0200777417f, -0.0147104883f, -0.00729690011f,  0.0016694689f,
- 0.0114853672f,  0.02128446f,  0.0301054204f,  0.03697694f,
- 0.0410129138f,  0.0415093321f,  0.0380333749f,  0.0304950299f,
- 0.0191923285f,  0.00482304203f, -0.0115416941f, -0.0285230397f,
--0.0445368533f, -0.0579264573f, -0.0671158215f, -0.070770308f,
--0.0679502076f, -0.0582416438f, -0.0418501969f, -0.0196448429f,
- 0.00685658762f,  0.0355644891f,  0.0639556622f,  0.0892653703f,
- 0.108720484f,  0.11979613f,  0.120474745f,  0.109484562f,
- 0.0864946948f,  0.0522461633f,  0.00860233712f, -0.041491734f,
--0.0941444939f, -0.144742955f, -0.188255118f, -0.219589829f,
--0.233988169f, -0.227416437f, -0.196929062f, -0.140970726f,
--0.0595905561f,  0.0454527813f,  0.170708227f,  0.311175511f,
- 0.460568159f,  0.61168037f,  0.756833088f,  0.888367707f,
- 0.999151395f,  1.08305644f,  1.13537741f,  1.15315438f,
-};
-
-static float const coefs0_u[] = {
- 0.f, 2.4378013e-05f,  9.70782157e-05f,  0.000256572953f,  0.000527352928f,
- 0.000890796838f,  0.00124949518f,  0.00140604793f,  0.00107945998f,
--2.15586031e-05f, -0.00206589462f, -0.00493342625f, -0.00807135101f,
--0.0104515787f, -0.0107039866f, -0.00746258988f,  0.000109078838f,
- 0.0117345872f,  0.0255795186f,  0.0381690155f,  0.0448461522f,
- 0.0408218138f,  0.0226797758f, -0.00999595371f, -0.0533441602f,
--0.0987927774f, -0.133827418f, -0.144042973f, -0.116198269f,
--0.0416493482f,  0.0806808506f,  0.242643854f,  0.427127981f,
- 0.610413245f,  0.766259257f,  0.8708884f,  0.907742029f,
-};
-
-static float const iir_coefs[] = {
- 0.0262852045f,  0.0998310478f,  0.206865061f,  0.330224134f,
- 0.454420362f,  0.568578357f,  0.666944466f,  0.747869771f,
- 0.812324404f,  0.8626001f,  0.901427744f,  0.931486057f,
- 0.955191529f,  0.974661783f,  0.991776305f,
-};
-
diff --git a/soxr-sys/src/vr32.c b/soxr-sys/src/vr32.c
deleted file mode 100644
index 8b1a25937..000000000
--- a/soxr-sys/src/vr32.c
+++ /dev/null
@@ -1,651 +0,0 @@
-/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1                  See LICENCE for details. */
-
-/* Variable-rate resampling. */
-
-#include <assert.h>
-#include "math-wrap.h"
-#include <string.h>
-#include <stdlib.h>
-#include "internal.h"
-#define FIFO_SIZE_T int
-#define FIFO_MIN 0x8000
-#include "fifo.h"
-#include "vr-coefs.h"
-
-#define FADE_LEN_BITS     9
-#define PHASE_BITS_D      10
-#define PHASE_BITS_U      9
-
-#define PHASES0_D         12
-#define POLY_FIR_LEN_D    20
-#define PHASES0_U         6
-#define POLY_FIR_LEN_U    12
-
-#define MULT32            (65536. * 65536.)
-#define PHASES_D          (1 << PHASE_BITS_D)
-#define PHASES_U          (1 << PHASE_BITS_U)
-
-#define CONVOLVE \
-    _ _ _ _ _ _ _ _ _ _  _ _ _ _ _ _ _ _ _ _ \
-    _ _ _ _ _ _ _ _ _ _  _ _ _ _ _ _ _ _ _ _ \
-    _ _ _ _ _ _ _ _ _ _  _ _ _ _ _ _ _ _ _ _
-
-#define HALF_FIR_LEN_2 (iAL(half_fir_coefs) - 1)
-#define HALF_FIR_LEN_4 (HALF_FIR_LEN_2 / 2)
-
-#define _ sum += (input[-i] + input[i]) * half_fir_coefs[i], ++i;
-static float half_fir(float const * input)
-{
-  long i = 1;
-  float sum = input[0] * half_fir_coefs[0];
-  CONVOLVE CONVOLVE
-  assert(i == HALF_FIR_LEN_2 + 1);
-  return (float)sum;
-}
-#undef _
-
-#define _ sum += (input[-i] + input[i]) * half_fir_coefs[2*i], ++i;
-static float double_fir0(float const * input)
-{
-  int i = 1;
-  float sum = input[0] * half_fir_coefs[0];
-  CONVOLVE
-  assert(i == HALF_FIR_LEN_4 + 1);
-  return (float)(sum * 2);
-}
-#undef _
-
-#define _ sum += (input[-i] + input[1+i]) * half_fir_coefs[2*i+1], ++i;
-static float double_fir1(float const * input)
-{
-  int i = 0;
-  float sum = 0;
-  CONVOLVE
-  assert(i == HALF_FIR_LEN_4 + 0);
-  return (float)(sum * 2);
-}
-#undef _
-
-static float fast_half_fir(float const * input)
-{
-  int i = 0;
-  float sum = input[0] * .5f;
-#define _ sum += (input[-(2*i+1)] + input[2*i+1]) * fast_half_fir_coefs[i], ++i;
-  _ _ _ _ _ _
-#undef _
-  return (float)sum;
-}
-
-#define IIR_FILTER _ _ _ _ _ _ _
-#define _ in1=(in1-p->y[i])*iir_coefs[i]+tmp1;tmp1=p->y[i],p->y[i]=in1;++i;\
-          in0=(in0-p->y[i])*iir_coefs[i]+tmp0;tmp0=p->y[i],p->y[i]=in0;++i;
-
-typedef struct {float x[2], y[AL(iir_coefs)];} half_iir_t;
-
-static float half_iir1(half_iir_t * p, float in0, float in1)
-{
-  int i = 0;
-  float tmp0, tmp1;
-  tmp0 = p->x[0], p->x[0] = in0;
-  tmp1 = p->x[1], p->x[1] = in1;
-  IIR_FILTER
-  p->y[i] = in1 = (in1 - p->y[i]) * iir_coefs[i] + tmp1;
-  return in1 + in0;
-}
-#undef _
-
-static void half_iir(half_iir_t * p, float * obuf, float const * ibuf, int olen)
-{
-  int i;
-  for (i=0; i < olen; obuf[i] = (float)half_iir1(p, ibuf[i*2], ibuf[i*2+1]),++i);
-}
-
-static void half_phase(half_iir_t * p, float * buf, int len)
-{
-  float const small_normal = 1/MULT32/MULT32; /* To quash denormals on path 0.*/
-  int i;
-  for (i = 0; i < len; buf[i] = (float)half_iir1(p, buf[i], 0), ++i);
-#define _ p->y[i] += small_normal, i += 2;
-  i = 0, _ IIR_FILTER
-#undef _
-#define _ p->y[i] -= small_normal, i += 2;
-  i = 0, _ IIR_FILTER
-#undef _
-}
-
-#define coef(coef_p, interp_order, fir_len, phase_num, coef_interp_num, \
-    fir_coef_num) coef_p[(fir_len) * ((interp_order) + 1) * (phase_num) + \
-    ((interp_order) + 1) * (fir_coef_num) + (interp_order - coef_interp_num)]
-
-#define COEF(h,l,i) ((i)<0||(i)>=(l)?0:(h)[(i)>(l)/2?(l)-(i):(i)])
-static void prepare_coefs(float * coefs, int n, int phases0, int phases,
-    float const * coefs0, double multiplier)
-{
-  double k[6];
-  int length0 = n * phases0, length = n * phases, K0 = iAL(k)/2 - 1, i, j, pos;
-  float * coefs1 = malloc(((size_t)length / 2  + 1) * sizeof(*coefs1));
-  float * p = coefs1, f0, f1 = 0;
-
-  for (j = 0; j < iAL(k); k[j] = COEF(coefs0, length0, j - K0), ++j);
-  for (pos = i = 0; i < length0 / 2; ++i) {
-    double b=(1/24.)*(k[0]+k[4]+6*k[2]-4*(k[1]+k[3])),d=.5*(k[1]+k[3])-k[2]-b;
-    double a=(1/120.)*(k[5]-k[2]-9*(9*b+d)+2.5*(k[3]-k[1])-2*(k[4]-k[0]));
-    double c=(1/12.)*(k[4]-k[0]-2*(k[3]-k[1])-60*a),e=.5*(k[3]-k[1])-a-c;
-    for (; pos / phases == i; pos += phases0) {
-      double x = (double)(pos % phases) / phases;
-      *p++ = (float)(k[K0] + ((((a*x + b)*x + c)*x + d)*x + e)*x);
-    }
-    for (j = 0; j < iAL(k) - 1; k[j] = k[j + 1], ++j);
-    k[j] = COEF(coefs0, length0, i + iAL(k) / 2 + 1);
-  }
-  if (!(length & 1))
-    *p++ = (float)k[K0];
-  assert(p - coefs1 == length / 2  + 1);
-
-  for (i = 0; i < n; ++i) for (j = phases - 1; j >= 0; --j, f1 = f0) {
-    pos = (n - 1 - i) * phases + j;
-    f0 = COEF(coefs1, length, pos) * (float)multiplier;
-    coef(coefs, 1, n, j, 0, i) = (float)f0;
-    coef(coefs, 1, n, j, 1, i) = (float)(f1 - f0);
-  }
-  free(coefs1);
-}
-
-#define _ sum += (b *x + a)*input[i], ++i;
-#define a (coef(poly_fir_coefs_d, 1, POLY_FIR_LEN_D, phase, 0,i))
-#define b (coef(poly_fir_coefs_d, 1, POLY_FIR_LEN_D, phase, 1,i))
-static float poly_fir_coefs_d[POLY_FIR_LEN_D * PHASES_D * 2];
-
-static float poly_fir1_d(float const * input, uint32_t frac)
-{
-  int i = 0, phase = (int)(frac >> (32 - PHASE_BITS_D));
-  float sum = 0, x = (float)(frac << PHASE_BITS_D) * (float)(1 / MULT32);
-  _ _ _ _ _  _ _ _ _ _  _ _ _ _ _  _ _ _ _ _
-  assert(i == POLY_FIR_LEN_D);
-  return (float)sum;
-}
-#undef a
-#undef b
-#define a (coef(poly_fir_coefs_u, 1, POLY_FIR_LEN_U, phase, 0,i))
-#define b (coef(poly_fir_coefs_u, 1, POLY_FIR_LEN_U, phase, 1,i))
-static float poly_fir_coefs_u[POLY_FIR_LEN_U * PHASES_U * 2];
-
-static float poly_fir1_u(float const * input, uint32_t frac)
-{
-  int i = 0, phase = (int)(frac >> (32 - PHASE_BITS_U));
-  float sum = 0, x = (float)(frac << PHASE_BITS_U) * (float)(1 / MULT32);
-  _ _ _ _ _  _ _ _ _ _  _ _
-  assert(i == POLY_FIR_LEN_U);
-  return (float)sum;
-}
-#undef a
-#undef b
-#undef _
-
-#define ADD_TO(x,y)           x.all += y.all
-#define SUBTRACT_FROM(x,y)    x.all -= y.all
-#define FRAC(x)               x.part.frac
-#define INT(x)                x.part.integer
-
-typedef struct {
-  union {
-    int64_t all;
-#if HAVE_BIGENDIAN
-    struct {int32_t integer; uint32_t frac;} part;
-#else
-    struct {uint32_t frac; int32_t integer;} part;
-#endif
-  } at, step, step_step;
-  float const * input;
-  int len, stage_num;
-  bool is_d; /* true: downsampling at x2 rate; false: upsampling at 1x rate. */
-  double step_mult;
-} stream_t;
-
-static int poly_fir_d(stream_t * s, float * output, int olen)
-{
-  int i;
-  float const * input = s->input - POLY_FIR_LEN_D / 2 + 1;
-  for (i = 0; i < olen && INT(s->at) < s->len; ++i) {
-    output[i] = poly_fir1_d(input + INT(s->at), FRAC(s->at));
-    ADD_TO(s->at, s->step);
-    if (!(INT(s->at) < s->len)) {
-      SUBTRACT_FROM(s->at, s->step);
-      break;
-    }
-    output[++i] = poly_fir1_d(input + INT(s->at), FRAC(s->at));
-    ADD_TO(s->at, s->step);
-    ADD_TO(s->step, s->step_step);
-  }
-  return i;
-}
-
-static int poly_fir_fade_d(
-    stream_t * s, float const * vol, int step, float * output, int olen)
-{
-  int i;
-  float const * input = s->input - POLY_FIR_LEN_D / 2 + 1;
-  for (i = 0; i < olen && INT(s->at) < s->len; ++i, vol += step) {
-    output[i] += *vol * poly_fir1_d(input + INT(s->at), FRAC(s->at));
-    ADD_TO(s->at, s->step);
-    if (!(INT(s->at) < s->len)) {
-      SUBTRACT_FROM(s->at, s->step);
-      break;
-    }
-    output[++i] += *(vol += step) * poly_fir1_d(input + INT(s->at),FRAC(s->at));
-    ADD_TO(s->at, s->step);
-    ADD_TO(s->step, s->step_step);
-  }
-  return i;
-}
-
-static int poly_fir_u(stream_t * s, float * output, int olen)
-{
-  int i;
-  float const * input = s->input - POLY_FIR_LEN_U / 2 + 1;
-  for (i = 0; i < olen && INT(s->at) < s->len; ++i) {
-    output[i] = poly_fir1_u(input + INT(s->at), FRAC(s->at));
-    ADD_TO(s->at, s->step);
-    ADD_TO(s->step, s->step_step);
-  }
-  return i;
-}
-
-static int poly_fir_fade_u(
-    stream_t * s, float const * vol, int step, float * output, int olen)
-{
-  int i;
-  float const * input = s->input - POLY_FIR_LEN_U / 2 + 1;
-  for (i = 0; i < olen && INT(s->at) < s->len; i += 2, vol += step) {
-    output[i] += *vol * poly_fir1_u(input + INT(s->at), FRAC(s->at));
-    ADD_TO(s->at, s->step);
-    ADD_TO(s->step, s->step_step);
-  }
-  return i;
-}
-
-#define shiftr(x,by) ((by) < 0? (x) << (-(by)) : (x) >> (by))
-#define shiftl(x,by) shiftr(x,-(by))
-#define stage_occupancy(s) (fifo_occupancy(&(s)->fifo) - 4*HALF_FIR_LEN_2)
-#define stage_read_p(s) ((float *)fifo_read_ptr(&(s)->fifo) + 2*HALF_FIR_LEN_2)
-#define stage_preload(s) memset(fifo_reserve(&(s)->fifo, (s)->preload), \
-    0, sizeof(float) * (size_t)(s)->preload);
-
-typedef struct {
-  fifo_t fifo;
-  double step_mult;
-  int is_fast, x_fade_len, preload;
-} stage_t;
-
-typedef struct {
-  int num_stages0, num_stages, flushing;
-  int fade_len, slew_len, xfade, stage_inc, switch_stage_num;
-  double new_io_ratio, default_io_ratio;
-  stage_t * stages;
-  fifo_t output_fifo;
-  half_iir_t halfer;
-  stream_t current, fadeout; /* Current/fade-in, fadeout streams. */
-} rate_t;
-
-static float fade_coefs[(2 << FADE_LEN_BITS) + 1];
-
-static void vr_init(rate_t * p, double default_io_ratio, int num_stages, double mult)
-{
-  int i;
-  assert(num_stages >= 0);
-  memset(p, 0, sizeof(*p));
-
-  p->num_stages0 = num_stages;
-  p->num_stages = num_stages = max(num_stages, 1);
-  p->stages = (stage_t *)calloc((unsigned)num_stages + 1, sizeof(*p->stages)) + 1;
-  for (i = -1; i < p->num_stages; ++i) {
-    stage_t * s = &p->stages[i];
-    fifo_create(&s->fifo, sizeof(float));
-    s->step_mult = 2 * MULT32 / shiftl(2, i);
-    s->preload = i < 0? 0 : i == 0? 2 * HALF_FIR_LEN_2 : 3 * HALF_FIR_LEN_2 / 2;
-    stage_preload(s);
-    s->is_fast = true;
-    lsx_debug("%-3i preload=%i", i, s->preload);
-  }
-  fifo_create(&p->output_fifo, sizeof(float));
-  p->default_io_ratio = default_io_ratio;
-  if (fade_coefs[0]==0) {
-    for (i = 0; i < iAL(fade_coefs); ++i)
-      fade_coefs[i] = (float)(.5 * (1 + cos(M_PI * i / (AL(fade_coefs) - 1))));
-    prepare_coefs(poly_fir_coefs_u, POLY_FIR_LEN_U, PHASES0_U, PHASES_U, coefs0_u, mult);
-    prepare_coefs(poly_fir_coefs_d, POLY_FIR_LEN_D, PHASES0_D, PHASES_D, coefs0_d, mult *.5);
-  }
-  assert(fade_coefs[0]);
-}
-
-static void enter_new_stage(rate_t * p, int occupancy0)
-{
-  p->current.len = shiftr(occupancy0, p->current.stage_num);
-  p->current.input = stage_read_p(&p->stages[p->current.stage_num]);
-
-  p->current.step_mult = p->stages[p->current.stage_num].step_mult;
-  p->current.is_d = p->current.stage_num >= 0;
-  if (p->current.is_d)
-    p->current.step_mult *= .5;
-}
-
-static void set_step(stream_t * p, double io_ratio)
-{
-  p->step.all = (int64_t)(io_ratio * p->step_mult + .5);
-}
-
-static bool set_step_step(stream_t * p, double io_ratio, int slew_len)
-{
-  int64_t dif;
-  int difi;
-  stream_t tmp = *p;
-  set_step(&tmp, io_ratio);
-  dif = tmp.step.all - p->step.all;
-  dif = dif < 0? dif - (slew_len >> 1) : dif + (slew_len >> 1);
-  difi = (int)dif;   /* Try to avoid int64_t div. */
-  p->step_step.all = difi == dif? difi / slew_len : dif / slew_len;
-  return p->step_step.all != 0;
-}
-
-static void vr_set_io_ratio(rate_t * p, double io_ratio, size_t slew_len)
-{
-  assert(io_ratio > 0);
-  if (slew_len) {
-    if (!set_step_step(&p->current, io_ratio, p->slew_len = (int)slew_len))
-      p->slew_len = 0, p->new_io_ratio = 0, p->fadeout.step_step.all = 0;
-    else {
-      p->new_io_ratio = io_ratio;
-      if (p->fade_len)
-        set_step_step(&p->fadeout, io_ratio, p->slew_len);
-    }
-  }
-  else {
-    if (p->default_io_ratio!=0) { /* Then this is the first call to this fn. */
-      int octave = (int)floor(log(io_ratio) / M_LN2);
-      p->current.stage_num = octave < 0? -1 : min(octave, p->num_stages0-1);
-      enter_new_stage(p, 0);
-    }
-    else if (p->fade_len)
-      set_step(&p->fadeout, io_ratio);
-    set_step(&p->current, io_ratio);
-    if (p->default_io_ratio!=0) FRAC(p->current.at) = FRAC(p->current.step) >> 1;
-    p->default_io_ratio = 0;
-  }
-}
-
-static bool do_input_stage(rate_t * p, int stage_num, int sign, int min_stage_num)
-{
-  int i = 0;
-  float * dest;
-  stage_t * s = &p->stages[stage_num];
-  stage_t * s1 = &p->stages[stage_num - sign];
-  float const * src = (float *)fifo_read_ptr(&s1->fifo) + HALF_FIR_LEN_2;
-  int len = shiftr(fifo_occupancy(&s1->fifo) - HALF_FIR_LEN_2 * 2, sign);
-  int already_done = fifo_occupancy(&s->fifo) - s->preload;
-  if ((len -= already_done) <= 0)
-    return false;
-  src += shiftl(already_done, sign);
-
-  dest = fifo_reserve(&s->fifo, len);
-  if (stage_num < 0) for (; i < len; ++src)
-    dest[i++] = double_fir0(src), dest[i++] = double_fir1(src);
-  else {
-    bool should_be_fast = p->stage_inc;
-    if (!s->x_fade_len && stage_num == p->switch_stage_num) {
-      p->switch_stage_num = 0;
-      if (s->is_fast != should_be_fast) {
-        s->x_fade_len = 1 << FADE_LEN_BITS, s->is_fast = should_be_fast, ++p->xfade;
-        lsx_debug("xfade level %i, inc?=%i", stage_num, p->stage_inc);
-      }
-    }
-    if (s->x_fade_len) {
-      float const * vol1 = fade_coefs + (s->x_fade_len << 1);
-      float const * vol2 = fade_coefs + (((1 << FADE_LEN_BITS) - s->x_fade_len) << 1);
-      int n = min(len, s->x_fade_len);
-      /*lsx_debug("xfade level %i, inc?=%i len=%i n=%i", stage_num, p->stage_inc, s->x_fade_len, n);*/
-      if (should_be_fast)
-        for (; i < n; vol2 += 2, vol1 -= 2, src += 2)
-          dest[i++] = *vol1 * fast_half_fir(src) + *vol2 * half_fir(src);
-      else for (; i < n; vol2 += 2, vol1 -= 2, src += 2)
-        dest[i++] = *vol2 * fast_half_fir(src) + *vol1 * half_fir(src);
-      s->x_fade_len -= n;
-      p->xfade -= !s->x_fade_len;
-    }
-    if (stage_num < min_stage_num)
-      for (; i < len; dest[i++] = fast_half_fir(src), src += 2);
-    else for (; i < len; dest[i++] = half_fir(src), src += 2);
-  }
-  if (p->flushing > 0)
-    stage_preload(s);
-  return true;
-}
-
-static int vr_process(rate_t * p, int olen0)
-{
-  assert(p->num_stages > 0);
-  if (p->default_io_ratio!=0)
-    vr_set_io_ratio(p, p->default_io_ratio, 0);
-  {
-    float * output = fifo_reserve(&p->output_fifo, olen0);
-    int j, odone0 = 0, min_stage_num = p->current.stage_num;
-    int occupancy0, max_stage_num = min_stage_num;
-    if (p->fade_len) {
-      min_stage_num = min(min_stage_num, p->fadeout.stage_num);
-      max_stage_num = max(max_stage_num, p->fadeout.stage_num);
-    }
-
-    for (j = min(min_stage_num, 0); j <= max_stage_num; ++j)
-      if (j && !do_input_stage(p, j, j < 0? -1 : 1, min_stage_num))
-        break;
-    if (p->flushing > 0)
-      p->flushing = -1;
-
-    occupancy0 = shiftl(max(0,stage_occupancy(&p->stages[max_stage_num])), max_stage_num);
-    p->current.len = shiftr(occupancy0, p->current.stage_num);
-    p->current.input = stage_read_p(&p->stages[p->current.stage_num]);
-    if (p->fade_len) {
-      p->fadeout.len = shiftr(occupancy0, p->fadeout.stage_num);
-      p->fadeout.input = stage_read_p(&p->stages[p->fadeout.stage_num]);
-    }
-
-    while (odone0 < olen0) {
-      int odone, odone2, olen = olen0 - odone0, stage_dif = 0, shift;
-      float buf[64 << 1];
-
-      olen = min(olen, (int)(AL(buf) >> 1));
-      if (p->slew_len)
-        olen = min(olen, p->slew_len);
-      else if (p->new_io_ratio!=0) {
-        set_step(&p->current, p->new_io_ratio);
-        set_step(&p->fadeout, p->new_io_ratio);
-        p->fadeout.step_step.all = p->current.step_step.all = 0;
-        p->new_io_ratio = 0;
-      }
-      if (!p->flushing && !p->fade_len && !p->xfade) {
-        if (p->current.is_d) {
-          if (INT(p->current.step) && FRAC(p->current.step))
-            stage_dif = 1, ++max_stage_num;
-          else if (!INT(p->current.step) && FRAC(p->current.step) < (1u << 31))
-            stage_dif = -1, --min_stage_num;
-        } else if (INT(p->current.step) > 1 && FRAC(p->current.step))
-          stage_dif = 1, ++max_stage_num;
-      }
-      if (stage_dif) {
-        int n = p->current.stage_num + stage_dif;
-        if (n >= p->num_stages)
-          --max_stage_num;
-        else {
-          p->stage_inc = stage_dif > 0;
-          p->fadeout = p->current;
-          p->current.stage_num += stage_dif;
-          if (!p->stage_inc)
-          p->switch_stage_num = p->current.stage_num;
-          if ((p->current.stage_num < 0 && stage_dif < 0) ||
-              (p->current.stage_num > 0 && stage_dif > 0)) {
-            stage_t * s = &p->stages[p->current.stage_num];
-            fifo_clear(&s->fifo);
-            stage_preload(s);
-            s->is_fast = false;
-            do_input_stage(p, p->current.stage_num, stage_dif, p->current.stage_num);
-          }
-          if (p->current.stage_num > 0 && stage_dif < 0) {
-            int idone = INT(p->current.at);
-            stage_t * s = &p->stages[p->current.stage_num];
-            fifo_trim_to(&s->fifo, 2 * HALF_FIR_LEN_2 + idone + (POLY_FIR_LEN_D >> 1));
-            do_input_stage(p, p->current.stage_num, 1, p->current.stage_num);
-          }
-          enter_new_stage(p, occupancy0);
-          shift = -stage_dif;
-#define lshift(x,by) (x)=(by)>0?(x)<<(by):(x)>>-(by)
-          lshift(p->current.at.all, shift);
-          shift += p->fadeout.is_d - p->current.is_d;
-          lshift(p->current.step.all, shift);
-          lshift(p->current.step_step.all, shift);
-          p->fade_len = AL(fade_coefs) - 1;
-          lsx_debug("switch from stage %i to %i, x2 from %i to %i", p->fadeout.stage_num, p->current.stage_num, p->fadeout.is_d, p->current.is_d);
-        }
-      }
-
-      if (p->fade_len) {
-        float const * vol1 = fade_coefs + p->fade_len;
-        float const * vol2 = fade_coefs + (iAL(fade_coefs) - 1 - p->fade_len);
-        int olen2 = (olen = min(olen, p->fade_len >> 1)) << 1;
-
-        /* x2 is more fine-grained so may fail to produce a pair of samples
-         * where x1 would not (the x1 second sample is a zero so is always
-         * available).  So do x2 first, then feed odone to the second one. */
-        memset(buf, 0, sizeof(*buf) * (size_t)olen2);
-        if (p->current.is_d && p->fadeout.is_d) {
-          odone  = poly_fir_fade_d(&p->current, vol1,-1, buf, olen2);
-          odone2 = poly_fir_fade_d(&p->fadeout, vol2, 1, buf, odone);
-        } else if (p->current.is_d) {
-          odone  = poly_fir_fade_d(&p->current, vol1,-1, buf, olen2);
-          odone2 = poly_fir_fade_u(&p->fadeout, vol2, 2, buf, odone);
-        } else {
-          assert(p->fadeout.is_d);
-          odone  = poly_fir_fade_d(&p->fadeout, vol2, 1, buf, olen2);
-          odone2 = poly_fir_fade_u(&p->current, vol1,-2, buf, odone);
-        }
-        assert(odone == odone2);
-        (void)odone2;
-        p->fade_len -= odone;
-        if (!p->fade_len) {
-          if (p->stage_inc)
-            p->switch_stage_num = min_stage_num++;
-          else
-            --max_stage_num;
-        }
-        half_iir(&p->halfer, &output[odone0], buf, odone >>= 1);
-      }
-      else if (p->current.is_d) {
-        odone = poly_fir_d(&p->current, buf, olen << 1) >> 1;
-        half_iir(&p->halfer, &output[odone0], buf, odone);
-      }
-      else {
-        odone = poly_fir_u(&p->current, &output[odone0], olen);
-        if (p->num_stages0)
-          half_phase(&p->halfer, &output[odone0], odone);
-      }
-      odone0 += odone;
-      if (p->slew_len)
-        p->slew_len -= odone;
-      if (odone != olen)
-        break; /* Need more input. */
-    } {
-      int from = max(0, max_stage_num), to = min(0, min_stage_num);
-      int i, idone = shiftr(INT(p->current.at), from - p->current.stage_num);
-      INT(p->current.at) -= shiftl(idone, from - p->current.stage_num);
-      if (p->fade_len)
-        INT(p->fadeout.at) -= shiftl(idone, from - p->fadeout.stage_num);
-      for (i = from; i >= to; --i, idone <<= 1)
-        fifo_read(&p->stages[i].fifo, idone, NULL);
-    }
-    fifo_trim_by(&p->output_fifo, olen0 - odone0);
-    return odone0;
-  }
-}
-
-static float * vr_input(rate_t * p, float const * input, size_t n)
-{
-  return fifo_write(&p->stages[0].fifo, (int)n, input);
-}
-
-static float const * vr_output(rate_t * p, float * output, size_t * n)
-{
-  fifo_t * fifo = &p->output_fifo;
-  if (1 || !p->num_stages0)
-    return fifo_read(fifo, (int)(*n = min(*n, (size_t)fifo_occupancy(fifo))), output);
-  else { /* Ignore this complication for now. */
-    int const IIR_DELAY = 2;
-    float * ptr = fifo_read_ptr(fifo);
-    int olen = min((int)*n, max(0, fifo_occupancy(fifo) - IIR_DELAY));
-    *n = (size_t)olen;
-    if (output)
-      memcpy(output, ptr + IIR_DELAY, *n * sizeof(*output));
-    fifo_read(fifo, olen, NULL);
-    return ptr + IIR_DELAY;
-  }
-}
-
-static void vr_flush(rate_t * p)
-{
-  if (!p->flushing) {
-    stage_preload(&p->stages[0]);
-    ++p->flushing;
-  }
-}
-
-static void vr_close(rate_t * p)
-{
-  int i;
-
-  fifo_delete(&p->output_fifo);
-  for (i = -1; i < p->num_stages; ++i) {
-    stage_t * s = &p->stages[i];
-    fifo_delete(&s->fifo);
-  }
-  free(p->stages - 1);
-}
-
-static double vr_delay(rate_t * p)
-{
-  return 100; /* TODO */
-  (void)p;
-}
-
-static void vr_sizes(size_t * shared, size_t * channel)
-{
-  *shared = 0;
-  *channel = sizeof(rate_t);
-}
-
-static char const * vr_create(void * channel, void * shared,double max_io_ratio,
-    void * q_spec, void * r_spec, double scale)
-{
-  double x = max_io_ratio;
-  int n;
-  for (n = 0; x > 1; x *= .5, ++n);
-  vr_init(channel, max_io_ratio, n, scale);
-  return 0;
-  (void)shared, (void)q_spec, (void)r_spec;
-}
-
-static char const * vr_id(void)
-{
-  return "vr32";
-}
-
-typedef void (* fn_t)(void);
-fn_t _soxr_vr32_cb[] = {
-  (fn_t)vr_input,
-  (fn_t)vr_process,
-  (fn_t)vr_output,
-  (fn_t)vr_flush,
-  (fn_t)vr_close,
-  (fn_t)vr_delay,
-  (fn_t)vr_sizes,
-  (fn_t)vr_create,
-  (fn_t)vr_set_io_ratio,
-  (fn_t)vr_id,
-};

From 37bebf39844073dac55f2cf231cef61eb8330a36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?The=CC=81o=20Monnom?= <theo.8bits@gmail.com>
Date: Fri, 21 Mar 2025 22:26:19 +0100
Subject: [PATCH 6/8] test

---
 livekit-ffi/src/server/resampler.rs | 57 ++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 22 deletions(-)

diff --git a/livekit-ffi/src/server/resampler.rs b/livekit-ffi/src/server/resampler.rs
index 6faa68b02..a7a7a7d8e 100644
--- a/livekit-ffi/src/server/resampler.rs
+++ b/livekit-ffi/src/server/resampler.rs
@@ -32,74 +32,84 @@ pub struct SoxResampler {
 unsafe impl Send for SoxResampler {}
 
 impl SoxResampler {
+    /// Creates a new SoxResampler using soxr's default quality and runtime options.
+    /// The provided `QualitySpec` and `RuntimeSpec` are ignored and null pointers are passed
+    /// to `soxr_create` to let soxr choose its defaults.
     pub fn new(
         input_rate: f64,
         output_rate: f64,
         num_channels: u32,
         io_spec: IOSpec,
-        quality_spec: QualitySpec,
-        runtime_spec: RuntimeSpec,
+        _quality_spec: QualitySpec, // ignored – using default soxr options
+        _runtime_spec: RuntimeSpec, // ignored – using default soxr options
     ) -> Result<Self, String> {
-        let error: *mut *const c_char = std::ptr::null_mut();
+        let mut err: *const c_char = std::ptr::null();
 
         let soxr_ptr = unsafe {
+            // Create io_spec from our types.
             let io_spec = soxr_sys::soxr_io_spec(
                 to_soxr_datatype(io_spec.input_type),
                 to_soxr_datatype(io_spec.output_type),
             );
 
-            let quality_spec = soxr_sys::soxr_quality_spec(
-                quality_spec.quality as c_ulong,
-                quality_spec.flags as c_ulong,
-            );
-
-            let runtime_spec = soxr_sys::soxr_runtime_spec(runtime_spec.num_threads);
-
+            // Pass null pointers for quality and runtime specs so that
+            // soxr will use its internal default options.
             soxr_sys::soxr_create(
                 input_rate,
                 output_rate,
                 num_channels,
-                error,
-                &io_spec,
-                &quality_spec,
-                &runtime_spec,
+                &mut err,
+                std::ptr::null(), // default io_spec
+                std::ptr::null(), // default quality
+                std::ptr::null(), // default runtime
             )
         };
 
-        if !error.is_null() {
-            let error_msg = unsafe { std::ffi::CStr::from_ptr(*error) };
+        if !err.is_null() || soxr_ptr.is_null() {
+            let error_msg = unsafe { std::ffi::CStr::from_ptr(err) };
             return Err(error_msg.to_string_lossy().to_string());
         }
 
         Ok(Self { soxr_ptr, out_buf: Vec::new(), input_rate, output_rate, num_channels })
     }
 
+    /// Processes the input buffer and returns the resampled output.
+    /// This version verifies that the input length is a multiple of the number of channels
+    /// and uses valid pointers for tracking the number of frames consumed and produced.
     pub fn push(&mut self, input: &[i16]) -> Result<&[i16], String> {
+        // Ensure the input length is a multiple of the channel count.
+        if input.len() % self.num_channels as usize != 0 {
+            return Err("Input length must be a multiple of num_channels".to_string());
+        }
+
         let input_length = input.len() / self.num_channels as usize;
         let ratio = self.output_rate / self.input_rate;
-        let soxr_delay = unsafe { soxr_sys::soxr_delay(self.soxr_ptr) };
+        let delay = unsafe { soxr_sys::soxr_delay(self.soxr_ptr) };
 
+        // Estimate maximum output frames: processed frames + delay + an extra frame.
         let max_out_len =
-            ((input_length as f64 * ratio).ceil() as usize) + (soxr_delay.ceil() as usize) + 1;
+            (input_length as f64 * ratio).ceil() as usize + (delay.ceil() as usize) + 1;
 
         let required_output_size = max_out_len * self.num_channels as usize;
         if self.out_buf.len() < required_output_size {
             self.out_buf.resize(required_output_size, 0);
         }
 
-        let mut idone: usize = 0;
+        // Using valid pointers for both consumed input (idone) and produced output (odone)
         let mut odone: usize = 0;
+
         let error = unsafe {
             soxr_sys::soxr_process(
                 self.soxr_ptr,
                 input.as_ptr() as *const c_void,
                 input_length,
-                &mut idone,
+                std::ptr::null_mut(),
                 self.out_buf.as_mut_ptr() as *mut c_void,
                 max_out_len,
                 &mut odone,
             )
         };
+
         if !error.is_null() {
             let error_msg = unsafe { std::ffi::CStr::from_ptr(error) };
             return Err(error_msg.to_string_lossy().to_string());
@@ -109,19 +119,22 @@ impl SoxResampler {
         Ok(&self.out_buf[..output_samples])
     }
 
+    /// Flushes the internal state, processing any remaining data.
+    /// Passes null for the input pointer and for the idone parameter (since it is not needed).
     pub fn flush(&mut self) -> Result<&[i16], String> {
         let mut odone: usize = 0;
         let error = unsafe {
             soxr_sys::soxr_process(
                 self.soxr_ptr,
-                std::ptr::null(),
+                std::ptr::null(), // no more input
                 0,
-                std::ptr::null_mut(),
+                std::ptr::null_mut(), // no need to know how many were consumed
                 self.out_buf.as_mut_ptr() as *mut c_void,
                 self.out_buf.len(),
                 &mut odone,
             )
         };
+
         if !error.is_null() {
             let error_msg = unsafe { std::ffi::CStr::from_ptr(error) };
             return Err(error_msg.to_string_lossy().to_string());

From e814b1ab8b99be148937ed29e781d323922ad739 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?The=CC=81o=20Monnom?= <theo.8bits@gmail.com>
Date: Fri, 21 Mar 2025 22:35:49 +0100
Subject: [PATCH 7/8] Update resampler.rs

---
 livekit-ffi/src/server/resampler.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/livekit-ffi/src/server/resampler.rs b/livekit-ffi/src/server/resampler.rs
index a7a7a7d8e..908343a64 100644
--- a/livekit-ffi/src/server/resampler.rs
+++ b/livekit-ffi/src/server/resampler.rs
@@ -59,7 +59,7 @@ impl SoxResampler {
                 output_rate,
                 num_channels,
                 &mut err,
-                std::ptr::null(), // default io_spec
+                &io_spec,
                 std::ptr::null(), // default quality
                 std::ptr::null(), // default runtime
             )

From 46877b974484ac06256d381ccdb4ea0c57e34191 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?The=CC=81o=20Monnom?= <theo.8bits@gmail.com>
Date: Fri, 21 Mar 2025 22:54:19 +0100
Subject: [PATCH 8/8] test

---
 livekit-ffi/src/server/resampler.rs | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/livekit-ffi/src/server/resampler.rs b/livekit-ffi/src/server/resampler.rs
index 908343a64..113054d56 100644
--- a/livekit-ffi/src/server/resampler.rs
+++ b/livekit-ffi/src/server/resampler.rs
@@ -43,7 +43,7 @@ impl SoxResampler {
         _quality_spec: QualitySpec, // ignored – using default soxr options
         _runtime_spec: RuntimeSpec, // ignored – using default soxr options
     ) -> Result<Self, String> {
-        let mut err: *const c_char = std::ptr::null();
+        let mut err: *mut *const c_char = std::ptr::null_mut();
 
         let soxr_ptr = unsafe {
             // Create io_spec from our types.
@@ -58,7 +58,7 @@ impl SoxResampler {
                 input_rate,
                 output_rate,
                 num_channels,
-                &mut err,
+                err,
                 &io_spec,
                 std::ptr::null(), // default quality
                 std::ptr::null(), // default runtime
@@ -66,7 +66,7 @@ impl SoxResampler {
         };
 
         if !err.is_null() || soxr_ptr.is_null() {
-            let error_msg = unsafe { std::ffi::CStr::from_ptr(err) };
+            let error_msg = unsafe { std::ffi::CStr::from_ptr(*err) };
             return Err(error_msg.to_string_lossy().to_string());
         }
 
@@ -77,11 +77,6 @@ impl SoxResampler {
     /// This version verifies that the input length is a multiple of the number of channels
     /// and uses valid pointers for tracking the number of frames consumed and produced.
     pub fn push(&mut self, input: &[i16]) -> Result<&[i16], String> {
-        // Ensure the input length is a multiple of the channel count.
-        if input.len() % self.num_channels as usize != 0 {
-            return Err("Input length must be a multiple of num_channels".to_string());
-        }
-
         let input_length = input.len() / self.num_channels as usize;
         let ratio = self.output_rate / self.input_rate;
         let delay = unsafe { soxr_sys::soxr_delay(self.soxr_ptr) };
@@ -96,6 +91,7 @@ impl SoxResampler {
         }
 
         // Using valid pointers for both consumed input (idone) and produced output (odone)
+        let mut idone: usize = 0;
         let mut odone: usize = 0;
 
         let error = unsafe {
@@ -103,7 +99,7 @@ impl SoxResampler {
                 self.soxr_ptr,
                 input.as_ptr() as *const c_void,
                 input_length,
-                std::ptr::null_mut(),
+                &mut idone,
                 self.out_buf.as_mut_ptr() as *mut c_void,
                 max_out_len,
                 &mut odone,