diff --git a/externals/coda-oss/.github/workflows/build_unittest.yml b/externals/coda-oss/.github/workflows/build_unittest.yml
index 4490ff194..109df279d 100644
--- a/externals/coda-oss/.github/workflows/build_unittest.yml
+++ b/externals/coda-oss/.github/workflows/build_unittest.yml
@@ -11,9 +11,9 @@ jobs:
     name: ${{ matrix.os }}-${{ matrix.python-version }}-CMake
     runs-on: ${{ matrix.os }}
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout
     - name: Set up Python
-      uses: actions/setup-python@v3
+      uses: actions/setup-python@v5 # https://github.com/marketplace/actions/setup-python
       with:
         python-version: ${{ matrix.python-version }}
     - name: Install python dependencies
@@ -63,7 +63,7 @@ jobs:
     runs-on: ${{ matrix.os }}
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout
     - name: configure
       run: |
         ls env:
@@ -76,7 +76,7 @@ jobs:
         cmake --build . --config ${{ matrix.configuration }} -j
         cmake --build . --config ${{ matrix.configuration }} --target install
     - name: Add msbuild to PATH
-      uses: microsoft/setup-msbuild@v1.0.2 # https://github.com/marketplace/actions/setup-msbuild
+      uses: microsoft/setup-msbuild@v1.1 # https://github.com/marketplace/actions/setup-msbuild
       with:
         msbuild-architecture: x64
     - name: msbuild
@@ -98,9 +98,9 @@ jobs:
     name: ${{ matrix.os }}-${{ matrix.python-version }}-CMake
     runs-on: ${{ matrix.os }}
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout
     - name: Set up Python
-      uses: actions/setup-python@v3
+      uses: actions/setup-python@v5 # https://github.com/marketplace/actions/setup-python
       with:
         python-version: ${{ matrix.python-version }}
     - name: Install python dependencies
@@ -136,7 +136,7 @@ jobs:
     name: ${{ matrix.os }}-${{ matrix.configuration }}-${{ matrix.avx }}-CMake
     runs-on: ${{ matrix.os }}
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout
     - name: configure
       run: |
         mkdir out && cd out
@@ -161,9 +161,9 @@ jobs:
     name: ${{ matrix.os }}-${{ matrix.python-version }}-waf${{ matrix.debugging }}
     runs-on: ${{ matrix.os }}
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout
     - name: Set up Python
-      uses: actions/setup-python@v3
+      uses: actions/setup-python@v5 # https://github.com/marketplace/actions/setup-python
       with:
         python-version: ${{ matrix.python-version }}
     - name: configure_with_swig
diff --git a/externals/coda-oss/UnitTest/UnitTest.vcxproj b/externals/coda-oss/UnitTest/UnitTest.vcxproj
index d79dab6f9..19508e59c 100644
--- a/externals/coda-oss/UnitTest/UnitTest.vcxproj
+++ b/externals/coda-oss/UnitTest/UnitTest.vcxproj
@@ -325,6 +325,10 @@
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
     </ClCompile>
+    <ClCompile Include="..\modules\c++\sys\unittests\test_ximd.cpp">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+    </ClCompile>
     <ClCompile Include="..\modules\c++\types\unittests\test_complex.cpp">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
diff --git a/externals/coda-oss/UnitTest/UnitTest.vcxproj.filters b/externals/coda-oss/UnitTest/UnitTest.vcxproj.filters
index f838a0433..cd789e927 100644
--- a/externals/coda-oss/UnitTest/UnitTest.vcxproj.filters
+++ b/externals/coda-oss/UnitTest/UnitTest.vcxproj.filters
@@ -258,6 +258,9 @@
     <ClCompile Include="..\modules\drivers\highfive\unittests\tests_high_five_base.cpp">
       <Filter>hdf5.lite</Filter>
     </ClCompile>
+    <ClCompile Include="..\modules\c++\sys\unittests\test_ximd.cpp">
+      <Filter>sys</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="pch.h" />
diff --git a/externals/coda-oss/UnitTest/sys.cpp b/externals/coda-oss/UnitTest/sys.cpp
index 7ac63c6a1..7bc804707 100644
--- a/externals/coda-oss/UnitTest/sys.cpp
+++ b/externals/coda-oss/UnitTest/sys.cpp
@@ -1,6 +1,8 @@
 #include "pch.h"
 #include "CppUnitTest.h"
 
+#include <coda_oss/numbers.h>
+
 #include <import/sys.h>
 #include <sys/Runnable.h>
 #include <sys/Thread.h>
@@ -21,6 +23,7 @@
 #include <sys/DateTime.h>
 #include <sys/Conf.h>
 #include <sys/Path.h>
+#include <sys/Ximd.h>
 
 namespace sys
 {
@@ -56,4 +59,8 @@ TEST_CLASS(test_path){ public:
 #include "sys/unittests/test_path.cpp"
 };
 
+TEST_CLASS(test_ximd){ public:
+#include "sys/unittests/test_ximd.cpp"
+};
+
 }
\ No newline at end of file
diff --git a/externals/coda-oss/modules/c++/coda-oss.vcxproj b/externals/coda-oss/modules/c++/coda-oss.vcxproj
index 6aacf1781..8b35965df 100644
--- a/externals/coda-oss/modules/c++/coda-oss.vcxproj
+++ b/externals/coda-oss/modules/c++/coda-oss.vcxproj
@@ -285,6 +285,7 @@
     <ClInclude Include="sys\include\sys\ThreadWin32.h" />
     <ClInclude Include="sys\include\sys\TimeStamp.h" />
     <ClInclude Include="sys\include\sys\UTCDateTime.h" />
+    <ClInclude Include="sys\include\sys\Ximd.h" />
     <ClInclude Include="tiff\include\tiff\Common.h" />
     <ClInclude Include="tiff\include\tiff\FileReader.h" />
     <ClInclude Include="tiff\include\tiff\FileWriter.h" />
diff --git a/externals/coda-oss/modules/c++/coda-oss.vcxproj.filters b/externals/coda-oss/modules/c++/coda-oss.vcxproj.filters
index a94b8aafd..accaf020b 100644
--- a/externals/coda-oss/modules/c++/coda-oss.vcxproj.filters
+++ b/externals/coda-oss/modules/c++/coda-oss.vcxproj.filters
@@ -957,6 +957,9 @@
     <ClInclude Include="coda_oss\include\coda_oss\mdspan_.h">
       <Filter>coda_oss</Filter>
     </ClInclude>
+    <ClInclude Include="sys\include\sys\Ximd.h">
+      <Filter>sys</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="pch.cpp" />
diff --git a/externals/coda-oss/modules/c++/hdf5.lite/include/hdf5/lite/highfive.h b/externals/coda-oss/modules/c++/hdf5.lite/include/hdf5/lite/highfive.h
index d5d91987b..c93d5a9a4 100644
--- a/externals/coda-oss/modules/c++/hdf5.lite/include/hdf5/lite/highfive.h
+++ b/externals/coda-oss/modules/c++/hdf5.lite/include/hdf5/lite/highfive.h
@@ -33,6 +33,8 @@
 #include <stdexcept>
 #include <vector>
 
+#include "types/RowCol.h"
+
 #include "H5_.h"
 #include "SpanRC.h"
 
diff --git a/externals/coda-oss/modules/c++/sys/include/sys/Ximd.h b/externals/coda-oss/modules/c++/sys/include/sys/Ximd.h
new file mode 100644
index 000000000..f9e0dcbf2
--- /dev/null
+++ b/externals/coda-oss/modules/c++/sys/include/sys/Ximd.h
@@ -0,0 +1,276 @@
+/* =========================================================================
+ * This file is part of sys-c++ 
+ * =========================================================================
+ * 
+ * (C) Copyright 2004 - 2014, MDA Information Systems LLC
+ * © Copyright 2024, Maxar Technologies, Inc.
+ *
+ * sys-c++ is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public 
+ * License along with this program; If not, 
+ * see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#pragma once 
+
+/*!
+ *  \brief A simple (poor man's) "implemtation" of std::experimental::simd
+ * for development/testing purposes.
+ *
+ * `std::experimental::simd` needs G++11 and C++20 and the "vectorclass"
+ * library needs C++17.  "Rolling our own" using `std::array` lets developers
+ * try out some things without needing actual SIMD code.
+ *
+ */
+
+#include "Dbg.h"
+
+// This is intended for development/testing purposes, so enable by default only for debug.
+#ifndef CODA_OSS_Ximd_ENABLED
+#define CODA_OSS_Ximd_ENABLED CODA_OSS_DEBUG
+#endif
+
+#if CODA_OSS_Ximd_ENABLED
+
+#include <array>
+#include <type_traits>
+#include <functional>
+#include <cmath>
+
+namespace sys
+{
+namespace ximd
+{
+
+// Need a class for the "broadcast" constructor (not impelemented).
+// Also helps to avoid overloading `std::array`.
+template <typename T, int N = 4>
+struct Ximd final
+{
+    static_assert(std::is_arithmetic<T>::value, "T must be arithmetic");
+    // static_assert(std::is_same<T, std::remove_cv<T>::type>::value, "no `const` for T");
+
+    using value_type = T;
+    using reference = T&;
+
+    Ximd() = default;
+    // This is the same as the "generater" overload below ... avoid enable_if gunk for now.
+     template<typename U>
+     Ximd(U v) noexcept
+    {
+        *this = generate([&](size_t) { return v; });
+    }
+    template <typename U>
+    Ximd(const Ximd<U>& other) noexcept
+    {
+        *this = other;
+    }
+    template <typename U>
+    Ximd(const U* mem)
+    {
+        copy_from(mem);
+    }
+
+    // https://en.cppreference.com/w/cpp/experimental/simd/simd/simd
+    // this is the same as `U&& v` above; avoid enable_if gunk for now.
+    template <typename G>
+    static auto generate(G&& generator) noexcept
+    {
+        Ximd retval;
+        // This is where all the "magic" (would) happen.
+        for (size_t i = 0; i < size(); i++)
+        {
+            retval[i] = generator(i);
+        }
+        return retval;
+    }
+    template <typename G>
+    explicit Ximd(G&& generator, nullptr_t) noexcept
+    {
+        *this = generate(generator);
+    }
+
+    reference operator[](size_t pos) noexcept
+    {
+        return value[pos];
+    }
+    value_type operator[](size_t pos) const noexcept
+    {
+        return value[pos];
+    }
+
+    static constexpr size_t size() noexcept
+    {
+        return N;
+    }
+
+    template <typename U>
+    void copy_from(const U* mem)
+    {
+        *this = Ximd::generate([&](size_t i) { return mem[i]; });
+    }
+    template <typename U>
+    void copy_to(U* mem) const
+    {
+        for (size_t i = 0; i < size(); i++)
+        {
+            mem[i] = (*this)[i];
+        }
+    }
+
+    Ximd& operator++() noexcept
+    {
+        *this = Ximd::generate([&](size_t i) { return ++value[i]; });
+        return *this;
+    }
+    Ximd operator++(int) noexcept
+    {
+        auto tmp = *this;
+        ++(*this);
+        return tmp;
+    }
+
+private:
+    std::array<value_type, N> value{};
+};
+
+// template<typename T, int N>
+// using fixed_size_ximd = Ximd<T, N>;
+//
+// template<typename T, int N>
+// using native_ximd = Ximd<T>;
+
+using ximd_mask = Ximd<bool>;
+
+template <typename T>
+inline auto operator+(const Ximd<T>& lhs, const Ximd<T>& rhs) noexcept
+{
+    return Ximd<T>::generate([&](size_t i) { return lhs[i] + rhs[i]; });
+}
+template <typename T>
+inline auto operator+(const Ximd<T>& lhs, typename Ximd<T>::value_type rhs) noexcept
+{
+    return lhs + Ximd<T>(rhs);
+}
+template <typename T>
+inline auto operator-(const Ximd<T>& lhs, const Ximd<T>& rhs) noexcept
+{
+    return Ximd<T>::generate([&](size_t i) { return lhs[i] - rhs[i]; });
+}
+template <typename T>
+inline auto operator-(const Ximd<T>& lhs, typename Ximd<T>::value_type rhs) noexcept
+{
+    return lhs - Ximd<T>(rhs);
+}
+template <typename T>
+inline auto operator*(const Ximd<T>& lhs, const Ximd<T>& rhs) noexcept
+{
+    return Ximd<T>::generate([&](size_t i) { return lhs[i] * rhs[i]; });
+}
+template <typename T>
+inline auto operator/(const Ximd<T>& lhs, typename Ximd<T>::value_type rhs) noexcept
+{
+    return lhs / Ximd<T>(rhs);
+}
+template <typename T>
+inline auto operator/(const Ximd<T>& lhs, const Ximd<T>& rhs) noexcept
+{
+    return Ximd<T>::generate([&](size_t i) { return lhs[i] / rhs[i]; });
+}
+
+template <typename T>
+inline auto& operator+=(Ximd<T>& lhs, const Ximd<T>& rhs) noexcept
+{
+    lhs = lhs + rhs;
+    return lhs;
+}
+template <typename T>
+inline auto& operator-=(Ximd<T>& lhs, const Ximd<T>& rhs) noexcept
+{
+    lhs = lhs - rhs;
+    return lhs;
+}
+
+template <typename T>
+inline auto operator==(const Ximd<T>& lhs, const Ximd<T>& rhs) noexcept
+{
+    return ximd_mask::generate([&](size_t i) { return lhs[i] == rhs[i]; });
+}
+template <typename T>
+inline auto operator==(const Ximd<T>& lhs, typename Ximd<T>::value_type rhs) noexcept
+{
+    return lhs == Ximd<T>(rhs);
+}
+template <typename T>
+inline auto operator!=(const Ximd<T>& lhs, const Ximd<T>& rhs) noexcept
+{
+    return ximd_mask::generate([&](size_t i) { return lhs[i] != rhs[i]; });
+}
+template <typename T>
+inline auto operator!=(const Ximd<T>& lhs, typename Ximd<T>::value_type rhs) noexcept
+{
+    return lhs != Ximd<T>(rhs);
+}
+template <typename T>
+inline auto operator<(const Ximd<T>& lhs, const Ximd<T>& rhs) noexcept
+{
+    return ximd_mask::generate([&](size_t i) { return lhs[i] < rhs[i]; });
+}
+template <typename T>
+inline auto operator<(const Ximd<T>& lhs, typename Ximd<T>::value_type rhs) noexcept
+{
+    return lhs < Ximd<T>(rhs);
+}
+template <typename T>
+inline auto operator<=(const Ximd<T>& lhs, const Ximd<T>& rhs) noexcept
+{
+    return ximd_mask::generate([&](size_t i) { return lhs[i] <= rhs[i]; });
+}
+template <typename T>
+inline auto operator>(const Ximd<T>& lhs, const Ximd<T>& rhs) noexcept
+{
+    return ximd_mask::generate([&](size_t i) { return lhs[i] > rhs[i]; });
+}
+template <typename T>
+inline auto operator>(const Ximd<T>& lhs, typename Ximd<T>::value_type rhs) noexcept
+{
+    return lhs > Ximd<T>(rhs);
+}
+
+inline bool any_of(const ximd_mask& m)
+{
+    for (size_t i = 0; i < m.size(); i++)
+    {
+        if (m[i])
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+template <typename T>
+inline auto atan2(const Ximd<T>& real, const Ximd<T>& imag)
+{
+    return Ximd<T>::generate([&](size_t i) { return std::atan2(real[i], imag[i]); });
+}
+template <typename T>
+inline auto round(const Ximd<T>& v)
+{
+    return Ximd<T>::generate([&](size_t i) { return std::round(v[i]); });
+}
+
+} // ximd
+} // sys
+
+#endif
diff --git a/externals/coda-oss/modules/c++/sys/unittests/test_ximd.cpp b/externals/coda-oss/modules/c++/sys/unittests/test_ximd.cpp
new file mode 100644
index 000000000..d419edca7
--- /dev/null
+++ b/externals/coda-oss/modules/c++/sys/unittests/test_ximd.cpp
@@ -0,0 +1,573 @@
+/* =========================================================================
+ * This file is part of sys-c++
+ * =========================================================================
+ *
+ * (C) Copyright 2004 - 2014, MDA Information Systems LLC
+ * © Copyright 2024, Maxar Technologies, Inc.
+ *
+ * sys-c++ is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; If not,
+ * see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <complex>
+#include <vector>
+#include <std/span>
+#include <std/numbers>
+
+// These are unittests, always enable unless explicitly disabled
+#ifndef CODA_OSS_Ximd_ENABLED
+#define CODA_OSS_Ximd_ENABLED 1
+#endif
+#include <sys/Ximd.h>
+
+#include <gsl/gsl.h>
+#include <sys/Span.h>
+
+#include "TestCase.h"
+
+using zfloat = std::complex<float>;
+
+template<typename T>
+using simd = sys::ximd::Ximd<T>;
+using intv = simd<int>;
+using floatv = simd<float>;
+using intv_mask = sys::ximd::ximd_mask;
+using floatv_mask = sys::ximd::ximd_mask;
+
+// Manage a SIMD complex as an array of two SIMDs
+using zfloatv = std::array<floatv, 2>;
+static inline auto& real(zfloatv& z) noexcept
+{
+    return z[0];
+}
+static inline const auto& real(const zfloatv& z) noexcept
+{
+    return z[0];
+}
+static inline auto& imag(zfloatv& z) noexcept
+{
+    return z[1];
+}
+static inline const auto& imag(const zfloatv& z) noexcept
+{
+    return z[1];
+}
+static inline size_t size(const zfloatv& z) noexcept
+{
+    auto retval = real(z).size();
+    assert(retval == imag(z).size());
+    return retval;
+}
+
+static inline auto arg(const zfloatv& z)
+{
+    // https://en.cppreference.com/w/cpp/numeric/complex/arg
+    // > `std::atan2(std::imag(z), std::real(z))`
+    return atan2(imag(z), real(z));  // arg()
+}
+
+template <typename TGeneratorReal, typename TGeneratorImag>
+static inline auto make_zfloatv(TGeneratorReal&& generate_real, TGeneratorImag&& generate_imag)
+{
+    zfloatv retval;
+    for (size_t i = 0; i < size(retval); i++)
+    {
+        real(retval)[i] = generate_real(i);
+        imag(retval)[i] = generate_imag(i);
+    }
+    return retval;
+}
+
+template <typename T>
+static inline auto copy_from(std::span<const T> p)
+{
+    simd<T> retval;
+    assert(p.size() == retval.size());
+    retval.copy_from(p.data());
+    return retval;
+}
+static inline auto copy_from(std::span<const zfloat> p)
+{
+    const auto generate_real = [&](size_t i) { return p[i].real(); };
+    const auto generate_imag = [&](size_t i) { return p[i].imag(); };
+    return make_zfloatv(generate_real, generate_imag);
+}
+
+template<typename T>
+static inline auto copy_to(const simd<T>& v)
+{
+    std::vector<typename simd<T>::value_type> retval(v.size());
+    v.copy_to(retval.data());
+    return retval;
+}
+static inline auto copy_to(const zfloatv& v)
+{
+    std::vector<zfloat> retval;
+    for (size_t i = 0; i < size(v); i++)
+    {        
+        retval.emplace_back(real(v)[i], imag(v)[i]);
+    }
+    return retval;
+}
+
+TEST_CASE(testDefaultConstructor)
+{
+    // sanity check implementation and utility routines
+    TEST_ASSERT_EQ(floatv::size(), size(zfloatv{}));
+
+    // intv v = 1;
+    intv v;
+    v = 1;
+    for (size_t i = 0; i < v.size(); i++)
+    {
+        TEST_ASSERT_EQ(1, v[i]);
+    }
+}
+
+// Sample code from SIX; see **ComplexToAMP8IPHS8I.cpp**.
+static auto GetPhase(std::complex<double> v)
+{
+    // There's an intentional conversion to zero when we cast 256 -> uint8. That wrap around
+    // handles cases that are close to 2PI.
+    double phase = std::arg(v);
+    if (phase < 0.0) phase += coda_oss::numbers::pi * 2.0; // Wrap from [0, 2PI]
+    return phase;
+}
+static uint8_t getPhase(zfloat v, float phase_delta)
+{
+    // Phase is determined via arithmetic because it's equally spaced.
+    const auto phase = GetPhase(v);
+    return gsl::narrow_cast<uint8_t>(std::round(phase / phase_delta));
+}
+
+static inline auto if_add(const floatv_mask& m, const floatv& lhs, typename floatv::value_type rhs)
+{
+    const auto generate_add = [&](size_t i) {
+        return m[i] ? lhs[i] + rhs : lhs[i];
+    };
+    return floatv::generate(generate_add);
+}
+static inline auto roundi(const floatv& v)  // match vcl::roundi()
+{
+    const auto rounded = round(v);
+    const auto generate_roundi = [&](size_t i)
+    { return static_cast<typename intv::value_type>(rounded[i]); };
+    return intv::generate(generate_roundi);
+}
+static auto getPhase(const zfloatv& v, float phase_delta)
+{
+    // Phase is determined via arithmetic because it's equally spaced.
+    // There's an intentional conversion to zero when we cast 256 -> uint8. That wrap around
+    // handles cases that are close to 2PI.
+    auto phase = arg(v);
+    //where(phase < 0.0f, phase) += std::numbers::pi_v<float> *2.0f; // Wrap from [0, 2PI]
+    phase = if_add(phase < 0.0f, phase, std::numbers::pi_v<float> * 2.0f); // Wrap from [0, 2PI]
+    return roundi(phase / phase_delta);
+}
+static inline const auto& cxValues()
+{
+    //static const std::vector<zfloat> retval{/*{0, 0},*/ {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}, {-1, -1}, {0, -1}, {1, -1}};
+    static const std::vector<zfloat> retval{{0.0, 0.0}, {1.0, 1.0}, {10.0, -10.0}, {-100.0, 100.0}, {-1000.0, -1000.0}, // sample data from SIX
+            {-1.0, -1.0}, {1.0, -1.0}, {-1.0, 1.0} // "pad" to multiple of floatv::size()
+    };
+    return retval;
+}
+
+static auto expected_getPhase_values(const std::string& testName, float phase_delta)
+{
+    auto&& values = cxValues();
+    TEST_ASSERT(values.size() % floatv::size() == 0);
+    std::vector<uint8_t> expected;
+    for (auto&& v : values)
+    {
+        expected.push_back(getPhase(v, phase_delta));
+    }
+    return expected;
+}
+
+static auto toComplex_(double A, uint8_t phase)
+{
+    // The phase values should be read in (values 0 to 255) and converted to float by doing:
+    // P = (1 / 256) * input_value
+    const double P = (1.0 / 256.0) * phase;
+
+    // To convert the amplitude and phase values to complex float (i.e. real and imaginary):
+    // S = A * cos(2 * pi * P) + j * A * sin(2 * pi * P)
+    const double angle = 2 * std::numbers::pi * P;
+    const auto sin_angle = sin(angle);
+    const auto cos_angle = cos(angle);
+    zfloat S(gsl::narrow_cast<float>(A * cos_angle), gsl::narrow_cast<float>(A * sin_angle));
+    return S;
+}
+inline static auto toComplex(uint8_t amplitude, uint8_t phase)
+{
+    // A = input_amplitude(i.e. 0 to 255)
+    const double A = amplitude;
+    return toComplex_(A, phase);
+}
+
+static auto phase_delta()
+{
+    static const auto p0 = GetPhase(toComplex(1, 0));
+    static const auto p1 = GetPhase(toComplex(1, 1));
+    assert(p0 == 0.0);
+    assert(p1 > p0);
+    static const auto retval = gsl::narrow_cast<float>(p1 - p0);
+    return retval;
+}
+
+static auto load(const std::vector<zfloat>& values)
+{
+    std::vector<zfloatv> retval;
+    constexpr auto sz = floatv::size();
+    auto const pValues = sys::make_span(values);
+    auto p = sys::make_span(pValues.data(), sz);
+    for (size_t i = 0; i < values.size() / sz; i++)
+    {
+        retval.push_back(copy_from(p));
+        p = sys::make_span(p.data() + sz, p.size());
+    }
+    return retval;
+}
+
+TEST_CASE(testGetPhase)
+{
+    static const auto& expected = expected_getPhase_values(testName, phase_delta());
+
+    const auto valuesv = load(cxValues());
+    std::vector<uint8_t> actual;
+    for (auto&& zvaluev : valuesv)
+    {
+        const auto phase = getPhase(zvaluev, phase_delta());
+        const auto phase_ = copy_to(phase);
+        for (auto&& ph : copy_to(phase))
+        {
+            actual.push_back(gsl::narrow_cast<uint8_t>(ph));
+        }
+    }
+
+    TEST_ASSERT_EQ(actual.size(), expected.size());
+    for (size_t i = 0; i < actual.size(); i++)
+    {
+        TEST_ASSERT_EQ(actual[i], expected[i]);
+    }
+}
+
+// Again, more sample code from SIX
+static constexpr size_t AmplitudeTableSize = 256;
+static auto getPhaseDirections_()
+{
+    //! Unit vector rays that represent each direction that phase can point.
+    std::array<zfloat, AmplitudeTableSize> phase_directions; // interleaved, std::complex<float>
+
+     const auto p0 = GetPhase(toComplex(1, 0));
+    for (size_t i = 0; i < AmplitudeTableSize; i++)
+    {
+        const float angle = static_cast<float>(p0) + i * phase_delta();
+        const auto y = sin(angle);
+        const auto x = cos(angle);
+        phase_directions[i] = {x, y};
+    }
+    return phase_directions;
+}
+static inline const auto& getPhaseDirections()
+{
+    static const auto retval = getPhaseDirections_();
+    return retval;
+}
+
+template <size_t N>
+static inline auto lookup(const intv& zindex, const std::array<zfloat, N>& phase_directions)
+{
+    const auto generate_real = [&](size_t i)
+    {
+        const auto i_ = zindex[i];
+        return phase_directions[i_].real();
+    };
+    const auto generate_imag = [&](size_t i)
+    {
+        const auto i_ = zindex[i];
+        return phase_directions[i_].imag();
+    };
+    return make_zfloatv(generate_real, generate_imag);
+}
+
+TEST_CASE(testLookup)
+{
+    const auto& phase_directions = getPhaseDirections();
+
+    static const auto& expected_getPhase = expected_getPhase_values(testName, phase_delta());
+    std::vector<zfloat> expected;
+    for (auto&& phase : expected_getPhase)
+    {
+        expected.push_back(phase_directions[phase]);
+    }
+    
+    const auto valuesv = load(cxValues());
+    std::vector<zfloat> actual;
+    for (auto&& zvaluev : valuesv)
+    {
+        const auto phase = getPhase(zvaluev, phase_delta());
+        const auto phase_direction = lookup<AmplitudeTableSize>(phase, phase_directions);
+
+        for (auto&& pd : copy_to(phase_direction))
+        {
+            actual.push_back(pd);
+        }
+    }
+
+    TEST_ASSERT_EQ(actual.size(), expected.size());
+    for (size_t i = 0; i < actual.size(); i++)
+    {
+        TEST_ASSERT_EQ(actual[i], expected[i]);
+    }
+}
+
+// And ... more sample code from SIX
+static auto iota_0_256_()
+{
+    static_assert(sizeof(size_t) > sizeof(uint8_t), "size_t can't hold UINT8_MAX!");
+
+    std::vector<uint8_t> retval;
+    retval.reserve(UINT8_MAX + 1);
+    for (size_t i = 0; i <= UINT8_MAX; i++) // Be careful with indexing so that we don't wrap-around in the loop.
+    {
+        retval.push_back(gsl::narrow<uint8_t>(i));
+    }
+    assert(retval.size() == UINT8_MAX + 1);
+    return retval;
+}
+static inline std::vector<uint8_t> iota_0_256()
+{
+    static const auto retval = iota_0_256_();
+    return retval;
+}
+
+static auto make_magnitudes_()
+{
+    std::vector<float> result;
+    result.reserve(AmplitudeTableSize);
+    for (const auto amplitude : iota_0_256())
+    {
+        // AmpPhase -> Complex
+        const auto phase = amplitude;
+        const auto complex = toComplex(amplitude, phase);
+        result.push_back(std::abs(complex));
+    }
+
+    // I don't know if we can guarantee that the amplitude table is non-decreasing.
+    // Check to verify property at runtime.
+    if (!std::is_sorted(result.begin(), result.end()))
+    {
+        throw std::runtime_error("magnitudes must be sorted");
+    }
+
+    std::array<float, AmplitudeTableSize> retval;
+    std::copy(result.begin(), result.end(), retval.begin());
+    return retval;
+}
+static inline std::span<const float> magnitudes()
+{
+    //! The sorted set of possible magnitudes order from small to large.
+    static const auto cached_magnitudes = make_magnitudes_();
+    return sys::make_span(cached_magnitudes);
+}
+
+/*!
+ * Find the nearest element given an iterator range.
+ * @param value query value
+ * @return index of nearest value within the iterator range.
+ */
+static uint8_t nearest(std::span<const float> magnitudes, float value)
+{
+    const auto begin = magnitudes.begin();
+    const auto end = magnitudes.end();
+
+    const auto it = std::lower_bound(begin, end, value);
+    if (it == begin) return 0;
+
+    const auto prev_it = std::prev(it);
+    const auto nearest_it = it == end ? prev_it  :
+        (value - *prev_it <= *it - value ? prev_it : it);
+    const auto distance = std::distance(begin, nearest_it);
+    assert(distance <= std::numeric_limits<uint8_t>::max());
+    return gsl::narrow<uint8_t>(distance);
+}
+static uint8_t find_nearest(zfloat phase_direction, zfloat v)
+{
+    // We have to do a 1D nearest neighbor search for magnitude.
+    // But it's not the magnitude of the input complex value - it's the projection of
+    // the complex value onto the ray of candidate magnitudes at the selected phase.
+    // i.e. dot product.
+    const auto projection = (phase_direction.real() * v.real()) + (phase_direction.imag() * v.imag());
+    //assert(std::abs(projection - std::abs(v)) < 1e-5); // TODO ???
+    return nearest(magnitudes(), projection);
+}
+
+template<typename TTest, typename TResult>
+static inline auto select(const TTest& test, const TResult& t, const TResult& f)
+{
+    TResult retval;
+    for (size_t i = 0; i < test.size(); i++)
+    {
+        retval[i] = test[i] ? t[i] : f[i];
+    }
+    return retval;
+}
+
+static auto lookup(const intv& zindex, std::span<const float> magnitudes)
+{
+    const auto generate = [&](size_t i) {
+        const auto i_ = zindex[i];
+        
+        // The index may be out of range. This is expected because `i` might be "don't care."
+        if ((i_ >= 0) && (i_ < std::ssize(magnitudes)))
+        {
+            return magnitudes[i_];
+        }
+        return NAN; // propogate "don't care"
+    };
+    return floatv::generate(generate);
+}
+
+static inline auto lower_bound_(std::span<const float> magnitudes, const floatv& v)
+{
+    intv first; first = 0;
+    intv last; last = gsl::narrow<int>(magnitudes.size());
+
+    auto count = last - first;
+    while (any_of(count > 0))
+    {
+        auto it = first;
+        const auto step = count / 2;
+        it += step;
+
+        auto next = it; ++next; // ... ++it;
+        auto advance = count; advance -= step + 1;  // ...  -= step + 1;
+
+        const auto c = lookup(it, magnitudes); // magnituides[it]
+
+        const auto test = c < v; // (c < v).__cvt(); // https://github.com/VcDevel/std-simd/issues/41
+
+        //where(test, it) = next; // ... ++it
+        //where(test, first) = it; // first = ...
+        //// count -= step + 1 <...OR...> count = step
+        //where(test, count) = advance;
+        //where(!test, count) = step;
+        it = select(test, next, it); // ... ++it
+        first = select(test, it, first); // first = ...
+        count = select(test, advance, step); // count -= step + 1 <...OR...> count = step
+    }
+    return first;
+}
+static inline auto lower_bound(std::span<const float> magnitudes, const floatv& value)
+{
+    return lower_bound_(magnitudes, value);
+}
+
+static auto nearest(std::span<const float> magnitudes, const floatv& value)
+{
+    assert(magnitudes.size() == AmplitudeTableSize);
+
+    /*
+        const auto it = std::lower_bound(begin, end, value);
+        if (it == begin) return 0;
+
+        const auto prev_it = std::prev(it);
+        const auto nearest_it = it == end ? prev_it  :
+            (value - *prev_it <= *it - value ? prev_it : it);
+        const auto distance = std::distance(begin, nearest_it);
+        assert(distance <= std::numeric_limits<uint8_t>::max());
+        return gsl::narrow<uint8_t>(distance);
+    */
+    const auto it = lower_bound(magnitudes, value);
+    const auto prev_it  = it - 1;  // const auto prev_it = std::prev(it);
+
+    const auto v0 = value - lookup(prev_it, magnitudes); // value - *prev_it
+    const auto v1 = lookup(it, magnitudes) - value; // *it - value
+    //const auto nearest_it = select(v0 <= v1, prev_it, it); //  (value - *prev_it <= *it - value ? prev_it : it);
+    
+    intv end; end = gsl::narrow<int>(magnitudes.size());
+    //const auto end_test = select(it == end, prev_it, nearest_it); // it == end ? prev_it  : ...
+    intv zero; zero = 0;
+    auto retval = select(it == 0, zero, // if (it == begin) return 0;
+        select(it == end, prev_it,  // it == end ? prev_it  : ...
+            select(v0 <= v1, prev_it, it) //  (value - *prev_it <= *it - value ? prev_it : it);
+        ));
+    return retval;
+}
+
+static auto find_nearest(std::span<const float> magnitudes, const floatv& phase_direction_real, const floatv& phase_direction_imag,
+    const zfloatv& v)
+{
+    // We have to do a 1D nearest neighbor search for magnitude.
+    // But it's not the magnitude of the input complex value - it's the projection of
+    // the complex value onto the ray of candidate magnitudes at the selected phase.
+    // i.e. dot product.
+    const auto projection = (phase_direction_real * real(v)) + (phase_direction_imag * imag(v));
+    //assert(std::abs(projection - std::abs(v)) < 1e-5); // TODO ???
+    return nearest(magnitudes, projection);
+}
+static inline auto find_nearest(std::span<const float> magnitudes, const zfloatv& phase_direction, const zfloatv& v)
+{
+    return find_nearest(magnitudes, real(phase_direction), imag(phase_direction), v);
+}
+
+TEST_CASE(testFindNearest)
+{
+    const auto& values = cxValues();
+    const auto& phase_directions = getPhaseDirections();
+
+    static const auto& expected_getPhase = expected_getPhase_values(testName, phase_delta());
+    std::vector<zfloat> expected_phase_directions;
+    for (auto&& phase : expected_getPhase)
+    {
+        expected_phase_directions.push_back(phase_directions[phase]);
+    }
+    std::vector<uint8_t> expected;
+    for (size_t i = 0; i < values.size(); i++)
+    {
+        auto a = find_nearest(expected_phase_directions[i], values[i]);
+        expected.push_back(a);
+    }
+    
+    const auto valuesv = load(cxValues());
+    std::vector<uint8_t> actual;
+    for (auto&& v : valuesv)
+    {
+        const auto phase = getPhase(v, phase_delta());
+        const auto phase_direction = lookup<AmplitudeTableSize>(phase, phase_directions);
+        const auto amplitude = find_nearest(magnitudes(), phase_direction, v);
+
+        for (auto&& a : copy_to(amplitude))
+        {
+            actual.push_back(static_cast<uint8_t>(a));
+        }
+    }
+
+    TEST_ASSERT_EQ(actual.size(), expected.size());
+    for (size_t i = 0; i < actual.size(); i++)
+    {
+        TEST_ASSERT_EQ(actual[i], expected[i]);
+    }
+}
+
+TEST_MAIN(
+    TEST_CHECK(testDefaultConstructor);
+
+    TEST_CHECK(testGetPhase);
+    TEST_CHECK(testLookup);
+    TEST_CHECK(testFindNearest);
+)
diff --git a/externals/nitro/.github/workflows/codeql.yml b/externals/nitro/.github/workflows/codeql.yml
index b4da2819c..f564eba15 100644
--- a/externals/nitro/.github/workflows/codeql.yml
+++ b/externals/nitro/.github/workflows/codeql.yml
@@ -15,15 +15,20 @@ on:
   push:
     branches: [ "main" ]
   pull_request:
-    # The branches below must be a subset of the branches above
     branches: [ "main" ]
   schedule:
-    - cron: '19 23 * * 3'
+    - cron: '35 17 * * 6'
 
 jobs:
   analyze:
     name: Analyze
-    runs-on: ubuntu-latest
+    # Runner size impacts CodeQL analysis time. To learn more, please see:
+    #   - https://gh.io/recommended-hardware-resources-for-running-codeql
+    #   - https://gh.io/supported-runners-and-hardware-resources
+    #   - https://gh.io/using-larger-runners
+    # Consider using larger runners for possible analysis time improvements.
+    runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
+    timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
     permissions:
       actions: read
       contents: read
@@ -32,41 +37,44 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        language: [ 'cpp' ]
-        # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
+        language: [ 'c-cpp' ]
+        # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ]
+        # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both
+        # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both
         # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
 
     steps:
     - name: Checkout repository
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@v2
+      uses: github/codeql-action/init@v3
       with:
         languages: ${{ matrix.language }}
         # If you wish to specify custom queries, you can do so here or in a config file.
         # By default, queries listed here will override any specified in a config file.
         # Prefix the list here with "+" to use these queries and those in the config file.
-        
-        # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
-        # queries: security-extended,security-and-quality
 
+        # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
+        # queries: security-extended,security-and-quality
         
-    # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
+    # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@v2
+      uses: github/codeql-action/autobuild@v3
 
     # â„¹ï¸ Command-line programs to run using the OS shell.
     # ðŸ“š See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
 
-    #   If the Autobuild fails above, remove it and uncomment the following three lines. 
+    #   If the Autobuild fails above, remove it and uncomment the following three lines.
     #   modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
 
     # - run: |
-    #   echo "Run, Build Application using script"
-    #   ./location_of_script_within_repo/buildscript.sh
+    #     echo "Run, Build Application using script"
+    #     ./location_of_script_within_repo/buildscript.sh
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v2
+      uses: github/codeql-action/analyze@v3
+      with:
+        category: "/language:${{matrix.language}}"
diff --git a/externals/nitro/.github/workflows/frequent_check.yml b/externals/nitro/.github/workflows/frequent_check.yml
index 9c1777f88..44916acd5 100644
--- a/externals/nitro/.github/workflows/frequent_check.yml
+++ b/externals/nitro/.github/workflows/frequent_check.yml
@@ -14,9 +14,9 @@ jobs:
     runs-on: ${{ matrix.os }}
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout
     - name: Set up Python
-      uses: actions/setup-python@v1
+      uses: actions/setup-python@v5 # https://github.com/marketplace/actions/setup-python
       with:
         python-version: '3.7'
     - name: configure
@@ -34,7 +34,7 @@ jobs:
         cd out
         ctest -C ${{ matrix.configuration }} --output-on-failure
     - name: Add msbuild to PATH
-      uses: microsoft/setup-msbuild@v1 # https://github.com/marketplace/actions/setup-msbuild
+      uses: microsoft/setup-msbuild@v1.1 # https://github.com/marketplace/actions/setup-msbuild
       with:
         msbuild-architecture: x64
     - name: msbuild
@@ -51,7 +51,7 @@ jobs:
     runs-on: ${{ matrix.os }}
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout
     - name: configure CODA-OSS
       run: |
         mkdir externals\coda-oss\out
@@ -63,7 +63,7 @@ jobs:
         cmake --build . --config ${{ matrix.configuration }} -j
         cmake --build . --config ${{ matrix.configuration }} --target install
     - name: Add msbuild to PATH
-      uses: microsoft/setup-msbuild@v1.0.2 # https://github.com/marketplace/actions/setup-msbuild
+      uses: microsoft/setup-msbuild@v1.1 # https://github.com/marketplace/actions/setup-msbuild
       with:
         msbuild-architecture: x64
     - name: msbuild
@@ -87,9 +87,9 @@ jobs:
     runs-on: ${{ matrix.os }}
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout
     - name: Set up Python
-      uses: actions/setup-python@v1
+      uses: actions/setup-python@v5 # https://github.com/marketplace/actions/setup-python
       with:
         python-version: '3.7'
     - name: configure
diff --git a/externals/nitro/.github/workflows/main.yml b/externals/nitro/.github/workflows/main.yml
index ef3dfa65e..041d480fb 100644
--- a/externals/nitro/.github/workflows/main.yml
+++ b/externals/nitro/.github/workflows/main.yml
@@ -17,9 +17,9 @@ jobs:
     runs-on: ${{ matrix.os }}
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout
     - name: Set up Python
-      uses: actions/setup-python@v1
+      uses: actions/setup-python@v5 # https://github.com/marketplace/actions/setup-python
       with:
         python-version: '3.7'
     - name: configure
@@ -37,7 +37,7 @@ jobs:
         cd out
         ctest -C ${{ matrix.configuration }} --output-on-failure
     - name: Add msbuild to PATH
-      uses: microsoft/setup-msbuild@v1.0.2 # https://github.com/marketplace/actions/setup-msbuild
+      uses: microsoft/setup-msbuild@v1.1 # https://github.com/marketplace/actions/setup-msbuild
       with:
         msbuild-architecture: x64
     - name: msbuild
@@ -54,7 +54,7 @@ jobs:
     runs-on: ${{ matrix.os }}
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout
     - name: configure CODA-OSS
       run: |
         mkdir externals\coda-oss\out
@@ -66,7 +66,7 @@ jobs:
         cmake --build . --config ${{ matrix.configuration }} -j
         cmake --build . --config ${{ matrix.configuration }} --target install
     - name: Add msbuild to PATH
-      uses: microsoft/setup-msbuild@v1 # https://github.com/marketplace/actions/setup-msbuild
+      uses: microsoft/setup-msbuild@v1.1 # https://github.com/marketplace/actions/setup-msbuild
       with:
         msbuild-architecture: x64
     - name: msbuild
@@ -90,9 +90,9 @@ jobs:
     runs-on: ${{ matrix.os }}
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout
     - name: Set up Python
-      uses: actions/setup-python@v1
+      uses: actions/setup-python@v5 # https://github.com/marketplace/actions/setup-python
       with:
         python-version: '3.7'
     - name: configure
diff --git a/six/modules/c++/samples/8AMPI_PHSI.cpp b/six/modules/c++/samples/8AMPI_PHSI.cpp
index af79cc4d1..5ceff52f6 100644
--- a/six/modules/c++/samples/8AMPI_PHSI.cpp
+++ b/six/modules/c++/samples/8AMPI_PHSI.cpp
@@ -9,7 +9,7 @@
 #include <assert.h>
 
 #include <iostream>
-#include <span>
+#include <std/span>
 #include <algorithm>
 #include <iterator>
 #include <future>
diff --git a/six/modules/c++/samples/8AMPI_PHSI.dir/8AMPI_PHSI.dir.vcxproj b/six/modules/c++/samples/8AMPI_PHSI.dir/8AMPI_PHSI.dir.vcxproj
index f65f3e8f7..1cd611727 100644
--- a/six/modules/c++/samples/8AMPI_PHSI.dir/8AMPI_PHSI.dir.vcxproj
+++ b/six/modules/c++/samples/8AMPI_PHSI.dir/8AMPI_PHSI.dir.vcxproj
@@ -66,6 +66,8 @@
       <TreatWarningAsError>true</TreatWarningAsError>
       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
       <RuntimeTypeInfo>true</RuntimeTypeInfo>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+      <LanguageStandard_C>stdc11</LanguageStandard_C>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -91,6 +93,8 @@
       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
       <ControlFlowGuard>Guard</ControlFlowGuard>
       <RuntimeTypeInfo>true</RuntimeTypeInfo>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+      <LanguageStandard_C>stdc11</LanguageStandard_C>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -133,9 +137,6 @@
     <ProjectReference Include="..\..\six.sicd\six.sicd.vcxproj">
       <Project>{34ac2314-fbd1-42ad-aaf4-0cebc6bf737e}</Project>
     </ProjectReference>
-    <ProjectReference Include="..\..\six.sidd\six.sidd.vcxproj">
-      <Project>{ddc587c2-53ba-44a9-94e7-07be52af3d0b}</Project>
-    </ProjectReference>
     <ProjectReference Include="..\..\six\six.vcxproj">
       <Project>{62aad4dd-35ba-41a0-8263-1f4dfd755689}</Project>
     </ProjectReference>
diff --git a/six/modules/c++/samples/CMakeLists.txt b/six/modules/c++/samples/CMakeLists.txt
index e0527df40..96238f7eb 100644
--- a/six/modules/c++/samples/CMakeLists.txt
+++ b/six/modules/c++/samples/CMakeLists.txt
@@ -18,6 +18,7 @@ add_sample(extract_cphd_xml                     cli-c++ cphd-c++ xml.lite-c++)
 add_sample(image_to_scene                       six.sicd-c++ six.sidd-c++)
 add_sample(project_slant_to_output              cli-c++ io-c++ six-c++ six.sicd-c++ sio.lite-c++)
 add_sample(round_trip_six                       cli-c++ six.convert-c++ six.sicd-c++ six.sidd-c++)
+add_sample(8AMPI_PHSI                    	six.sicd-c++)
 add_sample(sicd_output_plane_pixel_to_lat_lon   cli-c++ six.sicd-c++)
 add_sample(test_compare_sidd                    cli-c++ six.sicd-c++ six.sidd-c++)
 add_sample(test_create_sicd                     cli-c++ six.sicd-c++ sio.lite-c++)
diff --git a/six/modules/c++/samples/wscript b/six/modules/c++/samples/wscript
index 498d294af..6de4750b5 100644
--- a/six/modules/c++/samples/wscript
+++ b/six/modules/c++/samples/wscript
@@ -12,6 +12,7 @@ def build(bld):
                'project_slant_to_output'             : 'cli io six six.sicd sio.lite',
                'image_to_scene'                      : 'six.sicd six.sidd',
                'round_trip_six'                      : 'cli six.convert six.sicd six.sidd',
+               '8AMPI_PHSI'                          : 'six.sicd',
                'test_create_sicd'                    : 'cli six.sicd sio.lite',
                'test_create_sicd_from_mem'           : 'cli six.sicd',
                'test_create_sidd_from_mem'           : 'cli six.sicd six.sidd',
diff --git a/six/modules/c++/six.sicd/CMakeLists.txt b/six/modules/c++/six.sicd/CMakeLists.txt
index 31a6e143c..31373160e 100644
--- a/six/modules/c++/six.sicd/CMakeLists.txt
+++ b/six/modules/c++/six.sicd/CMakeLists.txt
@@ -8,6 +8,7 @@ coda_add_module(
         source/ComplexData.cpp
         source/ComplexDataBuilder.cpp
         source/ComplexToAMP8IPHS8I.cpp
+        source/ComplexToAMP8IPHS8I_unseq.cpp
         source/ComplexXMLControl.cpp
         source/ComplexXMLParser.cpp
         source/ComplexXMLParser040.cpp
diff --git a/six/modules/c++/six.sicd/six.sicd.vcxproj b/six/modules/c++/six.sicd/six.sicd.vcxproj
index 9296d12e1..3b0b210df 100644
--- a/six/modules/c++/six.sicd/six.sicd.vcxproj
+++ b/six/modules/c++/six.sicd/six.sicd.vcxproj
@@ -154,6 +154,9 @@
     <ClCompile Include="source\ComplexData.cpp" />
     <ClCompile Include="source\ComplexDataBuilder.cpp" />
     <ClCompile Include="source\ComplexToAMP8IPHS8I.cpp" />
+    <ClCompile Include="source\ComplexToAMP8IPHS8I_simd.cpp" />
+    <ClCompile Include="source\ComplexToAMP8IPHS8I_vcl.cpp" />
+    <ClCompile Include="source\ComplexToAMP8IPHS8I_ximd.cpp" />
     <ClCompile Include="source\ComplexXMLControl.cpp" />
     <ClCompile Include="source\ComplexXMLParser.cpp" />
     <ClCompile Include="source\ComplexXMLParser040.cpp" />
diff --git a/six/modules/c++/six.sicd/six.sicd.vcxproj.filters b/six/modules/c++/six.sicd/six.sicd.vcxproj.filters
index 4f4d855cf..fb6a88a17 100644
--- a/six/modules/c++/six.sicd/six.sicd.vcxproj.filters
+++ b/six/modules/c++/six.sicd/six.sicd.vcxproj.filters
@@ -248,5 +248,14 @@
     <ClCompile Include="source\DataParser.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="source\ComplexToAMP8IPHS8I_vcl.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="source\ComplexToAMP8IPHS8I_simd.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="source\ComplexToAMP8IPHS8I_ximd.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I.cpp b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I.cpp
index 0c08b0447..adfd97059 100644
--- a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I.cpp
+++ b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I.cpp
@@ -29,6 +29,7 @@
 #include <std/numbers>
 #include <algorithm>
 #include <functional>
+#include <stdexcept>
 
 #include <coda_oss/CPlusPlus.h>
 #if CODA_OSS_cpp17
@@ -83,7 +84,7 @@ inline auto GetPhase(std::complex<double> v)
     if (phase < 0.0) phase += std::numbers::pi * 2.0; // Wrap from [0, 2PI]
     return phase;
 }
-uint8_t six::sicd::details::ComplexToAMP8IPHS8I::getPhase(six::zfloat v) const
+uint8_t six::sicd::details::ComplexToAMP8IPHS8I::Impl::getPhase(six::zfloat v) const
 {
     // Phase is determined via arithmetic because it's equally spaced.
     const auto phase = GetPhase(v);
@@ -144,23 +145,29 @@ static std::span<const float> get_magnitudes(const six::AmplitudeTable* pAmplitu
     return sys::make_const_span(uncached_magnitudes);
 }
 
+
 six::sicd::details::ComplexToAMP8IPHS8I::ComplexToAMP8IPHS8I(const six::AmplitudeTable *pAmplitudeTable)
-    : magnitudes(get_magnitudes(pAmplitudeTable, uncached_magnitudes))
+    :impl(*this)
 {
+    impl.magnitudes = get_magnitudes(pAmplitudeTable, impl.uncached_magnitudes);
+
     const auto p0 = GetPhase(Utilities::toComplex(1, 0, pAmplitudeTable));
     const auto p1 = GetPhase(Utilities::toComplex(1, 1, pAmplitudeTable));
     assert(p0 == 0.0);
     assert(p1 > p0);
-    phase_delta = gsl::narrow_cast<float>(p1 - p0);
+    impl.phase_delta = gsl::narrow_cast<float>(p1 - p0);
     for(size_t i = 0; i < 256; i++)
     {
-        const units::Radians<float> angle{ static_cast<float>(p0) + i * phase_delta };
+        const units::Radians<float> angle{ static_cast<float>(p0) + i * impl.phase_delta };
         float y, x;
         SinCos(angle, y, x);
-        phase_directions[i] = { x, y };
+        impl.phase_directions[i] = { x, y };
 
-        phase_directions_real[i] = phase_directions[i].real();
-        phase_directions_imag[i] = phase_directions[i].imag();
+        // Only need the parallel array when using the "vectorclass" library.
+        #ifdef SIX_sicd_has_VCL
+        impl.phase_directions_real[i] = impl.phase_directions[i].real();
+        impl.phase_directions_imag[i] = impl.phase_directions[i].imag();
+        #endif
     }
 }
 
@@ -184,7 +191,7 @@ static uint8_t nearest(std::span<const float> magnitudes, float value)
     assert(distance <= std::numeric_limits<uint8_t>::max());
     return gsl::narrow<uint8_t>(distance);
 }
-uint8_t six::sicd::details::ComplexToAMP8IPHS8I::find_nearest(six::zfloat phase_direction, six::zfloat v) const
+uint8_t six::sicd::details::ComplexToAMP8IPHS8I::Impl::find_nearest(six::zfloat phase_direction, six::zfloat v) const
 {
     // We have to do a 1D nearest neighbor search for magnitude.
     // But it's not the magnitude of the input complex value - it's the projection of
@@ -197,16 +204,16 @@ uint8_t six::sicd::details::ComplexToAMP8IPHS8I::find_nearest(six::zfloat phase_
 six::AMP8I_PHS8I_t six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbor_(const six::zfloat &v) const
 {
     six::AMP8I_PHS8I_t retval;
-    retval.phase = getPhase(v);
+    retval.phase = impl.getPhase(v);
 
-    auto&& phase_direction = phase_directions[retval.phase];
-    retval.amplitude = find_nearest(phase_direction, v);
+    auto&& phase_direction = impl.phase_directions[retval.phase];
+    retval.amplitude = impl.find_nearest(phase_direction, v);
     return retval;
 }
-template <typename TInputIt, typename TOutputIt>
-void six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_seq(TInputIt first, TInputIt last, TOutputIt dest) const
+void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_seq(std::span<const six::zfloat> inputs, std::span<AMP8I_PHS8I_t> results) const
 {
-    std::transform(first, last, dest, [&](const auto& v) { return nearest_neighbor_(v); });
+    std::transform(inputs.begin(), inputs.end(), results.begin(),
+        [&](const auto& v) { return converter.nearest_neighbor_(v); });
 }
 std::vector<six::AMP8I_PHS8I_t> six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_seq(
     std::span<const zfloat> inputs, const six::AmplitudeTable* pAmplitudeTable)
@@ -215,7 +222,7 @@ std::vector<six::AMP8I_PHS8I_t> six::sicd::details::ComplexToAMP8IPHS8I::nearest
     const auto& converter = make_(pAmplitudeTable);
 
     std::vector<six::AMP8I_PHS8I_t> retval(inputs.size());
-    converter.nearest_neighbors_seq(inputs.begin(), inputs.end(), retval.begin());
+    converter.impl.nearest_neighbors_seq(inputs, retval);
     return retval;
 }
 
@@ -249,13 +256,16 @@ static inline OutputIt transform_async(const InputIt first1, const InputIt last1
     return handle.get();
 }
 
-template <typename TInputIt, typename TOutputIt>
-void six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_par(TInputIt first, TInputIt last, TOutputIt dest) const
+void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_par(std::span<const six::zfloat> inputs, std::span<AMP8I_PHS8I_t> results) const
 {
     const auto nearest_neighbor = [&](const auto& v)
     {
-        return this->nearest_neighbor_(v);
+        return converter.nearest_neighbor_(v);
     };
+
+    const auto first = inputs.begin();
+    const auto last = inputs.end();
+    const auto dest = results.begin();
      
 #if SIX_six_sicd_ComplexToAMP8IPHS8I_has_execution
     std::ignore = std::transform(std::execution::par, first, last, dest, nearest_neighbor);
@@ -266,7 +276,7 @@ void six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_par(TInputIt fir
     constexpr auto default_cutoff = (dimension * dimension) * 4;
     const auto cutoff = cutoff_ == 0 ? default_cutoff : cutoff_;
 
-    const auto transform_f = [&](const TInputIt first1, const TInputIt last1, TOutputIt d_first)
+    const auto transform_f = [&](auto first1, auto last1, auto d_first)
     {
         return std::transform(first1, last1, d_first, nearest_neighbor);
     };
@@ -280,7 +290,7 @@ std::vector<six::AMP8I_PHS8I_t> six::sicd::details::ComplexToAMP8IPHS8I::nearest
     const auto& converter = make_(pAmplitudeTable);
 
     std::vector<six::AMP8I_PHS8I_t> retval(inputs.size());
-    converter.nearest_neighbors_par(inputs.begin(), inputs.end(), retval.begin());
+    converter.impl.nearest_neighbors_par(inputs, sys::make_span(retval));
     return retval;
 }
 
@@ -289,42 +299,75 @@ std::vector<six::AMP8I_PHS8I_t> six::sicd::details::ComplexToAMP8IPHS8I::nearest
 {
     // TODO: there could be more complicated logic here to decide between
     // _seq, _par, _unseq, and _par_unseq
-    #if SIX_sicd_have_VCL || SIX_sicd_have_experimental_simd
-    return nearest_neighbors_par_unseq(inputs, pAmplitudeTable);
+    #if SIX_sicd_ComplexToAMP8IPHS8I_unseq
+
+    // None of our SIMD implementations are dramatically slower when
+    // running unittests; that's mostly because there's lots of IO.  So that
+    // the more interesting code-path is tested, use UNSEQ in debugging.
+    #if CODA_OSS_DEBUG
+    return nearest_neighbors_unseq(inputs, pAmplitudeTable); // TODO:
     #else
     return nearest_neighbors_par(inputs, pAmplitudeTable);
     #endif
-}
-
 
-template <typename TInputIt, typename TOutputIt>
-void six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_par_unseq(TInputIt first, TInputIt last, TOutputIt dest) const
-{
-    constexpr ptrdiff_t cutoff_ = 0; // too slow w/o multi-threading
-    // The value of "default_cutoff" was determined by testing; there is nothing special about it, feel free to change it.
-    constexpr auto default_cutoff = (256 * 256) * 2;
-    const auto cutoff = cutoff_ == 0 ? default_cutoff : cutoff_;
+    #else
+    return nearest_neighbors_par(inputs, pAmplitudeTable);
 
-    const auto transform_f = [&](const TInputIt first1, const TInputIt last1, TOutputIt d_first)
-    {
-        nearest_neighbors_unseq(first1, last1, d_first);
-        static const TOutputIt retval;
-        return retval;
-    };
-    std::ignore = transform_async(first, last, dest, transform_f, cutoff);
+#endif
 }
+
 #if SIX_sicd_ComplexToAMP8IPHS8I_unseq
-std::vector<six::AMP8I_PHS8I_t> six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_par_unseq(
+std::vector<six::AMP8I_PHS8I_t> six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq(
     std::span<const zfloat> inputs, const six::AmplitudeTable* pAmplitudeTable)
 {
-    // make a structure to quickly find the nearest neighbor
-    const auto& converter = make_(pAmplitudeTable);
+    // TODO: there could be more complicated logic here to determine which UNSEQ
+    // implementation to use.
 
-    std::vector<six::AMP8I_PHS8I_t> retval(inputs.size());
-    converter.nearest_neighbors_par_unseq(inputs.begin(), inputs.end(), retval.begin());
-    return retval;
+    #if SIX_sicd_has_VCL
+    return nearest_neighbors_unseq_vcl(inputs, pAmplitudeTable);
+
+    #elif SIX_sicd_has_simd
+    return nearest_neighbors_unseq_simd(inputs, pAmplitudeTable);
+
+    #elif SIX_sicd_has_ximd
+    return nearest_neighbors_unseq_ximd(inputs, pAmplitudeTable);
+
+    #else
+    #error "Don't know how to implement nearest_neighbors_unseq()"
+    throw std::logic_error("Don't know how to implement nearest_neighbors_unseq()");
+
+    #endif
 }
-#endif //  SIX_sicd_have_VCL || SIX_sicd_have_experimental_simd
+#endif // SIX_sicd_ComplexToAMP8IPHS8I_unseq
+
+//template <typename TInputIt, typename TOutputIt>
+//void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_par_unseq(TInputIt first, TInputIt last, TOutputIt dest) const
+//{
+//    constexpr ptrdiff_t cutoff_ = 0; // too slow w/o multi-threading
+//    // The value of "default_cutoff" was determined by testing; there is nothing special about it, feel free to change it.
+//    constexpr auto default_cutoff = (256 * 256) * 2;
+//    const auto cutoff = cutoff_ == 0 ? default_cutoff : cutoff_;
+//
+//    const auto transform_f = [&](const TInputIt first1, const TInputIt last1, TOutputIt d_first)
+//    {
+//        nearest_neighbors_unseq(first1, last1, d_first);
+//        static const TOutputIt retval;
+//        return retval;
+//    };
+//    std::ignore = transform_async(first, last, dest, transform_f, cutoff);
+//}
+//#if SIX_sicd_ComplexToAMP8IPHS8I_unseq
+//std::vector<six::AMP8I_PHS8I_t> six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_par_unseq(
+//    std::span<const zfloat> inputs, const six::AmplitudeTable* pAmplitudeTable)
+//{
+//    // make a structure to quickly find the nearest neighbor
+//    const auto& converter = make_(pAmplitudeTable);
+//
+//    std::vector<six::AMP8I_PHS8I_t> retval(inputs.size());
+//    converter.impl.nearest_neighbors_par_unseq(inputs.begin(), inputs.end(), retval.begin());
+//    return retval;
+//}
+//#endif //  SIX_sicd_ComplexToAMP8IPHS8I_unseq
 
 const six::sicd::details::ComplexToAMP8IPHS8I& six::sicd::details::ComplexToAMP8IPHS8I::make_(const six::AmplitudeTable* pAmplitudeTable)
 {
diff --git a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_simd.cpp b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_simd.cpp
new file mode 100644
index 000000000..ece707b2f
--- /dev/null
+++ b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_simd.cpp
@@ -0,0 +1,377 @@
+/* =========================================================================
+* This file is part of six.sicd-c++
+* =========================================================================
+*
+* (C) Copyright 2021, Maxar Technologies, Inc.
+*
+* six.sicd-c++ is free software; you can redistribute it and/or modify
+* it under the terms of the GNU Lesser General Public License as published by
+* the Free Software Foundation; either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU Lesser General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public
+* License along with this program; If not,
+* see <http://www.gnu.org/licenses/>.
+*
+*/
+#include "six/AmplitudeTable.h"
+
+#include <math.h>
+#include <assert.h>
+
+#include <cassert>
+#include <memory>
+#include <std/numbers>
+#include <algorithm>
+#include <functional>
+#include <type_traits>
+#include <execution>
+#include <array>
+#include <std/span>
+
+#include <gsl/gsl.h>
+#include <math/Utilities.h>
+#include <units/Angles.h>
+#include <sys/Span.h>
+
+#include "six/sicd/Utilities.h"
+
+#if SIX_sicd_has_simd
+
+#include <experimental/simd>
+
+// https://en.cppreference.com/w/cpp/experimental/simd/simd_cast
+// > The TS specification is missing `simd_cast` and `static_simd_cast` overloads for `simd_mask`. ...
+namespace stdx
+{
+    using namespace std::experimental;
+    using namespace std::experimental::__proposed;
+}
+
+// https://en.cppreference.com/w/cpp/experimental/simd
+using floatv = stdx::native_simd<float>;
+// using doublev = stdx::rebind_simd_t<double, floatv>;
+using intv = stdx::rebind_simd_t<int, floatv>;
+using zfloatv = std::array<floatv, 2>;
+
+auto& real(zfloatv& z)
+{
+    return z[0];
+}
+const auto& real(const zfloatv& z)
+{
+    return z[0];
+}
+auto& imag(zfloatv& z)
+{
+    return z[1];
+}
+const auto& imag(const zfloatv& z)
+{
+    return z[1];
+}
+
+// https://en.cppreference.com/w/cpp/experimental/simd/simd/simd
+template<typename TGeneratorReal, typename TGeneratorImag>
+inline auto make_zfloatv(TGeneratorReal&& generate_real, TGeneratorImag&& generate_imag)
+{
+    zfloatv retval;
+    real(retval) = floatv(generate_real);
+    imag(retval) = floatv(generate_imag);
+    return retval;
+}
+
+static inline auto load(std::span<const six::zfloat> p)
+{
+    const auto generate_real = [&](size_t i) {
+        return p[i].real();
+    };
+    const auto generate_imag = [&](size_t i) {
+        return p[i].imag();
+    };
+    return make_zfloatv(generate_real, generate_imag);
+}
+
+
+// https://en.cppreference.com/w/cpp/numeric/complex/arg
+// > `std::atan2(std::imag(z), std::real(z))`
+inline auto arg(const floatv& real, const floatv& imag)
+{
+    return atan2(imag, real); // arg()
+}
+inline auto arg(const zfloatv& z)
+{
+    return arg(real(z), imag(z));
+}
+
+inline auto roundi(const floatv& v) // match vcl::roundi()
+{
+    return static_simd_cast<intv>(round(v));
+}
+
+template<typename TTest, typename TResult>
+inline auto select(const TTest& test_, const TResult& t, const TResult& f)
+{
+    //const auto test = test_.__cvt(); // https://github.com/VcDevel/std-simd/issues/41
+    const auto test = stdx::static_simd_cast<typename TResult::mask_type>(test_); // https://github.com/VcDevel/std-simd/issues/41
+
+    // https://en.cppreference.com/w/cpp/experimental/simd/where_expression
+    // > ... All other elements are left unchanged.
+    TResult retval;
+    where(test, retval) = t;
+    where(!test, retval) = f;
+    return retval;
+}
+
+static inline auto getPhase(const zfloatv& v, float phase_delta)
+{
+    // Phase is determined via arithmetic because it's equally spaced.
+    // There's an intentional conversion to zero when we cast 256 -> uint8. That wrap around
+    // handles cases that are close to 2PI.
+    auto phase = arg(v);
+    where(phase < 0.0f, phase) += std::numbers::pi_v<float> *2.0f; // Wrap from [0, 2PI]
+    return roundi(phase / phase_delta);
+}
+
+template<size_t N>
+static inline auto lookup(const intv& zindex, const std::array<six::zfloat, N>& phase_directions)
+{
+    // It seems that the "generator" constuctor is called with SIMD instructions.
+    // https://en.cppreference.com/w/cpp/experimental/simd/simd/simd
+    // > The calls to `generator` are unsequenced with respect to each other.
+
+    const auto generate_real = [&](size_t i) {
+        const auto i_ = zindex[i];
+        return phase_directions[i_].real();
+    };
+    const auto generate_imag = [&](size_t i) {
+        const auto i_ = zindex[i];
+        return phase_directions[i_].imag();
+    };
+    return make_zfloatv(generate_real, generate_imag);
+}
+
+// https://en.cppreference.com/w/cpp/algorithm/lower_bound
+/*
+template<class ForwardIt, class T>
+ForwardIt lower_bound(ForwardIt first, ForwardIt last, const T& value)
+{
+    ForwardIt it;
+    typename std::iterator_traits<ForwardIt>::difference_type count, step;
+    count = std::distance(first, last);
+
+    while (count > 0)
+    {
+        it = first;
+        step = count / 2;
+        std::advance(it, step);
+
+        if (*it < value)
+        {
+            first = ++it;
+            count -= step + 1;
+        }
+        else
+            count = step;
+    }
+
+    return first;
+}
+*/
+template<size_t N>
+static inline auto lookup(const intv& zindex, std::span<const float> magnitudes)
+{
+    // It seems that the "generator" constuctor is called with SIMD instructions.
+    // https://en.cppreference.com/w/cpp/experimental/simd/simd/simd
+    // > The calls to `generator` are unsequenced with respect to each other.
+
+    const auto generate = [&](size_t i) {
+        const auto i_ = zindex[i];
+        return magnitudes[i_];
+    };
+    return floatv(generate);
+}
+inline auto lower_bound_(std::span<const float> magnitudes, const floatv& v)
+{
+    intv first = 0;
+    const intv last = gsl::narrow<int>(magnitudes.size());
+
+    auto count = last - first;
+    while (any_of(count > 0))
+    {
+        auto it = first;
+        const auto step = count / 2;
+        it += step;
+
+        auto next = it; ++next; // ... ++it;
+        auto advance = count; advance -= step + 1;  // ...  -= step + 1;
+
+        const auto c = lookup<six::AmplitudeTableSize>(it, magnitudes); // magnituides[it]
+
+        const auto test = c < v; // (c < v).__cvt(); // https://github.com/VcDevel/std-simd/issues/41
+
+        //where(test, it) = next; // ... ++it
+        //where(test, first) = it; // first = ...
+        //// count -= step + 1 <...OR...> count = step
+        //where(test, count) = advance;
+        //where(!test, count) = step;
+        it = select(test, next, it); // ... ++it
+        first = select(test, it, first); // first = ...
+        count = select(test, advance, step); // count -= step + 1 <...OR...> count = step
+    }
+    return first;
+}
+inline auto lower_bound(std::span<const float> magnitudes, const floatv& value)
+{
+    auto retval = lower_bound_(magnitudes, value);
+
+    #ifndef NDEBUG // i.e., debug
+    for (size_t i = 0; i < value.size(); i++)
+    {
+        const auto it = std::lower_bound(magnitudes.begin(), magnitudes.end(), value[i]);
+        const auto result = gsl::narrow<int>(std::distance(magnitudes.begin(), it));
+        assert(retval[i] == result);
+    }
+    #endif
+
+    return retval;
+}
+
+static auto nearest(std::span<const float> magnitudes, const floatv& value)
+{
+    assert(magnitudes.size() == six::AmplitudeTableSize);
+
+    /*
+        const auto it = std::lower_bound(begin, end, value);
+        if (it == begin) return 0;
+
+        const auto prev_it = std::prev(it);
+        const auto nearest_it = it == end ? prev_it  :
+            (value - *prev_it <= *it - value ? prev_it : it);
+        const auto distance = std::distance(begin, nearest_it);
+        assert(distance <= std::numeric_limits<uint8_t>::max());
+        return gsl::narrow<uint8_t>(distance);
+    */
+    const auto it = ::lower_bound(magnitudes, value);
+    const auto prev_it = it - 1; // const auto prev_it = std::prev(it);
+
+    const auto v0 = value - lookup<six::AmplitudeTableSize>(prev_it, magnitudes); // value - *prev_it
+    const auto v1 = lookup<six::AmplitudeTableSize>(it, magnitudes) - value; // *it - value
+    //const auto nearest_it = select(v0 <= v1, prev_it, it); //  (value - *prev_it <= *it - value ? prev_it : it);
+    
+    const intv end = gsl::narrow<int>(magnitudes.size());
+    //const auto end_test = select(it == end, prev_it, nearest_it); // it == end ? prev_it  : ...
+    const intv zero = 0;
+    auto retval = select(it == 0, zero, // if (it == begin) return 0;
+        select(it == end, prev_it,  // it == end ? prev_it  : ...
+            select(v0 <=v1, prev_it, it) //  (value - *prev_it <= *it - value ? prev_it : it);
+        ));
+    return retval;
+}
+
+static auto find_nearest(std::span<const float> magnitudes, const floatv& phase_direction_real, const floatv& phase_direction_imag,
+    const zfloatv& v)
+{
+    // We have to do a 1D nearest neighbor search for magnitude.
+    // But it's not the magnitude of the input complex value - it's the projection of
+    // the complex value onto the ray of candidate magnitudes at the selected phase.
+    // i.e. dot product.
+    const auto projection = (phase_direction_real * real(v)) + (phase_direction_imag * imag(v));
+    //assert(std::abs(projection - std::abs(v)) < 1e-5); // TODO ???
+    return nearest(magnitudes, projection);
+}
+static inline auto find_nearest(std::span<const float> magnitudes, const zfloatv& phase_direction, const zfloatv& v)
+{
+    return find_nearest(magnitudes, real(phase_direction), imag(phase_direction), v);
+}
+
+void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_unseq_(std::span<const six::zfloat> p, std::span<AMP8I_PHS8I_t> results) const
+{
+    const auto v = load(p);
+
+    const auto phase = ::getPhase(v, phase_delta);
+
+    const auto phase_direction = lookup<six::AmplitudeTableSize>(phase, phase_directions);
+    #ifndef NDEBUG // i.e., debug
+    for (size_t i = 0; i < phase.size(); i++)
+    {
+        const auto pd = phase_directions[phase[i]];
+        assert(pd.real() == real(phase_direction)[i]);
+        assert(pd.imag() == imag(phase_direction)[i]);
+    }
+    #endif
+
+    const auto amplitude = ::find_nearest(magnitudes, phase_direction, v);
+    #ifndef NDEBUG // i.e., debug
+    for (size_t i = 0; i < amplitude.size(); i++)
+    {
+        const auto i_ = phase[i];
+        const auto a = find_nearest(phase_directions[i_], p[i]);
+        assert(a == amplitude[i]);
+    }
+    #endif
+
+    auto dest = results.begin();
+    for (size_t i = 0; i < v.size(); i++)
+    {
+        dest->phase = gsl::narrow_cast<uint8_t>(phase[i]);
+        dest->amplitude = gsl::narrow_cast<uint8_t>(amplitude[i]);
+
+        ++dest;
+    }
+}
+
+void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_unseq(std::span<const six::zfloat> inputs, std::span<AMP8I_PHS8I_t> results) const
+{
+    auto first = inputs.begin();
+    const auto last = inputs.end();
+    auto dest = results.begin();
+
+    // The above code is simpler (no templates) if we use just a single VCL
+    // complex type: `zfloatv`.  If there is any performance difference,
+    // it will only be for extreme edge cases since the smaller types are only used
+    // at the end of the loop.
+    //
+    // It also makes this loop simpler as we handle all non-multiples-of-8 in
+    // the same way.
+    constexpr auto elements_per_iteration = 8;
+
+    // Can do these checks one-time outside of the loop
+    const auto distance = std::distance(first, last);
+
+    // First, do multiples of 8
+    const auto distance_ = distance - (distance % elements_per_iteration);
+    const auto last_ = first + distance_;
+    for (; first != last_; first += elements_per_iteration, dest += elements_per_iteration)
+    {
+        const auto f = sys::make_span(&(*first), elements_per_iteration);
+        const auto d = sys::make_span(&(*dest), elements_per_iteration);
+        nearest_neighbors_unseq_(f, d);
+    }
+
+    // Then finish off anything left
+    assert(std::distance(first, last) < elements_per_iteration);
+    for (; first != last; ++first, ++dest)
+    {
+        const auto f = sys::make_span(&(*first), 1);
+        const auto d = sys::make_span(&(*dest), 1);
+        nearest_neighbors_seq(f, d);
+    }
+}
+std::vector<six::AMP8I_PHS8I_t> six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq_vcl(
+    std::span<const zfloat> inputs, const six::AmplitudeTable* pAmplitudeTable)
+{
+    // make a structure to quickly find the nearest neighbor
+    const auto& converter = make_(pAmplitudeTable);
+
+    std::vector<six::AMP8I_PHS8I_t> retval(inputs.size());
+    converter.impl.nearest_neighbors_unseq(inputs, sys::make_span(retval));
+    return retval;
+}
+
+#endif // SIX_sicd_has_simd
diff --git a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_unseq.cpp b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_unseq.cpp
new file mode 100644
index 000000000..541fb4b5c
--- /dev/null
+++ b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_unseq.cpp
@@ -0,0 +1,759 @@
+/* =========================================================================
+* This file is part of six.sicd-c++
+* =========================================================================
+*
+* (C) Copyright 2021, Maxar Technologies, Inc.
+*
+* six.sicd-c++ is free software; you can redistribute it and/or modify
+* it under the terms of the GNU Lesser General Public License as published by
+* the Free Software Foundation; either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU Lesser General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public
+* License along with this program; If not,
+* see <http://www.gnu.org/licenses/>.
+*
+*/
+#include "six/AmplitudeTable.h"
+
+#include <math.h>
+#include <assert.h>
+
+#include <cassert>
+#include <memory>
+#include <std/numbers>
+#include <algorithm>
+#include <functional>
+#include <type_traits>
+
+#include <coda_oss/CPlusPlus.h>
+#if CODA_OSS_cpp17
+    // <execution> is broken with the older version of GCC we're using
+    #if (__GNUC__ >= 10) || _MSC_VER
+    #include <execution>
+    #define SIX_six_sicd_ComplexToAMP8IPHS8I_has_execution 1
+    #endif
+#endif
+
+#include <gsl/gsl.h>
+#include <math/Utilities.h>
+#include <units/Angles.h>
+#include <sys/Span.h>
+
+#include "six/sicd/Utilities.h"
+
+#undef min
+#undef max
+
+using zfloat = six::zfloat;
+using AMP8I_PHS8I_t = six::AMP8I_PHS8I_t;
+
+#if SIX_sicd_has_VCL
+
+#define VCL_NAMESPACE vcl
+#if _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4100) // '...': unreferenced formal parameter
+#pragma warning(disable: 4127) // conditional expression is constant
+#pragma warning(disable: 4244) // '...': conversion from '...' to '...', possible loss of data
+#pragma warning(disable: 4723) // potential divide by 0
+#endif
+#include "six/sicd/vectorclass/version2/vectorclass.h"
+#include "six/sicd/vectorclass/version2/vectormath_trig.h"
+#include "six/sicd/vectorclass/complex/complexvec1.h"
+#if _MSC_VER
+#pragma warning(pop)
+#endif
+
+namespace six
+{
+namespace sicd
+{
+namespace vcl
+{
+
+constexpr auto elements_per_iteration = 4;
+
+using floatv = ::vcl::Vec4f;
+using intv = ::vcl::Vec4i;
+using zfloatv = ::vcl::Complex4f;
+
+} // vcl
+} // sicd
+} // six
+
+auto real(const six::sicd::vcl::zfloatv& z)
+{
+    return z.real();
+}
+auto imag(const six::sicd::vcl::zfloatv& z)
+{
+    return z.imag();
+}
+static inline size_t size(const six::sicd::vcl::zfloatv& z) noexcept
+{
+    return z.size();
+}
+static inline int ssize(const six::sicd::vcl::zfloatv& z) noexcept
+{
+    return z.size();
+}
+static inline int ssize(const six::sicd::vcl::floatv& z) noexcept
+{
+    return z.size();
+}
+static inline int ssize(const six::sicd::vcl::intv& z) noexcept
+{
+    return z.size();
+}
+
+// https://en.cppreference.com/w/cpp/numeric/complex/arg
+// > `std::atan2(std::imag(z), std::real(z))`
+inline auto arg(const six::sicd::vcl::floatv& real, const six::sicd::vcl::floatv& imag)
+{
+    return atan2(imag, real); // arg()
+}
+inline auto arg(const six::sicd::vcl::zfloatv& z)
+{
+    return arg(real(z), imag(z));
+}
+
+template<typename T>
+inline auto if_add(const T& f, const six::sicd::vcl::floatv& a, float b)
+{
+    return vcl::if_add(f, a, b);
+}
+
+inline bool any_of(const six::sicd::vcl::intv& m)
+{
+    return horizontal_or(m);
+}
+
+static inline void copy_from(std::span<const float> p, six::sicd::vcl::floatv& result)
+{
+    assert(p.size() == result.size());
+    result.load(p.data());
+}
+static inline auto copy_from(std::span<const zfloat> p, six::sicd::vcl::zfloatv& result)
+{
+    // https://en.cppreference.com/w/cpp/numeric/complex
+    // > For any pointer to an element of an array of `std::complex<T>` named `p` and any valid array index `i`, ...
+    // > is the real part of the complex number `p[i]`, ...
+    result.load(reinterpret_cast<const float*>(p.data()));
+}
+
+template<size_t N>
+static inline auto lookup(const six::sicd::vcl::intv& zindex, const std::array<zfloat, N>& phase_directions)
+{
+    return ::vcl::lookup<N>(zindex, phase_directions.data());
+}
+static inline auto lookup(const six::sicd::vcl::intv& zindex, std::span<const float> magnitudes)
+{
+    assert(magnitudes.size() == six::AmplitudeTableSize);
+    return ::vcl::lookup<six::AmplitudeTableSize>(zindex, magnitudes.data());
+}
+
+#endif // SIX_sicd_has_VCL
+
+#if SIX_sicd_has_ximd
+#ifndef CODA_OSS_Ximd_ENABLE
+#define CODA_OSS_Ximd_ENABLE 1
+#endif
+#include <sys/Ximd.h>
+
+namespace six
+{
+namespace sicd
+{
+namespace ximd
+{
+
+template<typename T>
+using simd = sys::ximd::Ximd<T>;
+using intv = simd<int>;
+using floatv = simd<float>;
+using intv_mask = sys::ximd::ximd_mask;
+using floatv_mask = sys::ximd::ximd_mask;
+
+constexpr auto elements_per_iteration = floatv::size();
+
+// Manage a SIMD complex as an array of two SIMDs
+using zfloatv = std::array<floatv, 2>;
+
+template<typename TTest, typename TResult>
+static inline auto select(const TTest& test, const TResult& t, const TResult& f)
+{
+    TResult retval;
+    for (size_t i = 0; i < test.size(); i++)
+    {
+        retval[i] = test[i] ? t[i] : f[i];
+    }
+    return retval;
+}
+
+} // ximd
+} // sicd
+} // six
+
+static inline auto& real(six::sicd::ximd::zfloatv& z) noexcept
+{
+    return z[0];
+}
+static inline const auto& real(const six::sicd::ximd::zfloatv& z) noexcept
+{
+    return z[0];
+}
+static inline auto& imag(six::sicd::ximd::zfloatv& z) noexcept
+{
+    return z[1];
+}
+static inline const auto& imag(const six::sicd::ximd::zfloatv& z) noexcept
+{
+    return z[1];
+}
+
+static inline size_t size(const six::sicd::ximd::zfloatv& z) noexcept
+{
+    auto retval = real(z).size();
+    assert(retval == imag(z).size());
+    return retval;
+}
+static inline auto ssize(const six::sicd::ximd::zfloatv& z) noexcept
+{
+    return gsl::narrow<int>(size(z));
+}
+template<typename T>
+static inline auto ssize(const six::sicd::ximd::simd<T>& v) noexcept
+{
+    return gsl::narrow<int>(v.size());
+}
+
+namespace six
+{
+namespace sicd
+{
+namespace ximd
+{
+
+template <typename TGeneratorReal, typename TGeneratorImag>
+static inline auto make_zfloatv(TGeneratorReal&& generate_real, TGeneratorImag&& generate_imag)
+{
+    six::sicd::ximd::zfloatv retval;
+    for (size_t i = 0; i < size(retval); i++)
+    {
+        real(retval)[i] = generate_real(i);
+        imag(retval)[i] = generate_imag(i);
+    }
+    return retval;
+}
+
+} // ximd
+} // sicd
+} // six
+
+
+static inline auto arg(const six::sicd::ximd::zfloatv& z)
+{
+    // https://en.cppreference.com/w/cpp/numeric/complex/arg
+    // > `std::atan2(std::imag(z), std::real(z))`
+    return atan2(imag(z), real(z));  // arg()
+}
+
+static inline auto copy_from(std::span<float> p, six::sicd::ximd::floatv& result)
+{
+    assert(p.size() == result.size());
+    result.copy_from(p.data());
+}
+static inline auto copy_from(std::span<const zfloat> p, six::sicd::ximd::zfloatv& result)
+{
+    const auto generate_real = [&](size_t i) { return p[i].real(); };
+    const auto generate_imag = [&](size_t i) { return p[i].imag(); };
+    result = six::sicd::ximd::make_zfloatv(generate_real, generate_imag);
+}
+
+static inline auto roundi(const six::sicd::ximd::floatv& v)  // match vcl::roundi()
+{
+    const auto rounded = round(v);
+    const auto generate_roundi = [&](size_t i)
+    { return static_cast<typename six::sicd::ximd::intv::value_type>(rounded[i]); };
+    return six::sicd::ximd::intv::generate(generate_roundi);
+}
+
+static inline auto select(const six::sicd::ximd::floatv_mask& test, const  six::sicd::ximd::floatv& t, const  six::sicd::ximd::floatv& f)
+{
+    return six::sicd::ximd::select(test, t, f);
+}
+static inline auto select(const six::sicd::ximd::intv_mask& test, const  six::sicd::ximd::intv& t, const  six::sicd::ximd::intv& f)
+{
+    return six::sicd::ximd::select(test, t, f);
+}
+
+static inline auto if_add(const six::sicd::ximd::floatv_mask& m, const six::sicd::ximd::floatv& v, typename six::sicd::ximd::floatv::value_type c)
+{
+    // phase = if_add(phase < 0.0, phase, std::numbers::pi_v<float> * 2.0f); // Wrap from [0, 2PI]
+    const auto generate_add = [&](size_t i) {
+        return m[i] ? v[i] + c : v[i];
+    };
+    return six::sicd::ximd::floatv::generate(generate_add);
+}
+
+template<size_t N>
+static inline auto lookup(const six::sicd::ximd::intv& zindex, const std::array<zfloat, N>& phase_directions)
+{
+    // It seems that the "generator" constuctor is called with SIMD instructions.
+    // https://en.cppreference.com/w/cpp/experimental/simd/simd/simd
+    // > The calls to `generator` are unsequenced with respect to each other.
+
+    const auto generate_real = [&](size_t i) {
+        const auto i_ = zindex[i];
+        return phase_directions[i_].real();
+    };
+    const auto generate_imag = [&](size_t i) {
+        const auto i_ = zindex[i];
+        return phase_directions[i_].imag();
+    };
+    return six::sicd::ximd::make_zfloatv(generate_real, generate_imag);
+}
+
+static auto lookup(const six::sicd::ximd::intv& zindex, std::span<const float> magnitudes)
+{
+    const auto generate = [&](size_t i) {
+        const auto i_ = zindex[i];
+
+        // The index may be out of range. This is expected because `i` might be "don't care."
+        if ((i_ >= 0) && (i_ < std::ssize(magnitudes)))
+        {
+            return magnitudes[i_];
+        }
+        return NAN; // propogate "don't care"
+    };
+    return six::sicd::ximd::floatv::generate(generate);
+}
+
+#endif // SIX_sicd_has_ximd
+
+template<typename ZFloatV>
+static auto getPhase(const ZFloatV& v, float phase_delta)
+{
+    // Phase is determined via arithmetic because it's equally spaced.
+    // There's an intentional conversion to zero when we cast 256 -> uint8. That wrap around
+    // handles cases that are close to 2PI.
+    auto phase = arg(v);
+    phase = if_add(phase < 0.0, phase, std::numbers::pi_v<float> * 2.0f); // Wrap from [0, 2PI]
+    return roundi(phase / phase_delta);
+}
+
+// https://en.cppreference.com/w/cpp/algorithm/lower_bound
+/*
+template<class ForwardIt, class T>
+ForwardIt lower_bound(ForwardIt first, ForwardIt last, const T& value)
+{
+    ForwardIt it;
+    typename std::iterator_traits<ForwardIt>::difference_type count, step;
+    count = std::distance(first, last);
+
+    while (count > 0)
+    {
+        it = first;
+        step = count / 2;
+        std::advance(it, step);
+
+        if (*it < value)
+        {
+            first = ++it;
+            count -= step + 1;
+        }
+        else
+            count = step;
+    }
+
+    return first;
+}
+*/
+template<typename IntV, typename FloatV>
+inline auto lower_bound_(std::span<const float> magnitudes, const FloatV& v)
+{
+    IntV first = 0;
+    const IntV last = gsl::narrow<int>(magnitudes.size());
+
+    auto count = last - first;
+    while (any_of(count > 0))
+    {
+        auto it = first;
+        const auto step = count / 2;
+        it += step;
+
+        auto next = it; ++next; // ... ++it;
+        auto advance = count; advance -= step + 1;  // ...  -= step + 1;
+
+        const auto c = lookup(it, magnitudes); // magnituides[it]
+        const auto test = c < v;
+        it = select(test, next, it); // ... ++it
+        first = select(test, it, first); // first = ...
+        count = select(test, advance, step); // count -= step + 1 <...OR...> count = step
+    }
+    return first;
+}
+template<typename IntV, typename FloatV>
+inline auto lower_bound(std::span<const float> magnitudes, const FloatV& value)
+{
+    auto retval = lower_bound_<IntV>(magnitudes, value);
+
+    #if CODA_OSS_DEBUG
+    for (int i = 0; i < ssize(value); i++)
+    {
+        const auto it = std::lower_bound(magnitudes.begin(), magnitudes.end(), value[i]);
+        const auto result = gsl::narrow<int>(std::distance(magnitudes.begin(), it));
+        assert(retval[i] == result);
+    }
+    #endif
+
+    return retval;
+}
+
+template<typename IntV, typename FloatV>
+static auto nearest(std::span<const float> magnitudes, const FloatV& value)
+{
+    assert(magnitudes.size() == six::AmplitudeTableSize);
+
+    /*
+        const auto it = std::lower_bound(begin, end, value);
+        if (it == begin) return 0;
+
+        const auto prev_it = std::prev(it);
+        const auto nearest_it = it == end ? prev_it  :
+            (value - *prev_it <= *it - value ? prev_it : it);
+        const auto distance = std::distance(begin, nearest_it);
+        assert(distance <= std::numeric_limits<uint8_t>::max());
+        return gsl::narrow<uint8_t>(distance);
+    */
+    const auto it = ::lower_bound<IntV>(magnitudes, value);
+    const auto prev_it = it - 1; // const auto prev_it = std::prev(it);
+
+    const auto v0 = value - lookup(prev_it, magnitudes); // value - *prev_it
+    const auto v1 = lookup(it, magnitudes) - value; // *it - value
+    //const auto nearest_it = select(v0 <= v1, prev_it, it); //  (value - *prev_it <= *it - value ? prev_it : it);
+    
+    const IntV end = gsl::narrow<int>(magnitudes.size());
+    //const auto end_test = select(it == end, prev_it, nearest_it); // it == end ? prev_it  : ...
+    const IntV zero = 0;
+    auto retval = select(it == 0, zero, // if (it == begin) return 0;
+        select(it == end, prev_it,  // it == end ? prev_it  : ...
+            select(v0 <=v1, prev_it, it) //  (value - *prev_it <= *it - value ? prev_it : it);
+        ));
+    return retval;
+}
+
+template<typename IntV, typename FloatV, typename ZFloatV>
+static auto find_nearest(std::span<const float> magnitudes,
+    const FloatV& phase_direction_real, const FloatV& phase_direction_imag,
+    const ZFloatV& v)
+{
+    // We have to do a 1D nearest neighbor search for magnitude.
+    // But it's not the magnitude of the input complex value - it's the projection of
+    // the complex value onto the ray of candidate magnitudes at the selected phase.
+    // i.e. dot product.
+    const auto projection = (phase_direction_real * real(v)) + (phase_direction_imag * imag(v));
+    //assert(std::abs(projection - std::abs(v)) < 1e-5); // TODO ???
+    return nearest<IntV>(magnitudes, projection);
+}
+
+#if SIX_sicd_has_VCL
+static auto lookup_and_find_nearest(const six::sicd::details::ComplexToAMP8IPHS8I& converter,
+    const six::sicd::vcl::intv& phase, const  six::sicd::vcl::zfloatv& v)
+{
+    const auto& impl = converter.impl;
+
+    const auto phase_direction_real = lookup(phase, impl.phase_directions_real);
+    const auto phase_direction_imag = lookup(phase, impl.phase_directions_imag);
+    return ::find_nearest<six::sicd::vcl::intv>(impl.magnitudes, phase_direction_real, phase_direction_imag, v);
+}
+#endif
+
+#if SIX_sicd_has_ximd
+static auto lookup_and_find_nearest(const six::sicd::details::ComplexToAMP8IPHS8I& converter,
+    const six::sicd::ximd::intv& phase, const  six::sicd::ximd::zfloatv& v)
+{
+    const auto& impl = converter.impl;
+
+    const auto phase_direction = lookup(phase, impl.phase_directions);
+    return ::find_nearest<six::sicd::ximd::intv>(impl.magnitudes, real(phase_direction), imag(phase_direction), v);
+}
+#endif
+
+#if SIX_sicd_ComplexToAMP8IPHS8I_unseq
+template<typename ZFloatV>
+void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_unseq_T(std::span<const zfloat> p, std::span<AMP8I_PHS8I_t> results) const
+{
+    ZFloatV v;
+    assert(p.size() == size(v));
+
+    copy_from(p, v);
+    #if CODA_OSS_DEBUG
+    for (int i = 0; i < ssize(v); i++)
+    {
+        const auto z = p[i];
+        assert(real(v)[i] == z.real());
+        assert(imag(v)[i] == z.imag());
+    }
+    #endif
+
+    const auto phase = ::getPhase(v, phase_delta);
+    #if CODA_OSS_DEBUG
+    for (int i = 0; i < ssize(phase); i++)
+    {
+        const auto phase_ = getPhase(p[i]);
+        assert(static_cast<uint8_t>(phase[i]) == phase_);
+    }
+    #endif
+
+    const auto amplitude = lookup_and_find_nearest(converter, phase, v);
+    #if CODA_OSS_DEBUG
+    for (int i = 0; i < ssize(amplitude); i++)
+    {
+        const auto i_ = phase[i];
+        const auto a = find_nearest(phase_directions[i_], p[i]);
+        assert(a == amplitude[i]);
+    }
+    #endif
+
+    // interleave() and store() is slower than an explicit loop.
+    auto dest = results.begin();
+    for (int i = 0; i < ssize(v); i++)
+    {
+        dest->phase = gsl::narrow_cast<uint8_t>(phase[i]);
+        dest->amplitude = gsl::narrow_cast<uint8_t>(amplitude[i]);
+
+        ++dest;
+    }
+}
+
+template<typename ZFloatV, int elements_per_iteration>
+void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_unseq(std::span<const zfloat> inputs, std::span<AMP8I_PHS8I_t> results) const
+{
+    auto first = inputs.begin();
+    const auto last = inputs.end();
+    auto dest = results.begin();
+
+    // The above code is simpler (no templates) if we use just a single VCL
+    // complex type: `zfloatv`.  If there is any performance difference,
+    // it will only be for extreme edge cases since the smaller types are only used
+    // at the end of the loop.
+    //
+    // It also makes this loop simpler as we handle all non-multiples-of-8 in
+    // the same way.
+
+    // Can do these checks one-time outside of the loop
+    const auto distance = std::distance(first, last);
+
+    // First, do multiples of 8
+    const auto distance_ = distance - (distance % elements_per_iteration);
+    const auto last_ = first + distance_;
+    for (; first != last_; first += elements_per_iteration, dest += elements_per_iteration)
+    {
+        const auto f = sys::make_span(&(*first), elements_per_iteration);
+        const auto d = sys::make_span(&(*dest), elements_per_iteration);
+
+        nearest_neighbors_unseq_T<ZFloatV>(f, d);
+    }
+
+    // Then finish off anything left
+    assert(std::distance(first, last) < elements_per_iteration);
+    for (; first != last; ++first, ++dest)
+    {
+        const auto f = sys::make_span(&(*first), 1);
+        const auto d = sys::make_span(&(*dest), 1);
+        nearest_neighbors_seq(f, d);
+    }
+}
+#endif // SIX_sicd_ComplexToAMP8IPHS8I_unseq
+
+#if SIX_sicd_has_VCL
+std::vector<AMP8I_PHS8I_t> six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq_vcl(
+    std::span<const zfloat> inputs, const six::AmplitudeTable* pAmplitudeTable)
+{
+    // make a structure to quickly find the nearest neighbor
+    const auto& converter = make_(pAmplitudeTable);
+
+    std::vector<AMP8I_PHS8I_t> retval(inputs.size());
+    converter.impl.nearest_neighbors_unseq<vcl::zfloatv, vcl::elements_per_iteration>(inputs, sys::make_span(retval));
+    return retval;
+}
+#endif // SIX_sicd_has_VCL
+
+#if SIX_sicd_has_ximd
+std::vector<AMP8I_PHS8I_t> six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq_ximd(
+    std::span<const zfloat> inputs, const six::AmplitudeTable* pAmplitudeTable)
+{
+    // make a structure to quickly find the nearest neighbor
+    const auto& converter = make_(pAmplitudeTable);
+
+    std::vector<AMP8I_PHS8I_t> retval(inputs.size());
+    converter.impl.nearest_neighbors_unseq<ximd::zfloatv, ximd::elements_per_iteration>(inputs, sys::make_span(retval));
+    return retval;
+}
+#endif // SIX_sicd_has_ximd
+
+/**********************************************************************
+
+// This is here (instead of **ComplexToAMP8IPHS8I.cpp**) because par_unseq() might
+// need to know implementation details of _unseq()
+using input_it = std::span<const zfloat>::iterator;
+using output_it = std::span<AMP8I_PHS8I_t>::iterator;
+
+struct const_iterator final
+{
+    using Type = input_it::value_type;
+
+    using iterator_category = std::random_access_iterator_tag;
+    using value_type = std::remove_cv_t<Type>;
+    using difference_type = std::ptrdiff_t;
+    using pointer = Type*;
+    using reference = Type&;
+    using const_reference = const Type&;
+
+    input_it current_;
+
+    const_iterator() = default;
+    const_iterator(input_it it) : current_(it) {}
+
+    const_reference operator*() const noexcept
+    {
+        return *current_;
+    }
+
+    const_iterator& operator++() noexcept
+    {
+        for (ptrdiff_t i = 0; i < 8; i++)
+        {
+            ++current_;
+        }
+        return *this;
+    }
+
+    const_iterator& operator--() noexcept
+    {
+        --current_;
+        return *this;
+    }
+
+    const_iterator& operator-=(const difference_type n) noexcept
+    {
+        current_ -= n;
+        return *this;
+    }
+    const_iterator operator-(const difference_type n) const noexcept
+    {
+        auto ret = *this;
+        ret -= n;
+        return ret;
+    }
+    difference_type operator-(const const_iterator& rhs) const noexcept
+    {
+        return current_ - rhs.current_;
+    }
+
+    bool operator!=(const const_iterator& rhs) const noexcept
+    {
+        return current_ != rhs.current_;
+    }
+};
+
+struct result_wrapper final
+{
+    output_it current_;
+
+    result_wrapper& operator=(const std::vector<AMP8I_PHS8I_t>& values)
+    {
+        for (auto& v : values)
+        {
+            *current_ = v;
+            ++current_;
+        }
+        return *this;
+    }
+};
+
+struct iterator final
+{
+    using Type = output_it::value_type;
+
+    using iterator_category = std::random_access_iterator_tag;
+    using value_type = std::remove_cv_t<Type>;
+    using difference_type = std::ptrdiff_t;
+    using pointer = Type*;
+    using reference = Type&;
+
+    output_it current_;
+
+    iterator() = default;
+    iterator(output_it it) : current_(it) {}
+
+    result_wrapper operator*() noexcept
+    {
+        return result_wrapper{ current_};
+    }
+
+    iterator& operator++() noexcept
+    {
+        for (ptrdiff_t i = 0; i < 8; i++)
+        {
+            ++current_;
+        }
+        return *this;
+    }
+
+    iterator& operator--() noexcept
+    {
+        --current_;
+        return *this;
+    }
+
+    iterator& operator-=(const difference_type n) noexcept
+    {
+        current_ -= n;
+        return *this;
+    }
+
+    bool operator!=(const iterator& rhs) const noexcept
+    {
+        return current_ != rhs.current_;
+    }
+};
+
+void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_par_unseq(std::span<const zfloat> inputs, std::span<AMP8I_PHS8I_t> results) const
+{
+    const auto first = inputs.begin();
+    const auto last = inputs.end();
+    auto dest = results.begin();
+
+    const auto func = [&](const auto& v) {
+        std::span<const zfloat> values(&v, 8);
+
+        std::vector<AMP8I_PHS8I_t> retval(values.size());
+        nearest_neighbors_unseq(values, sys::make_span(retval));
+        return retval;
+    };
+
+    std::ignore = std::transform(std::execution::seq,
+        const_iterator{ first }, const_iterator{ last }, iterator{ dest }, func);
+}
+#if SIX_sicd_ComplexToAMP8IPHS8I_unseq
+std::vector<AMP8I_PHS8I_t> six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_par_unseq(
+    std::span<const zfloat> inputs, const six::AmplitudeTable* pAmplitudeTable)
+{
+    // make a structure to quickly find the nearest neighbor
+    const auto& converter = make_(pAmplitudeTable);
+
+    std::vector<AMP8I_PHS8I_t> retval(inputs.size());
+    converter.impl.nearest_neighbors_par_unseq(inputs, sys::make_span(retval));
+    return retval;
+}
+#endif //  SIX_sicd_ComplexToAMP8IPHS8I_unseq
+
+**********************************************************************/
diff --git a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_vcl.cpp b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_vcl.cpp
deleted file mode 100644
index 74fa23225..000000000
--- a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_vcl.cpp
+++ /dev/null
@@ -1,239 +0,0 @@
-/* =========================================================================
-* This file is part of six.sicd-c++
-* =========================================================================
-*
-* (C) Copyright 2021, Maxar Technologies, Inc.
-*
-* six.sicd-c++ is free software; you can redistribute it and/or modify
-* it under the terms of the GNU Lesser General Public License as published by
-* the Free Software Foundation; either version 3 of the License, or
-* (at your option) any later version.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-* GNU Lesser General Public License for more details.
-*
-* You should have received a copy of the GNU Lesser General Public
-* License along with this program; If not,
-* see <http://www.gnu.org/licenses/>.
-*
-*/
-#include "six/AmplitudeTable.h"
-
-#include <math.h>
-#include <assert.h>
-
-#include <cassert>
-#include <memory>
-#include <std/numbers>
-#include <algorithm>
-#include <functional>
-
-#include <gsl/gsl.h>
-#include <math/Utilities.h>
-#include <units/Angles.h>
-
-#include "six/sicd/Utilities.h"
-
-#undef min
-#undef max
-
-#if SIX_sicd_has_VCL
-
-#define VCL_NAMESPACE vcl
-#if _MSC_VER
-#pragma warning(disable: 4100) // '...': unreferenced formal parameter
-#endif
-#include "six/sicd/vectorclass/version2/vectorclass.h"
-#include "six/sicd/vectorclass/version2/vectormath_trig.h"
-#include "six/sicd/vectorclass/complex/complexvec1.h"
-
-// https://en.cppreference.com/w/cpp/experimental/simd
-using zfloatv = vcl::Complex8f;
-using floatv = vcl::Vec8f;
-using intv = vcl::Vec8i;
-
-// https://en.cppreference.com/w/cpp/numeric/complex/arg
-// > `std::atan2(std::imag(z), std::real(z))`
-inline auto arg(const floatv& real, const floatv& imag)
-{
-    return atan2(imag, real); // arg()
-}
-inline auto arg(const zfloatv& z)
-{
-    return arg(z.real(), z.imag());
-}
-
-static inline auto interleave(const intv& a, const intv& b)
-{
-    // The blend() indicies are based on one large array
-    auto index0 = vcl::blend8<0, 8, 1, 9, 2, 10, 3, 11>(a, b); // i.e., a[0], b[0], a[1], b[1], ...
-    auto index1 = vcl::blend8<4, 12, 5, 13, 6, 14, 7, 15>(a, b);
-    return vcl::Vec16i(std::move(index0), std::move(index1));
-}
-
-// There's no `vcl::lookup()` for `std::complex<T>*`, implement using floats
-template<size_t N>
-static auto lookup(const intv& zindex, const std::array<six::zfloat, N>& table_)
-{
-    const auto table = reinterpret_cast<const float*>(table_.data());
-    constexpr auto size_as_floats = N * 2; // table_t is six::zfloat
-
-    // A `six::zfloat` at *n* is at *2n* when viewed as `float`.
-    const auto real_index = zindex * 2;
-    // The imaginary part is after the real part
-    const auto imag_index = real_index + 1; // [n] is real, [n+1] is imag
-    const auto index = interleave(real_index, imag_index);
-
-    auto lookup = vcl::lookup<size_as_floats>(index, table);
-    return zfloatv(std::move(lookup));
-}
-
-static auto getPhase(const zfloatv& v, float phase_delta)
-{
-    // Phase is determined via arithmetic because it's equally spaced.
-    // There's an intentional conversion to zero when we cast 256 -> uint8. That wrap around
-    // handles cases that are close to 2PI.
-    auto phase = arg(v);
-    phase = if_add(phase < 0.0, phase, std::numbers::pi_v<float> * 2.0f); // Wrap from [0, 2PI]
-    return roundi(phase / phase_delta);
-}
-
-inline auto lower_bound(std::span<const float> magnitudes, const floatv& value)
-{
-    const auto begin = magnitudes.begin();
-    const auto end = magnitudes.end();
-
-    intv retval;
-    for (int i = 0; i < value.size(); i++)
-    {
-        const auto it = std::lower_bound(begin, end, value[i]);
-        const auto result = std::distance(begin, it);
-        retval.insert(i, gsl::narrow<int>(result));
-    }
-    return retval;
-}
-static auto nearest(std::span<const float> magnitudes, const floatv& value)
-{
-    assert(magnitudes.size() == six::AmplitudeTableSize);
-
-    /*
-        const auto it = std::lower_bound(begin, end, value);
-        if (it == begin) return 0;
-
-        const auto prev_it = std::prev(it);
-        const auto nearest_it = it == end ? prev_it  :
-            (value - *prev_it <= *it - value ? prev_it : it);
-        const auto distance = std::distance(begin, nearest_it);
-        assert(distance <= std::numeric_limits<uint8_t>::max());
-        return gsl::narrow<uint8_t>(distance);
-    */
-    const auto it = ::lower_bound(magnitudes, value);
-    const auto prev_it = it - 1; // const auto prev_it = std::prev(it);
-
-    const auto v0 = value - vcl::lookup<six::AmplitudeTableSize>(prev_it, magnitudes.data()); // value - *prev_it
-    const auto v1 = vcl::lookup<six::AmplitudeTableSize>(it, magnitudes.data()) - value; // *it - value
-    //const auto nearest_it = select(v0 <= v1, prev_it, it); //  (value - *prev_it <= *it - value ? prev_it : it);
-    
-    const intv end = gsl::narrow<int>(magnitudes.size());
-    //const auto end_test = select(it == end, prev_it, nearest_it); // it == end ? prev_it  : ...
-    const intv zero = 0;
-    auto retval = select(it == 0, zero, // if (it == begin) return 0;
-        select(it == end, prev_it,  // it == end ? prev_it  : ...
-            select(v0 <=v1, prev_it, it) //  (value - *prev_it <= *it - value ? prev_it : it);
-        ));
-    return retval;
-}
-
-static inline auto find_nearest(std::span<const float> magnitudes, const zfloatv& phase_direction, const zfloatv& v)
-{
-    // We have to do a 1D nearest neighbor search for magnitude.
-    // But it's not the magnitude of the input complex value - it's the projection of
-    // the complex value onto the ray of candidate magnitudes at the selected phase.
-    // i.e. dot product.
-    const auto projection = (phase_direction.real() * v.real()) + (phase_direction.imag() * v.imag());
-    //assert(std::abs(projection - std::abs(v)) < 1e-5); // TODO ???
-    return nearest(magnitudes, projection);
-}
-static inline auto find_nearest(std::span<const float> magnitudes, const floatv& phase_direction_real, const floatv& phase_direction_imag,
-    const zfloatv& v)
-{
-    // We have to do a 1D nearest neighbor search for magnitude.
-    // But it's not the magnitude of the input complex value - it's the projection of
-    // the complex value onto the ray of candidate magnitudes at the selected phase.
-    // i.e. dot product.
-    const auto projection = (phase_direction_real * v.real()) + (phase_direction_imag * v.imag());
-    //assert(std::abs(projection - std::abs(v)) < 1e-5); // TODO ???
-    return nearest(magnitudes, projection);
-}
-
-template< typename TOutputIter>
-void six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq_(const six::zfloat* p, TOutputIter dest) const
-{
-    // https://en.cppreference.com/w/cpp/numeric/complex
-    // > For any pointer to an element of an array of `std::complex<T>` named `p` and any valid array index `i`, ...
-    // > is the real part of the complex number `p[i]`, ...
-    zfloatv v;
-    v.load(reinterpret_cast<const float*>(p));
-
-    const auto phase = ::getPhase(v, phase_delta);
-
-    //const auto phase_direction = lookup(phase, phase_directions);
-    //const auto amplitude = ::find_nearest(magnitudes, phase_direction, v);
-    const auto phase_direction_real = vcl::lookup<six::AmplitudeTableSize>(phase, phase_directions_real.data());
-    const auto phase_direction_imag = vcl::lookup<six::AmplitudeTableSize>(phase, phase_directions_imag.data());
-    //const auto amplitude = ::find_nearest(magnitudes, phase_direction_real, phase_direction_imag, v);
-
-    // interleave() and store() is slower than an explicit loop.
-    for (int i = 0; i < v.size(); i++)
-    {
-        dest->phase = gsl::narrow_cast<uint8_t>(phase[i]);
-
-        //dest->amplitude = find_nearest(phase_directions[dest->phase], p[i]);
-        //const auto phase_direction_ = phase_direction.extract(i);
-        //dest->amplitude = find_nearest(six::zfloat(phase_direction_.real(), phase_direction_.imag()), p[i]);
-        dest->amplitude = find_nearest(six::zfloat(phase_direction_real[i], phase_direction_imag[i]), p[i]);
-        //dest->amplitude = gsl::narrow_cast<uint8_t>(amplitude[i]);
-
-        ++dest;
-    }
-}
-
-template <typename TInputIt, typename TOutputIt>
-void six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq(TInputIt first, TInputIt last, TOutputIt dest) const
-{
-    // The above code is simpler (no templates) if we use just a single VCL
-    // complex type: `zfloatv`.  If there is any performance differ4ence,
-    // it will only be for extreme edge cases since the smaller types are only used
-    // at the end of the loop.
-    //
-    // It also makes this loop simpler as we handle all non-multiples-of-8 in
-    // the same way.
-    for (; first != last; ++first, ++dest)
-    {
-        const auto distance = std::distance(first, last);
-        assert(distance > 0); // should be out of the loop!
-        if (distance % 8 == 0)
-        {
-            nearest_neighbors_unseq_(&(*first), dest);
-            first += 7; dest += 7;
-        }
-        else
-        {
-            nearest_neighbors_seq(first, last, dest);
-        }
-    }
-}
-std::vector<six::AMP8I_PHS8I_t> six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq(
-    std::span<const zfloat> inputs, const six::AmplitudeTable* pAmplitudeTable)
-{
-    // make a structure to quickly find the nearest neighbor
-    const auto& converter = make_(pAmplitudeTable);
-
-    std::vector<six::AMP8I_PHS8I_t> retval(inputs.size());
-    converter.nearest_neighbors_unseq(inputs.begin(), inputs.end(), retval.begin());
-    return retval;
-}
-
-#endif // SIX_sicd_have_VCL
\ No newline at end of file
diff --git a/six/modules/c++/six/include/six/AmplitudeTable.h b/six/modules/c++/six/include/six/AmplitudeTable.h
index 8fc034402..966a160a9 100644
--- a/six/modules/c++/six/include/six/AmplitudeTable.h
+++ b/six/modules/c++/six/include/six/AmplitudeTable.h
@@ -35,6 +35,7 @@
 
 #include <import/except.h>
 #include <coda_oss/CPlusPlus.h>
+#include <sys/Dbg.h>
 
 #include <nitf/LookupTable.hpp>
 #include <scene/sys_Conf.h>
@@ -171,7 +172,7 @@ struct SIX_SIX_API AMP8I_PHS8I_t final
     uint8_t phase;
 };
 
-// Control a few details of the ComplexToAMP8IPHS8I implementation, espeically "unseq" (i.e., SIMD).
+// Control a few details of the ComplexToAMP8IPHS8I implementation, especially "unseq" (i.e., SIMD).
 #ifndef SIX_sicd_has_VCL
     // Do we have the "vectorclass" library? https://github.com/vectorclass/version2
     #if !CODA_OSS_cpp17 // VCL needs C++17
@@ -180,35 +181,36 @@ struct SIX_SIX_API AMP8I_PHS8I_t final
         // __has_include is part of C++17
         #if __has_include("../../../six.sicd/include/six/sicd/vectorclass/version2/vectorclass.h") || \
             __has_include("six.sicd/include/six/sicd/vectorclass/version2/vectorclass.h")
-       #define SIX_sicd_has_VCL 1
+        #define SIX_sicd_has_VCL 1
         #else
         #define SIX_sicd_has_VCL 0
         #endif // __has_include
     #endif // C++17
 #endif
 
-#ifndef SIX_sicd_has_experimental_simd
+#ifndef SIX_sicd_has_simd
     // Do we have the `std::experimental::simd? https://en.cppreference.com/w/cpp/experimental/simd
-    #if (__GNUC__ >= 11) && CODA_OSS_cpp20
+    #if (__GNUC__ >= 999) && CODA_OSS_cpp20 // TODO: 11 instead of 999
         // https://github.com/VcDevel/std-simd "... shipping with GCC since version 11."
-        #include <experimental/simd>
-        #define SIX_sicd_has_experimental_simd 1
+        #define SIX_sicd_has_simd 1
     #else
-        #define SIX_sicd_has_experimental_simd 0
+        #define SIX_sicd_has_simd 0
     #endif // __GNUC__
 #endif
 
-#if SIX_sicd_has_VCL && SIX_sicd_has_experimental_simd
-    // This is because there is a single ComplexToAMP8IPHS8I::nearest_neighbors_unseq() method.
-    // Other than that, it should be possible to use both VCL and std::experimental::simd
-    #error "Can't enable both VCL and std::experimental::simd at the same time'"
+#ifndef SIX_sicd_has_ximd
+    // This is a "hacked up" version of std::experimental::simd using std::array.
+    // It's primarily for development and testing: VCL needs C++17 and
+    // std::experimental::simd is G++11/C++20.
+    #define SIX_sicd_has_ximd CODA_OSS_DEBUG
 #endif
+
 #ifndef SIX_sicd_ComplexToAMP8IPHS8I_unseq
-    #if SIX_sicd_has_VCL || SIX_sicd_has_experimental_simd
+    #if SIX_sicd_has_VCL || SIX_sicd_has_simd || SIX_sicd_has_ximd
     #define SIX_sicd_ComplexToAMP8IPHS8I_unseq 1
     #else
     #define SIX_sicd_ComplexToAMP8IPHS8I_unseq 0
-    #endif // SIX_sicd_have_VCL || SIX_sicd_have_experimental_simd
+    #endif // SIX_sicd_have_VCL || SIX_sicd_has_simd
 #endif // SIX_sicd_ComplexToAMP8IPHS8I_unseq
 
 
@@ -247,38 +249,65 @@ class ComplexToAMP8IPHS8I final
     AMP8I_PHS8I_t nearest_neighbor_(const six::zfloat& v) const;
     static std::vector<AMP8I_PHS8I_t> nearest_neighbors_par(std::span<const six::zfloat> inputs, const six::AmplitudeTable*);
     static std::vector<AMP8I_PHS8I_t> nearest_neighbors_seq(std::span<const six::zfloat> inputs, const six::AmplitudeTable*);
+
     #if SIX_sicd_ComplexToAMP8IPHS8I_unseq
     static std::vector<AMP8I_PHS8I_t> nearest_neighbors_unseq(std::span<const six::zfloat> inputs, const six::AmplitudeTable*);
     static std::vector<AMP8I_PHS8I_t> nearest_neighbors_par_unseq(std::span<const six::zfloat> inputs, const six::AmplitudeTable*);
+
+    #if SIX_sicd_has_VCL
+    static std::vector<AMP8I_PHS8I_t> nearest_neighbors_unseq_vcl(std::span<const six::zfloat> inputs, const six::AmplitudeTable*);
+    #endif
+    #if SIX_sicd_has_simd
+    static std::vector<AMP8I_PHS8I_t> nearest_neighbors_unseq_simd(std::span<const six::zfloat> inputs, const six::AmplitudeTable*);
     #endif
+    #if SIX_sicd_has_ximd
+    static std::vector<AMP8I_PHS8I_t> nearest_neighbors_unseq_ximd(std::span<const six::zfloat> inputs, const six::AmplitudeTable*);
+    #endif
+    #endif
+    
     static std::vector<AMP8I_PHS8I_t> nearest_neighbors(std::span<const six::zfloat> inputs, const six::AmplitudeTable*); // one of the above
 
 private:
-    template <typename TInputIt, typename TOutputIt>
-    void nearest_neighbors_seq(TInputIt first, TInputIt last, TOutputIt dest) const;
-    template <typename TInputIt, typename TOutputIt>
-    void nearest_neighbors_par(TInputIt first, TInputIt last, TOutputIt dest) const;
-    template <typename TInputIt, typename TOutputIt>
-    void nearest_neighbors_unseq(TInputIt first, TInputIt last, TOutputIt dest) const;
-    template <typename TInputIt, typename TOutputIt>
-    void nearest_neighbors_par_unseq(TInputIt first, TInputIt last, TOutputIt dest) const;
-
-    template< typename TOutputIter>
-    void nearest_neighbors_unseq_(const six::zfloat* p, TOutputIter dest) const;
-
-    //! The sorted set of possible magnitudes order from small to large.
-    std::array<float, AmplitudeTableSize> uncached_magnitudes; // Order is important! This must be ...
-    std::span<const float> magnitudes; // ... before this.
-    uint8_t find_nearest(six::zfloat phase_direction, six::zfloat v) const;
-
-    //! The difference in phase angle between two UINT phase values.
-    float phase_delta;
-    uint8_t getPhase(six::zfloat) const;
-
-    //! Unit vector rays that represent each direction that phase can point.
-    std::array<six::zfloat, AmplitudeTableSize> phase_directions; // interleaved, std::complex<float>
-    std::array<float, AmplitudeTableSize> phase_directions_real;
-    std::array<float, AmplitudeTableSize> phase_directions_imag;
+    struct Impl final
+    {
+        const ComplexToAMP8IPHS8I& converter;
+        Impl(const ComplexToAMP8IPHS8I& c) : converter(c) {}
+        ~Impl() = default;
+        Impl(const Impl&) = delete;
+        Impl& operator=(const Impl&) = delete;
+        Impl(Impl&&) = delete; // implicitly deleted because of =delete for copy
+        Impl& operator=(Impl&&) = delete; // implicitly deleted because of =delete for copy
+
+        void nearest_neighbors_seq(std::span<const zfloat> inputs, std::span<AMP8I_PHS8I_t> results) const;
+        void nearest_neighbors_par(std::span<const zfloat> inputs, std::span<AMP8I_PHS8I_t> results) const;
+
+        #if SIX_sicd_ComplexToAMP8IPHS8I_unseq
+        template<typename ZFloatV, int elements_per_iteration>
+        void nearest_neighbors_unseq(std::span<const zfloat> inputs, std::span<AMP8I_PHS8I_t> results) const;
+        void nearest_neighbors_par_unseq(std::span<const zfloat> inputs, std::span<AMP8I_PHS8I_t> results) const;
+
+        template<typename ZFloatV>
+        void nearest_neighbors_unseq_T(std::span<const zfloat>, std::span<AMP8I_PHS8I_t>) const;
+        #endif 
+
+        //! The sorted set of possible magnitudes order from small to large.
+        std::array<float, AmplitudeTableSize> uncached_magnitudes;
+        std::span<const float> magnitudes;
+        uint8_t find_nearest(zfloat phase_direction, zfloat v) const;
+
+        //! The difference in phase angle between two UINT phase values.
+        float phase_delta;
+        uint8_t getPhase(zfloat) const;
+
+        //! Unit vector rays that represent each direction that phase can point.
+        std::array<zfloat, AmplitudeTableSize> phase_directions; // interleaved, std::complex<float>
+        #ifdef SIX_sicd_has_VCL
+        std::array<float, AmplitudeTableSize> phase_directions_real;
+        std::array<float, AmplitudeTableSize> phase_directions_imag;
+        #endif
+    };
+public:
+    Impl impl;
 };
 }
 }