diff --git a/externals/coda-oss/modules/c++/mt/include/mt/Algorithm.h b/externals/coda-oss/modules/c++/mt/include/mt/Algorithm.h
index ba17f39f5..8e311c9e3 100644
--- a/externals/coda-oss/modules/c++/mt/include/mt/Algorithm.h
+++ b/externals/coda-oss/modules/c++/mt/include/mt/Algorithm.h
@@ -20,22 +20,73 @@
  *
  */
 
-#ifndef CODA_OSS_mt_Algorithm_h_INCLUDED_
-#define CODA_OSS_mt_Algorithm_h_INCLUDED_
 #pragma once
 
 #include <algorithm>
 #include <iterator>
 #include <future>
 
+#include "coda_oss/CPlusPlus.h"
+#if CODA_OSS_cpp17
+	// <execution> is broken with the older version of GCC we're using
+	#if (__GNUC__ >= 10) || _MSC_VER
+	#include <execution>
+	#define CODA_OSS_mt_Algorithm_has_execution 1
+	#endif
+#endif
+
 namespace mt
 {
-// There was a transform_async() utility here, but I removed it.
-//
-// First of all, C++11's std::async() is now (in 2023) thought of as maybe a
-// bit "half baked," and perhaps shouldn't be emulated.  Then, C++17 added
-// parallel algorithms which might be a better ... although we're still at C++14.
-}
+// "Roll our own" `std::transform(execution::par)` using std::async()
+// https://en.cppreference.com/w/cpp/algorithm/transform
+
+// Our own `Transform_par_()` is built on `std::async()`; for that we need to control
+// a couple of settings.
+struct Transform_par_settings final
+{
+    Transform_par_settings() = default;
+
+    Transform_par_settings(ptrdiff_t cutoff) : cutoff_(cutoff) { }
+    Transform_par_settings(std::launch policy) : policy_(policy) { }
+    Transform_par_settings(ptrdiff_t cutoff, std::launch policy) : cutoff_(cutoff), policy_(policy) { }
+    Transform_par_settings(std::launch policy, ptrdiff_t cutoff) : Transform_par_settings(cutoff, policy) { }
+
+    // The value of "default_cutoff" was determined by testing; there is nothing
+    // special about it, feel free to change it.
+    static constexpr ptrdiff_t dimension = 128 * 8;
+    static constexpr ptrdiff_t default_cutoff = dimension * dimension;
+    ptrdiff_t cutoff_ =  default_cutoff;
 
-#endif // CODA_OSS_mt_Algorithm_h_INCLUDED_
+    // https://en.cppreference.com/w/cpp/thread/launch
+    std::launch policy_ = std::launch::async; // "the task is executed on a different thread, potentially by creating and launching it first"
+};
+
+template <typename InputIt, typename OutputIt, typename UnaryOperation>
+inline OutputIt Transform_par_(InputIt first1, InputIt last1, OutputIt d_first, UnaryOperation unary_op,
+    const Transform_par_settings& settings)
+{
+    // https://en.cppreference.com/w/cpp/thread/async
+    const auto len = std::distance(first1, last1);
+    if (len < settings.cutoff_)
+    {
+        return std::transform(first1, last1, d_first, unary_op);
+    }
+    
+    const auto mid1 = first1 + len / 2;
+    const auto d_mid = d_first + len / 2;
+    auto handle = std::async(settings.policy_, Transform_par_<InputIt, OutputIt, UnaryOperation>, mid1, last1, d_mid, unary_op, settings);
+    Transform_par_(first1, mid1, d_first, unary_op, settings);
+    return handle.get();
+}
+template <typename InputIt, typename OutputIt, typename UnaryOperation>
+inline OutputIt Transform_par(InputIt first1, InputIt last1, OutputIt d_first, UnaryOperation unary_op,
+    Transform_par_settings settings = Transform_par_settings{})
+{
+#if CODA_OSS_mt_Algorithm_has_execution
+    return std::transform(std::execution::par, first1, last1, d_first, unary_op);
+#else
+    return Transform_par_(first1, last1, d_first, unary_op, settings);
+#endif // CODA_OSS_mt_Algorithm_has_execution
+}
 
+}
\ No newline at end of file
diff --git a/externals/coda-oss/modules/c++/mt/unittests/test_mt_byte_swap.cpp b/externals/coda-oss/modules/c++/mt/unittests/test_mt_byte_swap.cpp
index d2099cf83..ccc2e92cc 100644
--- a/externals/coda-oss/modules/c++/mt/unittests/test_mt_byte_swap.cpp
+++ b/externals/coda-oss/modules/c++/mt/unittests/test_mt_byte_swap.cpp
@@ -28,14 +28,20 @@
 #include <std/cstddef> // std::byte
 #include <std/span>
 
+#include <sys/ByteSwap.h>
+
 #include <mt/ThreadedByteSwap.h>
+#include <mt/Algorithm.h>
+
+#undef min
+#undef max
 
-static std::vector<uint64_t> make_origValues(size_t NUM_PIXELS)
+static std::vector<uint64_t> make_origValues_(size_t count)
 {
     ::srand(334);
 
-    std::vector<uint64_t> retval(NUM_PIXELS);
-    for (size_t ii = 0; ii < NUM_PIXELS; ++ii)
+    std::vector<uint64_t> retval(count);
+    for (size_t ii = 0; ii < count; ++ii)
     {
         const auto value = static_cast<float>(::rand()) / RAND_MAX *
                 std::numeric_limits<uint64_t>::max();
@@ -44,10 +50,16 @@ static std::vector<uint64_t> make_origValues(size_t NUM_PIXELS)
     return retval;
 }
 
+static constexpr size_t NUM_PIXELS = 10000;
+static const std::vector<uint64_t>& make_origValues()
+{
+    static const auto retval = make_origValues_(NUM_PIXELS);
+    return retval;
+}
+
 TEST_CASE(testThreadedByteSwap)
 {
-    constexpr size_t NUM_PIXELS = 10000;
-    const auto origValues = make_origValues(NUM_PIXELS);
+    const auto& origValues = make_origValues();
 
     constexpr size_t numThreads = 4;
 
@@ -59,14 +71,70 @@ TEST_CASE(testThreadedByteSwap)
     std::vector<uint64_t> swappedValues2(origValues.size());
     mt::threadedByteSwap(origValues.data(), sizeof(origValues[0]), NUM_PIXELS, numThreads, swappedValues2.data());
 
-    // Everything should match
-    for (size_t ii = 0; ii < NUM_PIXELS; ++ii)
+    for (size_t ii = 0; ii < NUM_PIXELS; ++ii)  // Everything should match
     {
         TEST_ASSERT_EQ(values1[ii], swappedValues2[ii]);
     }
 }
 
+TEST_CASE(test_transform_ByteSwap)
+{
+    const auto& origValues = make_origValues();
+
+    // Byte swap the old-fashioned way
+    constexpr size_t numThreads = 4;
+    auto expected_(origValues);
+    constexpr auto elemSize = sizeof(expected_[0]);
+    mt::threadedByteSwap(expected_.data(), elemSize, NUM_PIXELS, numThreads);
+    const auto& expected = expected_;
+
+    // Byte swap into output buffer
+    const auto byteSwap = [&](const auto& buffer_) {
+        auto buffer = buffer_;
+        sys::byteSwap(&buffer, elemSize, 1 /*numElements*/);
+        return buffer;
+    };
+
+    std::vector<uint64_t> actual(origValues.size());
+    std::transform(origValues.begin(), origValues.end(), actual.begin(), byteSwap);
+    for (size_t ii = 0; ii < NUM_PIXELS; ++ii)     // Everything should match
+    {
+        TEST_ASSERT_EQ(expected[ii], actual[ii]);
+    }
+}
+
+TEST_CASE(test_Transform_par_ByteSwap)
+{
+    const auto& origValues = make_origValues();
+
+    // Byte swap the old-fashioned way
+    constexpr size_t numThreads = 4;
+    auto expected_(origValues);
+    constexpr auto elemSize = sizeof(expected_[0]);
+    mt::threadedByteSwap(expected_.data(), elemSize, NUM_PIXELS, numThreads);
+    const auto& expected = expected_;
+
+    // Byte swap into output buffer
+    const auto byteSwap = [&](const auto& buffer_) {
+        auto buffer = buffer_;
+        sys::byteSwap(&buffer, elemSize, 1 /*numElements*/);
+        return buffer;
+    };
+
+    // be sure we do something more than just call std::transform()
+    const mt::Transform_par_settings settings{ NUM_PIXELS / 4 /*cutoff*/ };
+
+    std::vector<uint64_t> actual(origValues.size());
+    mt::Transform_par(origValues.begin(), origValues.end(), actual.begin(), byteSwap, settings);
+    for (size_t ii = 0; ii < NUM_PIXELS; ++ii)  // Everything should match
+    {
+        TEST_ASSERT_EQ(expected[ii], actual[ii]);
+    }
+}
+
 TEST_MAIN(
     TEST_CHECK(testThreadedByteSwap);
+    TEST_CHECK(test_transform_ByteSwap);
+    TEST_CHECK(test_Transform_par_ByteSwap);
     )
      
\ No newline at end of file
diff --git a/six/modules/c++/samples/8AMPI_PHSI.cpp b/six/modules/c++/samples/8AMPI_PHSI.cpp
index 5ceff52f6..14dbd7c44 100644
--- a/six/modules/c++/samples/8AMPI_PHSI.cpp
+++ b/six/modules/c++/samples/8AMPI_PHSI.cpp
@@ -14,7 +14,6 @@
 #include <iterator>
 #include <future>
 #include <vector>
-//#include <execution>
 #include <chrono>
 
 #include "six/AmplitudeTable.h"
diff --git a/six/modules/c++/six.sicd/six.sicd.vcxproj b/six/modules/c++/six.sicd/six.sicd.vcxproj
index 3b0b210df..6d58ad83c 100644
--- a/six/modules/c++/six.sicd/six.sicd.vcxproj
+++ b/six/modules/c++/six.sicd/six.sicd.vcxproj
@@ -154,9 +154,7 @@
     <ClCompile Include="source\ComplexData.cpp" />
     <ClCompile Include="source\ComplexDataBuilder.cpp" />
     <ClCompile Include="source\ComplexToAMP8IPHS8I.cpp" />
-    <ClCompile Include="source\ComplexToAMP8IPHS8I_simd.cpp" />
-    <ClCompile Include="source\ComplexToAMP8IPHS8I_vcl.cpp" />
-    <ClCompile Include="source\ComplexToAMP8IPHS8I_ximd.cpp" />
+    <ClCompile Include="source\ComplexToAMP8IPHS8I_unseq.cpp" />
     <ClCompile Include="source\ComplexXMLControl.cpp" />
     <ClCompile Include="source\ComplexXMLParser.cpp" />
     <ClCompile Include="source\ComplexXMLParser040.cpp" />
diff --git a/six/modules/c++/six.sicd/six.sicd.vcxproj.filters b/six/modules/c++/six.sicd/six.sicd.vcxproj.filters
index fb6a88a17..f7ec2e4b0 100644
--- a/six/modules/c++/six.sicd/six.sicd.vcxproj.filters
+++ b/six/modules/c++/six.sicd/six.sicd.vcxproj.filters
@@ -248,13 +248,7 @@
     <ClCompile Include="source\DataParser.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="source\ComplexToAMP8IPHS8I_vcl.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="source\ComplexToAMP8IPHS8I_simd.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="source\ComplexToAMP8IPHS8I_ximd.cpp">
+    <ClCompile Include="source\ComplexToAMP8IPHS8I_unseq.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
diff --git a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I.cpp b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I.cpp
index adfd97059..1bdef6f87 100644
--- a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I.cpp
+++ b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I.cpp
@@ -31,15 +31,7 @@
 #include <functional>
 #include <stdexcept>
 
-#include <coda_oss/CPlusPlus.h>
-#if CODA_OSS_cpp17
-    // <execution> is broken with the older version of GCC we're using
-    #if (__GNUC__ >= 10) || _MSC_VER
-    #include <execution>
-    #define SIX_six_sicd_ComplexToAMP8IPHS8I_has_execution 1
-    #endif
-#endif
-
+#include <mt/Algorithm.h>
 #include <gsl/gsl.h>
 #include <math/Utilities.h>
 #include <units/Angles.h>
@@ -231,30 +223,6 @@ six::AMP8I_PHS8I_t six::sicd::nearest_neighbor(const details::ComplexToAMP8IPHS8
     return i.nearest_neighbor_(v);
 }
 
- // Yes, this is duplicated code :-(  1) hopefully it will go away someday "soon,"
- // that is, we'll be at C++17; 2) the cutoff/dimension values may be different.
- //
- // First of all, C++11's std::async() is now (in 2023) thought of as maybe a
- // bit "half baked," and perhaps shouldn't be emulated.  Then, C++17 added
- // parallel algorithms which might be a better way of satisfying our immediate
- // needs (below) ... although we're still at C++14.
-template <typename InputIt, typename OutputIt, typename TTransformFunc>
-static inline OutputIt transform_async(const InputIt first1, const InputIt last1, OutputIt d_first, TTransformFunc transform_f,
-    typename std::iterator_traits<InputIt>::difference_type cutoff)
-{
-    // https://en.cppreference.com/w/cpp/thread/async
-    const auto len = std::distance(first1, last1);
-    if (len < cutoff)
-    {
-        return transform_f(first1, last1, d_first);
-    }
-
-    const auto mid1 = first1 + len / 2;
-    const auto d_mid = d_first + len / 2;
-    auto handle = std::async(transform_async<InputIt, OutputIt, TTransformFunc>, mid1, last1, d_mid, transform_f, cutoff);
-    transform_async(first1, mid1, d_first, transform_f, cutoff);
-    return handle.get();
-}
 
 void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_par(std::span<const six::zfloat> inputs, std::span<AMP8I_PHS8I_t> results) const
 {
@@ -262,26 +230,7 @@ void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_par(std::s
     {
         return converter.nearest_neighbor_(v);
     };
-
-    const auto first = inputs.begin();
-    const auto last = inputs.end();
-    const auto dest = results.begin();
-     
-#if SIX_six_sicd_ComplexToAMP8IPHS8I_has_execution
-    std::ignore = std::transform(std::execution::par, first, last, dest, nearest_neighbor);
-#else
-    constexpr ptrdiff_t cutoff_ = 0; // too slow w/o multi-threading
-    // The value of "default_cutoff" was determined by testing; there is nothing special about it, feel free to change it.
-    constexpr auto dimension = 256;
-    constexpr auto default_cutoff = (dimension * dimension) * 4;
-    const auto cutoff = cutoff_ == 0 ? default_cutoff : cutoff_;
-
-    const auto transform_f = [&](auto first1, auto last1, auto d_first)
-    {
-        return std::transform(first1, last1, d_first, nearest_neighbor);
-    };
-    std::ignore = transform_async(first, last, dest, transform_f, cutoff);
-#endif // SIX_six_sicd_ComplexToAMP8IPHS8I_has_execution
+    std::ignore = mt::Transform_par(inputs.begin(), inputs.end(), results.begin(), nearest_neighbor);
 }
 std::vector<six::AMP8I_PHS8I_t> six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_par(
     std::span<const zfloat> inputs, const six::AmplitudeTable* pAmplitudeTable)
@@ -323,12 +272,12 @@ std::vector<six::AMP8I_PHS8I_t> six::sicd::details::ComplexToAMP8IPHS8I::nearest
     // TODO: there could be more complicated logic here to determine which UNSEQ
     // implementation to use.
 
-    #if SIX_sicd_has_VCL
-    return nearest_neighbors_unseq_vcl(inputs, pAmplitudeTable);
-
-    #elif SIX_sicd_has_simd
+    #if SIX_sicd_has_simd
     return nearest_neighbors_unseq_simd(inputs, pAmplitudeTable);
 
+    #elif SIX_sicd_has_VCL
+    return nearest_neighbors_unseq_vcl(inputs, pAmplitudeTable);
+
     #elif SIX_sicd_has_ximd
     return nearest_neighbors_unseq_ximd(inputs, pAmplitudeTable);
 
diff --git a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_simd.cpp b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_simd.cpp
deleted file mode 100644
index ece707b2f..000000000
--- a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_simd.cpp
+++ /dev/null
@@ -1,377 +0,0 @@
-/* =========================================================================
-* This file is part of six.sicd-c++
-* =========================================================================
-*
-* (C) Copyright 2021, Maxar Technologies, Inc.
-*
-* six.sicd-c++ is free software; you can redistribute it and/or modify
-* it under the terms of the GNU Lesser General Public License as published by
-* the Free Software Foundation; either version 3 of the License, or
-* (at your option) any later version.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-* GNU Lesser General Public License for more details.
-*
-* You should have received a copy of the GNU Lesser General Public
-* License along with this program; If not,
-* see <http://www.gnu.org/licenses/>.
-*
-*/
-#include "six/AmplitudeTable.h"
-
-#include <math.h>
-#include <assert.h>
-
-#include <cassert>
-#include <memory>
-#include <std/numbers>
-#include <algorithm>
-#include <functional>
-#include <type_traits>
-#include <execution>
-#include <array>
-#include <std/span>
-
-#include <gsl/gsl.h>
-#include <math/Utilities.h>
-#include <units/Angles.h>
-#include <sys/Span.h>
-
-#include "six/sicd/Utilities.h"
-
-#if SIX_sicd_has_simd
-
-#include <experimental/simd>
-
-// https://en.cppreference.com/w/cpp/experimental/simd/simd_cast
-// > The TS specification is missing `simd_cast` and `static_simd_cast` overloads for `simd_mask`. ...
-namespace stdx
-{
-    using namespace std::experimental;
-    using namespace std::experimental::__proposed;
-}
-
-// https://en.cppreference.com/w/cpp/experimental/simd
-using floatv = stdx::native_simd<float>;
-// using doublev = stdx::rebind_simd_t<double, floatv>;
-using intv = stdx::rebind_simd_t<int, floatv>;
-using zfloatv = std::array<floatv, 2>;
-
-auto& real(zfloatv& z)
-{
-    return z[0];
-}
-const auto& real(const zfloatv& z)
-{
-    return z[0];
-}
-auto& imag(zfloatv& z)
-{
-    return z[1];
-}
-const auto& imag(const zfloatv& z)
-{
-    return z[1];
-}
-
-// https://en.cppreference.com/w/cpp/experimental/simd/simd/simd
-template<typename TGeneratorReal, typename TGeneratorImag>
-inline auto make_zfloatv(TGeneratorReal&& generate_real, TGeneratorImag&& generate_imag)
-{
-    zfloatv retval;
-    real(retval) = floatv(generate_real);
-    imag(retval) = floatv(generate_imag);
-    return retval;
-}
-
-static inline auto load(std::span<const six::zfloat> p)
-{
-    const auto generate_real = [&](size_t i) {
-        return p[i].real();
-    };
-    const auto generate_imag = [&](size_t i) {
-        return p[i].imag();
-    };
-    return make_zfloatv(generate_real, generate_imag);
-}
-
-
-// https://en.cppreference.com/w/cpp/numeric/complex/arg
-// > `std::atan2(std::imag(z), std::real(z))`
-inline auto arg(const floatv& real, const floatv& imag)
-{
-    return atan2(imag, real); // arg()
-}
-inline auto arg(const zfloatv& z)
-{
-    return arg(real(z), imag(z));
-}
-
-inline auto roundi(const floatv& v) // match vcl::roundi()
-{
-    return static_simd_cast<intv>(round(v));
-}
-
-template<typename TTest, typename TResult>
-inline auto select(const TTest& test_, const TResult& t, const TResult& f)
-{
-    //const auto test = test_.__cvt(); // https://github.com/VcDevel/std-simd/issues/41
-    const auto test = stdx::static_simd_cast<typename TResult::mask_type>(test_); // https://github.com/VcDevel/std-simd/issues/41
-
-    // https://en.cppreference.com/w/cpp/experimental/simd/where_expression
-    // > ... All other elements are left unchanged.
-    TResult retval;
-    where(test, retval) = t;
-    where(!test, retval) = f;
-    return retval;
-}
-
-static inline auto getPhase(const zfloatv& v, float phase_delta)
-{
-    // Phase is determined via arithmetic because it's equally spaced.
-    // There's an intentional conversion to zero when we cast 256 -> uint8. That wrap around
-    // handles cases that are close to 2PI.
-    auto phase = arg(v);
-    where(phase < 0.0f, phase) += std::numbers::pi_v<float> *2.0f; // Wrap from [0, 2PI]
-    return roundi(phase / phase_delta);
-}
-
-template<size_t N>
-static inline auto lookup(const intv& zindex, const std::array<six::zfloat, N>& phase_directions)
-{
-    // It seems that the "generator" constuctor is called with SIMD instructions.
-    // https://en.cppreference.com/w/cpp/experimental/simd/simd/simd
-    // > The calls to `generator` are unsequenced with respect to each other.
-
-    const auto generate_real = [&](size_t i) {
-        const auto i_ = zindex[i];
-        return phase_directions[i_].real();
-    };
-    const auto generate_imag = [&](size_t i) {
-        const auto i_ = zindex[i];
-        return phase_directions[i_].imag();
-    };
-    return make_zfloatv(generate_real, generate_imag);
-}
-
-// https://en.cppreference.com/w/cpp/algorithm/lower_bound
-/*
-template<class ForwardIt, class T>
-ForwardIt lower_bound(ForwardIt first, ForwardIt last, const T& value)
-{
-    ForwardIt it;
-    typename std::iterator_traits<ForwardIt>::difference_type count, step;
-    count = std::distance(first, last);
-
-    while (count > 0)
-    {
-        it = first;
-        step = count / 2;
-        std::advance(it, step);
-
-        if (*it < value)
-        {
-            first = ++it;
-            count -= step + 1;
-        }
-        else
-            count = step;
-    }
-
-    return first;
-}
-*/
-template<size_t N>
-static inline auto lookup(const intv& zindex, std::span<const float> magnitudes)
-{
-    // It seems that the "generator" constuctor is called with SIMD instructions.
-    // https://en.cppreference.com/w/cpp/experimental/simd/simd/simd
-    // > The calls to `generator` are unsequenced with respect to each other.
-
-    const auto generate = [&](size_t i) {
-        const auto i_ = zindex[i];
-        return magnitudes[i_];
-    };
-    return floatv(generate);
-}
-inline auto lower_bound_(std::span<const float> magnitudes, const floatv& v)
-{
-    intv first = 0;
-    const intv last = gsl::narrow<int>(magnitudes.size());
-
-    auto count = last - first;
-    while (any_of(count > 0))
-    {
-        auto it = first;
-        const auto step = count / 2;
-        it += step;
-
-        auto next = it; ++next; // ... ++it;
-        auto advance = count; advance -= step + 1;  // ...  -= step + 1;
-
-        const auto c = lookup<six::AmplitudeTableSize>(it, magnitudes); // magnituides[it]
-
-        const auto test = c < v; // (c < v).__cvt(); // https://github.com/VcDevel/std-simd/issues/41
-
-        //where(test, it) = next; // ... ++it
-        //where(test, first) = it; // first = ...
-        //// count -= step + 1 <...OR...> count = step
-        //where(test, count) = advance;
-        //where(!test, count) = step;
-        it = select(test, next, it); // ... ++it
-        first = select(test, it, first); // first = ...
-        count = select(test, advance, step); // count -= step + 1 <...OR...> count = step
-    }
-    return first;
-}
-inline auto lower_bound(std::span<const float> magnitudes, const floatv& value)
-{
-    auto retval = lower_bound_(magnitudes, value);
-
-    #ifndef NDEBUG // i.e., debug
-    for (size_t i = 0; i < value.size(); i++)
-    {
-        const auto it = std::lower_bound(magnitudes.begin(), magnitudes.end(), value[i]);
-        const auto result = gsl::narrow<int>(std::distance(magnitudes.begin(), it));
-        assert(retval[i] == result);
-    }
-    #endif
-
-    return retval;
-}
-
-static auto nearest(std::span<const float> magnitudes, const floatv& value)
-{
-    assert(magnitudes.size() == six::AmplitudeTableSize);
-
-    /*
-        const auto it = std::lower_bound(begin, end, value);
-        if (it == begin) return 0;
-
-        const auto prev_it = std::prev(it);
-        const auto nearest_it = it == end ? prev_it  :
-            (value - *prev_it <= *it - value ? prev_it : it);
-        const auto distance = std::distance(begin, nearest_it);
-        assert(distance <= std::numeric_limits<uint8_t>::max());
-        return gsl::narrow<uint8_t>(distance);
-    */
-    const auto it = ::lower_bound(magnitudes, value);
-    const auto prev_it = it - 1; // const auto prev_it = std::prev(it);
-
-    const auto v0 = value - lookup<six::AmplitudeTableSize>(prev_it, magnitudes); // value - *prev_it
-    const auto v1 = lookup<six::AmplitudeTableSize>(it, magnitudes) - value; // *it - value
-    //const auto nearest_it = select(v0 <= v1, prev_it, it); //  (value - *prev_it <= *it - value ? prev_it : it);
-    
-    const intv end = gsl::narrow<int>(magnitudes.size());
-    //const auto end_test = select(it == end, prev_it, nearest_it); // it == end ? prev_it  : ...
-    const intv zero = 0;
-    auto retval = select(it == 0, zero, // if (it == begin) return 0;
-        select(it == end, prev_it,  // it == end ? prev_it  : ...
-            select(v0 <=v1, prev_it, it) //  (value - *prev_it <= *it - value ? prev_it : it);
-        ));
-    return retval;
-}
-
-static auto find_nearest(std::span<const float> magnitudes, const floatv& phase_direction_real, const floatv& phase_direction_imag,
-    const zfloatv& v)
-{
-    // We have to do a 1D nearest neighbor search for magnitude.
-    // But it's not the magnitude of the input complex value - it's the projection of
-    // the complex value onto the ray of candidate magnitudes at the selected phase.
-    // i.e. dot product.
-    const auto projection = (phase_direction_real * real(v)) + (phase_direction_imag * imag(v));
-    //assert(std::abs(projection - std::abs(v)) < 1e-5); // TODO ???
-    return nearest(magnitudes, projection);
-}
-static inline auto find_nearest(std::span<const float> magnitudes, const zfloatv& phase_direction, const zfloatv& v)
-{
-    return find_nearest(magnitudes, real(phase_direction), imag(phase_direction), v);
-}
-
-void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_unseq_(std::span<const six::zfloat> p, std::span<AMP8I_PHS8I_t> results) const
-{
-    const auto v = load(p);
-
-    const auto phase = ::getPhase(v, phase_delta);
-
-    const auto phase_direction = lookup<six::AmplitudeTableSize>(phase, phase_directions);
-    #ifndef NDEBUG // i.e., debug
-    for (size_t i = 0; i < phase.size(); i++)
-    {
-        const auto pd = phase_directions[phase[i]];
-        assert(pd.real() == real(phase_direction)[i]);
-        assert(pd.imag() == imag(phase_direction)[i]);
-    }
-    #endif
-
-    const auto amplitude = ::find_nearest(magnitudes, phase_direction, v);
-    #ifndef NDEBUG // i.e., debug
-    for (size_t i = 0; i < amplitude.size(); i++)
-    {
-        const auto i_ = phase[i];
-        const auto a = find_nearest(phase_directions[i_], p[i]);
-        assert(a == amplitude[i]);
-    }
-    #endif
-
-    auto dest = results.begin();
-    for (size_t i = 0; i < v.size(); i++)
-    {
-        dest->phase = gsl::narrow_cast<uint8_t>(phase[i]);
-        dest->amplitude = gsl::narrow_cast<uint8_t>(amplitude[i]);
-
-        ++dest;
-    }
-}
-
-void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_unseq(std::span<const six::zfloat> inputs, std::span<AMP8I_PHS8I_t> results) const
-{
-    auto first = inputs.begin();
-    const auto last = inputs.end();
-    auto dest = results.begin();
-
-    // The above code is simpler (no templates) if we use just a single VCL
-    // complex type: `zfloatv`.  If there is any performance difference,
-    // it will only be for extreme edge cases since the smaller types are only used
-    // at the end of the loop.
-    //
-    // It also makes this loop simpler as we handle all non-multiples-of-8 in
-    // the same way.
-    constexpr auto elements_per_iteration = 8;
-
-    // Can do these checks one-time outside of the loop
-    const auto distance = std::distance(first, last);
-
-    // First, do multiples of 8
-    const auto distance_ = distance - (distance % elements_per_iteration);
-    const auto last_ = first + distance_;
-    for (; first != last_; first += elements_per_iteration, dest += elements_per_iteration)
-    {
-        const auto f = sys::make_span(&(*first), elements_per_iteration);
-        const auto d = sys::make_span(&(*dest), elements_per_iteration);
-        nearest_neighbors_unseq_(f, d);
-    }
-
-    // Then finish off anything left
-    assert(std::distance(first, last) < elements_per_iteration);
-    for (; first != last; ++first, ++dest)
-    {
-        const auto f = sys::make_span(&(*first), 1);
-        const auto d = sys::make_span(&(*dest), 1);
-        nearest_neighbors_seq(f, d);
-    }
-}
-std::vector<six::AMP8I_PHS8I_t> six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq_vcl(
-    std::span<const zfloat> inputs, const six::AmplitudeTable* pAmplitudeTable)
-{
-    // make a structure to quickly find the nearest neighbor
-    const auto& converter = make_(pAmplitudeTable);
-
-    std::vector<six::AMP8I_PHS8I_t> retval(inputs.size());
-    converter.impl.nearest_neighbors_unseq(inputs, sys::make_span(retval));
-    return retval;
-}
-
-#endif // SIX_sicd_has_simd
diff --git a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_unseq.cpp b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_unseq.cpp
index 541fb4b5c..6b546a7d1 100644
--- a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_unseq.cpp
+++ b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_unseq.cpp
@@ -31,19 +31,11 @@
 #include <functional>
 #include <type_traits>
 
-#include <coda_oss/CPlusPlus.h>
-#if CODA_OSS_cpp17
-    // <execution> is broken with the older version of GCC we're using
-    #if (__GNUC__ >= 10) || _MSC_VER
-    #include <execution>
-    #define SIX_six_sicd_ComplexToAMP8IPHS8I_has_execution 1
-    #endif
-#endif
-
 #include <gsl/gsl.h>
 #include <math/Utilities.h>
 #include <units/Angles.h>
 #include <sys/Span.h>
+#include <mt/Algorithm.h>
 
 #include "six/sicd/Utilities.h"
 
@@ -70,76 +62,61 @@ using AMP8I_PHS8I_t = six::AMP8I_PHS8I_t;
 #pragma warning(pop)
 #endif
 
-namespace six
-{
-namespace sicd
-{
-namespace vcl
-{
-
-constexpr auto elements_per_iteration = 4;
-
-using floatv = ::vcl::Vec4f;
-using intv = ::vcl::Vec4i;
-using zfloatv = ::vcl::Complex4f;
+using vcl_intv = vcl::Vec4i; 
+using vcl_floatv = vcl::Vec4f;
+constexpr auto vcl_elements_per_iteration = vcl_floatv::size();
 
-} // vcl
-} // sicd
-} // six
-
-auto real(const six::sicd::vcl::zfloatv& z)
+inline int ssize(const vcl_intv& z) noexcept
 {
-    return z.real();
+    return z.size();
 }
-auto imag(const six::sicd::vcl::zfloatv& z)
+inline int ssize(const vcl_floatv& z) noexcept
 {
-    return z.imag();
+    return z.size();
 }
-static inline size_t size(const six::sicd::vcl::zfloatv& z) noexcept
+
+using vcl_zfloatv = vcl::Complex4f;
+auto real(const vcl_zfloatv& z)
 {
-    return z.size();
+    return z.real();
 }
-static inline int ssize(const six::sicd::vcl::zfloatv& z) noexcept
+auto imag(const vcl_zfloatv& z)
 {
-    return z.size();
+    return z.imag();
 }
-static inline int ssize(const six::sicd::vcl::floatv& z) noexcept
+inline size_t size(const vcl_zfloatv& z) noexcept
 {
     return z.size();
 }
-static inline int ssize(const six::sicd::vcl::intv& z) noexcept
+inline int ssize(const vcl_zfloatv& z) noexcept
 {
     return z.size();
 }
 
-// https://en.cppreference.com/w/cpp/numeric/complex/arg
-// > `std::atan2(std::imag(z), std::real(z))`
-inline auto arg(const six::sicd::vcl::floatv& real, const six::sicd::vcl::floatv& imag)
-{
-    return atan2(imag, real); // arg()
-}
-inline auto arg(const six::sicd::vcl::zfloatv& z)
+inline auto arg(const vcl_zfloatv& z)
 {
-    return arg(real(z), imag(z));
+    // https://en.cppreference.com/w/cpp/numeric/complex/arg
+    // > `std::atan2(std::imag(z), std::real(z))`
+    return atan2(z.imag(), z.real()); // arg()
 }
 
 template<typename T>
-inline auto if_add(const T& f, const six::sicd::vcl::floatv& a, float b)
+inline auto if_add(const T& f, const vcl_floatv& a, float b)
 {
     return vcl::if_add(f, a, b);
 }
 
-inline bool any_of(const six::sicd::vcl::intv& m)
+inline bool any_of(const vcl_intv& m)
 {
     return horizontal_or(m);
 }
 
-static inline void copy_from(std::span<const float> p, six::sicd::vcl::floatv& result)
+inline void copy_from(std::span<const float> p, vcl_floatv& result)
 {
     assert(p.size() == result.size());
     result.load(p.data());
 }
-static inline auto copy_from(std::span<const zfloat> p, six::sicd::vcl::zfloatv& result)
+inline auto copy_from(std::span<const zfloat> p, vcl_zfloatv& result)
 {
     // https://en.cppreference.com/w/cpp/numeric/complex
     // > For any pointer to an element of an array of `std::complex<T>` named `p` and any valid array index `i`, ...
@@ -148,14 +125,14 @@ static inline auto copy_from(std::span<const zfloat> p, six::sicd::vcl::zfloatv&
 }
 
 template<size_t N>
-static inline auto lookup(const six::sicd::vcl::intv& zindex, const std::array<zfloat, N>& phase_directions)
+inline auto lookup(const vcl_intv& zindex, const std::array<zfloat, N>& phase_directions)
 {
-    return ::vcl::lookup<N>(zindex, phase_directions.data());
+    return vcl::lookup<N>(zindex, phase_directions.data());
 }
-static inline auto lookup(const six::sicd::vcl::intv& zindex, std::span<const float> magnitudes)
+inline auto lookup(const vcl_intv& zindex, std::span<const float> magnitudes)
 {
     assert(magnitudes.size() == six::AmplitudeTableSize);
-    return ::vcl::lookup<six::AmplitudeTableSize>(zindex, magnitudes.data());
+    return vcl::lookup<six::AmplitudeTableSize>(zindex, magnitudes.data());
 }
 
 #endif // SIX_sicd_has_VCL
@@ -166,27 +143,73 @@ static inline auto lookup(const six::sicd::vcl::intv& zindex, std::span<const fl
 #endif
 #include <sys/Ximd.h>
 
-namespace six
+template<typename T>
+using ximd = sys::ximd::Ximd<T>;
+
+using ximd_intv = ximd<int>;
+using ximd_intv_mask = sys::ximd::ximd_mask;
+
+using ximd_floatv = ximd<float>;
+using ximd_floatv_mask = sys::ximd::ximd_mask;
+
+constexpr auto ximd_elements_per_iteration = ximd_floatv::size();
+
+template<typename T>
+static inline auto ssize(const ximd<T>& v) noexcept
 {
-namespace sicd
+    return gsl::narrow<int>(v.size());
+}
+
+// Manage a SIMD complex as an array of two SIMDs
+using ximd_zfloatv = std::array<ximd_floatv, 2>;
+inline auto& real(ximd_zfloatv& z) noexcept
 {
-namespace ximd
+    return z[0];
+}
+inline const auto& real(const ximd_zfloatv& z) noexcept
 {
+    return z[0];
+}
+inline auto& imag(ximd_zfloatv& z) noexcept
+{
+    return z[1];
+}
+inline const auto& imag(const ximd_zfloatv& z) noexcept
+{
+    return z[1];
+}
+inline size_t size(const ximd_zfloatv& z) noexcept
+{
+    auto retval = real(z).size();
+    assert(retval == imag(z).size());
+    return retval;
+}
+inline auto ssize(const ximd_zfloatv& z) noexcept
+{
+    return gsl::narrow<int>(size(z));
+}
 
-template<typename T>
-using simd = sys::ximd::Ximd<T>;
-using intv = simd<int>;
-using floatv = simd<float>;
-using intv_mask = sys::ximd::ximd_mask;
-using floatv_mask = sys::ximd::ximd_mask;
-
-constexpr auto elements_per_iteration = floatv::size();
+inline auto arg(const ximd_zfloatv& z)
+{
+    // https://en.cppreference.com/w/cpp/numeric/complex/arg
+    // > `std::atan2(std::imag(z), std::real(z))`
+    return atan2(imag(z), real(z));  // arg()
+}
 
-// Manage a SIMD complex as an array of two SIMDs
-using zfloatv = std::array<floatv, 2>;
+template <typename TGeneratorReal, typename TGeneratorImag>
+static inline auto make_ximd_zfloatv(TGeneratorReal&& generate_real, TGeneratorImag&& generate_imag)
+{
+    ximd_zfloatv retval;
+    for (size_t i = 0; i < size(retval); i++)
+    {
+        real(retval)[i] = generate_real(i);
+        imag(retval)[i] = generate_imag(i);
+    }
+    return retval;
+}
 
 template<typename TTest, typename TResult>
-static inline auto select(const TTest& test, const TResult& t, const TResult& f)
+static inline auto ximd_select_(const TTest& test, const TResult& t, const TResult& f)
 {
     TResult retval;
     for (size_t i = 0; i < test.size(); i++)
@@ -196,54 +219,148 @@ static inline auto select(const TTest& test, const TResult& t, const TResult& f)
     return retval;
 }
 
-} // ximd
-} // sicd
-} // six
+static inline auto copy_from(std::span<const float> p, ximd_floatv& result)
+{
+    assert(p.size() == result.size());
+    result.copy_from(p.data());
+}
+static inline auto copy_from(std::span<const zfloat> p, ximd_zfloatv& result)
+{
+    const auto generate_real = [&](size_t i) { return p[i].real(); };
+    const auto generate_imag = [&](size_t i) { return p[i].imag(); };
+    result = make_ximd_zfloatv(generate_real, generate_imag);
+}
+
+static inline auto roundi(const ximd_floatv& v)  // match vcl::roundi()
+{
+    const auto rounded = round(v);
+    const auto generate_roundi = [&](size_t i)
+    { return static_cast<typename ximd_intv::value_type>(rounded[i]); };
+    return ximd_intv::generate(generate_roundi);
+}
+
+static inline auto select(const ximd_floatv_mask& test, const  ximd_floatv& t, const  ximd_floatv& f)
+{
+    return ximd_select_(test, t, f);
+}
+static inline auto select(const ximd_intv_mask& test, const  ximd_intv& t, const  ximd_intv& f)
+{
+    return ximd_select_(test, t, f);
+}
+
+static inline auto if_add(const ximd_floatv_mask& m, const ximd_floatv& v, typename ximd_floatv::value_type c)
+{
+    // phase = if_add(phase < 0.0, phase, std::numbers::pi_v<float> * 2.0f); // Wrap from [0, 2PI]
+    const auto generate_add = [&](size_t i) {
+        return m[i] ? v[i] + c : v[i];
+    };
+    return ximd_floatv::generate(generate_add);
+}
+
+template<size_t N>
+static inline auto lookup(const ximd_intv& zindex, const std::array<zfloat, N>& phase_directions)
+{
+    // It seems that the "generator" constuctor is called with SIMD instructions.
+    // https://en.cppreference.com/w/cpp/experimental/simd/simd/simd
+    // > The calls to `generator` are unsequenced with respect to each other.
+
+    const auto generate_real = [&](size_t i) {
+        const auto i_ = zindex[i];
+        return phase_directions[i_].real();
+    };
+    const auto generate_imag = [&](size_t i) {
+        const auto i_ = zindex[i];
+        return phase_directions[i_].imag();
+    };
+    return make_ximd_zfloatv(generate_real, generate_imag);
+}
+
+static auto lookup(const ximd_intv& zindex, std::span<const float> magnitudes)
+{
+    const auto generate = [&](size_t i) {
+        const auto i_ = zindex[i];
+
+        // The index may be out of range. This is expected because `i` might be "don't care."
+        if ((i_ >= 0) && (i_ < std::ssize(magnitudes)))
+        {
+            return magnitudes[i_];
+        }
+        return NAN; // propogate "don't care"
+    };
+    return ximd_floatv::generate(generate);
+}
+
+#endif // SIX_sicd_has_ximd
+
+#if SIX_sicd_has_simd
+
+#include <experimental/simd>
+
+// https://en.cppreference.com/w/cpp/experimental/simd/simd_cast
+// > The TS specification is missing `simd_cast` and `static_simd_cast` overloads for `simd_mask`. ...
+namespace stdx
+{
+    using namespace std::experimental;
+    using namespace std::experimental::__proposed;
+}
+
+template<typename T>
+using simd = stdx::simd<T>;
+
+using simd_intv = simd<int>;
+using simd_intv_mask = typename simd_intv::mask_type;
+
+using simd_floatv = simd<float>;
+using simd_floatv_mask = typename simd_floatv::mask_type;
+
+constexpr auto simd_elements_per_iteration = simd_floatv::size();
 
-static inline auto& real(six::sicd::ximd::zfloatv& z) noexcept
+template<typename T>
+static inline auto ssize(const simd<T>& v) noexcept
+{
+    return gsl::narrow<int>(v.size());
+}
+
+// Manage a SIMD complex as an array of two SIMDs
+using simd_zfloatv = std::array<simd_floatv, 2>;
+inline auto& real(simd_zfloatv& z) noexcept
 {
     return z[0];
 }
-static inline const auto& real(const six::sicd::ximd::zfloatv& z) noexcept
+inline const auto& real(const simd_zfloatv& z) noexcept
 {
     return z[0];
 }
-static inline auto& imag(six::sicd::ximd::zfloatv& z) noexcept
+inline auto& imag(simd_zfloatv& z) noexcept
 {
     return z[1];
 }
-static inline const auto& imag(const six::sicd::ximd::zfloatv& z) noexcept
+inline const auto& imag(const simd_zfloatv& z) noexcept
 {
     return z[1];
 }
-
-static inline size_t size(const six::sicd::ximd::zfloatv& z) noexcept
+inline size_t size(const simd_zfloatv& z) noexcept
 {
     auto retval = real(z).size();
     assert(retval == imag(z).size());
     return retval;
 }
-static inline auto ssize(const six::sicd::ximd::zfloatv& z) noexcept
+inline auto ssize(const simd_zfloatv& z) noexcept
 {
     return gsl::narrow<int>(size(z));
 }
-template<typename T>
-static inline auto ssize(const six::sicd::ximd::simd<T>& v) noexcept
-{
-    return gsl::narrow<int>(v.size());
-}
 
-namespace six
-{
-namespace sicd
-{
-namespace ximd
+inline auto arg(const simd_zfloatv& z)
 {
+    // https://en.cppreference.com/w/cpp/numeric/complex/arg
+    // > `std::atan2(std::imag(z), std::real(z))`
+    return atan2(imag(z), real(z));  // arg()
+}
 
 template <typename TGeneratorReal, typename TGeneratorImag>
-static inline auto make_zfloatv(TGeneratorReal&& generate_real, TGeneratorImag&& generate_imag)
+static inline auto make_simd_zfloatv(TGeneratorReal&& generate_real, TGeneratorImag&& generate_imag)
 {
-    six::sicd::ximd::zfloatv retval;
+    simd_zfloatv retval;
     for (size_t i = 0; i < size(retval); i++)
     {
         real(retval)[i] = generate_real(i);
@@ -252,58 +369,63 @@ static inline auto make_zfloatv(TGeneratorReal&& generate_real, TGeneratorImag&&
     return retval;
 }
 
-} // ximd
-} // sicd
-} // six
-
-
-static inline auto arg(const six::sicd::ximd::zfloatv& z)
+template<typename TTest, typename TResult>
+static inline auto simd_select_(const TTest& test, const TResult& t, const TResult& f)
 {
-    // https://en.cppreference.com/w/cpp/numeric/complex/arg
-    // > `std::atan2(std::imag(z), std::real(z))`
-    return atan2(imag(z), real(z));  // arg()
+    // https://en.cppreference.com/w/cpp/experimental/simd/where_expression
+    // > ... All other elements are left unchanged.
+    TResult retval;
+    where(test, retval) = t;
+    where(!test, retval) = f;
+    return retval;
 }
 
-static inline auto copy_from(std::span<float> p, six::sicd::ximd::floatv& result)
+static inline auto copy_from(std::span<const float> p, simd_floatv& result)
 {
     assert(p.size() == result.size());
-    result.copy_from(p.data());
+    result.copy_from(p.data(), stdx::element_aligned);
 }
-static inline auto copy_from(std::span<const zfloat> p, six::sicd::ximd::zfloatv& result)
+static inline auto copy_from(std::span<const zfloat> p, simd_zfloatv& result)
 {
     const auto generate_real = [&](size_t i) { return p[i].real(); };
     const auto generate_imag = [&](size_t i) { return p[i].imag(); };
-    result = six::sicd::ximd::make_zfloatv(generate_real, generate_imag);
+    result = make_simd_zfloatv(generate_real, generate_imag);
 }
 
-static inline auto roundi(const six::sicd::ximd::floatv& v)  // match vcl::roundi()
+static inline auto roundi(const simd_floatv& v)  // match vcl::roundi()
 {
     const auto rounded = round(v);
     const auto generate_roundi = [&](size_t i)
-    { return static_cast<typename six::sicd::ximd::intv::value_type>(rounded[i]); };
-    return six::sicd::ximd::intv::generate(generate_roundi);
+    { return static_cast<typename simd_intv::value_type>(rounded[i]); };
+    return simd_intv(generate_roundi);
 }
 
-static inline auto select(const six::sicd::ximd::floatv_mask& test, const  six::sicd::ximd::floatv& t, const  six::sicd::ximd::floatv& f)
+template<typename TMask>
+static inline auto select(const TMask& test_, const  simd_floatv& t, const  simd_floatv& f)
 {
-    return six::sicd::ximd::select(test, t, f);
+    //const auto test = test_.__cvt(); // https://github.com/VcDevel/std-simd/issues/41
+    const auto test = stdx::static_simd_cast<simd_floatv_mask>(test_); // https://github.com/VcDevel/std-simd/issues/41
+    return simd_select_(test, t, f);
 }
-static inline auto select(const six::sicd::ximd::intv_mask& test, const  six::sicd::ximd::intv& t, const  six::sicd::ximd::intv& f)
+template<typename TMask>
+static inline auto select(const TMask& test_, const  simd_intv& t, const  simd_intv& f)
 {
-    return six::sicd::ximd::select(test, t, f);
+    //const auto test = test_.__cvt(); // https://github.com/VcDevel/std-simd/issues/41
+    const auto test = stdx::static_simd_cast<simd_intv_mask>(test_); // https://github.com/VcDevel/std-simd/issues/41
+    return simd_select_(test, t, f);
 }
 
-static inline auto if_add(const six::sicd::ximd::floatv_mask& m, const six::sicd::ximd::floatv& v, typename six::sicd::ximd::floatv::value_type c)
+static inline auto if_add(const simd_floatv_mask& m, const simd_floatv& v, typename simd_floatv::value_type c)
 {
     // phase = if_add(phase < 0.0, phase, std::numbers::pi_v<float> * 2.0f); // Wrap from [0, 2PI]
     const auto generate_add = [&](size_t i) {
         return m[i] ? v[i] + c : v[i];
     };
-    return six::sicd::ximd::floatv::generate(generate_add);
+    return simd_floatv(generate_add);
 }
 
 template<size_t N>
-static inline auto lookup(const six::sicd::ximd::intv& zindex, const std::array<zfloat, N>& phase_directions)
+static inline auto lookup(const simd_intv& zindex, const std::array<zfloat, N>& phase_directions)
 {
     // It seems that the "generator" constuctor is called with SIMD instructions.
     // https://en.cppreference.com/w/cpp/experimental/simd/simd/simd
@@ -317,10 +439,10 @@ static inline auto lookup(const six::sicd::ximd::intv& zindex, const std::array<
         const auto i_ = zindex[i];
         return phase_directions[i_].imag();
     };
-    return six::sicd::ximd::make_zfloatv(generate_real, generate_imag);
+    return make_simd_zfloatv(generate_real, generate_imag);
 }
 
-static auto lookup(const six::sicd::ximd::intv& zindex, std::span<const float> magnitudes)
+static auto lookup(const simd_intv& zindex, std::span<const float> magnitudes)
 {
     const auto generate = [&](size_t i) {
         const auto i_ = zindex[i];
@@ -332,10 +454,12 @@ static auto lookup(const six::sicd::ximd::intv& zindex, std::span<const float> m
         }
         return NAN; // propogate "don't care"
     };
-    return six::sicd::ximd::floatv::generate(generate);
+    return simd_floatv(generate);
 }
 
-#endif // SIX_sicd_has_ximd
+#endif // SIX_sicd_has_simd
+
+/******************************************************************************************************/
 
 template<typename ZFloatV>
 static auto getPhase(const ZFloatV& v, float phase_delta)
@@ -344,7 +468,7 @@ static auto getPhase(const ZFloatV& v, float phase_delta)
     // There's an intentional conversion to zero when we cast 256 -> uint8. That wrap around
     // handles cases that are close to 2PI.
     auto phase = arg(v);
-    phase = if_add(phase < 0.0, phase, std::numbers::pi_v<float> * 2.0f); // Wrap from [0, 2PI]
+    phase = if_add(phase < 0.0f, phase, std::numbers::pi_v<float> * 2.0f); // Wrap from [0, 2PI]
     return roundi(phase / phase_delta);
 }
 
@@ -444,7 +568,7 @@ static auto nearest(std::span<const float> magnitudes, const FloatV& value)
     const IntV zero = 0;
     auto retval = select(it == 0, zero, // if (it == begin) return 0;
         select(it == end, prev_it,  // it == end ? prev_it  : ...
-            select(v0 <=v1, prev_it, it) //  (value - *prev_it <= *it - value ? prev_it : it);
+            select(v0 <= v1, prev_it, it) //  (value - *prev_it <= *it - value ? prev_it : it);
         ));
     return retval;
 }
@@ -465,24 +589,35 @@ static auto find_nearest(std::span<const float> magnitudes,
 
 #if SIX_sicd_has_VCL
 static auto lookup_and_find_nearest(const six::sicd::details::ComplexToAMP8IPHS8I& converter,
-    const six::sicd::vcl::intv& phase, const  six::sicd::vcl::zfloatv& v)
+    const vcl_intv& phase, const  vcl_zfloatv& v)
 {
     const auto& impl = converter.impl;
 
     const auto phase_direction_real = lookup(phase, impl.phase_directions_real);
     const auto phase_direction_imag = lookup(phase, impl.phase_directions_imag);
-    return ::find_nearest<six::sicd::vcl::intv>(impl.magnitudes, phase_direction_real, phase_direction_imag, v);
+    return ::find_nearest<vcl_intv>(impl.magnitudes, phase_direction_real, phase_direction_imag, v);
 }
 #endif
 
 #if SIX_sicd_has_ximd
 static auto lookup_and_find_nearest(const six::sicd::details::ComplexToAMP8IPHS8I& converter,
-    const six::sicd::ximd::intv& phase, const  six::sicd::ximd::zfloatv& v)
+    const ximd_intv& phase, const  ximd_zfloatv& v)
 {
     const auto& impl = converter.impl;
 
     const auto phase_direction = lookup(phase, impl.phase_directions);
-    return ::find_nearest<six::sicd::ximd::intv>(impl.magnitudes, real(phase_direction), imag(phase_direction), v);
+    return ::find_nearest<ximd_intv>(impl.magnitudes, real(phase_direction), imag(phase_direction), v);
+}
+#endif
+
+#if SIX_sicd_has_simd
+static auto lookup_and_find_nearest(const six::sicd::details::ComplexToAMP8IPHS8I& converter,
+    const simd_intv& phase, const  simd_zfloatv& v)
+{
+    const auto& impl = converter.impl;
+
+    const auto phase_direction = lookup(phase, impl.phase_directions);
+    return ::find_nearest<simd_intv>(impl.magnitudes, real(phase_direction), imag(phase_direction), v);
 }
 #endif
 
@@ -497,9 +632,9 @@ void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_unseq_T(st
     #if CODA_OSS_DEBUG
     for (int i = 0; i < ssize(v); i++)
     {
-        const auto z = p[i];
-        assert(real(v)[i] == z.real());
-        assert(imag(v)[i] == z.imag());
+        //const auto z = p[i];
+        //assert(real(v)[i] == z.real());
+        //assert(imag(v)[i] == z.imag());
     }
     #endif
 
@@ -581,7 +716,7 @@ std::vector<AMP8I_PHS8I_t> six::sicd::details::ComplexToAMP8IPHS8I::nearest_neig
     const auto& converter = make_(pAmplitudeTable);
 
     std::vector<AMP8I_PHS8I_t> retval(inputs.size());
-    converter.impl.nearest_neighbors_unseq<vcl::zfloatv, vcl::elements_per_iteration>(inputs, sys::make_span(retval));
+    converter.impl.nearest_neighbors_unseq<vcl_zfloatv, vcl_elements_per_iteration>(inputs, sys::make_span(retval));
     return retval;
 }
 #endif // SIX_sicd_has_VCL
@@ -594,166 +729,20 @@ std::vector<AMP8I_PHS8I_t> six::sicd::details::ComplexToAMP8IPHS8I::nearest_neig
     const auto& converter = make_(pAmplitudeTable);
 
     std::vector<AMP8I_PHS8I_t> retval(inputs.size());
-    converter.impl.nearest_neighbors_unseq<ximd::zfloatv, ximd::elements_per_iteration>(inputs, sys::make_span(retval));
+    converter.impl.nearest_neighbors_unseq<ximd_zfloatv, ximd_elements_per_iteration>(inputs, sys::make_span(retval));
     return retval;
 }
 #endif // SIX_sicd_has_ximd
 
-/**********************************************************************
-
-// This is here (instead of **ComplexToAMP8IPHS8I.cpp**) because par_unseq() might
-// need to know implementation details of _unseq()
-using input_it = std::span<const zfloat>::iterator;
-using output_it = std::span<AMP8I_PHS8I_t>::iterator;
-
-struct const_iterator final
-{
-    using Type = input_it::value_type;
-
-    using iterator_category = std::random_access_iterator_tag;
-    using value_type = std::remove_cv_t<Type>;
-    using difference_type = std::ptrdiff_t;
-    using pointer = Type*;
-    using reference = Type&;
-    using const_reference = const Type&;
-
-    input_it current_;
-
-    const_iterator() = default;
-    const_iterator(input_it it) : current_(it) {}
-
-    const_reference operator*() const noexcept
-    {
-        return *current_;
-    }
-
-    const_iterator& operator++() noexcept
-    {
-        for (ptrdiff_t i = 0; i < 8; i++)
-        {
-            ++current_;
-        }
-        return *this;
-    }
-
-    const_iterator& operator--() noexcept
-    {
-        --current_;
-        return *this;
-    }
-
-    const_iterator& operator-=(const difference_type n) noexcept
-    {
-        current_ -= n;
-        return *this;
-    }
-    const_iterator operator-(const difference_type n) const noexcept
-    {
-        auto ret = *this;
-        ret -= n;
-        return ret;
-    }
-    difference_type operator-(const const_iterator& rhs) const noexcept
-    {
-        return current_ - rhs.current_;
-    }
-
-    bool operator!=(const const_iterator& rhs) const noexcept
-    {
-        return current_ != rhs.current_;
-    }
-};
-
-struct result_wrapper final
-{
-    output_it current_;
-
-    result_wrapper& operator=(const std::vector<AMP8I_PHS8I_t>& values)
-    {
-        for (auto& v : values)
-        {
-            *current_ = v;
-            ++current_;
-        }
-        return *this;
-    }
-};
-
-struct iterator final
-{
-    using Type = output_it::value_type;
-
-    using iterator_category = std::random_access_iterator_tag;
-    using value_type = std::remove_cv_t<Type>;
-    using difference_type = std::ptrdiff_t;
-    using pointer = Type*;
-    using reference = Type&;
-
-    output_it current_;
-
-    iterator() = default;
-    iterator(output_it it) : current_(it) {}
-
-    result_wrapper operator*() noexcept
-    {
-        return result_wrapper{ current_};
-    }
-
-    iterator& operator++() noexcept
-    {
-        for (ptrdiff_t i = 0; i < 8; i++)
-        {
-            ++current_;
-        }
-        return *this;
-    }
-
-    iterator& operator--() noexcept
-    {
-        --current_;
-        return *this;
-    }
-
-    iterator& operator-=(const difference_type n) noexcept
-    {
-        current_ -= n;
-        return *this;
-    }
-
-    bool operator!=(const iterator& rhs) const noexcept
-    {
-        return current_ != rhs.current_;
-    }
-};
-
-void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_par_unseq(std::span<const zfloat> inputs, std::span<AMP8I_PHS8I_t> results) const
-{
-    const auto first = inputs.begin();
-    const auto last = inputs.end();
-    auto dest = results.begin();
-
-    const auto func = [&](const auto& v) {
-        std::span<const zfloat> values(&v, 8);
-
-        std::vector<AMP8I_PHS8I_t> retval(values.size());
-        nearest_neighbors_unseq(values, sys::make_span(retval));
-        return retval;
-    };
-
-    std::ignore = std::transform(std::execution::seq,
-        const_iterator{ first }, const_iterator{ last }, iterator{ dest }, func);
-}
-#if SIX_sicd_ComplexToAMP8IPHS8I_unseq
-std::vector<AMP8I_PHS8I_t> six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_par_unseq(
+#if SIX_sicd_has_simd
+std::vector<AMP8I_PHS8I_t> six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq_simd(
     std::span<const zfloat> inputs, const six::AmplitudeTable* pAmplitudeTable)
 {
     // make a structure to quickly find the nearest neighbor
     const auto& converter = make_(pAmplitudeTable);
 
     std::vector<AMP8I_PHS8I_t> retval(inputs.size());
-    converter.impl.nearest_neighbors_par_unseq(inputs, sys::make_span(retval));
+    converter.impl.nearest_neighbors_unseq<simd_zfloatv, simd_elements_per_iteration>(inputs, sys::make_span(retval));
     return retval;
 }
-#endif //  SIX_sicd_ComplexToAMP8IPHS8I_unseq
-
-**********************************************************************/
+#endif // SIX_sicd_has_simd
diff --git a/six/modules/c++/six.sicd/source/ImageData.cpp b/six/modules/c++/six.sicd/source/ImageData.cpp
index eaf993289..1e761cd41 100644
--- a/six/modules/c++/six.sicd/source/ImageData.cpp
+++ b/six/modules/c++/six.sicd/source/ImageData.cpp
@@ -32,14 +32,6 @@
 
 #include <gsl/gsl.h>
 #include <mt/Algorithm.h>
-#include <coda_oss/CPlusPlus.h>
-#if CODA_OSS_cpp17
-    // <execution> is broken with the older version of GCC we're using
-    #if (__GNUC__ >= 10) || _MSC_VER
-    #include <execution>
-    #define SIX_six_sicd_ImageData_has_execution 1
-    #endif
-#endif
 #include <sys/Span.h>
 
 #include "six/AmplitudeTable.h"
@@ -49,54 +41,6 @@
 using namespace six;
 using namespace six::sicd;
 
- // This was in coda-oss, but I removed it.
- //
- // First of all, C++11's std::async() is now (in 2023) thought of as maybe a
- // bit "half baked," and perhaps shouldn't be emulated.  Then, C++17 added
- // parallel algorithms which might be a better way of satisfying our immediate
- // needs (below) ... although we're still at C++14.
-template <typename InputIt, typename OutputIt, typename TFunc>
-static inline OutputIt transform_async(const InputIt first1, const InputIt last1, OutputIt d_first, TFunc f,
-    typename std::iterator_traits<InputIt>::difference_type cutoff)
-{
-    // https://en.cppreference.com/w/cpp/thread/async
-    const auto len = std::distance(first1, last1);
-    if (len < cutoff)
-    {
-        return std::transform(first1, last1, d_first, f);
-    }
-
-    constexpr auto policy = std::launch::async;
-
-    const auto mid1 = first1 + len / 2;
-    const auto d_mid = d_first + len / 2;
-    auto handle = std::async(policy, transform_async<InputIt, OutputIt, TFunc>, mid1, last1, d_mid, f, cutoff);
-    transform_async(first1, mid1, d_first, f, cutoff);
-    return handle.get();
-}
-template <typename TInputs, typename TResults, typename TFunc>
-static inline void transform(std::span<const TInputs> inputs, std::span<TResults> results, TFunc f)
-{
-#if SIX_six_sicd_ImageData_has_execution
-    std::ignore = std::transform(std::execution::par, inputs.begin(), inputs.end(), results.begin(), f);
-#else
-    constexpr ptrdiff_t cutoff_ = 0; // too slow w/o multi-threading
-    //if (cutoff_ < 0)
-    //{
-    //    std::ignore = std::transform(inputs.begin(), inputs.end(), results.begin(), f);
-    //}
-    //else
-    {
-        // The value of "default_cutoff" was determined by testing; there is nothing special about it, feel free to change it.
-        constexpr auto dimension = 128 * 8;
-        constexpr auto default_cutoff = dimension * dimension;
-        const auto cutoff = cutoff_ == 0 ? default_cutoff : cutoff_;
-        
-        std::ignore = transform_async(inputs.begin(), inputs.end(), results.begin(), f, cutoff);
-    }
-#endif // CODA_OSS_cpp17
-}
-
 bool ImageData::operator==(const ImageData& rhs) const
 {
     return (pixelType == rhs.pixelType &&
@@ -256,7 +200,7 @@ void ImageData::toComplex(six::Amp8iPhs8iLookup_t values, std::span<const AMP8I_
     {
         return values(v.amplitude, v.phase);
     };
-    transform(inputs, results, toComplex_);
+    std::ignore = mt::Transform_par(inputs.begin(), inputs.end(), results.begin(), toComplex_);
 }
 std::vector<six::zfloat> ImageData::toComplex(std::span<const AMP8I_PHS8I_t> inputs) const
 {
diff --git a/six/modules/c++/six/include/six/AmplitudeTable.h b/six/modules/c++/six/include/six/AmplitudeTable.h
index 966a160a9..4f44bc76b 100644
--- a/six/modules/c++/six/include/six/AmplitudeTable.h
+++ b/six/modules/c++/six/include/six/AmplitudeTable.h
@@ -180,8 +180,12 @@ struct SIX_SIX_API AMP8I_PHS8I_t final
     #else
         // __has_include is part of C++17
         #if __has_include("../../../six.sicd/include/six/sicd/vectorclass/version2/vectorclass.h") || \
-            __has_include("six.sicd/include/six/sicd/vectorclass/version2/vectorclass.h")
-        #define SIX_sicd_has_VCL 1
+            __has_include("six/sicd/vectorclass/version2/vectorclass.h")
+            #if _MSC_VER
+                #define SIX_sicd_has_VCL !CODA_OSS_cpp20 // TODO: MSVC works with C++17, but not C++20 ... ?
+            #else
+                #define SIX_sicd_has_VCL 1
+            #endif // _MSC_VER
         #else
         #define SIX_sicd_has_VCL 0
         #endif // __has_include
@@ -189,8 +193,8 @@ struct SIX_SIX_API AMP8I_PHS8I_t final
 #endif
 
 #ifndef SIX_sicd_has_simd
-    // Do we have the `std::experimental::simd? https://en.cppreference.com/w/cpp/experimental/simd
-    #if (__GNUC__ >= 999) && CODA_OSS_cpp20 // TODO: 11 instead of 999
+    // Do we have `std::experimental::simd? https://en.cppreference.com/w/cpp/experimental/simd
+    #if (__GNUC__ >= 11) && CODA_OSS_cpp20
         // https://github.com/VcDevel/std-simd "... shipping with GCC since version 11."
         #define SIX_sicd_has_simd 1
     #else
@@ -199,7 +203,7 @@ struct SIX_SIX_API AMP8I_PHS8I_t final
 #endif
 
 #ifndef SIX_sicd_has_ximd
-    // This is a "hacked up" version of std::experimental::simd using std::array.
+    // This is a "hacked up" version of std::experimental::simd using std::valarray.
     // It's primarily for development and testing: VCL needs C++17 and
     // std::experimental::simd is G++11/C++20.
     #define SIX_sicd_has_ximd CODA_OSS_DEBUG
@@ -278,29 +282,29 @@ class ComplexToAMP8IPHS8I final
         Impl(Impl&&) = delete; // implicitly deleted because of =delete for copy
         Impl& operator=(Impl&&) = delete; // implicitly deleted because of =delete for copy
 
-        void nearest_neighbors_seq(std::span<const zfloat> inputs, std::span<AMP8I_PHS8I_t> results) const;
-        void nearest_neighbors_par(std::span<const zfloat> inputs, std::span<AMP8I_PHS8I_t> results) const;
-
+        void nearest_neighbors_seq(std::span<const six::zfloat> inputs, std::span<AMP8I_PHS8I_t> results) const;
+        void nearest_neighbors_par(std::span<const six::zfloat> inputs, std::span<AMP8I_PHS8I_t> results) const;
         #if SIX_sicd_ComplexToAMP8IPHS8I_unseq
         template<typename ZFloatV, int elements_per_iteration>
-        void nearest_neighbors_unseq(std::span<const zfloat> inputs, std::span<AMP8I_PHS8I_t> results) const;
-        void nearest_neighbors_par_unseq(std::span<const zfloat> inputs, std::span<AMP8I_PHS8I_t> results) const;
+        void nearest_neighbors_unseq(std::span<const six::zfloat> inputs, std::span<AMP8I_PHS8I_t> results) const;
+        void nearest_neighbors_par_unseq(std::span<const six::zfloat> inputs, std::span<AMP8I_PHS8I_t> results) const;
 
         template<typename ZFloatV>
-        void nearest_neighbors_unseq_T(std::span<const zfloat>, std::span<AMP8I_PHS8I_t>) const;
+        void nearest_neighbors_unseq_T(std::span<const six::zfloat>, std::span<AMP8I_PHS8I_t>) const;
+
         #endif 
 
         //! The sorted set of possible magnitudes order from small to large.
         std::array<float, AmplitudeTableSize> uncached_magnitudes;
         std::span<const float> magnitudes;
-        uint8_t find_nearest(zfloat phase_direction, zfloat v) const;
+        uint8_t find_nearest(six::zfloat phase_direction, six::zfloat v) const;
 
         //! The difference in phase angle between two UINT phase values.
         float phase_delta;
-        uint8_t getPhase(zfloat) const;
+        uint8_t getPhase(six::zfloat) const;
 
         //! Unit vector rays that represent each direction that phase can point.
-        std::array<zfloat, AmplitudeTableSize> phase_directions; // interleaved, std::complex<float>
+        std::array<six::zfloat, AmplitudeTableSize> phase_directions; // interleaved, std::complex<float>
         #ifdef SIX_sicd_has_VCL
         std::array<float, AmplitudeTableSize> phase_directions_real;
         std::array<float, AmplitudeTableSize> phase_directions_imag;