diff --git a/externals/coda-oss/modules/c++/mt/include/mt/Algorithm.h b/externals/coda-oss/modules/c++/mt/include/mt/Algorithm.h index ba17f39f5..8e311c9e3 100644 --- a/externals/coda-oss/modules/c++/mt/include/mt/Algorithm.h +++ b/externals/coda-oss/modules/c++/mt/include/mt/Algorithm.h @@ -20,22 +20,73 @@ * */ -#ifndef CODA_OSS_mt_Algorithm_h_INCLUDED_ -#define CODA_OSS_mt_Algorithm_h_INCLUDED_ #pragma once #include #include #include +#include "coda_oss/CPlusPlus.h" +#if CODA_OSS_cpp17 + // is broken with the older version of GCC we're using + #if (__GNUC__ >= 10) || _MSC_VER + #include + #define CODA_OSS_mt_Algorithm_has_execution 1 + #endif +#endif + namespace mt { -// There was a transform_async() utility here, but I removed it. -// -// First of all, C++11's std::async() is now (in 2023) thought of as maybe a -// bit "half baked," and perhaps shouldn't be emulated. Then, C++17 added -// parallel algorithms which might be a better ... although we're still at C++14. -} +// "Roll our own" `std::transform(execution::par)` using std::async() +// https://en.cppreference.com/w/cpp/algorithm/transform + +// Our own `Transform_par_()` is built on `std::async()`; for that we need to control +// a couple of settings. +struct Transform_par_settings final +{ + Transform_par_settings() = default; + + Transform_par_settings(ptrdiff_t cutoff) : cutoff_(cutoff) { } + Transform_par_settings(std::launch policy) : policy_(policy) { } + Transform_par_settings(ptrdiff_t cutoff, std::launch policy) : cutoff_(cutoff), policy_(policy) { } + Transform_par_settings(std::launch policy, ptrdiff_t cutoff) : Transform_par_settings(cutoff, policy) { } + + // The value of "default_cutoff" was determined by testing; there is nothing + // special about it, feel free to change it. + static constexpr ptrdiff_t dimension = 128 * 8; + static constexpr ptrdiff_t default_cutoff = dimension * dimension; + ptrdiff_t cutoff_ = default_cutoff; -#endif // CODA_OSS_mt_Algorithm_h_INCLUDED_ + // https://en.cppreference.com/w/cpp/thread/launch + std::launch policy_ = std::launch::async; // "the task is executed on a different thread, potentially by creating and launching it first" +}; + +template +inline OutputIt Transform_par_(InputIt first1, InputIt last1, OutputIt d_first, UnaryOperation unary_op, + const Transform_par_settings& settings) +{ + // https://en.cppreference.com/w/cpp/thread/async + const auto len = std::distance(first1, last1); + if (len < settings.cutoff_) + { + return std::transform(first1, last1, d_first, unary_op); + } + + const auto mid1 = first1 + len / 2; + const auto d_mid = d_first + len / 2; + auto handle = std::async(settings.policy_, Transform_par_, mid1, last1, d_mid, unary_op, settings); + Transform_par_(first1, mid1, d_first, unary_op, settings); + return handle.get(); +} +template +inline OutputIt Transform_par(InputIt first1, InputIt last1, OutputIt d_first, UnaryOperation unary_op, + Transform_par_settings settings = Transform_par_settings{}) +{ +#if CODA_OSS_mt_Algorithm_has_execution + return std::transform(std::execution::par, first1, last1, d_first, unary_op); +#else + return Transform_par_(first1, last1, d_first, unary_op, settings); +#endif // CODA_OSS_mt_Algorithm_has_execution +} +} \ No newline at end of file diff --git a/externals/coda-oss/modules/c++/mt/unittests/test_mt_byte_swap.cpp b/externals/coda-oss/modules/c++/mt/unittests/test_mt_byte_swap.cpp index d2099cf83..ccc2e92cc 100644 --- a/externals/coda-oss/modules/c++/mt/unittests/test_mt_byte_swap.cpp +++ b/externals/coda-oss/modules/c++/mt/unittests/test_mt_byte_swap.cpp @@ -28,14 +28,20 @@ #include // std::byte #include +#include + #include +#include + +#undef min +#undef max -static std::vector make_origValues(size_t NUM_PIXELS) +static std::vector make_origValues_(size_t count) { ::srand(334); - std::vector retval(NUM_PIXELS); - for (size_t ii = 0; ii < NUM_PIXELS; ++ii) + std::vector retval(count); + for (size_t ii = 0; ii < count; ++ii) { const auto value = static_cast(::rand()) / RAND_MAX * std::numeric_limits::max(); @@ -44,10 +50,16 @@ static std::vector make_origValues(size_t NUM_PIXELS) return retval; } +static constexpr size_t NUM_PIXELS = 10000; +static const std::vector& make_origValues() +{ + static const auto retval = make_origValues_(NUM_PIXELS); + return retval; +} + TEST_CASE(testThreadedByteSwap) { - constexpr size_t NUM_PIXELS = 10000; - const auto origValues = make_origValues(NUM_PIXELS); + const auto& origValues = make_origValues(); constexpr size_t numThreads = 4; @@ -59,14 +71,70 @@ TEST_CASE(testThreadedByteSwap) std::vector swappedValues2(origValues.size()); mt::threadedByteSwap(origValues.data(), sizeof(origValues[0]), NUM_PIXELS, numThreads, swappedValues2.data()); - // Everything should match - for (size_t ii = 0; ii < NUM_PIXELS; ++ii) + for (size_t ii = 0; ii < NUM_PIXELS; ++ii) // Everything should match { TEST_ASSERT_EQ(values1[ii], swappedValues2[ii]); } } +TEST_CASE(test_transform_ByteSwap) +{ + const auto& origValues = make_origValues(); + + // Byte swap the old-fashioned way + constexpr size_t numThreads = 4; + auto expected_(origValues); + constexpr auto elemSize = sizeof(expected_[0]); + mt::threadedByteSwap(expected_.data(), elemSize, NUM_PIXELS, numThreads); + const auto& expected = expected_; + + // Byte swap into output buffer + const auto byteSwap = [&](const auto& buffer_) { + auto buffer = buffer_; + sys::byteSwap(&buffer, elemSize, 1 /*numElements*/); + return buffer; + }; + + std::vector actual(origValues.size()); + std::transform(origValues.begin(), origValues.end(), actual.begin(), byteSwap); + for (size_t ii = 0; ii < NUM_PIXELS; ++ii) // Everything should match + { + TEST_ASSERT_EQ(expected[ii], actual[ii]); + } +} + +TEST_CASE(test_Transform_par_ByteSwap) +{ + const auto& origValues = make_origValues(); + + // Byte swap the old-fashioned way + constexpr size_t numThreads = 4; + auto expected_(origValues); + constexpr auto elemSize = sizeof(expected_[0]); + mt::threadedByteSwap(expected_.data(), elemSize, NUM_PIXELS, numThreads); + const auto& expected = expected_; + + // Byte swap into output buffer + const auto byteSwap = [&](const auto& buffer_) { + auto buffer = buffer_; + sys::byteSwap(&buffer, elemSize, 1 /*numElements*/); + return buffer; + }; + + // be sure we do something more than just call std::transform() + const mt::Transform_par_settings settings{ NUM_PIXELS / 4 /*cutoff*/ }; + + std::vector actual(origValues.size()); + mt::Transform_par(origValues.begin(), origValues.end(), actual.begin(), byteSwap, settings); + for (size_t ii = 0; ii < NUM_PIXELS; ++ii) // Everything should match + { + TEST_ASSERT_EQ(expected[ii], actual[ii]); + } +} + TEST_MAIN( TEST_CHECK(testThreadedByteSwap); + TEST_CHECK(test_transform_ByteSwap); + TEST_CHECK(test_Transform_par_ByteSwap); ) \ No newline at end of file diff --git a/six/modules/c++/samples/8AMPI_PHSI.cpp b/six/modules/c++/samples/8AMPI_PHSI.cpp index 5ceff52f6..14dbd7c44 100644 --- a/six/modules/c++/samples/8AMPI_PHSI.cpp +++ b/six/modules/c++/samples/8AMPI_PHSI.cpp @@ -14,7 +14,6 @@ #include #include #include -//#include #include #include "six/AmplitudeTable.h" diff --git a/six/modules/c++/six.sicd/six.sicd.vcxproj b/six/modules/c++/six.sicd/six.sicd.vcxproj index 3b0b210df..6d58ad83c 100644 --- a/six/modules/c++/six.sicd/six.sicd.vcxproj +++ b/six/modules/c++/six.sicd/six.sicd.vcxproj @@ -154,9 +154,7 @@ - - - + diff --git a/six/modules/c++/six.sicd/six.sicd.vcxproj.filters b/six/modules/c++/six.sicd/six.sicd.vcxproj.filters index fb6a88a17..f7ec2e4b0 100644 --- a/six/modules/c++/six.sicd/six.sicd.vcxproj.filters +++ b/six/modules/c++/six.sicd/six.sicd.vcxproj.filters @@ -248,13 +248,7 @@ Source Files - - Source Files - - - Source Files - - + Source Files diff --git a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I.cpp b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I.cpp index adfd97059..1bdef6f87 100644 --- a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I.cpp +++ b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I.cpp @@ -31,15 +31,7 @@ #include #include -#include -#if CODA_OSS_cpp17 - // is broken with the older version of GCC we're using - #if (__GNUC__ >= 10) || _MSC_VER - #include - #define SIX_six_sicd_ComplexToAMP8IPHS8I_has_execution 1 - #endif -#endif - +#include #include #include #include @@ -231,30 +223,6 @@ six::AMP8I_PHS8I_t six::sicd::nearest_neighbor(const details::ComplexToAMP8IPHS8 return i.nearest_neighbor_(v); } - // Yes, this is duplicated code :-( 1) hopefully it will go away someday "soon," - // that is, we'll be at C++17; 2) the cutoff/dimension values may be different. - // - // First of all, C++11's std::async() is now (in 2023) thought of as maybe a - // bit "half baked," and perhaps shouldn't be emulated. Then, C++17 added - // parallel algorithms which might be a better way of satisfying our immediate - // needs (below) ... although we're still at C++14. -template -static inline OutputIt transform_async(const InputIt first1, const InputIt last1, OutputIt d_first, TTransformFunc transform_f, - typename std::iterator_traits::difference_type cutoff) -{ - // https://en.cppreference.com/w/cpp/thread/async - const auto len = std::distance(first1, last1); - if (len < cutoff) - { - return transform_f(first1, last1, d_first); - } - - const auto mid1 = first1 + len / 2; - const auto d_mid = d_first + len / 2; - auto handle = std::async(transform_async, mid1, last1, d_mid, transform_f, cutoff); - transform_async(first1, mid1, d_first, transform_f, cutoff); - return handle.get(); -} void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_par(std::span inputs, std::span results) const { @@ -262,26 +230,7 @@ void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_par(std::s { return converter.nearest_neighbor_(v); }; - - const auto first = inputs.begin(); - const auto last = inputs.end(); - const auto dest = results.begin(); - -#if SIX_six_sicd_ComplexToAMP8IPHS8I_has_execution - std::ignore = std::transform(std::execution::par, first, last, dest, nearest_neighbor); -#else - constexpr ptrdiff_t cutoff_ = 0; // too slow w/o multi-threading - // The value of "default_cutoff" was determined by testing; there is nothing special about it, feel free to change it. - constexpr auto dimension = 256; - constexpr auto default_cutoff = (dimension * dimension) * 4; - const auto cutoff = cutoff_ == 0 ? default_cutoff : cutoff_; - - const auto transform_f = [&](auto first1, auto last1, auto d_first) - { - return std::transform(first1, last1, d_first, nearest_neighbor); - }; - std::ignore = transform_async(first, last, dest, transform_f, cutoff); -#endif // SIX_six_sicd_ComplexToAMP8IPHS8I_has_execution + std::ignore = mt::Transform_par(inputs.begin(), inputs.end(), results.begin(), nearest_neighbor); } std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_par( std::span inputs, const six::AmplitudeTable* pAmplitudeTable) @@ -323,12 +272,12 @@ std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest // TODO: there could be more complicated logic here to determine which UNSEQ // implementation to use. - #if SIX_sicd_has_VCL - return nearest_neighbors_unseq_vcl(inputs, pAmplitudeTable); - - #elif SIX_sicd_has_simd + #if SIX_sicd_has_simd return nearest_neighbors_unseq_simd(inputs, pAmplitudeTable); + #elif SIX_sicd_has_VCL + return nearest_neighbors_unseq_vcl(inputs, pAmplitudeTable); + #elif SIX_sicd_has_ximd return nearest_neighbors_unseq_ximd(inputs, pAmplitudeTable); diff --git a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_simd.cpp b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_simd.cpp deleted file mode 100644 index ece707b2f..000000000 --- a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_simd.cpp +++ /dev/null @@ -1,377 +0,0 @@ -/* ========================================================================= -* This file is part of six.sicd-c++ -* ========================================================================= -* -* (C) Copyright 2021, Maxar Technologies, Inc. -* -* six.sicd-c++ is free software; you can redistribute it and/or modify -* it under the terms of the GNU Lesser General Public License as published by -* the Free Software Foundation; either version 3 of the License, or -* (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Lesser General Public License for more details. -* -* You should have received a copy of the GNU Lesser General Public -* License along with this program; If not, -* see . -* -*/ -#include "six/AmplitudeTable.h" - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "six/sicd/Utilities.h" - -#if SIX_sicd_has_simd - -#include - -// https://en.cppreference.com/w/cpp/experimental/simd/simd_cast -// > The TS specification is missing `simd_cast` and `static_simd_cast` overloads for `simd_mask`. ... -namespace stdx -{ - using namespace std::experimental; - using namespace std::experimental::__proposed; -} - -// https://en.cppreference.com/w/cpp/experimental/simd -using floatv = stdx::native_simd; -// using doublev = stdx::rebind_simd_t; -using intv = stdx::rebind_simd_t; -using zfloatv = std::array; - -auto& real(zfloatv& z) -{ - return z[0]; -} -const auto& real(const zfloatv& z) -{ - return z[0]; -} -auto& imag(zfloatv& z) -{ - return z[1]; -} -const auto& imag(const zfloatv& z) -{ - return z[1]; -} - -// https://en.cppreference.com/w/cpp/experimental/simd/simd/simd -template -inline auto make_zfloatv(TGeneratorReal&& generate_real, TGeneratorImag&& generate_imag) -{ - zfloatv retval; - real(retval) = floatv(generate_real); - imag(retval) = floatv(generate_imag); - return retval; -} - -static inline auto load(std::span p) -{ - const auto generate_real = [&](size_t i) { - return p[i].real(); - }; - const auto generate_imag = [&](size_t i) { - return p[i].imag(); - }; - return make_zfloatv(generate_real, generate_imag); -} - - -// https://en.cppreference.com/w/cpp/numeric/complex/arg -// > `std::atan2(std::imag(z), std::real(z))` -inline auto arg(const floatv& real, const floatv& imag) -{ - return atan2(imag, real); // arg() -} -inline auto arg(const zfloatv& z) -{ - return arg(real(z), imag(z)); -} - -inline auto roundi(const floatv& v) // match vcl::roundi() -{ - return static_simd_cast(round(v)); -} - -template -inline auto select(const TTest& test_, const TResult& t, const TResult& f) -{ - //const auto test = test_.__cvt(); // https://github.com/VcDevel/std-simd/issues/41 - const auto test = stdx::static_simd_cast(test_); // https://github.com/VcDevel/std-simd/issues/41 - - // https://en.cppreference.com/w/cpp/experimental/simd/where_expression - // > ... All other elements are left unchanged. - TResult retval; - where(test, retval) = t; - where(!test, retval) = f; - return retval; -} - -static inline auto getPhase(const zfloatv& v, float phase_delta) -{ - // Phase is determined via arithmetic because it's equally spaced. - // There's an intentional conversion to zero when we cast 256 -> uint8. That wrap around - // handles cases that are close to 2PI. - auto phase = arg(v); - where(phase < 0.0f, phase) += std::numbers::pi_v *2.0f; // Wrap from [0, 2PI] - return roundi(phase / phase_delta); -} - -template -static inline auto lookup(const intv& zindex, const std::array& phase_directions) -{ - // It seems that the "generator" constuctor is called with SIMD instructions. - // https://en.cppreference.com/w/cpp/experimental/simd/simd/simd - // > The calls to `generator` are unsequenced with respect to each other. - - const auto generate_real = [&](size_t i) { - const auto i_ = zindex[i]; - return phase_directions[i_].real(); - }; - const auto generate_imag = [&](size_t i) { - const auto i_ = zindex[i]; - return phase_directions[i_].imag(); - }; - return make_zfloatv(generate_real, generate_imag); -} - -// https://en.cppreference.com/w/cpp/algorithm/lower_bound -/* -template -ForwardIt lower_bound(ForwardIt first, ForwardIt last, const T& value) -{ - ForwardIt it; - typename std::iterator_traits::difference_type count, step; - count = std::distance(first, last); - - while (count > 0) - { - it = first; - step = count / 2; - std::advance(it, step); - - if (*it < value) - { - first = ++it; - count -= step + 1; - } - else - count = step; - } - - return first; -} -*/ -template -static inline auto lookup(const intv& zindex, std::span magnitudes) -{ - // It seems that the "generator" constuctor is called with SIMD instructions. - // https://en.cppreference.com/w/cpp/experimental/simd/simd/simd - // > The calls to `generator` are unsequenced with respect to each other. - - const auto generate = [&](size_t i) { - const auto i_ = zindex[i]; - return magnitudes[i_]; - }; - return floatv(generate); -} -inline auto lower_bound_(std::span magnitudes, const floatv& v) -{ - intv first = 0; - const intv last = gsl::narrow(magnitudes.size()); - - auto count = last - first; - while (any_of(count > 0)) - { - auto it = first; - const auto step = count / 2; - it += step; - - auto next = it; ++next; // ... ++it; - auto advance = count; advance -= step + 1; // ... -= step + 1; - - const auto c = lookup(it, magnitudes); // magnituides[it] - - const auto test = c < v; // (c < v).__cvt(); // https://github.com/VcDevel/std-simd/issues/41 - - //where(test, it) = next; // ... ++it - //where(test, first) = it; // first = ... - //// count -= step + 1 <...OR...> count = step - //where(test, count) = advance; - //where(!test, count) = step; - it = select(test, next, it); // ... ++it - first = select(test, it, first); // first = ... - count = select(test, advance, step); // count -= step + 1 <...OR...> count = step - } - return first; -} -inline auto lower_bound(std::span magnitudes, const floatv& value) -{ - auto retval = lower_bound_(magnitudes, value); - - #ifndef NDEBUG // i.e., debug - for (size_t i = 0; i < value.size(); i++) - { - const auto it = std::lower_bound(magnitudes.begin(), magnitudes.end(), value[i]); - const auto result = gsl::narrow(std::distance(magnitudes.begin(), it)); - assert(retval[i] == result); - } - #endif - - return retval; -} - -static auto nearest(std::span magnitudes, const floatv& value) -{ - assert(magnitudes.size() == six::AmplitudeTableSize); - - /* - const auto it = std::lower_bound(begin, end, value); - if (it == begin) return 0; - - const auto prev_it = std::prev(it); - const auto nearest_it = it == end ? prev_it : - (value - *prev_it <= *it - value ? prev_it : it); - const auto distance = std::distance(begin, nearest_it); - assert(distance <= std::numeric_limits::max()); - return gsl::narrow(distance); - */ - const auto it = ::lower_bound(magnitudes, value); - const auto prev_it = it - 1; // const auto prev_it = std::prev(it); - - const auto v0 = value - lookup(prev_it, magnitudes); // value - *prev_it - const auto v1 = lookup(it, magnitudes) - value; // *it - value - //const auto nearest_it = select(v0 <= v1, prev_it, it); // (value - *prev_it <= *it - value ? prev_it : it); - - const intv end = gsl::narrow(magnitudes.size()); - //const auto end_test = select(it == end, prev_it, nearest_it); // it == end ? prev_it : ... - const intv zero = 0; - auto retval = select(it == 0, zero, // if (it == begin) return 0; - select(it == end, prev_it, // it == end ? prev_it : ... - select(v0 <=v1, prev_it, it) // (value - *prev_it <= *it - value ? prev_it : it); - )); - return retval; -} - -static auto find_nearest(std::span magnitudes, const floatv& phase_direction_real, const floatv& phase_direction_imag, - const zfloatv& v) -{ - // We have to do a 1D nearest neighbor search for magnitude. - // But it's not the magnitude of the input complex value - it's the projection of - // the complex value onto the ray of candidate magnitudes at the selected phase. - // i.e. dot product. - const auto projection = (phase_direction_real * real(v)) + (phase_direction_imag * imag(v)); - //assert(std::abs(projection - std::abs(v)) < 1e-5); // TODO ??? - return nearest(magnitudes, projection); -} -static inline auto find_nearest(std::span magnitudes, const zfloatv& phase_direction, const zfloatv& v) -{ - return find_nearest(magnitudes, real(phase_direction), imag(phase_direction), v); -} - -void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_unseq_(std::span p, std::span results) const -{ - const auto v = load(p); - - const auto phase = ::getPhase(v, phase_delta); - - const auto phase_direction = lookup(phase, phase_directions); - #ifndef NDEBUG // i.e., debug - for (size_t i = 0; i < phase.size(); i++) - { - const auto pd = phase_directions[phase[i]]; - assert(pd.real() == real(phase_direction)[i]); - assert(pd.imag() == imag(phase_direction)[i]); - } - #endif - - const auto amplitude = ::find_nearest(magnitudes, phase_direction, v); - #ifndef NDEBUG // i.e., debug - for (size_t i = 0; i < amplitude.size(); i++) - { - const auto i_ = phase[i]; - const auto a = find_nearest(phase_directions[i_], p[i]); - assert(a == amplitude[i]); - } - #endif - - auto dest = results.begin(); - for (size_t i = 0; i < v.size(); i++) - { - dest->phase = gsl::narrow_cast(phase[i]); - dest->amplitude = gsl::narrow_cast(amplitude[i]); - - ++dest; - } -} - -void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_unseq(std::span inputs, std::span results) const -{ - auto first = inputs.begin(); - const auto last = inputs.end(); - auto dest = results.begin(); - - // The above code is simpler (no templates) if we use just a single VCL - // complex type: `zfloatv`. If there is any performance difference, - // it will only be for extreme edge cases since the smaller types are only used - // at the end of the loop. - // - // It also makes this loop simpler as we handle all non-multiples-of-8 in - // the same way. - constexpr auto elements_per_iteration = 8; - - // Can do these checks one-time outside of the loop - const auto distance = std::distance(first, last); - - // First, do multiples of 8 - const auto distance_ = distance - (distance % elements_per_iteration); - const auto last_ = first + distance_; - for (; first != last_; first += elements_per_iteration, dest += elements_per_iteration) - { - const auto f = sys::make_span(&(*first), elements_per_iteration); - const auto d = sys::make_span(&(*dest), elements_per_iteration); - nearest_neighbors_unseq_(f, d); - } - - // Then finish off anything left - assert(std::distance(first, last) < elements_per_iteration); - for (; first != last; ++first, ++dest) - { - const auto f = sys::make_span(&(*first), 1); - const auto d = sys::make_span(&(*dest), 1); - nearest_neighbors_seq(f, d); - } -} -std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq_vcl( - std::span inputs, const six::AmplitudeTable* pAmplitudeTable) -{ - // make a structure to quickly find the nearest neighbor - const auto& converter = make_(pAmplitudeTable); - - std::vector retval(inputs.size()); - converter.impl.nearest_neighbors_unseq(inputs, sys::make_span(retval)); - return retval; -} - -#endif // SIX_sicd_has_simd diff --git a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_unseq.cpp b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_unseq.cpp index 541fb4b5c..6b546a7d1 100644 --- a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_unseq.cpp +++ b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_unseq.cpp @@ -31,19 +31,11 @@ #include #include -#include -#if CODA_OSS_cpp17 - // is broken with the older version of GCC we're using - #if (__GNUC__ >= 10) || _MSC_VER - #include - #define SIX_six_sicd_ComplexToAMP8IPHS8I_has_execution 1 - #endif -#endif - #include #include #include #include +#include #include "six/sicd/Utilities.h" @@ -70,76 +62,61 @@ using AMP8I_PHS8I_t = six::AMP8I_PHS8I_t; #pragma warning(pop) #endif -namespace six -{ -namespace sicd -{ -namespace vcl -{ - -constexpr auto elements_per_iteration = 4; - -using floatv = ::vcl::Vec4f; -using intv = ::vcl::Vec4i; -using zfloatv = ::vcl::Complex4f; +using vcl_intv = vcl::Vec4i; +using vcl_floatv = vcl::Vec4f; +constexpr auto vcl_elements_per_iteration = vcl_floatv::size(); -} // vcl -} // sicd -} // six - -auto real(const six::sicd::vcl::zfloatv& z) +inline int ssize(const vcl_intv& z) noexcept { - return z.real(); + return z.size(); } -auto imag(const six::sicd::vcl::zfloatv& z) +inline int ssize(const vcl_floatv& z) noexcept { - return z.imag(); + return z.size(); } -static inline size_t size(const six::sicd::vcl::zfloatv& z) noexcept + +using vcl_zfloatv = vcl::Complex4f; +auto real(const vcl_zfloatv& z) { - return z.size(); + return z.real(); } -static inline int ssize(const six::sicd::vcl::zfloatv& z) noexcept +auto imag(const vcl_zfloatv& z) { - return z.size(); + return z.imag(); } -static inline int ssize(const six::sicd::vcl::floatv& z) noexcept +inline size_t size(const vcl_zfloatv& z) noexcept { return z.size(); } -static inline int ssize(const six::sicd::vcl::intv& z) noexcept +inline int ssize(const vcl_zfloatv& z) noexcept { return z.size(); } -// https://en.cppreference.com/w/cpp/numeric/complex/arg -// > `std::atan2(std::imag(z), std::real(z))` -inline auto arg(const six::sicd::vcl::floatv& real, const six::sicd::vcl::floatv& imag) -{ - return atan2(imag, real); // arg() -} -inline auto arg(const six::sicd::vcl::zfloatv& z) +inline auto arg(const vcl_zfloatv& z) { - return arg(real(z), imag(z)); + // https://en.cppreference.com/w/cpp/numeric/complex/arg + // > `std::atan2(std::imag(z), std::real(z))` + return atan2(z.imag(), z.real()); // arg() } template -inline auto if_add(const T& f, const six::sicd::vcl::floatv& a, float b) +inline auto if_add(const T& f, const vcl_floatv& a, float b) { return vcl::if_add(f, a, b); } -inline bool any_of(const six::sicd::vcl::intv& m) +inline bool any_of(const vcl_intv& m) { return horizontal_or(m); } -static inline void copy_from(std::span p, six::sicd::vcl::floatv& result) +inline void copy_from(std::span p, vcl_floatv& result) { assert(p.size() == result.size()); result.load(p.data()); } -static inline auto copy_from(std::span p, six::sicd::vcl::zfloatv& result) +inline auto copy_from(std::span p, vcl_zfloatv& result) { // https://en.cppreference.com/w/cpp/numeric/complex // > For any pointer to an element of an array of `std::complex` named `p` and any valid array index `i`, ... @@ -148,14 +125,14 @@ static inline auto copy_from(std::span p, six::sicd::vcl::zfloatv& } template -static inline auto lookup(const six::sicd::vcl::intv& zindex, const std::array& phase_directions) +inline auto lookup(const vcl_intv& zindex, const std::array& phase_directions) { - return ::vcl::lookup(zindex, phase_directions.data()); + return vcl::lookup(zindex, phase_directions.data()); } -static inline auto lookup(const six::sicd::vcl::intv& zindex, std::span magnitudes) +inline auto lookup(const vcl_intv& zindex, std::span magnitudes) { assert(magnitudes.size() == six::AmplitudeTableSize); - return ::vcl::lookup(zindex, magnitudes.data()); + return vcl::lookup(zindex, magnitudes.data()); } #endif // SIX_sicd_has_VCL @@ -166,27 +143,73 @@ static inline auto lookup(const six::sicd::vcl::intv& zindex, std::span -namespace six +template +using ximd = sys::ximd::Ximd; + +using ximd_intv = ximd; +using ximd_intv_mask = sys::ximd::ximd_mask; + +using ximd_floatv = ximd; +using ximd_floatv_mask = sys::ximd::ximd_mask; + +constexpr auto ximd_elements_per_iteration = ximd_floatv::size(); + +template +static inline auto ssize(const ximd& v) noexcept { -namespace sicd + return gsl::narrow(v.size()); +} + +// Manage a SIMD complex as an array of two SIMDs +using ximd_zfloatv = std::array; +inline auto& real(ximd_zfloatv& z) noexcept { -namespace ximd + return z[0]; +} +inline const auto& real(const ximd_zfloatv& z) noexcept { + return z[0]; +} +inline auto& imag(ximd_zfloatv& z) noexcept +{ + return z[1]; +} +inline const auto& imag(const ximd_zfloatv& z) noexcept +{ + return z[1]; +} +inline size_t size(const ximd_zfloatv& z) noexcept +{ + auto retval = real(z).size(); + assert(retval == imag(z).size()); + return retval; +} +inline auto ssize(const ximd_zfloatv& z) noexcept +{ + return gsl::narrow(size(z)); +} -template -using simd = sys::ximd::Ximd; -using intv = simd; -using floatv = simd; -using intv_mask = sys::ximd::ximd_mask; -using floatv_mask = sys::ximd::ximd_mask; - -constexpr auto elements_per_iteration = floatv::size(); +inline auto arg(const ximd_zfloatv& z) +{ + // https://en.cppreference.com/w/cpp/numeric/complex/arg + // > `std::atan2(std::imag(z), std::real(z))` + return atan2(imag(z), real(z)); // arg() +} -// Manage a SIMD complex as an array of two SIMDs -using zfloatv = std::array; +template +static inline auto make_ximd_zfloatv(TGeneratorReal&& generate_real, TGeneratorImag&& generate_imag) +{ + ximd_zfloatv retval; + for (size_t i = 0; i < size(retval); i++) + { + real(retval)[i] = generate_real(i); + imag(retval)[i] = generate_imag(i); + } + return retval; +} template -static inline auto select(const TTest& test, const TResult& t, const TResult& f) +static inline auto ximd_select_(const TTest& test, const TResult& t, const TResult& f) { TResult retval; for (size_t i = 0; i < test.size(); i++) @@ -196,54 +219,148 @@ static inline auto select(const TTest& test, const TResult& t, const TResult& f) return retval; } -} // ximd -} // sicd -} // six +static inline auto copy_from(std::span p, ximd_floatv& result) +{ + assert(p.size() == result.size()); + result.copy_from(p.data()); +} +static inline auto copy_from(std::span p, ximd_zfloatv& result) +{ + const auto generate_real = [&](size_t i) { return p[i].real(); }; + const auto generate_imag = [&](size_t i) { return p[i].imag(); }; + result = make_ximd_zfloatv(generate_real, generate_imag); +} + +static inline auto roundi(const ximd_floatv& v) // match vcl::roundi() +{ + const auto rounded = round(v); + const auto generate_roundi = [&](size_t i) + { return static_cast(rounded[i]); }; + return ximd_intv::generate(generate_roundi); +} + +static inline auto select(const ximd_floatv_mask& test, const ximd_floatv& t, const ximd_floatv& f) +{ + return ximd_select_(test, t, f); +} +static inline auto select(const ximd_intv_mask& test, const ximd_intv& t, const ximd_intv& f) +{ + return ximd_select_(test, t, f); +} + +static inline auto if_add(const ximd_floatv_mask& m, const ximd_floatv& v, typename ximd_floatv::value_type c) +{ + // phase = if_add(phase < 0.0, phase, std::numbers::pi_v * 2.0f); // Wrap from [0, 2PI] + const auto generate_add = [&](size_t i) { + return m[i] ? v[i] + c : v[i]; + }; + return ximd_floatv::generate(generate_add); +} + +template +static inline auto lookup(const ximd_intv& zindex, const std::array& phase_directions) +{ + // It seems that the "generator" constuctor is called with SIMD instructions. + // https://en.cppreference.com/w/cpp/experimental/simd/simd/simd + // > The calls to `generator` are unsequenced with respect to each other. + + const auto generate_real = [&](size_t i) { + const auto i_ = zindex[i]; + return phase_directions[i_].real(); + }; + const auto generate_imag = [&](size_t i) { + const auto i_ = zindex[i]; + return phase_directions[i_].imag(); + }; + return make_ximd_zfloatv(generate_real, generate_imag); +} + +static auto lookup(const ximd_intv& zindex, std::span magnitudes) +{ + const auto generate = [&](size_t i) { + const auto i_ = zindex[i]; + + // The index may be out of range. This is expected because `i` might be "don't care." + if ((i_ >= 0) && (i_ < std::ssize(magnitudes))) + { + return magnitudes[i_]; + } + return NAN; // propogate "don't care" + }; + return ximd_floatv::generate(generate); +} + +#endif // SIX_sicd_has_ximd + +#if SIX_sicd_has_simd + +#include + +// https://en.cppreference.com/w/cpp/experimental/simd/simd_cast +// > The TS specification is missing `simd_cast` and `static_simd_cast` overloads for `simd_mask`. ... +namespace stdx +{ + using namespace std::experimental; + using namespace std::experimental::__proposed; +} + +template +using simd = stdx::simd; + +using simd_intv = simd; +using simd_intv_mask = typename simd_intv::mask_type; + +using simd_floatv = simd; +using simd_floatv_mask = typename simd_floatv::mask_type; + +constexpr auto simd_elements_per_iteration = simd_floatv::size(); -static inline auto& real(six::sicd::ximd::zfloatv& z) noexcept +template +static inline auto ssize(const simd& v) noexcept +{ + return gsl::narrow(v.size()); +} + +// Manage a SIMD complex as an array of two SIMDs +using simd_zfloatv = std::array; +inline auto& real(simd_zfloatv& z) noexcept { return z[0]; } -static inline const auto& real(const six::sicd::ximd::zfloatv& z) noexcept +inline const auto& real(const simd_zfloatv& z) noexcept { return z[0]; } -static inline auto& imag(six::sicd::ximd::zfloatv& z) noexcept +inline auto& imag(simd_zfloatv& z) noexcept { return z[1]; } -static inline const auto& imag(const six::sicd::ximd::zfloatv& z) noexcept +inline const auto& imag(const simd_zfloatv& z) noexcept { return z[1]; } - -static inline size_t size(const six::sicd::ximd::zfloatv& z) noexcept +inline size_t size(const simd_zfloatv& z) noexcept { auto retval = real(z).size(); assert(retval == imag(z).size()); return retval; } -static inline auto ssize(const six::sicd::ximd::zfloatv& z) noexcept +inline auto ssize(const simd_zfloatv& z) noexcept { return gsl::narrow(size(z)); } -template -static inline auto ssize(const six::sicd::ximd::simd& v) noexcept -{ - return gsl::narrow(v.size()); -} -namespace six -{ -namespace sicd -{ -namespace ximd +inline auto arg(const simd_zfloatv& z) { + // https://en.cppreference.com/w/cpp/numeric/complex/arg + // > `std::atan2(std::imag(z), std::real(z))` + return atan2(imag(z), real(z)); // arg() +} template -static inline auto make_zfloatv(TGeneratorReal&& generate_real, TGeneratorImag&& generate_imag) +static inline auto make_simd_zfloatv(TGeneratorReal&& generate_real, TGeneratorImag&& generate_imag) { - six::sicd::ximd::zfloatv retval; + simd_zfloatv retval; for (size_t i = 0; i < size(retval); i++) { real(retval)[i] = generate_real(i); @@ -252,58 +369,63 @@ static inline auto make_zfloatv(TGeneratorReal&& generate_real, TGeneratorImag&& return retval; } -} // ximd -} // sicd -} // six - - -static inline auto arg(const six::sicd::ximd::zfloatv& z) +template +static inline auto simd_select_(const TTest& test, const TResult& t, const TResult& f) { - // https://en.cppreference.com/w/cpp/numeric/complex/arg - // > `std::atan2(std::imag(z), std::real(z))` - return atan2(imag(z), real(z)); // arg() + // https://en.cppreference.com/w/cpp/experimental/simd/where_expression + // > ... All other elements are left unchanged. + TResult retval; + where(test, retval) = t; + where(!test, retval) = f; + return retval; } -static inline auto copy_from(std::span p, six::sicd::ximd::floatv& result) +static inline auto copy_from(std::span p, simd_floatv& result) { assert(p.size() == result.size()); - result.copy_from(p.data()); + result.copy_from(p.data(), stdx::element_aligned); } -static inline auto copy_from(std::span p, six::sicd::ximd::zfloatv& result) +static inline auto copy_from(std::span p, simd_zfloatv& result) { const auto generate_real = [&](size_t i) { return p[i].real(); }; const auto generate_imag = [&](size_t i) { return p[i].imag(); }; - result = six::sicd::ximd::make_zfloatv(generate_real, generate_imag); + result = make_simd_zfloatv(generate_real, generate_imag); } -static inline auto roundi(const six::sicd::ximd::floatv& v) // match vcl::roundi() +static inline auto roundi(const simd_floatv& v) // match vcl::roundi() { const auto rounded = round(v); const auto generate_roundi = [&](size_t i) - { return static_cast(rounded[i]); }; - return six::sicd::ximd::intv::generate(generate_roundi); + { return static_cast(rounded[i]); }; + return simd_intv(generate_roundi); } -static inline auto select(const six::sicd::ximd::floatv_mask& test, const six::sicd::ximd::floatv& t, const six::sicd::ximd::floatv& f) +template +static inline auto select(const TMask& test_, const simd_floatv& t, const simd_floatv& f) { - return six::sicd::ximd::select(test, t, f); + //const auto test = test_.__cvt(); // https://github.com/VcDevel/std-simd/issues/41 + const auto test = stdx::static_simd_cast(test_); // https://github.com/VcDevel/std-simd/issues/41 + return simd_select_(test, t, f); } -static inline auto select(const six::sicd::ximd::intv_mask& test, const six::sicd::ximd::intv& t, const six::sicd::ximd::intv& f) +template +static inline auto select(const TMask& test_, const simd_intv& t, const simd_intv& f) { - return six::sicd::ximd::select(test, t, f); + //const auto test = test_.__cvt(); // https://github.com/VcDevel/std-simd/issues/41 + const auto test = stdx::static_simd_cast(test_); // https://github.com/VcDevel/std-simd/issues/41 + return simd_select_(test, t, f); } -static inline auto if_add(const six::sicd::ximd::floatv_mask& m, const six::sicd::ximd::floatv& v, typename six::sicd::ximd::floatv::value_type c) +static inline auto if_add(const simd_floatv_mask& m, const simd_floatv& v, typename simd_floatv::value_type c) { // phase = if_add(phase < 0.0, phase, std::numbers::pi_v * 2.0f); // Wrap from [0, 2PI] const auto generate_add = [&](size_t i) { return m[i] ? v[i] + c : v[i]; }; - return six::sicd::ximd::floatv::generate(generate_add); + return simd_floatv(generate_add); } template -static inline auto lookup(const six::sicd::ximd::intv& zindex, const std::array& phase_directions) +static inline auto lookup(const simd_intv& zindex, const std::array& phase_directions) { // It seems that the "generator" constuctor is called with SIMD instructions. // https://en.cppreference.com/w/cpp/experimental/simd/simd/simd @@ -317,10 +439,10 @@ static inline auto lookup(const six::sicd::ximd::intv& zindex, const std::array< const auto i_ = zindex[i]; return phase_directions[i_].imag(); }; - return six::sicd::ximd::make_zfloatv(generate_real, generate_imag); + return make_simd_zfloatv(generate_real, generate_imag); } -static auto lookup(const six::sicd::ximd::intv& zindex, std::span magnitudes) +static auto lookup(const simd_intv& zindex, std::span magnitudes) { const auto generate = [&](size_t i) { const auto i_ = zindex[i]; @@ -332,10 +454,12 @@ static auto lookup(const six::sicd::ximd::intv& zindex, std::span m } return NAN; // propogate "don't care" }; - return six::sicd::ximd::floatv::generate(generate); + return simd_floatv(generate); } -#endif // SIX_sicd_has_ximd +#endif // SIX_sicd_has_simd + +/******************************************************************************************************/ template static auto getPhase(const ZFloatV& v, float phase_delta) @@ -344,7 +468,7 @@ static auto getPhase(const ZFloatV& v, float phase_delta) // There's an intentional conversion to zero when we cast 256 -> uint8. That wrap around // handles cases that are close to 2PI. auto phase = arg(v); - phase = if_add(phase < 0.0, phase, std::numbers::pi_v * 2.0f); // Wrap from [0, 2PI] + phase = if_add(phase < 0.0f, phase, std::numbers::pi_v * 2.0f); // Wrap from [0, 2PI] return roundi(phase / phase_delta); } @@ -444,7 +568,7 @@ static auto nearest(std::span magnitudes, const FloatV& value) const IntV zero = 0; auto retval = select(it == 0, zero, // if (it == begin) return 0; select(it == end, prev_it, // it == end ? prev_it : ... - select(v0 <=v1, prev_it, it) // (value - *prev_it <= *it - value ? prev_it : it); + select(v0 <= v1, prev_it, it) // (value - *prev_it <= *it - value ? prev_it : it); )); return retval; } @@ -465,24 +589,35 @@ static auto find_nearest(std::span magnitudes, #if SIX_sicd_has_VCL static auto lookup_and_find_nearest(const six::sicd::details::ComplexToAMP8IPHS8I& converter, - const six::sicd::vcl::intv& phase, const six::sicd::vcl::zfloatv& v) + const vcl_intv& phase, const vcl_zfloatv& v) { const auto& impl = converter.impl; const auto phase_direction_real = lookup(phase, impl.phase_directions_real); const auto phase_direction_imag = lookup(phase, impl.phase_directions_imag); - return ::find_nearest(impl.magnitudes, phase_direction_real, phase_direction_imag, v); + return ::find_nearest(impl.magnitudes, phase_direction_real, phase_direction_imag, v); } #endif #if SIX_sicd_has_ximd static auto lookup_and_find_nearest(const six::sicd::details::ComplexToAMP8IPHS8I& converter, - const six::sicd::ximd::intv& phase, const six::sicd::ximd::zfloatv& v) + const ximd_intv& phase, const ximd_zfloatv& v) { const auto& impl = converter.impl; const auto phase_direction = lookup(phase, impl.phase_directions); - return ::find_nearest(impl.magnitudes, real(phase_direction), imag(phase_direction), v); + return ::find_nearest(impl.magnitudes, real(phase_direction), imag(phase_direction), v); +} +#endif + +#if SIX_sicd_has_simd +static auto lookup_and_find_nearest(const six::sicd::details::ComplexToAMP8IPHS8I& converter, + const simd_intv& phase, const simd_zfloatv& v) +{ + const auto& impl = converter.impl; + + const auto phase_direction = lookup(phase, impl.phase_directions); + return ::find_nearest(impl.magnitudes, real(phase_direction), imag(phase_direction), v); } #endif @@ -497,9 +632,9 @@ void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_unseq_T(st #if CODA_OSS_DEBUG for (int i = 0; i < ssize(v); i++) { - const auto z = p[i]; - assert(real(v)[i] == z.real()); - assert(imag(v)[i] == z.imag()); + //const auto z = p[i]; + //assert(real(v)[i] == z.real()); + //assert(imag(v)[i] == z.imag()); } #endif @@ -581,7 +716,7 @@ std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neig const auto& converter = make_(pAmplitudeTable); std::vector retval(inputs.size()); - converter.impl.nearest_neighbors_unseq(inputs, sys::make_span(retval)); + converter.impl.nearest_neighbors_unseq(inputs, sys::make_span(retval)); return retval; } #endif // SIX_sicd_has_VCL @@ -594,166 +729,20 @@ std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neig const auto& converter = make_(pAmplitudeTable); std::vector retval(inputs.size()); - converter.impl.nearest_neighbors_unseq(inputs, sys::make_span(retval)); + converter.impl.nearest_neighbors_unseq(inputs, sys::make_span(retval)); return retval; } #endif // SIX_sicd_has_ximd -/********************************************************************** - -// This is here (instead of **ComplexToAMP8IPHS8I.cpp**) because par_unseq() might -// need to know implementation details of _unseq() -using input_it = std::span::iterator; -using output_it = std::span::iterator; - -struct const_iterator final -{ - using Type = input_it::value_type; - - using iterator_category = std::random_access_iterator_tag; - using value_type = std::remove_cv_t; - using difference_type = std::ptrdiff_t; - using pointer = Type*; - using reference = Type&; - using const_reference = const Type&; - - input_it current_; - - const_iterator() = default; - const_iterator(input_it it) : current_(it) {} - - const_reference operator*() const noexcept - { - return *current_; - } - - const_iterator& operator++() noexcept - { - for (ptrdiff_t i = 0; i < 8; i++) - { - ++current_; - } - return *this; - } - - const_iterator& operator--() noexcept - { - --current_; - return *this; - } - - const_iterator& operator-=(const difference_type n) noexcept - { - current_ -= n; - return *this; - } - const_iterator operator-(const difference_type n) const noexcept - { - auto ret = *this; - ret -= n; - return ret; - } - difference_type operator-(const const_iterator& rhs) const noexcept - { - return current_ - rhs.current_; - } - - bool operator!=(const const_iterator& rhs) const noexcept - { - return current_ != rhs.current_; - } -}; - -struct result_wrapper final -{ - output_it current_; - - result_wrapper& operator=(const std::vector& values) - { - for (auto& v : values) - { - *current_ = v; - ++current_; - } - return *this; - } -}; - -struct iterator final -{ - using Type = output_it::value_type; - - using iterator_category = std::random_access_iterator_tag; - using value_type = std::remove_cv_t; - using difference_type = std::ptrdiff_t; - using pointer = Type*; - using reference = Type&; - - output_it current_; - - iterator() = default; - iterator(output_it it) : current_(it) {} - - result_wrapper operator*() noexcept - { - return result_wrapper{ current_}; - } - - iterator& operator++() noexcept - { - for (ptrdiff_t i = 0; i < 8; i++) - { - ++current_; - } - return *this; - } - - iterator& operator--() noexcept - { - --current_; - return *this; - } - - iterator& operator-=(const difference_type n) noexcept - { - current_ -= n; - return *this; - } - - bool operator!=(const iterator& rhs) const noexcept - { - return current_ != rhs.current_; - } -}; - -void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_par_unseq(std::span inputs, std::span results) const -{ - const auto first = inputs.begin(); - const auto last = inputs.end(); - auto dest = results.begin(); - - const auto func = [&](const auto& v) { - std::span values(&v, 8); - - std::vector retval(values.size()); - nearest_neighbors_unseq(values, sys::make_span(retval)); - return retval; - }; - - std::ignore = std::transform(std::execution::seq, - const_iterator{ first }, const_iterator{ last }, iterator{ dest }, func); -} -#if SIX_sicd_ComplexToAMP8IPHS8I_unseq -std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_par_unseq( +#if SIX_sicd_has_simd +std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq_simd( std::span inputs, const six::AmplitudeTable* pAmplitudeTable) { // make a structure to quickly find the nearest neighbor const auto& converter = make_(pAmplitudeTable); std::vector retval(inputs.size()); - converter.impl.nearest_neighbors_par_unseq(inputs, sys::make_span(retval)); + converter.impl.nearest_neighbors_unseq(inputs, sys::make_span(retval)); return retval; } -#endif // SIX_sicd_ComplexToAMP8IPHS8I_unseq - -**********************************************************************/ +#endif // SIX_sicd_has_simd diff --git a/six/modules/c++/six.sicd/source/ImageData.cpp b/six/modules/c++/six.sicd/source/ImageData.cpp index eaf993289..1e761cd41 100644 --- a/six/modules/c++/six.sicd/source/ImageData.cpp +++ b/six/modules/c++/six.sicd/source/ImageData.cpp @@ -32,14 +32,6 @@ #include #include -#include -#if CODA_OSS_cpp17 - // is broken with the older version of GCC we're using - #if (__GNUC__ >= 10) || _MSC_VER - #include - #define SIX_six_sicd_ImageData_has_execution 1 - #endif -#endif #include #include "six/AmplitudeTable.h" @@ -49,54 +41,6 @@ using namespace six; using namespace six::sicd; - // This was in coda-oss, but I removed it. - // - // First of all, C++11's std::async() is now (in 2023) thought of as maybe a - // bit "half baked," and perhaps shouldn't be emulated. Then, C++17 added - // parallel algorithms which might be a better way of satisfying our immediate - // needs (below) ... although we're still at C++14. -template -static inline OutputIt transform_async(const InputIt first1, const InputIt last1, OutputIt d_first, TFunc f, - typename std::iterator_traits::difference_type cutoff) -{ - // https://en.cppreference.com/w/cpp/thread/async - const auto len = std::distance(first1, last1); - if (len < cutoff) - { - return std::transform(first1, last1, d_first, f); - } - - constexpr auto policy = std::launch::async; - - const auto mid1 = first1 + len / 2; - const auto d_mid = d_first + len / 2; - auto handle = std::async(policy, transform_async, mid1, last1, d_mid, f, cutoff); - transform_async(first1, mid1, d_first, f, cutoff); - return handle.get(); -} -template -static inline void transform(std::span inputs, std::span results, TFunc f) -{ -#if SIX_six_sicd_ImageData_has_execution - std::ignore = std::transform(std::execution::par, inputs.begin(), inputs.end(), results.begin(), f); -#else - constexpr ptrdiff_t cutoff_ = 0; // too slow w/o multi-threading - //if (cutoff_ < 0) - //{ - // std::ignore = std::transform(inputs.begin(), inputs.end(), results.begin(), f); - //} - //else - { - // The value of "default_cutoff" was determined by testing; there is nothing special about it, feel free to change it. - constexpr auto dimension = 128 * 8; - constexpr auto default_cutoff = dimension * dimension; - const auto cutoff = cutoff_ == 0 ? default_cutoff : cutoff_; - - std::ignore = transform_async(inputs.begin(), inputs.end(), results.begin(), f, cutoff); - } -#endif // CODA_OSS_cpp17 -} - bool ImageData::operator==(const ImageData& rhs) const { return (pixelType == rhs.pixelType && @@ -256,7 +200,7 @@ void ImageData::toComplex(six::Amp8iPhs8iLookup_t values, std::span ImageData::toComplex(std::span inputs) const { diff --git a/six/modules/c++/six/include/six/AmplitudeTable.h b/six/modules/c++/six/include/six/AmplitudeTable.h index 966a160a9..4f44bc76b 100644 --- a/six/modules/c++/six/include/six/AmplitudeTable.h +++ b/six/modules/c++/six/include/six/AmplitudeTable.h @@ -180,8 +180,12 @@ struct SIX_SIX_API AMP8I_PHS8I_t final #else // __has_include is part of C++17 #if __has_include("../../../six.sicd/include/six/sicd/vectorclass/version2/vectorclass.h") || \ - __has_include("six.sicd/include/six/sicd/vectorclass/version2/vectorclass.h") - #define SIX_sicd_has_VCL 1 + __has_include("six/sicd/vectorclass/version2/vectorclass.h") + #if _MSC_VER + #define SIX_sicd_has_VCL !CODA_OSS_cpp20 // TODO: MSVC works with C++17, but not C++20 ... ? + #else + #define SIX_sicd_has_VCL 1 + #endif // _MSC_VER #else #define SIX_sicd_has_VCL 0 #endif // __has_include @@ -189,8 +193,8 @@ struct SIX_SIX_API AMP8I_PHS8I_t final #endif #ifndef SIX_sicd_has_simd - // Do we have the `std::experimental::simd? https://en.cppreference.com/w/cpp/experimental/simd - #if (__GNUC__ >= 999) && CODA_OSS_cpp20 // TODO: 11 instead of 999 + // Do we have `std::experimental::simd? https://en.cppreference.com/w/cpp/experimental/simd + #if (__GNUC__ >= 11) && CODA_OSS_cpp20 // https://github.com/VcDevel/std-simd "... shipping with GCC since version 11." #define SIX_sicd_has_simd 1 #else @@ -199,7 +203,7 @@ struct SIX_SIX_API AMP8I_PHS8I_t final #endif #ifndef SIX_sicd_has_ximd - // This is a "hacked up" version of std::experimental::simd using std::array. + // This is a "hacked up" version of std::experimental::simd using std::valarray. // It's primarily for development and testing: VCL needs C++17 and // std::experimental::simd is G++11/C++20. #define SIX_sicd_has_ximd CODA_OSS_DEBUG @@ -278,29 +282,29 @@ class ComplexToAMP8IPHS8I final Impl(Impl&&) = delete; // implicitly deleted because of =delete for copy Impl& operator=(Impl&&) = delete; // implicitly deleted because of =delete for copy - void nearest_neighbors_seq(std::span inputs, std::span results) const; - void nearest_neighbors_par(std::span inputs, std::span results) const; - + void nearest_neighbors_seq(std::span inputs, std::span results) const; + void nearest_neighbors_par(std::span inputs, std::span results) const; #if SIX_sicd_ComplexToAMP8IPHS8I_unseq template - void nearest_neighbors_unseq(std::span inputs, std::span results) const; - void nearest_neighbors_par_unseq(std::span inputs, std::span results) const; + void nearest_neighbors_unseq(std::span inputs, std::span results) const; + void nearest_neighbors_par_unseq(std::span inputs, std::span results) const; template - void nearest_neighbors_unseq_T(std::span, std::span) const; + void nearest_neighbors_unseq_T(std::span, std::span) const; + #endif //! The sorted set of possible magnitudes order from small to large. std::array uncached_magnitudes; std::span magnitudes; - uint8_t find_nearest(zfloat phase_direction, zfloat v) const; + uint8_t find_nearest(six::zfloat phase_direction, six::zfloat v) const; //! The difference in phase angle between two UINT phase values. float phase_delta; - uint8_t getPhase(zfloat) const; + uint8_t getPhase(six::zfloat) const; //! Unit vector rays that represent each direction that phase can point. - std::array phase_directions; // interleaved, std::complex + std::array phase_directions; // interleaved, std::complex #ifdef SIX_sicd_has_VCL std::array phase_directions_real; std::array phase_directions_imag;