diff --git a/externals/coda-oss/modules/c++/coda-oss.vcxproj b/externals/coda-oss/modules/c++/coda-oss.vcxproj index 8b35965df..6aacf1781 100644 --- a/externals/coda-oss/modules/c++/coda-oss.vcxproj +++ b/externals/coda-oss/modules/c++/coda-oss.vcxproj @@ -285,7 +285,6 @@ - diff --git a/externals/coda-oss/modules/c++/coda-oss.vcxproj.filters b/externals/coda-oss/modules/c++/coda-oss.vcxproj.filters index accaf020b..d755f9137 100644 --- a/externals/coda-oss/modules/c++/coda-oss.vcxproj.filters +++ b/externals/coda-oss/modules/c++/coda-oss.vcxproj.filters @@ -361,13 +361,13 @@ math.poly - polygon + str\polygon - polygon + str\polygon - polygon + str\polygon config @@ -957,9 +957,6 @@ coda_oss - - sys - @@ -1120,7 +1117,7 @@ math.linear - polygon + str\polygon cli @@ -1445,9 +1442,6 @@ {15f9b62f-d17e-4d84-bc34-de6fd5fbcb33} - - {f2544ccb-0933-44c7-af39-cd986982af3d} - {9050a469-23a5-4da0-92b1-a07a8e52e9fc} @@ -1502,6 +1496,9 @@ {59f3d9a1-06d3-4779-aef2-cc55223c3017} + + {f2544ccb-0933-44c7-af39-cd986982af3d} + diff --git a/externals/coda-oss/modules/c++/mt/include/mt/Algorithm.h b/externals/coda-oss/modules/c++/mt/include/mt/Algorithm.h index 7bed73917..83041d8c2 100644 --- a/externals/coda-oss/modules/c++/mt/include/mt/Algorithm.h +++ b/externals/coda-oss/modules/c++/mt/include/mt/Algorithm.h @@ -84,8 +84,13 @@ inline OutputIt Transform_par(InputIt first1, InputIt last1, OutputIt d_first, U Transform_par_settings settings = Transform_par_settings{}) { #if CODA_OSS_mt_Algorithm_has_execution - CODA_OSS_mark_symbol_unused(settings); - return std::transform(std::execution::par, first1, last1, d_first, unary_op); + #if __GNUC__ + // std::execution::par is dramatically slower w/GCC than using our own ... ??? + return Transform_par_(first1, last1, d_first, unary_op, settings); // TODO: std::execution::par + #else + CODA_OSS_mark_symbol_unused(settings); + return std::transform(std::execution::par, first1, last1, d_first, unary_op); + #endif // __GNUC__ #else return Transform_par_(first1, last1, d_first, unary_op, settings); #endif // CODA_OSS_mt_Algorithm_has_execution diff --git a/externals/coda-oss/modules/c++/sys/unittests/test_ximd.cpp b/externals/coda-oss/modules/c++/sys/unittests/test_ximd.cpp deleted file mode 100644 index d419edca7..000000000 --- a/externals/coda-oss/modules/c++/sys/unittests/test_ximd.cpp +++ /dev/null @@ -1,573 +0,0 @@ -/* ========================================================================= - * This file is part of sys-c++ - * ========================================================================= - * - * (C) Copyright 2004 - 2014, MDA Information Systems LLC - * © Copyright 2024, Maxar Technologies, Inc. - * - * sys-c++ is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; If not, - * see . - * - */ - -#include -#include -#include -#include - -// These are unittests, always enable unless explicitly disabled -#ifndef CODA_OSS_Ximd_ENABLED -#define CODA_OSS_Ximd_ENABLED 1 -#endif -#include - -#include -#include - -#include "TestCase.h" - -using zfloat = std::complex; - -template -using simd = sys::ximd::Ximd; -using intv = simd; -using floatv = simd; -using intv_mask = sys::ximd::ximd_mask; -using floatv_mask = sys::ximd::ximd_mask; - -// Manage a SIMD complex as an array of two SIMDs -using zfloatv = std::array; -static inline auto& real(zfloatv& z) noexcept -{ - return z[0]; -} -static inline const auto& real(const zfloatv& z) noexcept -{ - return z[0]; -} -static inline auto& imag(zfloatv& z) noexcept -{ - return z[1]; -} -static inline const auto& imag(const zfloatv& z) noexcept -{ - return z[1]; -} -static inline size_t size(const zfloatv& z) noexcept -{ - auto retval = real(z).size(); - assert(retval == imag(z).size()); - return retval; -} - -static inline auto arg(const zfloatv& z) -{ - // https://en.cppreference.com/w/cpp/numeric/complex/arg - // > `std::atan2(std::imag(z), std::real(z))` - return atan2(imag(z), real(z)); // arg() -} - -template -static inline auto make_zfloatv(TGeneratorReal&& generate_real, TGeneratorImag&& generate_imag) -{ - zfloatv retval; - for (size_t i = 0; i < size(retval); i++) - { - real(retval)[i] = generate_real(i); - imag(retval)[i] = generate_imag(i); - } - return retval; -} - -template -static inline auto copy_from(std::span p) -{ - simd retval; - assert(p.size() == retval.size()); - retval.copy_from(p.data()); - return retval; -} -static inline auto copy_from(std::span p) -{ - const auto generate_real = [&](size_t i) { return p[i].real(); }; - const auto generate_imag = [&](size_t i) { return p[i].imag(); }; - return make_zfloatv(generate_real, generate_imag); -} - -template -static inline auto copy_to(const simd& v) -{ - std::vector::value_type> retval(v.size()); - v.copy_to(retval.data()); - return retval; -} -static inline auto copy_to(const zfloatv& v) -{ - std::vector retval; - for (size_t i = 0; i < size(v); i++) - { - retval.emplace_back(real(v)[i], imag(v)[i]); - } - return retval; -} - -TEST_CASE(testDefaultConstructor) -{ - // sanity check implementation and utility routines - TEST_ASSERT_EQ(floatv::size(), size(zfloatv{})); - - // intv v = 1; - intv v; - v = 1; - for (size_t i = 0; i < v.size(); i++) - { - TEST_ASSERT_EQ(1, v[i]); - } -} - -// Sample code from SIX; see **ComplexToAMP8IPHS8I.cpp**. -static auto GetPhase(std::complex v) -{ - // There's an intentional conversion to zero when we cast 256 -> uint8. That wrap around - // handles cases that are close to 2PI. - double phase = std::arg(v); - if (phase < 0.0) phase += coda_oss::numbers::pi * 2.0; // Wrap from [0, 2PI] - return phase; -} -static uint8_t getPhase(zfloat v, float phase_delta) -{ - // Phase is determined via arithmetic because it's equally spaced. - const auto phase = GetPhase(v); - return gsl::narrow_cast(std::round(phase / phase_delta)); -} - -static inline auto if_add(const floatv_mask& m, const floatv& lhs, typename floatv::value_type rhs) -{ - const auto generate_add = [&](size_t i) { - return m[i] ? lhs[i] + rhs : lhs[i]; - }; - return floatv::generate(generate_add); -} -static inline auto roundi(const floatv& v) // match vcl::roundi() -{ - const auto rounded = round(v); - const auto generate_roundi = [&](size_t i) - { return static_cast(rounded[i]); }; - return intv::generate(generate_roundi); -} -static auto getPhase(const zfloatv& v, float phase_delta) -{ - // Phase is determined via arithmetic because it's equally spaced. - // There's an intentional conversion to zero when we cast 256 -> uint8. That wrap around - // handles cases that are close to 2PI. - auto phase = arg(v); - //where(phase < 0.0f, phase) += std::numbers::pi_v *2.0f; // Wrap from [0, 2PI] - phase = if_add(phase < 0.0f, phase, std::numbers::pi_v * 2.0f); // Wrap from [0, 2PI] - return roundi(phase / phase_delta); -} -static inline const auto& cxValues() -{ - //static const std::vector retval{/*{0, 0},*/ {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}, {-1, -1}, {0, -1}, {1, -1}}; - static const std::vector retval{{0.0, 0.0}, {1.0, 1.0}, {10.0, -10.0}, {-100.0, 100.0}, {-1000.0, -1000.0}, // sample data from SIX - {-1.0, -1.0}, {1.0, -1.0}, {-1.0, 1.0} // "pad" to multiple of floatv::size() - }; - return retval; -} - -static auto expected_getPhase_values(const std::string& testName, float phase_delta) -{ - auto&& values = cxValues(); - TEST_ASSERT(values.size() % floatv::size() == 0); - std::vector expected; - for (auto&& v : values) - { - expected.push_back(getPhase(v, phase_delta)); - } - return expected; -} - -static auto toComplex_(double A, uint8_t phase) -{ - // The phase values should be read in (values 0 to 255) and converted to float by doing: - // P = (1 / 256) * input_value - const double P = (1.0 / 256.0) * phase; - - // To convert the amplitude and phase values to complex float (i.e. real and imaginary): - // S = A * cos(2 * pi * P) + j * A * sin(2 * pi * P) - const double angle = 2 * std::numbers::pi * P; - const auto sin_angle = sin(angle); - const auto cos_angle = cos(angle); - zfloat S(gsl::narrow_cast(A * cos_angle), gsl::narrow_cast(A * sin_angle)); - return S; -} -inline static auto toComplex(uint8_t amplitude, uint8_t phase) -{ - // A = input_amplitude(i.e. 0 to 255) - const double A = amplitude; - return toComplex_(A, phase); -} - -static auto phase_delta() -{ - static const auto p0 = GetPhase(toComplex(1, 0)); - static const auto p1 = GetPhase(toComplex(1, 1)); - assert(p0 == 0.0); - assert(p1 > p0); - static const auto retval = gsl::narrow_cast(p1 - p0); - return retval; -} - -static auto load(const std::vector& values) -{ - std::vector retval; - constexpr auto sz = floatv::size(); - auto const pValues = sys::make_span(values); - auto p = sys::make_span(pValues.data(), sz); - for (size_t i = 0; i < values.size() / sz; i++) - { - retval.push_back(copy_from(p)); - p = sys::make_span(p.data() + sz, p.size()); - } - return retval; -} - -TEST_CASE(testGetPhase) -{ - static const auto& expected = expected_getPhase_values(testName, phase_delta()); - - const auto valuesv = load(cxValues()); - std::vector actual; - for (auto&& zvaluev : valuesv) - { - const auto phase = getPhase(zvaluev, phase_delta()); - const auto phase_ = copy_to(phase); - for (auto&& ph : copy_to(phase)) - { - actual.push_back(gsl::narrow_cast(ph)); - } - } - - TEST_ASSERT_EQ(actual.size(), expected.size()); - for (size_t i = 0; i < actual.size(); i++) - { - TEST_ASSERT_EQ(actual[i], expected[i]); - } -} - -// Again, more sample code from SIX -static constexpr size_t AmplitudeTableSize = 256; -static auto getPhaseDirections_() -{ - //! Unit vector rays that represent each direction that phase can point. - std::array phase_directions; // interleaved, std::complex - - const auto p0 = GetPhase(toComplex(1, 0)); - for (size_t i = 0; i < AmplitudeTableSize; i++) - { - const float angle = static_cast(p0) + i * phase_delta(); - const auto y = sin(angle); - const auto x = cos(angle); - phase_directions[i] = {x, y}; - } - return phase_directions; -} -static inline const auto& getPhaseDirections() -{ - static const auto retval = getPhaseDirections_(); - return retval; -} - -template -static inline auto lookup(const intv& zindex, const std::array& phase_directions) -{ - const auto generate_real = [&](size_t i) - { - const auto i_ = zindex[i]; - return phase_directions[i_].real(); - }; - const auto generate_imag = [&](size_t i) - { - const auto i_ = zindex[i]; - return phase_directions[i_].imag(); - }; - return make_zfloatv(generate_real, generate_imag); -} - -TEST_CASE(testLookup) -{ - const auto& phase_directions = getPhaseDirections(); - - static const auto& expected_getPhase = expected_getPhase_values(testName, phase_delta()); - std::vector expected; - for (auto&& phase : expected_getPhase) - { - expected.push_back(phase_directions[phase]); - } - - const auto valuesv = load(cxValues()); - std::vector actual; - for (auto&& zvaluev : valuesv) - { - const auto phase = getPhase(zvaluev, phase_delta()); - const auto phase_direction = lookup(phase, phase_directions); - - for (auto&& pd : copy_to(phase_direction)) - { - actual.push_back(pd); - } - } - - TEST_ASSERT_EQ(actual.size(), expected.size()); - for (size_t i = 0; i < actual.size(); i++) - { - TEST_ASSERT_EQ(actual[i], expected[i]); - } -} - -// And ... more sample code from SIX -static auto iota_0_256_() -{ - static_assert(sizeof(size_t) > sizeof(uint8_t), "size_t can't hold UINT8_MAX!"); - - std::vector retval; - retval.reserve(UINT8_MAX + 1); - for (size_t i = 0; i <= UINT8_MAX; i++) // Be careful with indexing so that we don't wrap-around in the loop. - { - retval.push_back(gsl::narrow(i)); - } - assert(retval.size() == UINT8_MAX + 1); - return retval; -} -static inline std::vector iota_0_256() -{ - static const auto retval = iota_0_256_(); - return retval; -} - -static auto make_magnitudes_() -{ - std::vector result; - result.reserve(AmplitudeTableSize); - for (const auto amplitude : iota_0_256()) - { - // AmpPhase -> Complex - const auto phase = amplitude; - const auto complex = toComplex(amplitude, phase); - result.push_back(std::abs(complex)); - } - - // I don't know if we can guarantee that the amplitude table is non-decreasing. - // Check to verify property at runtime. - if (!std::is_sorted(result.begin(), result.end())) - { - throw std::runtime_error("magnitudes must be sorted"); - } - - std::array retval; - std::copy(result.begin(), result.end(), retval.begin()); - return retval; -} -static inline std::span magnitudes() -{ - //! The sorted set of possible magnitudes order from small to large. - static const auto cached_magnitudes = make_magnitudes_(); - return sys::make_span(cached_magnitudes); -} - -/*! - * Find the nearest element given an iterator range. - * @param value query value - * @return index of nearest value within the iterator range. - */ -static uint8_t nearest(std::span magnitudes, float value) -{ - const auto begin = magnitudes.begin(); - const auto end = magnitudes.end(); - - const auto it = std::lower_bound(begin, end, value); - if (it == begin) return 0; - - const auto prev_it = std::prev(it); - const auto nearest_it = it == end ? prev_it : - (value - *prev_it <= *it - value ? prev_it : it); - const auto distance = std::distance(begin, nearest_it); - assert(distance <= std::numeric_limits::max()); - return gsl::narrow(distance); -} -static uint8_t find_nearest(zfloat phase_direction, zfloat v) -{ - // We have to do a 1D nearest neighbor search for magnitude. - // But it's not the magnitude of the input complex value - it's the projection of - // the complex value onto the ray of candidate magnitudes at the selected phase. - // i.e. dot product. - const auto projection = (phase_direction.real() * v.real()) + (phase_direction.imag() * v.imag()); - //assert(std::abs(projection - std::abs(v)) < 1e-5); // TODO ??? - return nearest(magnitudes(), projection); -} - -template -static inline auto select(const TTest& test, const TResult& t, const TResult& f) -{ - TResult retval; - for (size_t i = 0; i < test.size(); i++) - { - retval[i] = test[i] ? t[i] : f[i]; - } - return retval; -} - -static auto lookup(const intv& zindex, std::span magnitudes) -{ - const auto generate = [&](size_t i) { - const auto i_ = zindex[i]; - - // The index may be out of range. This is expected because `i` might be "don't care." - if ((i_ >= 0) && (i_ < std::ssize(magnitudes))) - { - return magnitudes[i_]; - } - return NAN; // propogate "don't care" - }; - return floatv::generate(generate); -} - -static inline auto lower_bound_(std::span magnitudes, const floatv& v) -{ - intv first; first = 0; - intv last; last = gsl::narrow(magnitudes.size()); - - auto count = last - first; - while (any_of(count > 0)) - { - auto it = first; - const auto step = count / 2; - it += step; - - auto next = it; ++next; // ... ++it; - auto advance = count; advance -= step + 1; // ... -= step + 1; - - const auto c = lookup(it, magnitudes); // magnituides[it] - - const auto test = c < v; // (c < v).__cvt(); // https://github.com/VcDevel/std-simd/issues/41 - - //where(test, it) = next; // ... ++it - //where(test, first) = it; // first = ... - //// count -= step + 1 <...OR...> count = step - //where(test, count) = advance; - //where(!test, count) = step; - it = select(test, next, it); // ... ++it - first = select(test, it, first); // first = ... - count = select(test, advance, step); // count -= step + 1 <...OR...> count = step - } - return first; -} -static inline auto lower_bound(std::span magnitudes, const floatv& value) -{ - return lower_bound_(magnitudes, value); -} - -static auto nearest(std::span magnitudes, const floatv& value) -{ - assert(magnitudes.size() == AmplitudeTableSize); - - /* - const auto it = std::lower_bound(begin, end, value); - if (it == begin) return 0; - - const auto prev_it = std::prev(it); - const auto nearest_it = it == end ? prev_it : - (value - *prev_it <= *it - value ? prev_it : it); - const auto distance = std::distance(begin, nearest_it); - assert(distance <= std::numeric_limits::max()); - return gsl::narrow(distance); - */ - const auto it = lower_bound(magnitudes, value); - const auto prev_it = it - 1; // const auto prev_it = std::prev(it); - - const auto v0 = value - lookup(prev_it, magnitudes); // value - *prev_it - const auto v1 = lookup(it, magnitudes) - value; // *it - value - //const auto nearest_it = select(v0 <= v1, prev_it, it); // (value - *prev_it <= *it - value ? prev_it : it); - - intv end; end = gsl::narrow(magnitudes.size()); - //const auto end_test = select(it == end, prev_it, nearest_it); // it == end ? prev_it : ... - intv zero; zero = 0; - auto retval = select(it == 0, zero, // if (it == begin) return 0; - select(it == end, prev_it, // it == end ? prev_it : ... - select(v0 <= v1, prev_it, it) // (value - *prev_it <= *it - value ? prev_it : it); - )); - return retval; -} - -static auto find_nearest(std::span magnitudes, const floatv& phase_direction_real, const floatv& phase_direction_imag, - const zfloatv& v) -{ - // We have to do a 1D nearest neighbor search for magnitude. - // But it's not the magnitude of the input complex value - it's the projection of - // the complex value onto the ray of candidate magnitudes at the selected phase. - // i.e. dot product. - const auto projection = (phase_direction_real * real(v)) + (phase_direction_imag * imag(v)); - //assert(std::abs(projection - std::abs(v)) < 1e-5); // TODO ??? - return nearest(magnitudes, projection); -} -static inline auto find_nearest(std::span magnitudes, const zfloatv& phase_direction, const zfloatv& v) -{ - return find_nearest(magnitudes, real(phase_direction), imag(phase_direction), v); -} - -TEST_CASE(testFindNearest) -{ - const auto& values = cxValues(); - const auto& phase_directions = getPhaseDirections(); - - static const auto& expected_getPhase = expected_getPhase_values(testName, phase_delta()); - std::vector expected_phase_directions; - for (auto&& phase : expected_getPhase) - { - expected_phase_directions.push_back(phase_directions[phase]); - } - std::vector expected; - for (size_t i = 0; i < values.size(); i++) - { - auto a = find_nearest(expected_phase_directions[i], values[i]); - expected.push_back(a); - } - - const auto valuesv = load(cxValues()); - std::vector actual; - for (auto&& v : valuesv) - { - const auto phase = getPhase(v, phase_delta()); - const auto phase_direction = lookup(phase, phase_directions); - const auto amplitude = find_nearest(magnitudes(), phase_direction, v); - - for (auto&& a : copy_to(amplitude)) - { - actual.push_back(static_cast(a)); - } - } - - TEST_ASSERT_EQ(actual.size(), expected.size()); - for (size_t i = 0; i < actual.size(); i++) - { - TEST_ASSERT_EQ(actual[i], expected[i]); - } -} - -TEST_MAIN( - TEST_CHECK(testDefaultConstructor); - - TEST_CHECK(testGetPhase); - TEST_CHECK(testLookup); - TEST_CHECK(testFindNearest); -) diff --git a/six/modules/c++/six.sicd/six.sicd.vcxproj b/six/modules/c++/six.sicd/six.sicd.vcxproj index 17b2d7d91..30aab486c 100644 --- a/six/modules/c++/six.sicd/six.sicd.vcxproj +++ b/six/modules/c++/six.sicd/six.sicd.vcxproj @@ -144,6 +144,7 @@ + diff --git a/six/modules/c++/six.sicd/six.sicd.vcxproj.filters b/six/modules/c++/six.sicd/six.sicd.vcxproj.filters index 6cc1c134f..54c151be9 100644 --- a/six/modules/c++/six.sicd/six.sicd.vcxproj.filters +++ b/six/modules/c++/six.sicd/six.sicd.vcxproj.filters @@ -138,6 +138,9 @@ Header Files + + Source Files + diff --git a/six/modules/c++/six.sicd/source/NearestNeighbors.cpp b/six/modules/c++/six.sicd/source/NearestNeighbors.cpp index 71f9fe62b..46fe788be 100644 --- a/six/modules/c++/six.sicd/source/NearestNeighbors.cpp +++ b/six/modules/c++/six.sicd/source/NearestNeighbors.cpp @@ -106,21 +106,11 @@ void six::sicd::NearestNeighbors::nearest_neighbors_par(std::span void six::sicd::NearestNeighbors::nearest_neighbors(std::span inputs, std::span results) const { - // TODO: there could be more complicated logic here to decide between - // _seq, _par, _unseq, and _par_unseq #if SIX_sicd_ComplexToAMP8IPHS8I_unseq - - // None of our SIMD implementations are dramatically slower when - // running unittests; that's mostly because there's lots of IO. So that - // the more interesting code-path is tested, use UNSEQ in debugging. - #if CODA_OSS_DEBUG - return nearest_neighbors_unseq(inputs, results); // TODO: - #else - return nearest_neighbors_par(inputs, results); - #endif + return nearest_neighbors_par_unseq(inputs, results); // there might be additional logic here #else - return nearest_neighbors_par(inputs, results); + return nearest_neighbors_par(inputs, results); // No SIMD support, use parallel #endif } @@ -152,7 +142,7 @@ void six::sicd::NearestNeighbors::nearest_neighbors(execution_policy policy, std #if CODA_OSS_DEBUG throw std::logic_error("Unhandled execution_policy value."); #else - return nearest_neighbors(inputs, results); // no policy specified, "default policy" + return nearest_neighbors_seq(inputs, results); // "... fall back to sequential execution." #endif } diff --git a/six/modules/c++/six.sicd/source/NearestNeighbors_unseq.cpp b/six/modules/c++/six.sicd/source/NearestNeighbors_unseq.cpp index 0de69a6fc..142f953f2 100644 --- a/six/modules/c++/six.sicd/source/NearestNeighbors_unseq.cpp +++ b/six/modules/c++/six.sicd/source/NearestNeighbors_unseq.cpp @@ -54,23 +54,21 @@ using AMP8I_PHS8I = six::AMP8I_PHS8I_t; #pragma warning(disable: 4723) // potential divide by 0 #endif #include "six/sicd/vectorclass/version2/vectorclass.h" +#include "six/sicd/vectorclass/version2/vector.h" #include "six/sicd/vectorclass/version2/vectormath_trig.h" #include "six/sicd/vectorclass/complex/complexvec1.h" #if _MSC_VER #pragma warning(pop) #endif -using vcl_intv = vcl::Vec8i; -using vcl_floatv = vcl::Vec8f; -constexpr auto vcl_elements_per_iteration = vcl_floatv::size(); +constexpr auto vcl_elements_per_iteration = 8; +using vcl_intv = vcl::Vec; +using vcl_floatv = vcl::Vec; -inline int ssize(const vcl_intv& z) noexcept +template +inline ptrdiff_t ssize(const vcl::Vec& v) noexcept { - return z.size(); -} -inline int ssize(const vcl_floatv& z) noexcept -{ - return z.size(); + return v.size(); } using vcl_zfloatv = vcl::Complex8f; @@ -91,13 +89,6 @@ inline int ssize(const vcl_zfloatv& z) noexcept return z.size(); } -inline auto arg(const vcl_zfloatv& z) -{ - // https://en.cppreference.com/w/cpp/numeric/complex/arg - // > `std::atan2(std::imag(z), std::real(z))` - return atan2(z.imag(), z.real()); // arg() -} - template inline auto if_add(const T& f, const vcl_floatv& a, float b) { @@ -135,207 +126,33 @@ inline auto lookup(const vcl_intv& zindex, std::span magnitudes) #endif // SIX_sicd_has_VCL -#if SIX_sicd_has_valarray -#include - -using valarray_intv = std::valarray; -using valarray_intv_mask = std::valarray; - -using valarray_floatv = std::valarray; -using valarray_floatv_mask = std::valarray; - -constexpr auto valarray_elements_per_iteration = 4; // valarray is not fixed size - -template -static inline auto ssize(const std::valarray& v) noexcept -{ - return gsl::narrow(v.size()); -} - -template -static auto valarray_generate(G&& generator) noexcept -{ - TValArray retval(valarray_elements_per_iteration); - for (size_t i = 0; i < retval.size(); i++) - { - retval[i] = generator(i); - } - return retval; -} -template -static inline auto generate(TGenerator&& generator, valarray_intv) -{ - return valarray_generate(generator); -} -template -static inline auto generate(TGenerator&& generator, valarray_floatv) -{ - return valarray_generate(generator); -} - -// Manage a SIMD complex as an array of two SIMDs -using valarray_zfloatv = std::array; -inline auto& real(valarray_zfloatv& z) noexcept -{ - return z[0]; -} -inline const auto& real(const valarray_zfloatv& z) noexcept -{ - return z[0]; -} -inline auto& imag(valarray_zfloatv& z) noexcept -{ - return z[1]; -} -inline const auto& imag(const valarray_zfloatv& z) noexcept -{ - return z[1]; -} -inline size_t size(const valarray_zfloatv& z) noexcept -{ - auto retval = real(z).size(); - assert(retval == imag(z).size()); - return retval; -} -inline auto ssize(const valarray_zfloatv& z) noexcept -{ - return gsl::narrow(size(z)); -} - -inline auto arg(const valarray_zfloatv& z) -{ - // https://en.cppreference.com/w/cpp/numeric/complex/arg - // > `std::atan2(std::imag(z), std::real(z))` - return std::atan2(imag(z), real(z)); // arg() -} - -template -static inline auto generate(TGeneratorReal&& generate_real, TGeneratorImag&& generate_imag, valarray_zfloatv) -{ - valarray_zfloatv retval; - real(retval) = generate(generate_real, valarray_floatv{}); - imag(retval) = generate(generate_imag, valarray_floatv{}); - return retval; -} - -static inline auto copy_from(std::span p, valarray_floatv& result) -{ - assert(p.size() == result.size()); - result = valarray_floatv(p.data(), p.size()); -} -static inline auto copy_from(std::span p, valarray_zfloatv& result) -{ - const auto generate_real = [&](size_t i) { return p[i].real(); }; - const auto generate_imag = [&](size_t i) { return p[i].imag(); }; - result = generate(generate_real, generate_imag, valarray_zfloatv{}); -} - -template -static auto valarray_select_(const TTest& test, const TResult& t, const TResult& f) -{ - TResult retval; - for (size_t i = 0; i < test.size(); i++) - { - retval[i] = test[i] ? t[i] : f[i]; - } - return retval; -} -static inline auto select(const valarray_floatv_mask& test, const valarray_floatv& t, const valarray_floatv& f) -{ - return valarray_select_(test, t, f); -} -static inline auto select(const valarray_intv_mask& test, const valarray_intv& t, const valarray_intv& f) -{ - return valarray_select_(test, t, f); -} - -static auto if_add(const valarray_floatv_mask& m, const valarray_floatv& v, typename valarray_floatv::value_type c) -{ - const auto generate_add = [&](size_t i) { - return m[i] ? v[i] + c : v[i]; - }; - return generate(generate_add, valarray_floatv{}); -} - -#endif // SIX_sicd_has_valarray - #if SIX_sicd_has_ximd -#ifndef CODA_OSS_Ximd_ENABLE -#define CODA_OSS_Ximd_ENABLE 1 +#ifndef SIX_Ximd_ENABLED +#define SIX_Ximd_ENABLED 1 #endif -#include +#include "unseq_Ximd.h" template -using ximd = sys::ximd::Ximd; +using ximd = six::ximd::Ximd; using ximd_intv = ximd; -using ximd_intv_mask = sys::ximd::ximd_mask; +using ximd_intv_mask = six::ximd::ximd_mask; using ximd_floatv = ximd; -using ximd_floatv_mask = sys::ximd::ximd_mask; +using ximd_floatv_mask = six::ximd::ximd_mask; constexpr auto ximd_elements_per_iteration = ximd_floatv::size(); template -static inline auto ssize(const ximd& v) noexcept +static inline ptrdiff_t ssize(const ximd& v) noexcept { - return gsl::narrow(v.size()); + return gsl::narrow(v.size()); } -template -static inline auto generate(TGenerator&& generator, ximd_intv) +template +static inline auto generate(TGenerator&& generator, ximd) { - return ximd_intv::generate(generator); -} -template -static inline auto generate(TGenerator&& generator, ximd_floatv) -{ - return ximd_floatv::generate(generator); -} - -// Manage a SIMD complex as an array of two SIMDs -using ximd_zfloatv = std::array; -inline auto& real(ximd_zfloatv& z) noexcept -{ - return z[0]; -} -inline const auto& real(const ximd_zfloatv& z) noexcept -{ - return z[0]; -} -inline auto& imag(ximd_zfloatv& z) noexcept -{ - return z[1]; -} -inline const auto& imag(const ximd_zfloatv& z) noexcept -{ - return z[1]; -} -inline size_t size(const ximd_zfloatv& z) noexcept -{ - auto retval = real(z).size(); - assert(retval == imag(z).size()); - return retval; -} -inline auto ssize(const ximd_zfloatv& z) noexcept -{ - return gsl::narrow(size(z)); -} - -inline auto arg(const ximd_zfloatv& z) -{ - // https://en.cppreference.com/w/cpp/numeric/complex/arg - // > `std::atan2(std::imag(z), std::real(z))` - return atan2(imag(z), real(z)); // arg() -} - -template -static inline auto generate(TGeneratorReal&& generate_real, TGeneratorImag&& generate_imag, ximd_zfloatv) -{ - ximd_zfloatv retval; - real(retval) = generate(generate_real, ximd_floatv{}); - imag(retval) = generate(generate_imag, ximd_floatv{}); - return retval; + return ximd(generator, nullptr); } static inline void copy_from(std::span p, ximd_floatv& result) @@ -343,12 +160,6 @@ static inline void copy_from(std::span p, ximd_floatv& result) assert(p.size() == result.size()); result.copy_from(p.data()); } -static inline void copy_from(std::span p, ximd_zfloatv& result) -{ - const auto generate_real = [&](size_t i) { return p[i].real(); }; - const auto generate_imag = [&](size_t i) { return p[i].imag(); }; - result = generate(generate_real, generate_imag, ximd_zfloatv{}); -} template static auto ximd_select_(const TTest& test, const TResult& t, const TResult& f) @@ -400,63 +211,10 @@ static inline auto ssize(const simd& v) noexcept return gsl::narrow(v.size()); } -template -static inline auto generate(TGenerator&& generator, simd_intv) +template +static inline auto generate(TGenerator&& generator, simd) { - return simd_intv(generator); -} -template -static inline auto generate(TGenerator&& generator, simd_floatv) -{ - return simd_floatv(generator); -} - -// Manage a SIMD complex as an array of two SIMDs -using simd_zfloatv = std::array; -inline auto& real(simd_zfloatv& z) noexcept -{ - return z[0]; -} -inline const auto& real(const simd_zfloatv& z) noexcept -{ - return z[0]; -} -inline auto& imag(simd_zfloatv& z) noexcept -{ - return z[1]; -} -inline const auto& imag(const simd_zfloatv& z) noexcept -{ - return z[1]; -} -inline size_t size(const simd_zfloatv& z) noexcept -{ - auto retval = real(z).size(); - assert(retval == imag(z).size()); - return retval; -} -inline auto ssize(const simd_zfloatv& z) noexcept -{ - return gsl::narrow(size(z)); -} - -inline auto arg(const simd_zfloatv& z) -{ - // https://en.cppreference.com/w/cpp/numeric/complex/arg - // > `std::atan2(std::imag(z), std::real(z))` - return atan2(imag(z), real(z)); // arg() -} - -template -static inline auto generate(TGeneratorReal&& generate_real, TGeneratorImag&& generate_imag, simd_zfloatv) -{ - simd_zfloatv retval; - for (size_t i = 0; i < size(retval); i++) - { - real(retval)[i] = generate_real(i); - imag(retval)[i] = generate_imag(i); - } - return retval; + return simd(generator); } static inline auto copy_from(std::span p, simd_floatv& result) @@ -464,12 +222,6 @@ static inline auto copy_from(std::span p, simd_floatv& result) assert(p.size() == result.size()); result.copy_from(p.data(), stdx::element_aligned); } -static inline auto copy_from(std::span p, simd_zfloatv& result) -{ - const auto generate_real = [&](size_t i) { return p[i].real(); }; - const auto generate_imag = [&](size_t i) { return p[i].imag(); }; - result = generate(generate_real, generate_imag, simd_zfloatv{}); -} template static auto simd_select_(const TTest& test, const TResult& t, const TResult& f) @@ -499,9 +251,67 @@ static inline auto select(const TMask& test_, const simd_intv& t, const simd_i #endif // SIX_sicd_has_simd #if SIX_sicd_has_ximd || SIX_sicd_has_simd +// Manage a SIMD complex as an array of two SIMDs +template +using zfloatv = std::array; + +template +inline auto& real(zfloatv& z) noexcept +{ + return z[0]; +} +template +inline const auto& real(const zfloatv& z) noexcept +{ + return z[0]; +} +template +inline auto& imag(zfloatv& z) noexcept +{ + return z[1]; +} +template +inline const auto& imag(const zfloatv& z) noexcept +{ + return z[1]; +} +template +inline size_t size(const zfloatv& z) noexcept +{ + auto retval = real(z).size(); + assert(retval == imag(z).size()); + return retval; +} +template +inline ptrdiff_t ssize(const zfloatv& z) noexcept +{ + return gsl::narrow(size(z)); +} +#if SIX_sicd_has_ximd +using ximd_zfloatv = zfloatv; +#endif +#if SIX_sicd_has_simd +using simd_zfloatv = zfloatv; +#endif + +template +static inline void zgenerate(TGeneratorReal&& generate_real, TGeneratorImag&& generate_imag, zfloatv& result) +{ + real(result) = generate(generate_real, FloatV{}); + imag(result) = generate(generate_imag, FloatV{}); +} + +template +static inline void copy_from(std::span mem, zfloatv& result) +{ + const auto generate_real = [&](size_t i) { return mem[i].real(); }; + const auto generate_imag = [&](size_t i) { return mem[i].imag(); }; + zgenerate(generate_real, generate_imag, result); +} // There are slicker ways of doing this, but `std::enable_if` is complex. This -// is simple and easy to understand. Simplify with concedpts in C++20? +// is simple and easy to understand. Simplify with concepts in C++20? template static auto roundi_(const FloatV& v) // match vcl::roundi() @@ -560,15 +370,10 @@ static auto lookup_(const IntV& zindex, const std::array& phase_direc const auto i_ = zindex[i]; return phase_directions[i_].imag(); }; - return generate(generate_real, generate_imag, ZFloatV{}); -} -#if SIX_sicd_has_valarray -template -static inline auto lookup(const valarray_intv& zindex, const std::array& phase_directions) -{ - return lookup_(zindex, phase_directions); + ZFloatV retval; + zgenerate(generate_real, generate_imag, retval); + return retval; } -#endif #if SIX_sicd_has_ximd template static inline auto lookup(const ximd_intv& zindex, const std::array& phase_directions) @@ -615,6 +420,14 @@ static inline auto lookup(const simd_intv& zindex, std::span magnit /******************************************************************************************************/ +template +inline auto arg(const ZFloatV& z) +{ + // https://en.cppreference.com/w/cpp/numeric/complex/arg + // > `std::atan2(std::imag(z), std::real(z))` + return atan2(imag(z), real(z)); +} + template static auto getPhase(const ZFloatV& v, float phase_delta) { @@ -627,34 +440,8 @@ static auto getPhase(const ZFloatV& v, float phase_delta) } // https://en.cppreference.com/w/cpp/algorithm/lower_bound -/* -template -ForwardIt lower_bound(ForwardIt first, ForwardIt last, const T& value) -{ - ForwardIt it; - typename std::iterator_traits::difference_type count, step; - count = std::distance(first, last); - - while (count > 0) - { - it = first; - step = count / 2; - std::advance(it, step); - - if (*it < value) - { - first = ++it; - count -= step + 1; - } - else - count = step; - } - - return first; -} -*/ template -inline auto lower_bound_(std::span magnitudes, const FloatV& v) +inline auto lower_bound(std::span magnitudes, const FloatV& v) { IntV first = 0; const IntV last = gsl::narrow(magnitudes.size()); @@ -673,41 +460,29 @@ inline auto lower_bound_(std::span magnitudes, const FloatV& v) const auto test = c < v; it = select(test, next, it); // ... ++it first = select(test, it, first); // first = ... - count = select(test, advance, step); // count -= step + 1 <...OR...> count = step + count = select(test, advance, step); // `count -= step + 1` —— `count = step` } return first; } -template -inline auto lower_bound(std::span magnitudes, const FloatV& value) -{ - return lower_bound_(magnitudes, value); -} +/*! + * Find the nearest element given an iterator range. + * @param value query value + * @return index of nearest value within the iterator range. + */ template static auto nearest(std::span magnitudes, const FloatV& value) { + // see `::nearest()` in **NearestNeighbors.cpp** assert(magnitudes.size() == six::AmplitudeTableSize); - /* - const auto it = std::lower_bound(begin, end, value); - if (it == begin) return 0; - - const auto prev_it = std::prev(it); - const auto nearest_it = it == end ? prev_it : - (value - *prev_it <= *it - value ? prev_it : it); - const auto distance = std::distance(begin, nearest_it); - assert(distance <= std::numeric_limits::max()); - return gsl::narrow(distance); - */ const auto it = ::lower_bound(magnitudes, value); const auto prev_it = it - 1; // const auto prev_it = std::prev(it); const auto v0 = value - lookup(prev_it, magnitudes); // value - *prev_it const auto v1 = lookup(it, magnitudes) - value; // *it - value - //const auto nearest_it = select(v0 <= v1, prev_it, it); // (value - *prev_it <= *it - value ? prev_it : it); - + const IntV end = gsl::narrow(magnitudes.size()); - //const auto end_test = select(it == end, prev_it, nearest_it); // it == end ? prev_it : ... const IntV zero = 0; auto retval = select(it == 0, zero, // if (it == begin) return 0; select(it == end, prev_it, // it == end ? prev_it : ... @@ -739,39 +514,15 @@ static auto lookup_and_find_nearest(const six::sicd::details::ComplexToAMP8IPHS8 return ::find_nearest(converter.magnitudes(), phase_direction_real, phase_direction_imag, v); } #endif - -#if SIX_sicd_has_valarray -static auto lookup_and_find_nearest(const six::sicd::details::ComplexToAMP8IPHS8I& converter, - const valarray_intv& phase, const valarray_zfloatv& v) -{ - const auto phase_direction = lookup(phase, converter.get_phase_directions().value); - return ::find_nearest(converter.magnitudes(), real(phase_direction), imag(phase_direction), v); -} -#endif - #if SIX_sicd_has_ximd || SIX_sicd_has_simd -template -static auto lookup_and_find_nearest_(const six::sicd::details::ComplexToAMP8IPHS8I& converter, - const IntV& phase, const ZFloatV& v) +template +static auto lookup_and_find_nearest(const six::sicd::details::ComplexToAMP8IPHS8I& converter, + const IntV& phase, const zfloatv& v) { const auto& phase_directions = converter.get_phase_directions().value; const auto phase_direction = lookup(phase, phase_directions); return ::find_nearest(converter.magnitudes(), real(phase_direction), imag(phase_direction), v); } -#if SIX_sicd_has_ximd -static auto lookup_and_find_nearest(const six::sicd::details::ComplexToAMP8IPHS8I& converter, - const ximd_intv& phase, const ximd_zfloatv& v) -{ - return lookup_and_find_nearest_(converter, phase, v); -} -#endif -#if SIX_sicd_has_simd -static auto lookup_and_find_nearest(const six::sicd::details::ComplexToAMP8IPHS8I& converter, - const simd_intv& phase, const simd_zfloatv& v) -{ - return lookup_and_find_nearest_(converter, phase, v); -} -#endif #endif #if SIX_sicd_ComplexToAMP8IPHS8I_unseq @@ -977,9 +728,6 @@ static const std::string unseq_simd = "simd"; #if SIX_sicd_has_VCL static const std::string unseq_vcl = "vcl"; #endif -#if SIX_sicd_has_valarray -static const std::string unseq_valarray = "valarray"; -#endif #if SIX_sicd_has_ximd static const std::string unseq_ximd = "ximd"; #endif @@ -988,8 +736,6 @@ static std::string nearest_neighbors_unseq_ = unseq_simd; #elif SIX_sicd_has_VCL unseq_vcl; -#elif SIX_sicd_has_valarray -unseq_valarray; #elif SIX_sicd_has_ximd unseq_ximd; #else @@ -1024,31 +770,39 @@ void six::sicd::NearestNeighbors::nearest_neighbors_(execution_policy policy, return nearest_neighbors_T(policy, inputs, results); } #endif - #if SIX_sicd_has_valarray - if (unseq == unseq_valarray) - { - return nearest_neighbors_T(policy, inputs, results); - } - #endif #if SIX_sicd_has_ximd + // XIMD is a hack just for development, it shouldn't normally be enabled + // for a release build; only use it for debug. if (unseq == unseq_ximd) { + #if CODA_OSS_DEBUG + // The XIMD implementations are dramatically slower when running unittests; + // that's mostly because there's lots of IO. This is the "more interesting" code path. return nearest_neighbors_T(policy, inputs, results); + + #else + // Use the corresponding "regular" (not UNSEQ) policy, this is either "par" or "seq". + if (policy == execution_policy::par_unseq) + { + return nearest_neighbors_par(inputs, results); + } + if (policy == execution_policy::unseq) + { + return nearest_neighbors_seq(inputs, results); + } + // We shouldn't be here otherwise; throw the exception, below. + #endif // CODA_OSS_DEBUG } - #endif + #endif // SIX_sicd_has_ximd throw std::logic_error("Don't know how to implement nearest_neighbors_() for unseq=" + unseq); } void six::sicd::NearestNeighbors::nearest_neighbors_unseq(std::span inputs, std::span results) const { - // TODO: there could be more complicated logic here to determine which UNSEQ - // implementation to use. nearest_neighbors_(execution_policy::unseq, inputs, results); } void six::sicd::NearestNeighbors::nearest_neighbors_par_unseq(std::span inputs, std::span results) const { - // TODO: there could be more complicated logic here to determine which UNSEQ - // implementation to use. nearest_neighbors_(execution_policy::par_unseq, inputs, results); } #endif // SIX_sicd_ComplexToAMP8IPHS8I_unseq diff --git a/externals/coda-oss/modules/c++/sys/include/sys/Ximd.h b/six/modules/c++/six.sicd/source/unseq_Ximd.h similarity index 97% rename from externals/coda-oss/modules/c++/sys/include/sys/Ximd.h rename to six/modules/c++/six.sicd/source/unseq_Ximd.h index f9e0dcbf2..2e06a65de 100644 --- a/externals/coda-oss/modules/c++/sys/include/sys/Ximd.h +++ b/six/modules/c++/six.sicd/source/unseq_Ximd.h @@ -33,21 +33,22 @@ * */ -#include "Dbg.h" +#include "sys/Dbg.h" // This is intended for development/testing purposes, so enable by default only for debug. -#ifndef CODA_OSS_Ximd_ENABLED -#define CODA_OSS_Ximd_ENABLED CODA_OSS_DEBUG +#ifndef SIX_Ximd_ENABLED +#define SIX_Ximd_ENABLED CODA_OSS_DEBUG #endif -#if CODA_OSS_Ximd_ENABLED +#if SIX_Ximd_ENABLED #include #include #include #include +#include -namespace sys +namespace six { namespace ximd { @@ -95,7 +96,7 @@ struct Ximd final return retval; } template - explicit Ximd(G&& generator, nullptr_t) noexcept + explicit Ximd(G&& generator, std::nullptr_t) noexcept { *this = generate(generator); } diff --git a/six/modules/c++/six/include/six/AmplitudeTable.h b/six/modules/c++/six/include/six/AmplitudeTable.h index 4003958ae..3c67d6817 100644 --- a/six/modules/c++/six/include/six/AmplitudeTable.h +++ b/six/modules/c++/six/include/six/AmplitudeTable.h @@ -188,7 +188,7 @@ struct SIX_SIX_API AMP8I_PHS8I_t final #define SIX_sicd_has_VCL 1 #endif #else - #define SIX_sicd_has_VCL 0 + #define SIX_sicd_has_VCL 0 #endif // __has_include #endif // C++17 #endif @@ -204,21 +204,14 @@ struct SIX_SIX_API AMP8I_PHS8I_t final #endif #ifndef SIX_sicd_has_ximd - // This is a "hacked up" version of std::experimental::simd using std::valarray. + // This is a "hacked up" version of std::experimental::simd using std::array. // It's primarily for development and testing: VCL needs C++17 and // std::experimental::simd is G++11/C++20. #define SIX_sicd_has_ximd CODA_OSS_DEBUG #endif -#ifndef SIX_sicd_has_valarray - // This is the "old way" of doing things. Note that GCC implements - // much of this using expresson templates which means writing - // generic routines must be done carefully. Turn this on by default - // so that "interesting" code is executed. - #define SIX_sicd_has_valarray 0 // TODO: finish implementing -#endif #ifndef SIX_sicd_ComplexToAMP8IPHS8I_unseq - #if SIX_sicd_has_VCL || SIX_sicd_has_simd || SIX_sicd_has_valarray || SIX_sicd_has_ximd + #if SIX_sicd_has_VCL || SIX_sicd_has_simd || SIX_sicd_has_ximd #define SIX_sicd_ComplexToAMP8IPHS8I_unseq 1 #else #define SIX_sicd_ComplexToAMP8IPHS8I_unseq 0