diff --git a/externals/coda-oss/.github/workflows/build_unittest.yml b/externals/coda-oss/.github/workflows/build_unittest.yml index 4490ff194..109df279d 100644 --- a/externals/coda-oss/.github/workflows/build_unittest.yml +++ b/externals/coda-oss/.github/workflows/build_unittest.yml @@ -11,9 +11,9 @@ jobs: name: ${{ matrix.os }}-${{ matrix.python-version }}-CMake runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout - name: Set up Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 # https://github.com/marketplace/actions/setup-python with: python-version: ${{ matrix.python-version }} - name: Install python dependencies @@ -63,7 +63,7 @@ jobs: runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout - name: configure run: | ls env: @@ -76,7 +76,7 @@ jobs: cmake --build . --config ${{ matrix.configuration }} -j cmake --build . --config ${{ matrix.configuration }} --target install - name: Add msbuild to PATH - uses: microsoft/setup-msbuild@v1.0.2 # https://github.com/marketplace/actions/setup-msbuild + uses: microsoft/setup-msbuild@v1.1 # https://github.com/marketplace/actions/setup-msbuild with: msbuild-architecture: x64 - name: msbuild @@ -98,9 +98,9 @@ jobs: name: ${{ matrix.os }}-${{ matrix.python-version }}-CMake runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout - name: Set up Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 # https://github.com/marketplace/actions/setup-python with: python-version: ${{ matrix.python-version }} - name: Install python dependencies @@ -136,7 +136,7 @@ jobs: name: ${{ matrix.os }}-${{ matrix.configuration }}-${{ matrix.avx }}-CMake runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout - name: configure run: | mkdir out && cd out @@ -161,9 +161,9 @@ jobs: name: ${{ matrix.os }}-${{ matrix.python-version }}-waf${{ matrix.debugging }} runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout - name: Set up Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 # https://github.com/marketplace/actions/setup-python with: python-version: ${{ matrix.python-version }} - name: configure_with_swig diff --git a/externals/coda-oss/UnitTest/UnitTest.vcxproj b/externals/coda-oss/UnitTest/UnitTest.vcxproj index d79dab6f9..19508e59c 100644 --- a/externals/coda-oss/UnitTest/UnitTest.vcxproj +++ b/externals/coda-oss/UnitTest/UnitTest.vcxproj @@ -325,6 +325,10 @@ true true + + true + true + true true diff --git a/externals/coda-oss/UnitTest/UnitTest.vcxproj.filters b/externals/coda-oss/UnitTest/UnitTest.vcxproj.filters index f838a0433..cd789e927 100644 --- a/externals/coda-oss/UnitTest/UnitTest.vcxproj.filters +++ b/externals/coda-oss/UnitTest/UnitTest.vcxproj.filters @@ -258,6 +258,9 @@ hdf5.lite + + sys + diff --git a/externals/coda-oss/UnitTest/sys.cpp b/externals/coda-oss/UnitTest/sys.cpp index 7ac63c6a1..7bc804707 100644 --- a/externals/coda-oss/UnitTest/sys.cpp +++ b/externals/coda-oss/UnitTest/sys.cpp @@ -1,6 +1,8 @@ #include "pch.h" #include "CppUnitTest.h" +#include + #include #include #include @@ -21,6 +23,7 @@ #include #include #include +#include namespace sys { @@ -56,4 +59,8 @@ TEST_CLASS(test_path){ public: #include "sys/unittests/test_path.cpp" }; +TEST_CLASS(test_ximd){ public: +#include "sys/unittests/test_ximd.cpp" +}; + } \ No newline at end of file diff --git a/externals/coda-oss/modules/c++/coda-oss.vcxproj b/externals/coda-oss/modules/c++/coda-oss.vcxproj index 6aacf1781..8b35965df 100644 --- a/externals/coda-oss/modules/c++/coda-oss.vcxproj +++ b/externals/coda-oss/modules/c++/coda-oss.vcxproj @@ -285,6 +285,7 @@ + diff --git a/externals/coda-oss/modules/c++/coda-oss.vcxproj.filters b/externals/coda-oss/modules/c++/coda-oss.vcxproj.filters index a94b8aafd..accaf020b 100644 --- a/externals/coda-oss/modules/c++/coda-oss.vcxproj.filters +++ b/externals/coda-oss/modules/c++/coda-oss.vcxproj.filters @@ -957,6 +957,9 @@ coda_oss + + sys + diff --git a/externals/coda-oss/modules/c++/hdf5.lite/include/hdf5/lite/highfive.h b/externals/coda-oss/modules/c++/hdf5.lite/include/hdf5/lite/highfive.h index d5d91987b..c93d5a9a4 100644 --- a/externals/coda-oss/modules/c++/hdf5.lite/include/hdf5/lite/highfive.h +++ b/externals/coda-oss/modules/c++/hdf5.lite/include/hdf5/lite/highfive.h @@ -33,6 +33,8 @@ #include #include +#include "types/RowCol.h" + #include "H5_.h" #include "SpanRC.h" diff --git a/externals/coda-oss/modules/c++/sys/include/sys/Ximd.h b/externals/coda-oss/modules/c++/sys/include/sys/Ximd.h new file mode 100644 index 000000000..f9e0dcbf2 --- /dev/null +++ b/externals/coda-oss/modules/c++/sys/include/sys/Ximd.h @@ -0,0 +1,276 @@ +/* ========================================================================= + * This file is part of sys-c++ + * ========================================================================= + * + * (C) Copyright 2004 - 2014, MDA Information Systems LLC + * © Copyright 2024, Maxar Technologies, Inc. + * + * sys-c++ is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; If not, + * see . + * + */ + +#pragma once + +/*! + * \brief A simple (poor man's) "implemtation" of std::experimental::simd + * for development/testing purposes. + * + * `std::experimental::simd` needs G++11 and C++20 and the "vectorclass" + * library needs C++17. "Rolling our own" using `std::array` lets developers + * try out some things without needing actual SIMD code. + * + */ + +#include "Dbg.h" + +// This is intended for development/testing purposes, so enable by default only for debug. +#ifndef CODA_OSS_Ximd_ENABLED +#define CODA_OSS_Ximd_ENABLED CODA_OSS_DEBUG +#endif + +#if CODA_OSS_Ximd_ENABLED + +#include +#include +#include +#include + +namespace sys +{ +namespace ximd +{ + +// Need a class for the "broadcast" constructor (not impelemented). +// Also helps to avoid overloading `std::array`. +template +struct Ximd final +{ + static_assert(std::is_arithmetic::value, "T must be arithmetic"); + // static_assert(std::is_same::type>::value, "no `const` for T"); + + using value_type = T; + using reference = T&; + + Ximd() = default; + // This is the same as the "generater" overload below ... avoid enable_if gunk for now. + template + Ximd(U v) noexcept + { + *this = generate([&](size_t) { return v; }); + } + template + Ximd(const Ximd& other) noexcept + { + *this = other; + } + template + Ximd(const U* mem) + { + copy_from(mem); + } + + // https://en.cppreference.com/w/cpp/experimental/simd/simd/simd + // this is the same as `U&& v` above; avoid enable_if gunk for now. + template + static auto generate(G&& generator) noexcept + { + Ximd retval; + // This is where all the "magic" (would) happen. + for (size_t i = 0; i < size(); i++) + { + retval[i] = generator(i); + } + return retval; + } + template + explicit Ximd(G&& generator, nullptr_t) noexcept + { + *this = generate(generator); + } + + reference operator[](size_t pos) noexcept + { + return value[pos]; + } + value_type operator[](size_t pos) const noexcept + { + return value[pos]; + } + + static constexpr size_t size() noexcept + { + return N; + } + + template + void copy_from(const U* mem) + { + *this = Ximd::generate([&](size_t i) { return mem[i]; }); + } + template + void copy_to(U* mem) const + { + for (size_t i = 0; i < size(); i++) + { + mem[i] = (*this)[i]; + } + } + + Ximd& operator++() noexcept + { + *this = Ximd::generate([&](size_t i) { return ++value[i]; }); + return *this; + } + Ximd operator++(int) noexcept + { + auto tmp = *this; + ++(*this); + return tmp; + } + +private: + std::array value{}; +}; + +// template +// using fixed_size_ximd = Ximd; +// +// template +// using native_ximd = Ximd; + +using ximd_mask = Ximd; + +template +inline auto operator+(const Ximd& lhs, const Ximd& rhs) noexcept +{ + return Ximd::generate([&](size_t i) { return lhs[i] + rhs[i]; }); +} +template +inline auto operator+(const Ximd& lhs, typename Ximd::value_type rhs) noexcept +{ + return lhs + Ximd(rhs); +} +template +inline auto operator-(const Ximd& lhs, const Ximd& rhs) noexcept +{ + return Ximd::generate([&](size_t i) { return lhs[i] - rhs[i]; }); +} +template +inline auto operator-(const Ximd& lhs, typename Ximd::value_type rhs) noexcept +{ + return lhs - Ximd(rhs); +} +template +inline auto operator*(const Ximd& lhs, const Ximd& rhs) noexcept +{ + return Ximd::generate([&](size_t i) { return lhs[i] * rhs[i]; }); +} +template +inline auto operator/(const Ximd& lhs, typename Ximd::value_type rhs) noexcept +{ + return lhs / Ximd(rhs); +} +template +inline auto operator/(const Ximd& lhs, const Ximd& rhs) noexcept +{ + return Ximd::generate([&](size_t i) { return lhs[i] / rhs[i]; }); +} + +template +inline auto& operator+=(Ximd& lhs, const Ximd& rhs) noexcept +{ + lhs = lhs + rhs; + return lhs; +} +template +inline auto& operator-=(Ximd& lhs, const Ximd& rhs) noexcept +{ + lhs = lhs - rhs; + return lhs; +} + +template +inline auto operator==(const Ximd& lhs, const Ximd& rhs) noexcept +{ + return ximd_mask::generate([&](size_t i) { return lhs[i] == rhs[i]; }); +} +template +inline auto operator==(const Ximd& lhs, typename Ximd::value_type rhs) noexcept +{ + return lhs == Ximd(rhs); +} +template +inline auto operator!=(const Ximd& lhs, const Ximd& rhs) noexcept +{ + return ximd_mask::generate([&](size_t i) { return lhs[i] != rhs[i]; }); +} +template +inline auto operator!=(const Ximd& lhs, typename Ximd::value_type rhs) noexcept +{ + return lhs != Ximd(rhs); +} +template +inline auto operator<(const Ximd& lhs, const Ximd& rhs) noexcept +{ + return ximd_mask::generate([&](size_t i) { return lhs[i] < rhs[i]; }); +} +template +inline auto operator<(const Ximd& lhs, typename Ximd::value_type rhs) noexcept +{ + return lhs < Ximd(rhs); +} +template +inline auto operator<=(const Ximd& lhs, const Ximd& rhs) noexcept +{ + return ximd_mask::generate([&](size_t i) { return lhs[i] <= rhs[i]; }); +} +template +inline auto operator>(const Ximd& lhs, const Ximd& rhs) noexcept +{ + return ximd_mask::generate([&](size_t i) { return lhs[i] > rhs[i]; }); +} +template +inline auto operator>(const Ximd& lhs, typename Ximd::value_type rhs) noexcept +{ + return lhs > Ximd(rhs); +} + +inline bool any_of(const ximd_mask& m) +{ + for (size_t i = 0; i < m.size(); i++) + { + if (m[i]) + { + return true; + } + } + return false; +} + +template +inline auto atan2(const Ximd& real, const Ximd& imag) +{ + return Ximd::generate([&](size_t i) { return std::atan2(real[i], imag[i]); }); +} +template +inline auto round(const Ximd& v) +{ + return Ximd::generate([&](size_t i) { return std::round(v[i]); }); +} + +} // ximd +} // sys + +#endif diff --git a/externals/coda-oss/modules/c++/sys/unittests/test_ximd.cpp b/externals/coda-oss/modules/c++/sys/unittests/test_ximd.cpp new file mode 100644 index 000000000..d419edca7 --- /dev/null +++ b/externals/coda-oss/modules/c++/sys/unittests/test_ximd.cpp @@ -0,0 +1,573 @@ +/* ========================================================================= + * This file is part of sys-c++ + * ========================================================================= + * + * (C) Copyright 2004 - 2014, MDA Information Systems LLC + * © Copyright 2024, Maxar Technologies, Inc. + * + * sys-c++ is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; If not, + * see . + * + */ + +#include +#include +#include +#include + +// These are unittests, always enable unless explicitly disabled +#ifndef CODA_OSS_Ximd_ENABLED +#define CODA_OSS_Ximd_ENABLED 1 +#endif +#include + +#include +#include + +#include "TestCase.h" + +using zfloat = std::complex; + +template +using simd = sys::ximd::Ximd; +using intv = simd; +using floatv = simd; +using intv_mask = sys::ximd::ximd_mask; +using floatv_mask = sys::ximd::ximd_mask; + +// Manage a SIMD complex as an array of two SIMDs +using zfloatv = std::array; +static inline auto& real(zfloatv& z) noexcept +{ + return z[0]; +} +static inline const auto& real(const zfloatv& z) noexcept +{ + return z[0]; +} +static inline auto& imag(zfloatv& z) noexcept +{ + return z[1]; +} +static inline const auto& imag(const zfloatv& z) noexcept +{ + return z[1]; +} +static inline size_t size(const zfloatv& z) noexcept +{ + auto retval = real(z).size(); + assert(retval == imag(z).size()); + return retval; +} + +static inline auto arg(const zfloatv& z) +{ + // https://en.cppreference.com/w/cpp/numeric/complex/arg + // > `std::atan2(std::imag(z), std::real(z))` + return atan2(imag(z), real(z)); // arg() +} + +template +static inline auto make_zfloatv(TGeneratorReal&& generate_real, TGeneratorImag&& generate_imag) +{ + zfloatv retval; + for (size_t i = 0; i < size(retval); i++) + { + real(retval)[i] = generate_real(i); + imag(retval)[i] = generate_imag(i); + } + return retval; +} + +template +static inline auto copy_from(std::span p) +{ + simd retval; + assert(p.size() == retval.size()); + retval.copy_from(p.data()); + return retval; +} +static inline auto copy_from(std::span p) +{ + const auto generate_real = [&](size_t i) { return p[i].real(); }; + const auto generate_imag = [&](size_t i) { return p[i].imag(); }; + return make_zfloatv(generate_real, generate_imag); +} + +template +static inline auto copy_to(const simd& v) +{ + std::vector::value_type> retval(v.size()); + v.copy_to(retval.data()); + return retval; +} +static inline auto copy_to(const zfloatv& v) +{ + std::vector retval; + for (size_t i = 0; i < size(v); i++) + { + retval.emplace_back(real(v)[i], imag(v)[i]); + } + return retval; +} + +TEST_CASE(testDefaultConstructor) +{ + // sanity check implementation and utility routines + TEST_ASSERT_EQ(floatv::size(), size(zfloatv{})); + + // intv v = 1; + intv v; + v = 1; + for (size_t i = 0; i < v.size(); i++) + { + TEST_ASSERT_EQ(1, v[i]); + } +} + +// Sample code from SIX; see **ComplexToAMP8IPHS8I.cpp**. +static auto GetPhase(std::complex v) +{ + // There's an intentional conversion to zero when we cast 256 -> uint8. That wrap around + // handles cases that are close to 2PI. + double phase = std::arg(v); + if (phase < 0.0) phase += coda_oss::numbers::pi * 2.0; // Wrap from [0, 2PI] + return phase; +} +static uint8_t getPhase(zfloat v, float phase_delta) +{ + // Phase is determined via arithmetic because it's equally spaced. + const auto phase = GetPhase(v); + return gsl::narrow_cast(std::round(phase / phase_delta)); +} + +static inline auto if_add(const floatv_mask& m, const floatv& lhs, typename floatv::value_type rhs) +{ + const auto generate_add = [&](size_t i) { + return m[i] ? lhs[i] + rhs : lhs[i]; + }; + return floatv::generate(generate_add); +} +static inline auto roundi(const floatv& v) // match vcl::roundi() +{ + const auto rounded = round(v); + const auto generate_roundi = [&](size_t i) + { return static_cast(rounded[i]); }; + return intv::generate(generate_roundi); +} +static auto getPhase(const zfloatv& v, float phase_delta) +{ + // Phase is determined via arithmetic because it's equally spaced. + // There's an intentional conversion to zero when we cast 256 -> uint8. That wrap around + // handles cases that are close to 2PI. + auto phase = arg(v); + //where(phase < 0.0f, phase) += std::numbers::pi_v *2.0f; // Wrap from [0, 2PI] + phase = if_add(phase < 0.0f, phase, std::numbers::pi_v * 2.0f); // Wrap from [0, 2PI] + return roundi(phase / phase_delta); +} +static inline const auto& cxValues() +{ + //static const std::vector retval{/*{0, 0},*/ {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}, {-1, -1}, {0, -1}, {1, -1}}; + static const std::vector retval{{0.0, 0.0}, {1.0, 1.0}, {10.0, -10.0}, {-100.0, 100.0}, {-1000.0, -1000.0}, // sample data from SIX + {-1.0, -1.0}, {1.0, -1.0}, {-1.0, 1.0} // "pad" to multiple of floatv::size() + }; + return retval; +} + +static auto expected_getPhase_values(const std::string& testName, float phase_delta) +{ + auto&& values = cxValues(); + TEST_ASSERT(values.size() % floatv::size() == 0); + std::vector expected; + for (auto&& v : values) + { + expected.push_back(getPhase(v, phase_delta)); + } + return expected; +} + +static auto toComplex_(double A, uint8_t phase) +{ + // The phase values should be read in (values 0 to 255) and converted to float by doing: + // P = (1 / 256) * input_value + const double P = (1.0 / 256.0) * phase; + + // To convert the amplitude and phase values to complex float (i.e. real and imaginary): + // S = A * cos(2 * pi * P) + j * A * sin(2 * pi * P) + const double angle = 2 * std::numbers::pi * P; + const auto sin_angle = sin(angle); + const auto cos_angle = cos(angle); + zfloat S(gsl::narrow_cast(A * cos_angle), gsl::narrow_cast(A * sin_angle)); + return S; +} +inline static auto toComplex(uint8_t amplitude, uint8_t phase) +{ + // A = input_amplitude(i.e. 0 to 255) + const double A = amplitude; + return toComplex_(A, phase); +} + +static auto phase_delta() +{ + static const auto p0 = GetPhase(toComplex(1, 0)); + static const auto p1 = GetPhase(toComplex(1, 1)); + assert(p0 == 0.0); + assert(p1 > p0); + static const auto retval = gsl::narrow_cast(p1 - p0); + return retval; +} + +static auto load(const std::vector& values) +{ + std::vector retval; + constexpr auto sz = floatv::size(); + auto const pValues = sys::make_span(values); + auto p = sys::make_span(pValues.data(), sz); + for (size_t i = 0; i < values.size() / sz; i++) + { + retval.push_back(copy_from(p)); + p = sys::make_span(p.data() + sz, p.size()); + } + return retval; +} + +TEST_CASE(testGetPhase) +{ + static const auto& expected = expected_getPhase_values(testName, phase_delta()); + + const auto valuesv = load(cxValues()); + std::vector actual; + for (auto&& zvaluev : valuesv) + { + const auto phase = getPhase(zvaluev, phase_delta()); + const auto phase_ = copy_to(phase); + for (auto&& ph : copy_to(phase)) + { + actual.push_back(gsl::narrow_cast(ph)); + } + } + + TEST_ASSERT_EQ(actual.size(), expected.size()); + for (size_t i = 0; i < actual.size(); i++) + { + TEST_ASSERT_EQ(actual[i], expected[i]); + } +} + +// Again, more sample code from SIX +static constexpr size_t AmplitudeTableSize = 256; +static auto getPhaseDirections_() +{ + //! Unit vector rays that represent each direction that phase can point. + std::array phase_directions; // interleaved, std::complex + + const auto p0 = GetPhase(toComplex(1, 0)); + for (size_t i = 0; i < AmplitudeTableSize; i++) + { + const float angle = static_cast(p0) + i * phase_delta(); + const auto y = sin(angle); + const auto x = cos(angle); + phase_directions[i] = {x, y}; + } + return phase_directions; +} +static inline const auto& getPhaseDirections() +{ + static const auto retval = getPhaseDirections_(); + return retval; +} + +template +static inline auto lookup(const intv& zindex, const std::array& phase_directions) +{ + const auto generate_real = [&](size_t i) + { + const auto i_ = zindex[i]; + return phase_directions[i_].real(); + }; + const auto generate_imag = [&](size_t i) + { + const auto i_ = zindex[i]; + return phase_directions[i_].imag(); + }; + return make_zfloatv(generate_real, generate_imag); +} + +TEST_CASE(testLookup) +{ + const auto& phase_directions = getPhaseDirections(); + + static const auto& expected_getPhase = expected_getPhase_values(testName, phase_delta()); + std::vector expected; + for (auto&& phase : expected_getPhase) + { + expected.push_back(phase_directions[phase]); + } + + const auto valuesv = load(cxValues()); + std::vector actual; + for (auto&& zvaluev : valuesv) + { + const auto phase = getPhase(zvaluev, phase_delta()); + const auto phase_direction = lookup(phase, phase_directions); + + for (auto&& pd : copy_to(phase_direction)) + { + actual.push_back(pd); + } + } + + TEST_ASSERT_EQ(actual.size(), expected.size()); + for (size_t i = 0; i < actual.size(); i++) + { + TEST_ASSERT_EQ(actual[i], expected[i]); + } +} + +// And ... more sample code from SIX +static auto iota_0_256_() +{ + static_assert(sizeof(size_t) > sizeof(uint8_t), "size_t can't hold UINT8_MAX!"); + + std::vector retval; + retval.reserve(UINT8_MAX + 1); + for (size_t i = 0; i <= UINT8_MAX; i++) // Be careful with indexing so that we don't wrap-around in the loop. + { + retval.push_back(gsl::narrow(i)); + } + assert(retval.size() == UINT8_MAX + 1); + return retval; +} +static inline std::vector iota_0_256() +{ + static const auto retval = iota_0_256_(); + return retval; +} + +static auto make_magnitudes_() +{ + std::vector result; + result.reserve(AmplitudeTableSize); + for (const auto amplitude : iota_0_256()) + { + // AmpPhase -> Complex + const auto phase = amplitude; + const auto complex = toComplex(amplitude, phase); + result.push_back(std::abs(complex)); + } + + // I don't know if we can guarantee that the amplitude table is non-decreasing. + // Check to verify property at runtime. + if (!std::is_sorted(result.begin(), result.end())) + { + throw std::runtime_error("magnitudes must be sorted"); + } + + std::array retval; + std::copy(result.begin(), result.end(), retval.begin()); + return retval; +} +static inline std::span magnitudes() +{ + //! The sorted set of possible magnitudes order from small to large. + static const auto cached_magnitudes = make_magnitudes_(); + return sys::make_span(cached_magnitudes); +} + +/*! + * Find the nearest element given an iterator range. + * @param value query value + * @return index of nearest value within the iterator range. + */ +static uint8_t nearest(std::span magnitudes, float value) +{ + const auto begin = magnitudes.begin(); + const auto end = magnitudes.end(); + + const auto it = std::lower_bound(begin, end, value); + if (it == begin) return 0; + + const auto prev_it = std::prev(it); + const auto nearest_it = it == end ? prev_it : + (value - *prev_it <= *it - value ? prev_it : it); + const auto distance = std::distance(begin, nearest_it); + assert(distance <= std::numeric_limits::max()); + return gsl::narrow(distance); +} +static uint8_t find_nearest(zfloat phase_direction, zfloat v) +{ + // We have to do a 1D nearest neighbor search for magnitude. + // But it's not the magnitude of the input complex value - it's the projection of + // the complex value onto the ray of candidate magnitudes at the selected phase. + // i.e. dot product. + const auto projection = (phase_direction.real() * v.real()) + (phase_direction.imag() * v.imag()); + //assert(std::abs(projection - std::abs(v)) < 1e-5); // TODO ??? + return nearest(magnitudes(), projection); +} + +template +static inline auto select(const TTest& test, const TResult& t, const TResult& f) +{ + TResult retval; + for (size_t i = 0; i < test.size(); i++) + { + retval[i] = test[i] ? t[i] : f[i]; + } + return retval; +} + +static auto lookup(const intv& zindex, std::span magnitudes) +{ + const auto generate = [&](size_t i) { + const auto i_ = zindex[i]; + + // The index may be out of range. This is expected because `i` might be "don't care." + if ((i_ >= 0) && (i_ < std::ssize(magnitudes))) + { + return magnitudes[i_]; + } + return NAN; // propogate "don't care" + }; + return floatv::generate(generate); +} + +static inline auto lower_bound_(std::span magnitudes, const floatv& v) +{ + intv first; first = 0; + intv last; last = gsl::narrow(magnitudes.size()); + + auto count = last - first; + while (any_of(count > 0)) + { + auto it = first; + const auto step = count / 2; + it += step; + + auto next = it; ++next; // ... ++it; + auto advance = count; advance -= step + 1; // ... -= step + 1; + + const auto c = lookup(it, magnitudes); // magnituides[it] + + const auto test = c < v; // (c < v).__cvt(); // https://github.com/VcDevel/std-simd/issues/41 + + //where(test, it) = next; // ... ++it + //where(test, first) = it; // first = ... + //// count -= step + 1 <...OR...> count = step + //where(test, count) = advance; + //where(!test, count) = step; + it = select(test, next, it); // ... ++it + first = select(test, it, first); // first = ... + count = select(test, advance, step); // count -= step + 1 <...OR...> count = step + } + return first; +} +static inline auto lower_bound(std::span magnitudes, const floatv& value) +{ + return lower_bound_(magnitudes, value); +} + +static auto nearest(std::span magnitudes, const floatv& value) +{ + assert(magnitudes.size() == AmplitudeTableSize); + + /* + const auto it = std::lower_bound(begin, end, value); + if (it == begin) return 0; + + const auto prev_it = std::prev(it); + const auto nearest_it = it == end ? prev_it : + (value - *prev_it <= *it - value ? prev_it : it); + const auto distance = std::distance(begin, nearest_it); + assert(distance <= std::numeric_limits::max()); + return gsl::narrow(distance); + */ + const auto it = lower_bound(magnitudes, value); + const auto prev_it = it - 1; // const auto prev_it = std::prev(it); + + const auto v0 = value - lookup(prev_it, magnitudes); // value - *prev_it + const auto v1 = lookup(it, magnitudes) - value; // *it - value + //const auto nearest_it = select(v0 <= v1, prev_it, it); // (value - *prev_it <= *it - value ? prev_it : it); + + intv end; end = gsl::narrow(magnitudes.size()); + //const auto end_test = select(it == end, prev_it, nearest_it); // it == end ? prev_it : ... + intv zero; zero = 0; + auto retval = select(it == 0, zero, // if (it == begin) return 0; + select(it == end, prev_it, // it == end ? prev_it : ... + select(v0 <= v1, prev_it, it) // (value - *prev_it <= *it - value ? prev_it : it); + )); + return retval; +} + +static auto find_nearest(std::span magnitudes, const floatv& phase_direction_real, const floatv& phase_direction_imag, + const zfloatv& v) +{ + // We have to do a 1D nearest neighbor search for magnitude. + // But it's not the magnitude of the input complex value - it's the projection of + // the complex value onto the ray of candidate magnitudes at the selected phase. + // i.e. dot product. + const auto projection = (phase_direction_real * real(v)) + (phase_direction_imag * imag(v)); + //assert(std::abs(projection - std::abs(v)) < 1e-5); // TODO ??? + return nearest(magnitudes, projection); +} +static inline auto find_nearest(std::span magnitudes, const zfloatv& phase_direction, const zfloatv& v) +{ + return find_nearest(magnitudes, real(phase_direction), imag(phase_direction), v); +} + +TEST_CASE(testFindNearest) +{ + const auto& values = cxValues(); + const auto& phase_directions = getPhaseDirections(); + + static const auto& expected_getPhase = expected_getPhase_values(testName, phase_delta()); + std::vector expected_phase_directions; + for (auto&& phase : expected_getPhase) + { + expected_phase_directions.push_back(phase_directions[phase]); + } + std::vector expected; + for (size_t i = 0; i < values.size(); i++) + { + auto a = find_nearest(expected_phase_directions[i], values[i]); + expected.push_back(a); + } + + const auto valuesv = load(cxValues()); + std::vector actual; + for (auto&& v : valuesv) + { + const auto phase = getPhase(v, phase_delta()); + const auto phase_direction = lookup(phase, phase_directions); + const auto amplitude = find_nearest(magnitudes(), phase_direction, v); + + for (auto&& a : copy_to(amplitude)) + { + actual.push_back(static_cast(a)); + } + } + + TEST_ASSERT_EQ(actual.size(), expected.size()); + for (size_t i = 0; i < actual.size(); i++) + { + TEST_ASSERT_EQ(actual[i], expected[i]); + } +} + +TEST_MAIN( + TEST_CHECK(testDefaultConstructor); + + TEST_CHECK(testGetPhase); + TEST_CHECK(testLookup); + TEST_CHECK(testFindNearest); +) diff --git a/externals/nitro/.github/workflows/codeql.yml b/externals/nitro/.github/workflows/codeql.yml index b4da2819c..f564eba15 100644 --- a/externals/nitro/.github/workflows/codeql.yml +++ b/externals/nitro/.github/workflows/codeql.yml @@ -15,15 +15,20 @@ on: push: branches: [ "main" ] pull_request: - # The branches below must be a subset of the branches above branches: [ "main" ] schedule: - - cron: '19 23 * * 3' + - cron: '35 17 * * 6' jobs: analyze: name: Analyze - runs-on: ubuntu-latest + # Runner size impacts CodeQL analysis time. To learn more, please see: + # - https://gh.io/recommended-hardware-resources-for-running-codeql + # - https://gh.io/supported-runners-and-hardware-resources + # - https://gh.io/using-larger-runners + # Consider using larger runners for possible analysis time improvements. + runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} + timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} permissions: actions: read contents: read @@ -32,41 +37,44 @@ jobs: strategy: fail-fast: false matrix: - language: [ 'cpp' ] - # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] + language: [ 'c-cpp' ] + # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ] + # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both + # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v2 + uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. # By default, queries listed here will override any specified in a config file. # Prefix the list here with "+" to use these queries and those in the config file. - - # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs - # queries: security-extended,security-and-quality + # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + # queries: security-extended,security-and-quality - # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). + # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v2 + uses: github/codeql-action/autobuild@v3 # â„¹ï¸ Command-line programs to run using the OS shell. # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun - # If the Autobuild fails above, remove it and uncomment the following three lines. + # If the Autobuild fails above, remove it and uncomment the following three lines. # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. # - run: | - # echo "Run, Build Application using script" - # ./location_of_script_within_repo/buildscript.sh + # echo "Run, Build Application using script" + # ./location_of_script_within_repo/buildscript.sh - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{matrix.language}}" diff --git a/externals/nitro/.github/workflows/frequent_check.yml b/externals/nitro/.github/workflows/frequent_check.yml index 9c1777f88..44916acd5 100644 --- a/externals/nitro/.github/workflows/frequent_check.yml +++ b/externals/nitro/.github/workflows/frequent_check.yml @@ -14,9 +14,9 @@ jobs: runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout - name: Set up Python - uses: actions/setup-python@v1 + uses: actions/setup-python@v5 # https://github.com/marketplace/actions/setup-python with: python-version: '3.7' - name: configure @@ -34,7 +34,7 @@ jobs: cd out ctest -C ${{ matrix.configuration }} --output-on-failure - name: Add msbuild to PATH - uses: microsoft/setup-msbuild@v1 # https://github.com/marketplace/actions/setup-msbuild + uses: microsoft/setup-msbuild@v1.1 # https://github.com/marketplace/actions/setup-msbuild with: msbuild-architecture: x64 - name: msbuild @@ -51,7 +51,7 @@ jobs: runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout - name: configure CODA-OSS run: | mkdir externals\coda-oss\out @@ -63,7 +63,7 @@ jobs: cmake --build . --config ${{ matrix.configuration }} -j cmake --build . --config ${{ matrix.configuration }} --target install - name: Add msbuild to PATH - uses: microsoft/setup-msbuild@v1.0.2 # https://github.com/marketplace/actions/setup-msbuild + uses: microsoft/setup-msbuild@v1.1 # https://github.com/marketplace/actions/setup-msbuild with: msbuild-architecture: x64 - name: msbuild @@ -87,9 +87,9 @@ jobs: runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout - name: Set up Python - uses: actions/setup-python@v1 + uses: actions/setup-python@v5 # https://github.com/marketplace/actions/setup-python with: python-version: '3.7' - name: configure diff --git a/externals/nitro/.github/workflows/main.yml b/externals/nitro/.github/workflows/main.yml index ef3dfa65e..041d480fb 100644 --- a/externals/nitro/.github/workflows/main.yml +++ b/externals/nitro/.github/workflows/main.yml @@ -17,9 +17,9 @@ jobs: runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout - name: Set up Python - uses: actions/setup-python@v1 + uses: actions/setup-python@v5 # https://github.com/marketplace/actions/setup-python with: python-version: '3.7' - name: configure @@ -37,7 +37,7 @@ jobs: cd out ctest -C ${{ matrix.configuration }} --output-on-failure - name: Add msbuild to PATH - uses: microsoft/setup-msbuild@v1.0.2 # https://github.com/marketplace/actions/setup-msbuild + uses: microsoft/setup-msbuild@v1.1 # https://github.com/marketplace/actions/setup-msbuild with: msbuild-architecture: x64 - name: msbuild @@ -54,7 +54,7 @@ jobs: runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout - name: configure CODA-OSS run: | mkdir externals\coda-oss\out @@ -66,7 +66,7 @@ jobs: cmake --build . --config ${{ matrix.configuration }} -j cmake --build . --config ${{ matrix.configuration }} --target install - name: Add msbuild to PATH - uses: microsoft/setup-msbuild@v1 # https://github.com/marketplace/actions/setup-msbuild + uses: microsoft/setup-msbuild@v1.1 # https://github.com/marketplace/actions/setup-msbuild with: msbuild-architecture: x64 - name: msbuild @@ -90,9 +90,9 @@ jobs: runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout - name: Set up Python - uses: actions/setup-python@v1 + uses: actions/setup-python@v5 # https://github.com/marketplace/actions/setup-python with: python-version: '3.7' - name: configure diff --git a/six/modules/c++/samples/8AMPI_PHSI.cpp b/six/modules/c++/samples/8AMPI_PHSI.cpp index af79cc4d1..5ceff52f6 100644 --- a/six/modules/c++/samples/8AMPI_PHSI.cpp +++ b/six/modules/c++/samples/8AMPI_PHSI.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include #include diff --git a/six/modules/c++/samples/8AMPI_PHSI.dir/8AMPI_PHSI.dir.vcxproj b/six/modules/c++/samples/8AMPI_PHSI.dir/8AMPI_PHSI.dir.vcxproj index f65f3e8f7..1cd611727 100644 --- a/six/modules/c++/samples/8AMPI_PHSI.dir/8AMPI_PHSI.dir.vcxproj +++ b/six/modules/c++/samples/8AMPI_PHSI.dir/8AMPI_PHSI.dir.vcxproj @@ -66,6 +66,8 @@ true MultiThreadedDebugDLL true + stdcpp17 + stdc11 Console @@ -91,6 +93,8 @@ Speed Guard true + stdcpp17 + stdc11 Console @@ -133,9 +137,6 @@ {34ac2314-fbd1-42ad-aaf4-0cebc6bf737e} - - {ddc587c2-53ba-44a9-94e7-07be52af3d0b} - {62aad4dd-35ba-41a0-8263-1f4dfd755689} diff --git a/six/modules/c++/samples/CMakeLists.txt b/six/modules/c++/samples/CMakeLists.txt index e0527df40..96238f7eb 100644 --- a/six/modules/c++/samples/CMakeLists.txt +++ b/six/modules/c++/samples/CMakeLists.txt @@ -18,6 +18,7 @@ add_sample(extract_cphd_xml cli-c++ cphd-c++ xml.lite-c++) add_sample(image_to_scene six.sicd-c++ six.sidd-c++) add_sample(project_slant_to_output cli-c++ io-c++ six-c++ six.sicd-c++ sio.lite-c++) add_sample(round_trip_six cli-c++ six.convert-c++ six.sicd-c++ six.sidd-c++) +add_sample(8AMPI_PHSI six.sicd-c++) add_sample(sicd_output_plane_pixel_to_lat_lon cli-c++ six.sicd-c++) add_sample(test_compare_sidd cli-c++ six.sicd-c++ six.sidd-c++) add_sample(test_create_sicd cli-c++ six.sicd-c++ sio.lite-c++) diff --git a/six/modules/c++/samples/wscript b/six/modules/c++/samples/wscript index 498d294af..6de4750b5 100644 --- a/six/modules/c++/samples/wscript +++ b/six/modules/c++/samples/wscript @@ -12,6 +12,7 @@ def build(bld): 'project_slant_to_output' : 'cli io six six.sicd sio.lite', 'image_to_scene' : 'six.sicd six.sidd', 'round_trip_six' : 'cli six.convert six.sicd six.sidd', + '8AMPI_PHSI' : 'six.sicd', 'test_create_sicd' : 'cli six.sicd sio.lite', 'test_create_sicd_from_mem' : 'cli six.sicd', 'test_create_sidd_from_mem' : 'cli six.sicd six.sidd', diff --git a/six/modules/c++/six.sicd/CMakeLists.txt b/six/modules/c++/six.sicd/CMakeLists.txt index 31a6e143c..31373160e 100644 --- a/six/modules/c++/six.sicd/CMakeLists.txt +++ b/six/modules/c++/six.sicd/CMakeLists.txt @@ -8,6 +8,7 @@ coda_add_module( source/ComplexData.cpp source/ComplexDataBuilder.cpp source/ComplexToAMP8IPHS8I.cpp + source/ComplexToAMP8IPHS8I_unseq.cpp source/ComplexXMLControl.cpp source/ComplexXMLParser.cpp source/ComplexXMLParser040.cpp diff --git a/six/modules/c++/six.sicd/six.sicd.vcxproj b/six/modules/c++/six.sicd/six.sicd.vcxproj index 9296d12e1..3b0b210df 100644 --- a/six/modules/c++/six.sicd/six.sicd.vcxproj +++ b/six/modules/c++/six.sicd/six.sicd.vcxproj @@ -154,6 +154,9 @@ + + + diff --git a/six/modules/c++/six.sicd/six.sicd.vcxproj.filters b/six/modules/c++/six.sicd/six.sicd.vcxproj.filters index 4f4d855cf..fb6a88a17 100644 --- a/six/modules/c++/six.sicd/six.sicd.vcxproj.filters +++ b/six/modules/c++/six.sicd/six.sicd.vcxproj.filters @@ -248,5 +248,14 @@ Source Files + + Source Files + + + Source Files + + + Source Files + \ No newline at end of file diff --git a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I.cpp b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I.cpp index 0c08b0447..adfd97059 100644 --- a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I.cpp +++ b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #if CODA_OSS_cpp17 @@ -83,7 +84,7 @@ inline auto GetPhase(std::complex v) if (phase < 0.0) phase += std::numbers::pi * 2.0; // Wrap from [0, 2PI] return phase; } -uint8_t six::sicd::details::ComplexToAMP8IPHS8I::getPhase(six::zfloat v) const +uint8_t six::sicd::details::ComplexToAMP8IPHS8I::Impl::getPhase(six::zfloat v) const { // Phase is determined via arithmetic because it's equally spaced. const auto phase = GetPhase(v); @@ -144,23 +145,29 @@ static std::span get_magnitudes(const six::AmplitudeTable* pAmplitu return sys::make_const_span(uncached_magnitudes); } + six::sicd::details::ComplexToAMP8IPHS8I::ComplexToAMP8IPHS8I(const six::AmplitudeTable *pAmplitudeTable) - : magnitudes(get_magnitudes(pAmplitudeTable, uncached_magnitudes)) + :impl(*this) { + impl.magnitudes = get_magnitudes(pAmplitudeTable, impl.uncached_magnitudes); + const auto p0 = GetPhase(Utilities::toComplex(1, 0, pAmplitudeTable)); const auto p1 = GetPhase(Utilities::toComplex(1, 1, pAmplitudeTable)); assert(p0 == 0.0); assert(p1 > p0); - phase_delta = gsl::narrow_cast(p1 - p0); + impl.phase_delta = gsl::narrow_cast(p1 - p0); for(size_t i = 0; i < 256; i++) { - const units::Radians angle{ static_cast(p0) + i * phase_delta }; + const units::Radians angle{ static_cast(p0) + i * impl.phase_delta }; float y, x; SinCos(angle, y, x); - phase_directions[i] = { x, y }; + impl.phase_directions[i] = { x, y }; - phase_directions_real[i] = phase_directions[i].real(); - phase_directions_imag[i] = phase_directions[i].imag(); + // Only need the parallel array when using the "vectorclass" library. + #ifdef SIX_sicd_has_VCL + impl.phase_directions_real[i] = impl.phase_directions[i].real(); + impl.phase_directions_imag[i] = impl.phase_directions[i].imag(); + #endif } } @@ -184,7 +191,7 @@ static uint8_t nearest(std::span magnitudes, float value) assert(distance <= std::numeric_limits::max()); return gsl::narrow(distance); } -uint8_t six::sicd::details::ComplexToAMP8IPHS8I::find_nearest(six::zfloat phase_direction, six::zfloat v) const +uint8_t six::sicd::details::ComplexToAMP8IPHS8I::Impl::find_nearest(six::zfloat phase_direction, six::zfloat v) const { // We have to do a 1D nearest neighbor search for magnitude. // But it's not the magnitude of the input complex value - it's the projection of @@ -197,16 +204,16 @@ uint8_t six::sicd::details::ComplexToAMP8IPHS8I::find_nearest(six::zfloat phase_ six::AMP8I_PHS8I_t six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbor_(const six::zfloat &v) const { six::AMP8I_PHS8I_t retval; - retval.phase = getPhase(v); + retval.phase = impl.getPhase(v); - auto&& phase_direction = phase_directions[retval.phase]; - retval.amplitude = find_nearest(phase_direction, v); + auto&& phase_direction = impl.phase_directions[retval.phase]; + retval.amplitude = impl.find_nearest(phase_direction, v); return retval; } -template -void six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_seq(TInputIt first, TInputIt last, TOutputIt dest) const +void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_seq(std::span inputs, std::span results) const { - std::transform(first, last, dest, [&](const auto& v) { return nearest_neighbor_(v); }); + std::transform(inputs.begin(), inputs.end(), results.begin(), + [&](const auto& v) { return converter.nearest_neighbor_(v); }); } std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_seq( std::span inputs, const six::AmplitudeTable* pAmplitudeTable) @@ -215,7 +222,7 @@ std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest const auto& converter = make_(pAmplitudeTable); std::vector retval(inputs.size()); - converter.nearest_neighbors_seq(inputs.begin(), inputs.end(), retval.begin()); + converter.impl.nearest_neighbors_seq(inputs, retval); return retval; } @@ -249,13 +256,16 @@ static inline OutputIt transform_async(const InputIt first1, const InputIt last1 return handle.get(); } -template -void six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_par(TInputIt first, TInputIt last, TOutputIt dest) const +void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_par(std::span inputs, std::span results) const { const auto nearest_neighbor = [&](const auto& v) { - return this->nearest_neighbor_(v); + return converter.nearest_neighbor_(v); }; + + const auto first = inputs.begin(); + const auto last = inputs.end(); + const auto dest = results.begin(); #if SIX_six_sicd_ComplexToAMP8IPHS8I_has_execution std::ignore = std::transform(std::execution::par, first, last, dest, nearest_neighbor); @@ -266,7 +276,7 @@ void six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_par(TInputIt fir constexpr auto default_cutoff = (dimension * dimension) * 4; const auto cutoff = cutoff_ == 0 ? default_cutoff : cutoff_; - const auto transform_f = [&](const TInputIt first1, const TInputIt last1, TOutputIt d_first) + const auto transform_f = [&](auto first1, auto last1, auto d_first) { return std::transform(first1, last1, d_first, nearest_neighbor); }; @@ -280,7 +290,7 @@ std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest const auto& converter = make_(pAmplitudeTable); std::vector retval(inputs.size()); - converter.nearest_neighbors_par(inputs.begin(), inputs.end(), retval.begin()); + converter.impl.nearest_neighbors_par(inputs, sys::make_span(retval)); return retval; } @@ -289,42 +299,75 @@ std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest { // TODO: there could be more complicated logic here to decide between // _seq, _par, _unseq, and _par_unseq - #if SIX_sicd_have_VCL || SIX_sicd_have_experimental_simd - return nearest_neighbors_par_unseq(inputs, pAmplitudeTable); + #if SIX_sicd_ComplexToAMP8IPHS8I_unseq + + // None of our SIMD implementations are dramatically slower when + // running unittests; that's mostly because there's lots of IO. So that + // the more interesting code-path is tested, use UNSEQ in debugging. + #if CODA_OSS_DEBUG + return nearest_neighbors_unseq(inputs, pAmplitudeTable); // TODO: #else return nearest_neighbors_par(inputs, pAmplitudeTable); #endif -} - -template -void six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_par_unseq(TInputIt first, TInputIt last, TOutputIt dest) const -{ - constexpr ptrdiff_t cutoff_ = 0; // too slow w/o multi-threading - // The value of "default_cutoff" was determined by testing; there is nothing special about it, feel free to change it. - constexpr auto default_cutoff = (256 * 256) * 2; - const auto cutoff = cutoff_ == 0 ? default_cutoff : cutoff_; + #else + return nearest_neighbors_par(inputs, pAmplitudeTable); - const auto transform_f = [&](const TInputIt first1, const TInputIt last1, TOutputIt d_first) - { - nearest_neighbors_unseq(first1, last1, d_first); - static const TOutputIt retval; - return retval; - }; - std::ignore = transform_async(first, last, dest, transform_f, cutoff); +#endif } + #if SIX_sicd_ComplexToAMP8IPHS8I_unseq -std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_par_unseq( +std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq( std::span inputs, const six::AmplitudeTable* pAmplitudeTable) { - // make a structure to quickly find the nearest neighbor - const auto& converter = make_(pAmplitudeTable); + // TODO: there could be more complicated logic here to determine which UNSEQ + // implementation to use. - std::vector retval(inputs.size()); - converter.nearest_neighbors_par_unseq(inputs.begin(), inputs.end(), retval.begin()); - return retval; + #if SIX_sicd_has_VCL + return nearest_neighbors_unseq_vcl(inputs, pAmplitudeTable); + + #elif SIX_sicd_has_simd + return nearest_neighbors_unseq_simd(inputs, pAmplitudeTable); + + #elif SIX_sicd_has_ximd + return nearest_neighbors_unseq_ximd(inputs, pAmplitudeTable); + + #else + #error "Don't know how to implement nearest_neighbors_unseq()" + throw std::logic_error("Don't know how to implement nearest_neighbors_unseq()"); + + #endif } -#endif // SIX_sicd_have_VCL || SIX_sicd_have_experimental_simd +#endif // SIX_sicd_ComplexToAMP8IPHS8I_unseq + +//template +//void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_par_unseq(TInputIt first, TInputIt last, TOutputIt dest) const +//{ +// constexpr ptrdiff_t cutoff_ = 0; // too slow w/o multi-threading +// // The value of "default_cutoff" was determined by testing; there is nothing special about it, feel free to change it. +// constexpr auto default_cutoff = (256 * 256) * 2; +// const auto cutoff = cutoff_ == 0 ? default_cutoff : cutoff_; +// +// const auto transform_f = [&](const TInputIt first1, const TInputIt last1, TOutputIt d_first) +// { +// nearest_neighbors_unseq(first1, last1, d_first); +// static const TOutputIt retval; +// return retval; +// }; +// std::ignore = transform_async(first, last, dest, transform_f, cutoff); +//} +//#if SIX_sicd_ComplexToAMP8IPHS8I_unseq +//std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_par_unseq( +// std::span inputs, const six::AmplitudeTable* pAmplitudeTable) +//{ +// // make a structure to quickly find the nearest neighbor +// const auto& converter = make_(pAmplitudeTable); +// +// std::vector retval(inputs.size()); +// converter.impl.nearest_neighbors_par_unseq(inputs.begin(), inputs.end(), retval.begin()); +// return retval; +//} +//#endif // SIX_sicd_ComplexToAMP8IPHS8I_unseq const six::sicd::details::ComplexToAMP8IPHS8I& six::sicd::details::ComplexToAMP8IPHS8I::make_(const six::AmplitudeTable* pAmplitudeTable) { diff --git a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_simd.cpp b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_simd.cpp new file mode 100644 index 000000000..ece707b2f --- /dev/null +++ b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_simd.cpp @@ -0,0 +1,377 @@ +/* ========================================================================= +* This file is part of six.sicd-c++ +* ========================================================================= +* +* (C) Copyright 2021, Maxar Technologies, Inc. +* +* six.sicd-c++ is free software; you can redistribute it and/or modify +* it under the terms of the GNU Lesser General Public License as published by +* the Free Software Foundation; either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Lesser General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public +* License along with this program; If not, +* see . +* +*/ +#include "six/AmplitudeTable.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "six/sicd/Utilities.h" + +#if SIX_sicd_has_simd + +#include + +// https://en.cppreference.com/w/cpp/experimental/simd/simd_cast +// > The TS specification is missing `simd_cast` and `static_simd_cast` overloads for `simd_mask`. ... +namespace stdx +{ + using namespace std::experimental; + using namespace std::experimental::__proposed; +} + +// https://en.cppreference.com/w/cpp/experimental/simd +using floatv = stdx::native_simd; +// using doublev = stdx::rebind_simd_t; +using intv = stdx::rebind_simd_t; +using zfloatv = std::array; + +auto& real(zfloatv& z) +{ + return z[0]; +} +const auto& real(const zfloatv& z) +{ + return z[0]; +} +auto& imag(zfloatv& z) +{ + return z[1]; +} +const auto& imag(const zfloatv& z) +{ + return z[1]; +} + +// https://en.cppreference.com/w/cpp/experimental/simd/simd/simd +template +inline auto make_zfloatv(TGeneratorReal&& generate_real, TGeneratorImag&& generate_imag) +{ + zfloatv retval; + real(retval) = floatv(generate_real); + imag(retval) = floatv(generate_imag); + return retval; +} + +static inline auto load(std::span p) +{ + const auto generate_real = [&](size_t i) { + return p[i].real(); + }; + const auto generate_imag = [&](size_t i) { + return p[i].imag(); + }; + return make_zfloatv(generate_real, generate_imag); +} + + +// https://en.cppreference.com/w/cpp/numeric/complex/arg +// > `std::atan2(std::imag(z), std::real(z))` +inline auto arg(const floatv& real, const floatv& imag) +{ + return atan2(imag, real); // arg() +} +inline auto arg(const zfloatv& z) +{ + return arg(real(z), imag(z)); +} + +inline auto roundi(const floatv& v) // match vcl::roundi() +{ + return static_simd_cast(round(v)); +} + +template +inline auto select(const TTest& test_, const TResult& t, const TResult& f) +{ + //const auto test = test_.__cvt(); // https://github.com/VcDevel/std-simd/issues/41 + const auto test = stdx::static_simd_cast(test_); // https://github.com/VcDevel/std-simd/issues/41 + + // https://en.cppreference.com/w/cpp/experimental/simd/where_expression + // > ... All other elements are left unchanged. + TResult retval; + where(test, retval) = t; + where(!test, retval) = f; + return retval; +} + +static inline auto getPhase(const zfloatv& v, float phase_delta) +{ + // Phase is determined via arithmetic because it's equally spaced. + // There's an intentional conversion to zero when we cast 256 -> uint8. That wrap around + // handles cases that are close to 2PI. + auto phase = arg(v); + where(phase < 0.0f, phase) += std::numbers::pi_v *2.0f; // Wrap from [0, 2PI] + return roundi(phase / phase_delta); +} + +template +static inline auto lookup(const intv& zindex, const std::array& phase_directions) +{ + // It seems that the "generator" constuctor is called with SIMD instructions. + // https://en.cppreference.com/w/cpp/experimental/simd/simd/simd + // > The calls to `generator` are unsequenced with respect to each other. + + const auto generate_real = [&](size_t i) { + const auto i_ = zindex[i]; + return phase_directions[i_].real(); + }; + const auto generate_imag = [&](size_t i) { + const auto i_ = zindex[i]; + return phase_directions[i_].imag(); + }; + return make_zfloatv(generate_real, generate_imag); +} + +// https://en.cppreference.com/w/cpp/algorithm/lower_bound +/* +template +ForwardIt lower_bound(ForwardIt first, ForwardIt last, const T& value) +{ + ForwardIt it; + typename std::iterator_traits::difference_type count, step; + count = std::distance(first, last); + + while (count > 0) + { + it = first; + step = count / 2; + std::advance(it, step); + + if (*it < value) + { + first = ++it; + count -= step + 1; + } + else + count = step; + } + + return first; +} +*/ +template +static inline auto lookup(const intv& zindex, std::span magnitudes) +{ + // It seems that the "generator" constuctor is called with SIMD instructions. + // https://en.cppreference.com/w/cpp/experimental/simd/simd/simd + // > The calls to `generator` are unsequenced with respect to each other. + + const auto generate = [&](size_t i) { + const auto i_ = zindex[i]; + return magnitudes[i_]; + }; + return floatv(generate); +} +inline auto lower_bound_(std::span magnitudes, const floatv& v) +{ + intv first = 0; + const intv last = gsl::narrow(magnitudes.size()); + + auto count = last - first; + while (any_of(count > 0)) + { + auto it = first; + const auto step = count / 2; + it += step; + + auto next = it; ++next; // ... ++it; + auto advance = count; advance -= step + 1; // ... -= step + 1; + + const auto c = lookup(it, magnitudes); // magnituides[it] + + const auto test = c < v; // (c < v).__cvt(); // https://github.com/VcDevel/std-simd/issues/41 + + //where(test, it) = next; // ... ++it + //where(test, first) = it; // first = ... + //// count -= step + 1 <...OR...> count = step + //where(test, count) = advance; + //where(!test, count) = step; + it = select(test, next, it); // ... ++it + first = select(test, it, first); // first = ... + count = select(test, advance, step); // count -= step + 1 <...OR...> count = step + } + return first; +} +inline auto lower_bound(std::span magnitudes, const floatv& value) +{ + auto retval = lower_bound_(magnitudes, value); + + #ifndef NDEBUG // i.e., debug + for (size_t i = 0; i < value.size(); i++) + { + const auto it = std::lower_bound(magnitudes.begin(), magnitudes.end(), value[i]); + const auto result = gsl::narrow(std::distance(magnitudes.begin(), it)); + assert(retval[i] == result); + } + #endif + + return retval; +} + +static auto nearest(std::span magnitudes, const floatv& value) +{ + assert(magnitudes.size() == six::AmplitudeTableSize); + + /* + const auto it = std::lower_bound(begin, end, value); + if (it == begin) return 0; + + const auto prev_it = std::prev(it); + const auto nearest_it = it == end ? prev_it : + (value - *prev_it <= *it - value ? prev_it : it); + const auto distance = std::distance(begin, nearest_it); + assert(distance <= std::numeric_limits::max()); + return gsl::narrow(distance); + */ + const auto it = ::lower_bound(magnitudes, value); + const auto prev_it = it - 1; // const auto prev_it = std::prev(it); + + const auto v0 = value - lookup(prev_it, magnitudes); // value - *prev_it + const auto v1 = lookup(it, magnitudes) - value; // *it - value + //const auto nearest_it = select(v0 <= v1, prev_it, it); // (value - *prev_it <= *it - value ? prev_it : it); + + const intv end = gsl::narrow(magnitudes.size()); + //const auto end_test = select(it == end, prev_it, nearest_it); // it == end ? prev_it : ... + const intv zero = 0; + auto retval = select(it == 0, zero, // if (it == begin) return 0; + select(it == end, prev_it, // it == end ? prev_it : ... + select(v0 <=v1, prev_it, it) // (value - *prev_it <= *it - value ? prev_it : it); + )); + return retval; +} + +static auto find_nearest(std::span magnitudes, const floatv& phase_direction_real, const floatv& phase_direction_imag, + const zfloatv& v) +{ + // We have to do a 1D nearest neighbor search for magnitude. + // But it's not the magnitude of the input complex value - it's the projection of + // the complex value onto the ray of candidate magnitudes at the selected phase. + // i.e. dot product. + const auto projection = (phase_direction_real * real(v)) + (phase_direction_imag * imag(v)); + //assert(std::abs(projection - std::abs(v)) < 1e-5); // TODO ??? + return nearest(magnitudes, projection); +} +static inline auto find_nearest(std::span magnitudes, const zfloatv& phase_direction, const zfloatv& v) +{ + return find_nearest(magnitudes, real(phase_direction), imag(phase_direction), v); +} + +void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_unseq_(std::span p, std::span results) const +{ + const auto v = load(p); + + const auto phase = ::getPhase(v, phase_delta); + + const auto phase_direction = lookup(phase, phase_directions); + #ifndef NDEBUG // i.e., debug + for (size_t i = 0; i < phase.size(); i++) + { + const auto pd = phase_directions[phase[i]]; + assert(pd.real() == real(phase_direction)[i]); + assert(pd.imag() == imag(phase_direction)[i]); + } + #endif + + const auto amplitude = ::find_nearest(magnitudes, phase_direction, v); + #ifndef NDEBUG // i.e., debug + for (size_t i = 0; i < amplitude.size(); i++) + { + const auto i_ = phase[i]; + const auto a = find_nearest(phase_directions[i_], p[i]); + assert(a == amplitude[i]); + } + #endif + + auto dest = results.begin(); + for (size_t i = 0; i < v.size(); i++) + { + dest->phase = gsl::narrow_cast(phase[i]); + dest->amplitude = gsl::narrow_cast(amplitude[i]); + + ++dest; + } +} + +void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_unseq(std::span inputs, std::span results) const +{ + auto first = inputs.begin(); + const auto last = inputs.end(); + auto dest = results.begin(); + + // The above code is simpler (no templates) if we use just a single VCL + // complex type: `zfloatv`. If there is any performance difference, + // it will only be for extreme edge cases since the smaller types are only used + // at the end of the loop. + // + // It also makes this loop simpler as we handle all non-multiples-of-8 in + // the same way. + constexpr auto elements_per_iteration = 8; + + // Can do these checks one-time outside of the loop + const auto distance = std::distance(first, last); + + // First, do multiples of 8 + const auto distance_ = distance - (distance % elements_per_iteration); + const auto last_ = first + distance_; + for (; first != last_; first += elements_per_iteration, dest += elements_per_iteration) + { + const auto f = sys::make_span(&(*first), elements_per_iteration); + const auto d = sys::make_span(&(*dest), elements_per_iteration); + nearest_neighbors_unseq_(f, d); + } + + // Then finish off anything left + assert(std::distance(first, last) < elements_per_iteration); + for (; first != last; ++first, ++dest) + { + const auto f = sys::make_span(&(*first), 1); + const auto d = sys::make_span(&(*dest), 1); + nearest_neighbors_seq(f, d); + } +} +std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq_vcl( + std::span inputs, const six::AmplitudeTable* pAmplitudeTable) +{ + // make a structure to quickly find the nearest neighbor + const auto& converter = make_(pAmplitudeTable); + + std::vector retval(inputs.size()); + converter.impl.nearest_neighbors_unseq(inputs, sys::make_span(retval)); + return retval; +} + +#endif // SIX_sicd_has_simd diff --git a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_unseq.cpp b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_unseq.cpp new file mode 100644 index 000000000..541fb4b5c --- /dev/null +++ b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_unseq.cpp @@ -0,0 +1,759 @@ +/* ========================================================================= +* This file is part of six.sicd-c++ +* ========================================================================= +* +* (C) Copyright 2021, Maxar Technologies, Inc. +* +* six.sicd-c++ is free software; you can redistribute it and/or modify +* it under the terms of the GNU Lesser General Public License as published by +* the Free Software Foundation; either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Lesser General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public +* License along with this program; If not, +* see . +* +*/ +#include "six/AmplitudeTable.h" + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#if CODA_OSS_cpp17 + // is broken with the older version of GCC we're using + #if (__GNUC__ >= 10) || _MSC_VER + #include + #define SIX_six_sicd_ComplexToAMP8IPHS8I_has_execution 1 + #endif +#endif + +#include +#include +#include +#include + +#include "six/sicd/Utilities.h" + +#undef min +#undef max + +using zfloat = six::zfloat; +using AMP8I_PHS8I_t = six::AMP8I_PHS8I_t; + +#if SIX_sicd_has_VCL + +#define VCL_NAMESPACE vcl +#if _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4100) // '...': unreferenced formal parameter +#pragma warning(disable: 4127) // conditional expression is constant +#pragma warning(disable: 4244) // '...': conversion from '...' to '...', possible loss of data +#pragma warning(disable: 4723) // potential divide by 0 +#endif +#include "six/sicd/vectorclass/version2/vectorclass.h" +#include "six/sicd/vectorclass/version2/vectormath_trig.h" +#include "six/sicd/vectorclass/complex/complexvec1.h" +#if _MSC_VER +#pragma warning(pop) +#endif + +namespace six +{ +namespace sicd +{ +namespace vcl +{ + +constexpr auto elements_per_iteration = 4; + +using floatv = ::vcl::Vec4f; +using intv = ::vcl::Vec4i; +using zfloatv = ::vcl::Complex4f; + +} // vcl +} // sicd +} // six + +auto real(const six::sicd::vcl::zfloatv& z) +{ + return z.real(); +} +auto imag(const six::sicd::vcl::zfloatv& z) +{ + return z.imag(); +} +static inline size_t size(const six::sicd::vcl::zfloatv& z) noexcept +{ + return z.size(); +} +static inline int ssize(const six::sicd::vcl::zfloatv& z) noexcept +{ + return z.size(); +} +static inline int ssize(const six::sicd::vcl::floatv& z) noexcept +{ + return z.size(); +} +static inline int ssize(const six::sicd::vcl::intv& z) noexcept +{ + return z.size(); +} + +// https://en.cppreference.com/w/cpp/numeric/complex/arg +// > `std::atan2(std::imag(z), std::real(z))` +inline auto arg(const six::sicd::vcl::floatv& real, const six::sicd::vcl::floatv& imag) +{ + return atan2(imag, real); // arg() +} +inline auto arg(const six::sicd::vcl::zfloatv& z) +{ + return arg(real(z), imag(z)); +} + +template +inline auto if_add(const T& f, const six::sicd::vcl::floatv& a, float b) +{ + return vcl::if_add(f, a, b); +} + +inline bool any_of(const six::sicd::vcl::intv& m) +{ + return horizontal_or(m); +} + +static inline void copy_from(std::span p, six::sicd::vcl::floatv& result) +{ + assert(p.size() == result.size()); + result.load(p.data()); +} +static inline auto copy_from(std::span p, six::sicd::vcl::zfloatv& result) +{ + // https://en.cppreference.com/w/cpp/numeric/complex + // > For any pointer to an element of an array of `std::complex` named `p` and any valid array index `i`, ... + // > is the real part of the complex number `p[i]`, ... + result.load(reinterpret_cast(p.data())); +} + +template +static inline auto lookup(const six::sicd::vcl::intv& zindex, const std::array& phase_directions) +{ + return ::vcl::lookup(zindex, phase_directions.data()); +} +static inline auto lookup(const six::sicd::vcl::intv& zindex, std::span magnitudes) +{ + assert(magnitudes.size() == six::AmplitudeTableSize); + return ::vcl::lookup(zindex, magnitudes.data()); +} + +#endif // SIX_sicd_has_VCL + +#if SIX_sicd_has_ximd +#ifndef CODA_OSS_Ximd_ENABLE +#define CODA_OSS_Ximd_ENABLE 1 +#endif +#include + +namespace six +{ +namespace sicd +{ +namespace ximd +{ + +template +using simd = sys::ximd::Ximd; +using intv = simd; +using floatv = simd; +using intv_mask = sys::ximd::ximd_mask; +using floatv_mask = sys::ximd::ximd_mask; + +constexpr auto elements_per_iteration = floatv::size(); + +// Manage a SIMD complex as an array of two SIMDs +using zfloatv = std::array; + +template +static inline auto select(const TTest& test, const TResult& t, const TResult& f) +{ + TResult retval; + for (size_t i = 0; i < test.size(); i++) + { + retval[i] = test[i] ? t[i] : f[i]; + } + return retval; +} + +} // ximd +} // sicd +} // six + +static inline auto& real(six::sicd::ximd::zfloatv& z) noexcept +{ + return z[0]; +} +static inline const auto& real(const six::sicd::ximd::zfloatv& z) noexcept +{ + return z[0]; +} +static inline auto& imag(six::sicd::ximd::zfloatv& z) noexcept +{ + return z[1]; +} +static inline const auto& imag(const six::sicd::ximd::zfloatv& z) noexcept +{ + return z[1]; +} + +static inline size_t size(const six::sicd::ximd::zfloatv& z) noexcept +{ + auto retval = real(z).size(); + assert(retval == imag(z).size()); + return retval; +} +static inline auto ssize(const six::sicd::ximd::zfloatv& z) noexcept +{ + return gsl::narrow(size(z)); +} +template +static inline auto ssize(const six::sicd::ximd::simd& v) noexcept +{ + return gsl::narrow(v.size()); +} + +namespace six +{ +namespace sicd +{ +namespace ximd +{ + +template +static inline auto make_zfloatv(TGeneratorReal&& generate_real, TGeneratorImag&& generate_imag) +{ + six::sicd::ximd::zfloatv retval; + for (size_t i = 0; i < size(retval); i++) + { + real(retval)[i] = generate_real(i); + imag(retval)[i] = generate_imag(i); + } + return retval; +} + +} // ximd +} // sicd +} // six + + +static inline auto arg(const six::sicd::ximd::zfloatv& z) +{ + // https://en.cppreference.com/w/cpp/numeric/complex/arg + // > `std::atan2(std::imag(z), std::real(z))` + return atan2(imag(z), real(z)); // arg() +} + +static inline auto copy_from(std::span p, six::sicd::ximd::floatv& result) +{ + assert(p.size() == result.size()); + result.copy_from(p.data()); +} +static inline auto copy_from(std::span p, six::sicd::ximd::zfloatv& result) +{ + const auto generate_real = [&](size_t i) { return p[i].real(); }; + const auto generate_imag = [&](size_t i) { return p[i].imag(); }; + result = six::sicd::ximd::make_zfloatv(generate_real, generate_imag); +} + +static inline auto roundi(const six::sicd::ximd::floatv& v) // match vcl::roundi() +{ + const auto rounded = round(v); + const auto generate_roundi = [&](size_t i) + { return static_cast(rounded[i]); }; + return six::sicd::ximd::intv::generate(generate_roundi); +} + +static inline auto select(const six::sicd::ximd::floatv_mask& test, const six::sicd::ximd::floatv& t, const six::sicd::ximd::floatv& f) +{ + return six::sicd::ximd::select(test, t, f); +} +static inline auto select(const six::sicd::ximd::intv_mask& test, const six::sicd::ximd::intv& t, const six::sicd::ximd::intv& f) +{ + return six::sicd::ximd::select(test, t, f); +} + +static inline auto if_add(const six::sicd::ximd::floatv_mask& m, const six::sicd::ximd::floatv& v, typename six::sicd::ximd::floatv::value_type c) +{ + // phase = if_add(phase < 0.0, phase, std::numbers::pi_v * 2.0f); // Wrap from [0, 2PI] + const auto generate_add = [&](size_t i) { + return m[i] ? v[i] + c : v[i]; + }; + return six::sicd::ximd::floatv::generate(generate_add); +} + +template +static inline auto lookup(const six::sicd::ximd::intv& zindex, const std::array& phase_directions) +{ + // It seems that the "generator" constuctor is called with SIMD instructions. + // https://en.cppreference.com/w/cpp/experimental/simd/simd/simd + // > The calls to `generator` are unsequenced with respect to each other. + + const auto generate_real = [&](size_t i) { + const auto i_ = zindex[i]; + return phase_directions[i_].real(); + }; + const auto generate_imag = [&](size_t i) { + const auto i_ = zindex[i]; + return phase_directions[i_].imag(); + }; + return six::sicd::ximd::make_zfloatv(generate_real, generate_imag); +} + +static auto lookup(const six::sicd::ximd::intv& zindex, std::span magnitudes) +{ + const auto generate = [&](size_t i) { + const auto i_ = zindex[i]; + + // The index may be out of range. This is expected because `i` might be "don't care." + if ((i_ >= 0) && (i_ < std::ssize(magnitudes))) + { + return magnitudes[i_]; + } + return NAN; // propogate "don't care" + }; + return six::sicd::ximd::floatv::generate(generate); +} + +#endif // SIX_sicd_has_ximd + +template +static auto getPhase(const ZFloatV& v, float phase_delta) +{ + // Phase is determined via arithmetic because it's equally spaced. + // There's an intentional conversion to zero when we cast 256 -> uint8. That wrap around + // handles cases that are close to 2PI. + auto phase = arg(v); + phase = if_add(phase < 0.0, phase, std::numbers::pi_v * 2.0f); // Wrap from [0, 2PI] + return roundi(phase / phase_delta); +} + +// https://en.cppreference.com/w/cpp/algorithm/lower_bound +/* +template +ForwardIt lower_bound(ForwardIt first, ForwardIt last, const T& value) +{ + ForwardIt it; + typename std::iterator_traits::difference_type count, step; + count = std::distance(first, last); + + while (count > 0) + { + it = first; + step = count / 2; + std::advance(it, step); + + if (*it < value) + { + first = ++it; + count -= step + 1; + } + else + count = step; + } + + return first; +} +*/ +template +inline auto lower_bound_(std::span magnitudes, const FloatV& v) +{ + IntV first = 0; + const IntV last = gsl::narrow(magnitudes.size()); + + auto count = last - first; + while (any_of(count > 0)) + { + auto it = first; + const auto step = count / 2; + it += step; + + auto next = it; ++next; // ... ++it; + auto advance = count; advance -= step + 1; // ... -= step + 1; + + const auto c = lookup(it, magnitudes); // magnituides[it] + const auto test = c < v; + it = select(test, next, it); // ... ++it + first = select(test, it, first); // first = ... + count = select(test, advance, step); // count -= step + 1 <...OR...> count = step + } + return first; +} +template +inline auto lower_bound(std::span magnitudes, const FloatV& value) +{ + auto retval = lower_bound_(magnitudes, value); + + #if CODA_OSS_DEBUG + for (int i = 0; i < ssize(value); i++) + { + const auto it = std::lower_bound(magnitudes.begin(), magnitudes.end(), value[i]); + const auto result = gsl::narrow(std::distance(magnitudes.begin(), it)); + assert(retval[i] == result); + } + #endif + + return retval; +} + +template +static auto nearest(std::span magnitudes, const FloatV& value) +{ + assert(magnitudes.size() == six::AmplitudeTableSize); + + /* + const auto it = std::lower_bound(begin, end, value); + if (it == begin) return 0; + + const auto prev_it = std::prev(it); + const auto nearest_it = it == end ? prev_it : + (value - *prev_it <= *it - value ? prev_it : it); + const auto distance = std::distance(begin, nearest_it); + assert(distance <= std::numeric_limits::max()); + return gsl::narrow(distance); + */ + const auto it = ::lower_bound(magnitudes, value); + const auto prev_it = it - 1; // const auto prev_it = std::prev(it); + + const auto v0 = value - lookup(prev_it, magnitudes); // value - *prev_it + const auto v1 = lookup(it, magnitudes) - value; // *it - value + //const auto nearest_it = select(v0 <= v1, prev_it, it); // (value - *prev_it <= *it - value ? prev_it : it); + + const IntV end = gsl::narrow(magnitudes.size()); + //const auto end_test = select(it == end, prev_it, nearest_it); // it == end ? prev_it : ... + const IntV zero = 0; + auto retval = select(it == 0, zero, // if (it == begin) return 0; + select(it == end, prev_it, // it == end ? prev_it : ... + select(v0 <=v1, prev_it, it) // (value - *prev_it <= *it - value ? prev_it : it); + )); + return retval; +} + +template +static auto find_nearest(std::span magnitudes, + const FloatV& phase_direction_real, const FloatV& phase_direction_imag, + const ZFloatV& v) +{ + // We have to do a 1D nearest neighbor search for magnitude. + // But it's not the magnitude of the input complex value - it's the projection of + // the complex value onto the ray of candidate magnitudes at the selected phase. + // i.e. dot product. + const auto projection = (phase_direction_real * real(v)) + (phase_direction_imag * imag(v)); + //assert(std::abs(projection - std::abs(v)) < 1e-5); // TODO ??? + return nearest(magnitudes, projection); +} + +#if SIX_sicd_has_VCL +static auto lookup_and_find_nearest(const six::sicd::details::ComplexToAMP8IPHS8I& converter, + const six::sicd::vcl::intv& phase, const six::sicd::vcl::zfloatv& v) +{ + const auto& impl = converter.impl; + + const auto phase_direction_real = lookup(phase, impl.phase_directions_real); + const auto phase_direction_imag = lookup(phase, impl.phase_directions_imag); + return ::find_nearest(impl.magnitudes, phase_direction_real, phase_direction_imag, v); +} +#endif + +#if SIX_sicd_has_ximd +static auto lookup_and_find_nearest(const six::sicd::details::ComplexToAMP8IPHS8I& converter, + const six::sicd::ximd::intv& phase, const six::sicd::ximd::zfloatv& v) +{ + const auto& impl = converter.impl; + + const auto phase_direction = lookup(phase, impl.phase_directions); + return ::find_nearest(impl.magnitudes, real(phase_direction), imag(phase_direction), v); +} +#endif + +#if SIX_sicd_ComplexToAMP8IPHS8I_unseq +template +void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_unseq_T(std::span p, std::span results) const +{ + ZFloatV v; + assert(p.size() == size(v)); + + copy_from(p, v); + #if CODA_OSS_DEBUG + for (int i = 0; i < ssize(v); i++) + { + const auto z = p[i]; + assert(real(v)[i] == z.real()); + assert(imag(v)[i] == z.imag()); + } + #endif + + const auto phase = ::getPhase(v, phase_delta); + #if CODA_OSS_DEBUG + for (int i = 0; i < ssize(phase); i++) + { + const auto phase_ = getPhase(p[i]); + assert(static_cast(phase[i]) == phase_); + } + #endif + + const auto amplitude = lookup_and_find_nearest(converter, phase, v); + #if CODA_OSS_DEBUG + for (int i = 0; i < ssize(amplitude); i++) + { + const auto i_ = phase[i]; + const auto a = find_nearest(phase_directions[i_], p[i]); + assert(a == amplitude[i]); + } + #endif + + // interleave() and store() is slower than an explicit loop. + auto dest = results.begin(); + for (int i = 0; i < ssize(v); i++) + { + dest->phase = gsl::narrow_cast(phase[i]); + dest->amplitude = gsl::narrow_cast(amplitude[i]); + + ++dest; + } +} + +template +void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_unseq(std::span inputs, std::span results) const +{ + auto first = inputs.begin(); + const auto last = inputs.end(); + auto dest = results.begin(); + + // The above code is simpler (no templates) if we use just a single VCL + // complex type: `zfloatv`. If there is any performance difference, + // it will only be for extreme edge cases since the smaller types are only used + // at the end of the loop. + // + // It also makes this loop simpler as we handle all non-multiples-of-8 in + // the same way. + + // Can do these checks one-time outside of the loop + const auto distance = std::distance(first, last); + + // First, do multiples of 8 + const auto distance_ = distance - (distance % elements_per_iteration); + const auto last_ = first + distance_; + for (; first != last_; first += elements_per_iteration, dest += elements_per_iteration) + { + const auto f = sys::make_span(&(*first), elements_per_iteration); + const auto d = sys::make_span(&(*dest), elements_per_iteration); + + nearest_neighbors_unseq_T(f, d); + } + + // Then finish off anything left + assert(std::distance(first, last) < elements_per_iteration); + for (; first != last; ++first, ++dest) + { + const auto f = sys::make_span(&(*first), 1); + const auto d = sys::make_span(&(*dest), 1); + nearest_neighbors_seq(f, d); + } +} +#endif // SIX_sicd_ComplexToAMP8IPHS8I_unseq + +#if SIX_sicd_has_VCL +std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq_vcl( + std::span inputs, const six::AmplitudeTable* pAmplitudeTable) +{ + // make a structure to quickly find the nearest neighbor + const auto& converter = make_(pAmplitudeTable); + + std::vector retval(inputs.size()); + converter.impl.nearest_neighbors_unseq(inputs, sys::make_span(retval)); + return retval; +} +#endif // SIX_sicd_has_VCL + +#if SIX_sicd_has_ximd +std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq_ximd( + std::span inputs, const six::AmplitudeTable* pAmplitudeTable) +{ + // make a structure to quickly find the nearest neighbor + const auto& converter = make_(pAmplitudeTable); + + std::vector retval(inputs.size()); + converter.impl.nearest_neighbors_unseq(inputs, sys::make_span(retval)); + return retval; +} +#endif // SIX_sicd_has_ximd + +/********************************************************************** + +// This is here (instead of **ComplexToAMP8IPHS8I.cpp**) because par_unseq() might +// need to know implementation details of _unseq() +using input_it = std::span::iterator; +using output_it = std::span::iterator; + +struct const_iterator final +{ + using Type = input_it::value_type; + + using iterator_category = std::random_access_iterator_tag; + using value_type = std::remove_cv_t; + using difference_type = std::ptrdiff_t; + using pointer = Type*; + using reference = Type&; + using const_reference = const Type&; + + input_it current_; + + const_iterator() = default; + const_iterator(input_it it) : current_(it) {} + + const_reference operator*() const noexcept + { + return *current_; + } + + const_iterator& operator++() noexcept + { + for (ptrdiff_t i = 0; i < 8; i++) + { + ++current_; + } + return *this; + } + + const_iterator& operator--() noexcept + { + --current_; + return *this; + } + + const_iterator& operator-=(const difference_type n) noexcept + { + current_ -= n; + return *this; + } + const_iterator operator-(const difference_type n) const noexcept + { + auto ret = *this; + ret -= n; + return ret; + } + difference_type operator-(const const_iterator& rhs) const noexcept + { + return current_ - rhs.current_; + } + + bool operator!=(const const_iterator& rhs) const noexcept + { + return current_ != rhs.current_; + } +}; + +struct result_wrapper final +{ + output_it current_; + + result_wrapper& operator=(const std::vector& values) + { + for (auto& v : values) + { + *current_ = v; + ++current_; + } + return *this; + } +}; + +struct iterator final +{ + using Type = output_it::value_type; + + using iterator_category = std::random_access_iterator_tag; + using value_type = std::remove_cv_t; + using difference_type = std::ptrdiff_t; + using pointer = Type*; + using reference = Type&; + + output_it current_; + + iterator() = default; + iterator(output_it it) : current_(it) {} + + result_wrapper operator*() noexcept + { + return result_wrapper{ current_}; + } + + iterator& operator++() noexcept + { + for (ptrdiff_t i = 0; i < 8; i++) + { + ++current_; + } + return *this; + } + + iterator& operator--() noexcept + { + --current_; + return *this; + } + + iterator& operator-=(const difference_type n) noexcept + { + current_ -= n; + return *this; + } + + bool operator!=(const iterator& rhs) const noexcept + { + return current_ != rhs.current_; + } +}; + +void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_par_unseq(std::span inputs, std::span results) const +{ + const auto first = inputs.begin(); + const auto last = inputs.end(); + auto dest = results.begin(); + + const auto func = [&](const auto& v) { + std::span values(&v, 8); + + std::vector retval(values.size()); + nearest_neighbors_unseq(values, sys::make_span(retval)); + return retval; + }; + + std::ignore = std::transform(std::execution::seq, + const_iterator{ first }, const_iterator{ last }, iterator{ dest }, func); +} +#if SIX_sicd_ComplexToAMP8IPHS8I_unseq +std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_par_unseq( + std::span inputs, const six::AmplitudeTable* pAmplitudeTable) +{ + // make a structure to quickly find the nearest neighbor + const auto& converter = make_(pAmplitudeTable); + + std::vector retval(inputs.size()); + converter.impl.nearest_neighbors_par_unseq(inputs, sys::make_span(retval)); + return retval; +} +#endif // SIX_sicd_ComplexToAMP8IPHS8I_unseq + +**********************************************************************/ diff --git a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_vcl.cpp b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_vcl.cpp deleted file mode 100644 index 74fa23225..000000000 --- a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_vcl.cpp +++ /dev/null @@ -1,239 +0,0 @@ -/* ========================================================================= -* This file is part of six.sicd-c++ -* ========================================================================= -* -* (C) Copyright 2021, Maxar Technologies, Inc. -* -* six.sicd-c++ is free software; you can redistribute it and/or modify -* it under the terms of the GNU Lesser General Public License as published by -* the Free Software Foundation; either version 3 of the License, or -* (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Lesser General Public License for more details. -* -* You should have received a copy of the GNU Lesser General Public -* License along with this program; If not, -* see . -* -*/ -#include "six/AmplitudeTable.h" - -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "six/sicd/Utilities.h" - -#undef min -#undef max - -#if SIX_sicd_has_VCL - -#define VCL_NAMESPACE vcl -#if _MSC_VER -#pragma warning(disable: 4100) // '...': unreferenced formal parameter -#endif -#include "six/sicd/vectorclass/version2/vectorclass.h" -#include "six/sicd/vectorclass/version2/vectormath_trig.h" -#include "six/sicd/vectorclass/complex/complexvec1.h" - -// https://en.cppreference.com/w/cpp/experimental/simd -using zfloatv = vcl::Complex8f; -using floatv = vcl::Vec8f; -using intv = vcl::Vec8i; - -// https://en.cppreference.com/w/cpp/numeric/complex/arg -// > `std::atan2(std::imag(z), std::real(z))` -inline auto arg(const floatv& real, const floatv& imag) -{ - return atan2(imag, real); // arg() -} -inline auto arg(const zfloatv& z) -{ - return arg(z.real(), z.imag()); -} - -static inline auto interleave(const intv& a, const intv& b) -{ - // The blend() indicies are based on one large array - auto index0 = vcl::blend8<0, 8, 1, 9, 2, 10, 3, 11>(a, b); // i.e., a[0], b[0], a[1], b[1], ... - auto index1 = vcl::blend8<4, 12, 5, 13, 6, 14, 7, 15>(a, b); - return vcl::Vec16i(std::move(index0), std::move(index1)); -} - -// There's no `vcl::lookup()` for `std::complex*`, implement using floats -template -static auto lookup(const intv& zindex, const std::array& table_) -{ - const auto table = reinterpret_cast(table_.data()); - constexpr auto size_as_floats = N * 2; // table_t is six::zfloat - - // A `six::zfloat` at *n* is at *2n* when viewed as `float`. - const auto real_index = zindex * 2; - // The imaginary part is after the real part - const auto imag_index = real_index + 1; // [n] is real, [n+1] is imag - const auto index = interleave(real_index, imag_index); - - auto lookup = vcl::lookup(index, table); - return zfloatv(std::move(lookup)); -} - -static auto getPhase(const zfloatv& v, float phase_delta) -{ - // Phase is determined via arithmetic because it's equally spaced. - // There's an intentional conversion to zero when we cast 256 -> uint8. That wrap around - // handles cases that are close to 2PI. - auto phase = arg(v); - phase = if_add(phase < 0.0, phase, std::numbers::pi_v * 2.0f); // Wrap from [0, 2PI] - return roundi(phase / phase_delta); -} - -inline auto lower_bound(std::span magnitudes, const floatv& value) -{ - const auto begin = magnitudes.begin(); - const auto end = magnitudes.end(); - - intv retval; - for (int i = 0; i < value.size(); i++) - { - const auto it = std::lower_bound(begin, end, value[i]); - const auto result = std::distance(begin, it); - retval.insert(i, gsl::narrow(result)); - } - return retval; -} -static auto nearest(std::span magnitudes, const floatv& value) -{ - assert(magnitudes.size() == six::AmplitudeTableSize); - - /* - const auto it = std::lower_bound(begin, end, value); - if (it == begin) return 0; - - const auto prev_it = std::prev(it); - const auto nearest_it = it == end ? prev_it : - (value - *prev_it <= *it - value ? prev_it : it); - const auto distance = std::distance(begin, nearest_it); - assert(distance <= std::numeric_limits::max()); - return gsl::narrow(distance); - */ - const auto it = ::lower_bound(magnitudes, value); - const auto prev_it = it - 1; // const auto prev_it = std::prev(it); - - const auto v0 = value - vcl::lookup(prev_it, magnitudes.data()); // value - *prev_it - const auto v1 = vcl::lookup(it, magnitudes.data()) - value; // *it - value - //const auto nearest_it = select(v0 <= v1, prev_it, it); // (value - *prev_it <= *it - value ? prev_it : it); - - const intv end = gsl::narrow(magnitudes.size()); - //const auto end_test = select(it == end, prev_it, nearest_it); // it == end ? prev_it : ... - const intv zero = 0; - auto retval = select(it == 0, zero, // if (it == begin) return 0; - select(it == end, prev_it, // it == end ? prev_it : ... - select(v0 <=v1, prev_it, it) // (value - *prev_it <= *it - value ? prev_it : it); - )); - return retval; -} - -static inline auto find_nearest(std::span magnitudes, const zfloatv& phase_direction, const zfloatv& v) -{ - // We have to do a 1D nearest neighbor search for magnitude. - // But it's not the magnitude of the input complex value - it's the projection of - // the complex value onto the ray of candidate magnitudes at the selected phase. - // i.e. dot product. - const auto projection = (phase_direction.real() * v.real()) + (phase_direction.imag() * v.imag()); - //assert(std::abs(projection - std::abs(v)) < 1e-5); // TODO ??? - return nearest(magnitudes, projection); -} -static inline auto find_nearest(std::span magnitudes, const floatv& phase_direction_real, const floatv& phase_direction_imag, - const zfloatv& v) -{ - // We have to do a 1D nearest neighbor search for magnitude. - // But it's not the magnitude of the input complex value - it's the projection of - // the complex value onto the ray of candidate magnitudes at the selected phase. - // i.e. dot product. - const auto projection = (phase_direction_real * v.real()) + (phase_direction_imag * v.imag()); - //assert(std::abs(projection - std::abs(v)) < 1e-5); // TODO ??? - return nearest(magnitudes, projection); -} - -template< typename TOutputIter> -void six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq_(const six::zfloat* p, TOutputIter dest) const -{ - // https://en.cppreference.com/w/cpp/numeric/complex - // > For any pointer to an element of an array of `std::complex` named `p` and any valid array index `i`, ... - // > is the real part of the complex number `p[i]`, ... - zfloatv v; - v.load(reinterpret_cast(p)); - - const auto phase = ::getPhase(v, phase_delta); - - //const auto phase_direction = lookup(phase, phase_directions); - //const auto amplitude = ::find_nearest(magnitudes, phase_direction, v); - const auto phase_direction_real = vcl::lookup(phase, phase_directions_real.data()); - const auto phase_direction_imag = vcl::lookup(phase, phase_directions_imag.data()); - //const auto amplitude = ::find_nearest(magnitudes, phase_direction_real, phase_direction_imag, v); - - // interleave() and store() is slower than an explicit loop. - for (int i = 0; i < v.size(); i++) - { - dest->phase = gsl::narrow_cast(phase[i]); - - //dest->amplitude = find_nearest(phase_directions[dest->phase], p[i]); - //const auto phase_direction_ = phase_direction.extract(i); - //dest->amplitude = find_nearest(six::zfloat(phase_direction_.real(), phase_direction_.imag()), p[i]); - dest->amplitude = find_nearest(six::zfloat(phase_direction_real[i], phase_direction_imag[i]), p[i]); - //dest->amplitude = gsl::narrow_cast(amplitude[i]); - - ++dest; - } -} - -template -void six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq(TInputIt first, TInputIt last, TOutputIt dest) const -{ - // The above code is simpler (no templates) if we use just a single VCL - // complex type: `zfloatv`. If there is any performance differ4ence, - // it will only be for extreme edge cases since the smaller types are only used - // at the end of the loop. - // - // It also makes this loop simpler as we handle all non-multiples-of-8 in - // the same way. - for (; first != last; ++first, ++dest) - { - const auto distance = std::distance(first, last); - assert(distance > 0); // should be out of the loop! - if (distance % 8 == 0) - { - nearest_neighbors_unseq_(&(*first), dest); - first += 7; dest += 7; - } - else - { - nearest_neighbors_seq(first, last, dest); - } - } -} -std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq( - std::span inputs, const six::AmplitudeTable* pAmplitudeTable) -{ - // make a structure to quickly find the nearest neighbor - const auto& converter = make_(pAmplitudeTable); - - std::vector retval(inputs.size()); - converter.nearest_neighbors_unseq(inputs.begin(), inputs.end(), retval.begin()); - return retval; -} - -#endif // SIX_sicd_have_VCL \ No newline at end of file diff --git a/six/modules/c++/six/include/six/AmplitudeTable.h b/six/modules/c++/six/include/six/AmplitudeTable.h index 8fc034402..966a160a9 100644 --- a/six/modules/c++/six/include/six/AmplitudeTable.h +++ b/six/modules/c++/six/include/six/AmplitudeTable.h @@ -35,6 +35,7 @@ #include #include +#include #include #include @@ -171,7 +172,7 @@ struct SIX_SIX_API AMP8I_PHS8I_t final uint8_t phase; }; -// Control a few details of the ComplexToAMP8IPHS8I implementation, espeically "unseq" (i.e., SIMD). +// Control a few details of the ComplexToAMP8IPHS8I implementation, especially "unseq" (i.e., SIMD). #ifndef SIX_sicd_has_VCL // Do we have the "vectorclass" library? https://github.com/vectorclass/version2 #if !CODA_OSS_cpp17 // VCL needs C++17 @@ -180,35 +181,36 @@ struct SIX_SIX_API AMP8I_PHS8I_t final // __has_include is part of C++17 #if __has_include("../../../six.sicd/include/six/sicd/vectorclass/version2/vectorclass.h") || \ __has_include("six.sicd/include/six/sicd/vectorclass/version2/vectorclass.h") - #define SIX_sicd_has_VCL 1 + #define SIX_sicd_has_VCL 1 #else #define SIX_sicd_has_VCL 0 #endif // __has_include #endif // C++17 #endif -#ifndef SIX_sicd_has_experimental_simd +#ifndef SIX_sicd_has_simd // Do we have the `std::experimental::simd? https://en.cppreference.com/w/cpp/experimental/simd - #if (__GNUC__ >= 11) && CODA_OSS_cpp20 + #if (__GNUC__ >= 999) && CODA_OSS_cpp20 // TODO: 11 instead of 999 // https://github.com/VcDevel/std-simd "... shipping with GCC since version 11." - #include - #define SIX_sicd_has_experimental_simd 1 + #define SIX_sicd_has_simd 1 #else - #define SIX_sicd_has_experimental_simd 0 + #define SIX_sicd_has_simd 0 #endif // __GNUC__ #endif -#if SIX_sicd_has_VCL && SIX_sicd_has_experimental_simd - // This is because there is a single ComplexToAMP8IPHS8I::nearest_neighbors_unseq() method. - // Other than that, it should be possible to use both VCL and std::experimental::simd - #error "Can't enable both VCL and std::experimental::simd at the same time'" +#ifndef SIX_sicd_has_ximd + // This is a "hacked up" version of std::experimental::simd using std::array. + // It's primarily for development and testing: VCL needs C++17 and + // std::experimental::simd is G++11/C++20. + #define SIX_sicd_has_ximd CODA_OSS_DEBUG #endif + #ifndef SIX_sicd_ComplexToAMP8IPHS8I_unseq - #if SIX_sicd_has_VCL || SIX_sicd_has_experimental_simd + #if SIX_sicd_has_VCL || SIX_sicd_has_simd || SIX_sicd_has_ximd #define SIX_sicd_ComplexToAMP8IPHS8I_unseq 1 #else #define SIX_sicd_ComplexToAMP8IPHS8I_unseq 0 - #endif // SIX_sicd_have_VCL || SIX_sicd_have_experimental_simd + #endif // SIX_sicd_have_VCL || SIX_sicd_has_simd #endif // SIX_sicd_ComplexToAMP8IPHS8I_unseq @@ -247,38 +249,65 @@ class ComplexToAMP8IPHS8I final AMP8I_PHS8I_t nearest_neighbor_(const six::zfloat& v) const; static std::vector nearest_neighbors_par(std::span inputs, const six::AmplitudeTable*); static std::vector nearest_neighbors_seq(std::span inputs, const six::AmplitudeTable*); + #if SIX_sicd_ComplexToAMP8IPHS8I_unseq static std::vector nearest_neighbors_unseq(std::span inputs, const six::AmplitudeTable*); static std::vector nearest_neighbors_par_unseq(std::span inputs, const six::AmplitudeTable*); + + #if SIX_sicd_has_VCL + static std::vector nearest_neighbors_unseq_vcl(std::span inputs, const six::AmplitudeTable*); + #endif + #if SIX_sicd_has_simd + static std::vector nearest_neighbors_unseq_simd(std::span inputs, const six::AmplitudeTable*); #endif + #if SIX_sicd_has_ximd + static std::vector nearest_neighbors_unseq_ximd(std::span inputs, const six::AmplitudeTable*); + #endif + #endif + static std::vector nearest_neighbors(std::span inputs, const six::AmplitudeTable*); // one of the above private: - template - void nearest_neighbors_seq(TInputIt first, TInputIt last, TOutputIt dest) const; - template - void nearest_neighbors_par(TInputIt first, TInputIt last, TOutputIt dest) const; - template - void nearest_neighbors_unseq(TInputIt first, TInputIt last, TOutputIt dest) const; - template - void nearest_neighbors_par_unseq(TInputIt first, TInputIt last, TOutputIt dest) const; - - template< typename TOutputIter> - void nearest_neighbors_unseq_(const six::zfloat* p, TOutputIter dest) const; - - //! The sorted set of possible magnitudes order from small to large. - std::array uncached_magnitudes; // Order is important! This must be ... - std::span magnitudes; // ... before this. - uint8_t find_nearest(six::zfloat phase_direction, six::zfloat v) const; - - //! The difference in phase angle between two UINT phase values. - float phase_delta; - uint8_t getPhase(six::zfloat) const; - - //! Unit vector rays that represent each direction that phase can point. - std::array phase_directions; // interleaved, std::complex - std::array phase_directions_real; - std::array phase_directions_imag; + struct Impl final + { + const ComplexToAMP8IPHS8I& converter; + Impl(const ComplexToAMP8IPHS8I& c) : converter(c) {} + ~Impl() = default; + Impl(const Impl&) = delete; + Impl& operator=(const Impl&) = delete; + Impl(Impl&&) = delete; // implicitly deleted because of =delete for copy + Impl& operator=(Impl&&) = delete; // implicitly deleted because of =delete for copy + + void nearest_neighbors_seq(std::span inputs, std::span results) const; + void nearest_neighbors_par(std::span inputs, std::span results) const; + + #if SIX_sicd_ComplexToAMP8IPHS8I_unseq + template + void nearest_neighbors_unseq(std::span inputs, std::span results) const; + void nearest_neighbors_par_unseq(std::span inputs, std::span results) const; + + template + void nearest_neighbors_unseq_T(std::span, std::span) const; + #endif + + //! The sorted set of possible magnitudes order from small to large. + std::array uncached_magnitudes; + std::span magnitudes; + uint8_t find_nearest(zfloat phase_direction, zfloat v) const; + + //! The difference in phase angle between two UINT phase values. + float phase_delta; + uint8_t getPhase(zfloat) const; + + //! Unit vector rays that represent each direction that phase can point. + std::array phase_directions; // interleaved, std::complex + #ifdef SIX_sicd_has_VCL + std::array phase_directions_real; + std::array phase_directions_imag; + #endif + }; +public: + Impl impl; }; } }