diff --git a/six/modules/c++/six.sicd/include/six/sicd/NearestNeighbors.h b/six/modules/c++/six.sicd/include/six/sicd/NearestNeighbors.h index ea5aaf159..e9629551e 100644 --- a/six/modules/c++/six.sicd/include/six/sicd/NearestNeighbors.h +++ b/six/modules/c++/six.sicd/include/six/sicd/NearestNeighbors.h @@ -68,15 +68,15 @@ struct NearestNeighbors final uint8_t find_nearest(six::zfloat phase_direction, six::zfloat v) const; uint8_t getPhase(six::zfloat) const; -#if SIX_sicd_ComplexToAMP8IPHS8I_unseq - template - auto nearest_neighbors_unseq_T(const std::array&) const; // TODO: std::span ... ? - template - void nearest_neighbors_unseq_(std::span inputs, std::span results) const; + #if SIX_sicd_ComplexToAMP8IPHS8I_unseq + void nearest_neighbors_(execution_policy, std::span inputs, std::span results) const; + + template + auto unseq_nearest_neighbors(std::span p) const; // TODO: std::span ... ? template - void nearest_neighbors_par_unseq_T(std::span inputs, std::span results) const; -#endif + void nearest_neighbors_T(execution_policy, std::span inputs, std::span results) const; + #endif }; } diff --git a/six/modules/c++/six.sicd/source/NearestNeighbors.cpp b/six/modules/c++/six.sicd/source/NearestNeighbors.cpp index f1ed35ccb..71f9fe62b 100644 --- a/six/modules/c++/six.sicd/source/NearestNeighbors.cpp +++ b/six/modules/c++/six.sicd/source/NearestNeighbors.cpp @@ -151,8 +151,9 @@ void six::sicd::NearestNeighbors::nearest_neighbors(execution_policy policy, std // > all standard execution policies can fall back to sequential execution. #if CODA_OSS_DEBUG throw std::logic_error("Unhandled execution_policy value."); - #endif + #else return nearest_neighbors(inputs, results); // no policy specified, "default policy" + #endif } AMP8I_PHS8I six::sicd::nearest_neighbor(const details::ComplexToAMP8IPHS8I& converter_, const zfloat& v) diff --git a/six/modules/c++/six.sicd/source/NearestNeighbors_unseq.cpp b/six/modules/c++/six.sicd/source/NearestNeighbors_unseq.cpp index 1c57eeb47..0de69a6fc 100644 --- a/six/modules/c++/six.sicd/source/NearestNeighbors_unseq.cpp +++ b/six/modules/c++/six.sicd/source/NearestNeighbors_unseq.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -679,18 +680,7 @@ inline auto lower_bound_(std::span magnitudes, const FloatV& v) template inline auto lower_bound(std::span magnitudes, const FloatV& value) { - auto retval = lower_bound_(magnitudes, value); - - #if CODA_OSS_DEBUG - for (int i = 0; i < ssize(value); i++) - { - const auto it = std::lower_bound(magnitudes.begin(), magnitudes.end(), value[i]); - const auto result = gsl::narrow(std::distance(magnitudes.begin(), it)); - assert(retval[i] == result); - } - #endif - - return retval; + return lower_bound_(magnitudes, value); } template @@ -786,6 +776,8 @@ static auto lookup_and_find_nearest(const six::sicd::details::ComplexToAMP8IPHS8 #if SIX_sicd_ComplexToAMP8IPHS8I_unseq +/******************************************************************************************************/ + template struct AMP8I_PHS8I_unseq final { @@ -793,92 +785,132 @@ struct AMP8I_PHS8I_unseq final IntV phase; }; -// Cast a blob of T* into "chunks" of T[N]; "undecay" an array. -// This isn't quite kosher, see: https://stackoverflow.com/questions/47924103/pointer-interconvertibility-vs-having-the-same-address -template -static inline auto array_cast(std::span data) +// This isn't someplace more widely available because: +// 1. there is no standard iteration for `mdspan` (not even in C++23), and +// 2. the particular needs are quite varied and require a lot of parameterization. +// +// Keep things simple for now and do this "in place." +template> +struct mdspan_iterator final +{ + using difference_type = std::ptrdiff_t; + using size_type = size_t; + using value_type = TValueType; + using pointer = TValueType*; + using const_pointer = const TValueType*; + using reference = TValueType; + using const_reference = const TValueType; + using iterator_category = std::random_access_iterator_tag; + + mdspan_iterator() = default; + mdspan_iterator(coda_oss::mdspan md, difference_type row = 0) : md_(md), row_(row) {} + + value_type operator*() const + { + return make_span(); + } + + mdspan_iterator& operator++() noexcept + { + ++row_; + return *this; + } + + mdspan_iterator& operator+=(const difference_type n) noexcept + { + row_ += n; + return *this; + } + mdspan_iterator operator+(const difference_type n) const noexcept + { + auto ret = *this; + ret += n; + return ret; + } + + difference_type operator-(const mdspan_iterator& rhs) const noexcept + { + return row_ - rhs.row_; + } + + bool operator!=(const mdspan_iterator& rhs) const noexcept + { + return row_ != rhs.row_; + } + +private: + coda_oss::mdspan md_; + difference_type row_ = 0; + + auto make_span() const + { + // We know our mdspan is contiguous, so this is OK + auto&& v = md_(row_, 0); // beginning of the the current row + return std::span(&v, md_.extent(1)); // span for the whole row + } +}; +template +auto begin(coda_oss::mdspan md) +{ + return mdspan_iterator(md, 0); +} +template +auto end(coda_oss::mdspan md) +{ + return mdspan_iterator(md, md.extent(0)); +} + +template +using const_mdspan_iterator = mdspan_iterator, TExtents>; +template +auto cbegin(coda_oss::mdspan md) +{ + return const_mdspan_iterator(md, 0); +} +template +auto cend(coda_oss::mdspan md) { - using chunk_t = std::array; - const auto number_of_chunks = data.size() / N; - const void* const pData = data.data(); - return sys::make_span(static_cast(pData), number_of_chunks); + return const_mdspan_iterator(md, md.extent(0)); } -// Inside of `std::transform()`, there is a copy; something like +// Inside of `std::transform()`, there is an assignment; something like // ``` // *dest = func(*first); // ``` -// By returning our own class from `func()`, we can take control of the assignment operator -// and use that to copy from `AMP8I_PHS8I_unseq` to `std::array&`. +// By returning our own class from `func()`, we can take control of the assignment operator. // (Unlike most other operators, `operator=()` *must* be a member-function.) -// -// Using inheritance to avoid padding at the end of the `struct`. ... needed? -template -struct AMP8I_PHS8I_array final : public std::array +struct mdspan_iterator_value final { - AMP8I_PHS8I_array& operator=(const AMP8I_PHS8I_unseq&) = delete; // should only be using move-assignment - AMP8I_PHS8I_array& operator=(AMP8I_PHS8I_unseq&& other) - { - for (int i = 0; i < gsl::narrow(N); i++) + std::span p_; + + mdspan_iterator_value(std::span s) : p_(s) {} + template + mdspan_iterator_value& operator=(const AMP8I_PHS8I_unseq& other) { + //assert(p_.size() <= size(other.amplitude)); + for (size_t i = 0; i < p_.size(); i++) { - (*this)[i].phase = gsl::narrow(other.phase[i]); - (*this)[i].amplitude = gsl::narrow(other.amplitude[i]); + const auto i_ = gsl::narrow(i); + p_[i].amplitude = gsl::narrow(other.amplitude[i_]); + p_[i].phase = gsl::narrow(other.phase[i_]); } return *this; } }; -template -static inline auto AMP8I_PHS8I_array_cast(std::span data) -{ - // This isn't quite kosher, see `array_cast()` above. - using chunk_t = AMP8I_PHS8I_array; - const auto number_of_chunks = data.size() / N; - void* const pDest = data.data(); - return std::span(static_cast(pDest), number_of_chunks); -} template using IntV = decltype(::getPhase(ZFloatV{}, 0.0f)); -// The compiler can sometimes do better optimization with fixed-size structures. -template -auto six::sicd::NearestNeighbors::nearest_neighbors_unseq_T(const std::array& p) const // TODO: std::span ... ? +template +auto six::sicd::NearestNeighbors::unseq_nearest_neighbors(std::span p) const // TODO: std::span ... ? The compiler can sometimes do better optimization with fixed-size structures. { ZFloatV v; assert(p.size() == size(v)); - copy_from(p, v); - #if CODA_OSS_DEBUG - for (int i = 0; i < ssize(v); i++) - { - //const auto z = p[i]; - //assert(real(v)[i] == z.real()); - //assert(imag(v)[i] == z.imag()); - } - #endif using intv_t = IntV; AMP8I_PHS8I_unseq retval; - retval.phase = ::getPhase(v, converter_.phase_delta()); - #if CODA_OSS_DEBUG - for (int i = 0; i < ssize(retval.phase); i++) - { - const auto phase_ = converter_.getPhase(p[i]); - assert(static_cast(retval.phase[i]) == phase_); - } - #endif - retval.amplitude = lookup_and_find_nearest(converter_, retval.phase, v); - #if CODA_OSS_DEBUG - for (int i = 0; i < ssize(retval.amplitude); i++) - { - const auto i_ = retval.phase[i]; - const auto& phase_directions = converter_.get_phase_directions().value; - const auto a = find_nearest(phase_directions[i_], p[i]); - assert(a == retval.amplitude[i]); - } - #endif return retval; } @@ -899,46 +931,41 @@ static void finish_nearest_neighbors_unseq(const six::sicd::NearestNeighbors& im } template -void six::sicd::NearestNeighbors::nearest_neighbors_unseq_(std::span inputs, std::span results) const +void six::sicd::NearestNeighbors::nearest_neighbors_T(execution_policy policy, + std::span inputs, std::span results) const { - using intv_t = IntV; - // View the data as chunks of *elements_per_iteration*. This allows iterating // to go *elements_per_iteration* at a time; and each chunk can be processed // using `nearest_neighbors_unseq_T()`, above. - // - // This isn't quite kosher, see: https://stackoverflow.com/questions/47924103/pointer-interconvertibility-vs-having-the-same-address - auto const first = array_cast(inputs); - auto const dest = AMP8I_PHS8I_array_cast(results); + using extents_t = coda_oss::dextents; // two dimensions: M×N + const extents_t extents{ inputs.size() / elements_per_iteration, elements_per_iteration }; + const coda_oss::mdspan md_inputs(inputs.data(), extents); + assert(md_inputs.size() <= inputs.size()); + auto const b = cbegin(md_inputs); + auto const e = cend(md_inputs); + + const coda_oss::mdspan md_results(results.data(), extents); + assert(md_results.size() <= results.size()); + auto const d = begin(md_results); const auto func = [&](const auto& v) { - return nearest_neighbors_unseq_T(v); + return unseq_nearest_neighbors(v); }; - std::transform(first.begin(), first.end(), dest.begin(), func); - - // Then finish off anything left - finish_nearest_neighbors_unseq(*this, inputs, results); -} - -template -void six::sicd::NearestNeighbors::nearest_neighbors_par_unseq_T(std::span inputs, std::span results) const -{ - using intv_t = IntV; - - // View the data as chunks of *elements_per_iteration*. This allows iterating - // to go *elements_per_iteration* at a time; and each chunk can be processed - // using `nearest_neighbors_unseq_T()`, above. - // - // This isn't quite kosher, see: https://stackoverflow.com/questions/47924103/pointer-interconvertibility-vs-having-the-same-address - auto const first = array_cast(inputs); - auto const dest = AMP8I_PHS8I_array_cast(results); - const auto func = [&](const auto& v) + if (policy == execution_policy::unseq) { - return nearest_neighbors_unseq_T(v); - }; - mt::Transform_par(first.begin(), first.end(), dest.begin(), func); + std::transform(/*std::execution::unseq,*/ b, e, d, func); + } + else if (policy == execution_policy::par_unseq) + { + //std::transform(std::execution::par_unseq, b, e, d, func); + mt::Transform_par(b, e, d, func); + } + else + { + throw std::logic_error("Unsupported execution_policy"); + } // Then finish off anything left finish_nearest_neighbors_unseq(*this, inputs, results); @@ -977,74 +1004,51 @@ std::string SIX_SICD_API six_sicd_set_nearest_neighbors_unseq(std::string unseq) return retval; } -void six::sicd::NearestNeighbors::nearest_neighbors_unseq(std::span inputs, std::span results) const +void six::sicd::NearestNeighbors::nearest_neighbors_(execution_policy policy, + std::span inputs, std::span results) const { // TODO: there could be more complicated logic here to determine which UNSEQ // implementation to use. - // This is very simple as it's only used for unit-testing const auto& unseq = ::nearest_neighbors_unseq_; #if SIX_sicd_has_simd if (unseq == unseq_simd) { - return nearest_neighbors_unseq_(inputs, results); + return nearest_neighbors_T(policy, inputs, results); } #endif #if SIX_sicd_has_VCL if (unseq == unseq_vcl) { - return nearest_neighbors_unseq_(inputs, results); + return nearest_neighbors_T(policy, inputs, results); } #endif #if SIX_sicd_has_valarray if (unseq == unseq_valarray) { - return nearest_neighbors_unseq_(inputs, results); + return nearest_neighbors_T(policy, inputs, results); } #endif #if SIX_sicd_has_ximd if (unseq == unseq_ximd) { - return nearest_neighbors_unseq_(inputs, results); + return nearest_neighbors_T(policy, inputs, results); } #endif - throw std::logic_error("Don't know how to implement nearest_neighbors_unseq() for unseq=" + unseq); + throw std::logic_error("Don't know how to implement nearest_neighbors_() for unseq=" + unseq); +} +void six::sicd::NearestNeighbors::nearest_neighbors_unseq(std::span inputs, std::span results) const +{ + // TODO: there could be more complicated logic here to determine which UNSEQ + // implementation to use. + nearest_neighbors_(execution_policy::unseq, inputs, results); } - void six::sicd::NearestNeighbors::nearest_neighbors_par_unseq(std::span inputs, std::span results) const { // TODO: there could be more complicated logic here to determine which UNSEQ // implementation to use. - - // This is very simple as it's only used for unit-testing - const auto& unseq = ::nearest_neighbors_unseq_; - #if SIX_sicd_has_simd - if (unseq == unseq_simd) - { - return nearest_neighbors_par_unseq_T(inputs, results); - } - #endif - #if SIX_sicd_has_VCL - if (unseq == unseq_vcl) - { - return nearest_neighbors_par_unseq_T(inputs, results); - } - #endif - #if SIX_sicd_has_valarray - if (unseq == unseq_valarray) - { - return nearest_neighbors_par_unseq_T(inputs, results); - } - #endif - #if SIX_sicd_has_ximd - if (unseq == unseq_ximd) - { - return nearest_neighbors_par_unseq_T(inputs, results); - } - #endif - - throw std::logic_error("Don't know how to implement nearest_neighbors_par_unseq() for unseq=" + unseq); + nearest_neighbors_(execution_policy::par_unseq, inputs, results); } #endif // SIX_sicd_ComplexToAMP8IPHS8I_unseq