diff --git a/UnitTest/TestCase.h b/UnitTest/TestCase.h index 225afd7bd..05a867c12 100644 --- a/UnitTest/TestCase.h +++ b/UnitTest/TestCase.h @@ -88,8 +88,8 @@ inline void assert_almost_eq(const std::string& testName, long double X1, long d #undef TEST_ASSERT_EQ_MSG #define TEST_ASSERT_EQ_MSG(msg, X1, X2) testName, Microsoft::VisualStudio::CppUnitTestFramework::Logger::WriteMessage(msg.c_str()); TEST_ASSERT_EQ(X1, X2) -#undef TEST_FAIL -#define TEST_FAIL(msg) { (void)testName; const auto vw(str::details::to_wstring(msg)); Microsoft::VisualStudio::CppUnitTestFramework::Assert::Fail(vw.c_str()); } +#undef TEST_FAIL_MSG +#define TEST_FAIL_MSG(msg) { (void)testName; const auto vw(str::details::to_wstring(msg)); Microsoft::VisualStudio::CppUnitTestFramework::Assert::Fail(vw.c_str()); } #undef TEST_EXCEPTION #undef TEST_THROWS diff --git a/externals/coda-oss/UnitTest/mt.cpp b/externals/coda-oss/UnitTest/mt.cpp index 6be7aa933..8bbcd4356 100644 --- a/externals/coda-oss/UnitTest/mt.cpp +++ b/externals/coda-oss/UnitTest/mt.cpp @@ -1,6 +1,8 @@ #include "pch.h" #include "CppUnitTest.h" +#include + #include #include #include @@ -11,6 +13,7 @@ #include #include #include +#include namespace mt { diff --git a/externals/coda-oss/cmake/CodaBuild.cmake b/externals/coda-oss/cmake/CodaBuild.cmake index 5ac3ed962..a74502f3c 100644 --- a/externals/coda-oss/cmake/CodaBuild.cmake +++ b/externals/coda-oss/cmake/CodaBuild.cmake @@ -146,6 +146,17 @@ macro(coda_initialize_build) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) + # Turn on AVX2 by default ... it's from 2013. + # Well, no :-( ... it seems to cause crashes w/older + # compilers on build servers. :-( + set(ENABLE_AVX2 false) + set(ENABLE_AVX512F false) + #if (NOT ENABLE_AVX512F) + # if (NOT DISABLE_AVX2) + # set(ENABLE_AVX2 true) + # endif() + #endif() + # MSVC-specific flags and options. if (MSVC) set_property(GLOBAL PROPERTY USE_FOLDERS ON) diff --git a/externals/coda-oss/modules/c++/CMakeLists.txt b/externals/coda-oss/modules/c++/CMakeLists.txt index 694e037aa..f8e98accb 100644 --- a/externals/coda-oss/modules/c++/CMakeLists.txt +++ b/externals/coda-oss/modules/c++/CMakeLists.txt @@ -11,14 +11,14 @@ if (MSVC) elseif (UNIX) # https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html - add_compile_options(-Werror) # Make all warnings into errors + #add_compile_options(-Werror) # Make all warnings into errors add_compile_options(-Wall -Wextra -Wpedantic -pedantic-errors -Wunused) add_compile_options(-Wzero-as-null-pointer-constant) - add_compile_options(-Wsuggest-final-types -Wsuggest-final-methods) + #add_compile_options(-Wsuggest-final-types -Wsuggest-final-methods) add_compile_options(-Wsuggest-override) add_compile_options(-Woverloaded-virtual) - add_compile_options(-Warray-bounds) + #add_compile_options(-Warray-bounds) add_compile_options(-Wduplicated-branches -Wduplicated-cond) add_compile_options(-Wtrampolines) add_compile_options(-Wshadow) @@ -29,6 +29,9 @@ elseif (UNIX) add_compile_options(-Wno-double-promotion) # implicit conversion of `float` to `double` is fine + add_compile_options(-Wno-array-bounds) # TODO: fix the code! + add_compile_options(-Wno-maybe-uninitialized) # TODO: fix the code! + # Need a newer compiler than GCC 9 #add_compile_options(-Wnrvo) endif() diff --git a/externals/coda-oss/modules/c++/mt/include/mt/Algorithm.h b/externals/coda-oss/modules/c++/mt/include/mt/Algorithm.h index 8e311c9e3..7bed73917 100644 --- a/externals/coda-oss/modules/c++/mt/include/mt/Algorithm.h +++ b/externals/coda-oss/modules/c++/mt/include/mt/Algorithm.h @@ -26,6 +26,7 @@ #include #include +#include "config/compiler_extensions.h" #include "coda_oss/CPlusPlus.h" #if CODA_OSS_cpp17 // is broken with the older version of GCC we're using @@ -83,10 +84,11 @@ inline OutputIt Transform_par(InputIt first1, InputIt last1, OutputIt d_first, U Transform_par_settings settings = Transform_par_settings{}) { #if CODA_OSS_mt_Algorithm_has_execution + CODA_OSS_mark_symbol_unused(settings); return std::transform(std::execution::par, first1, last1, d_first, unary_op); #else return Transform_par_(first1, last1, d_first, unary_op, settings); #endif // CODA_OSS_mt_Algorithm_has_execution } -} \ No newline at end of file +} diff --git a/externals/coda-oss/modules/c++/sys/source/AbstractOS.cpp b/externals/coda-oss/modules/c++/sys/source/AbstractOS.cpp index 436e12bb6..ebe072ff7 100644 --- a/externals/coda-oss/modules/c++/sys/source/AbstractOS.cpp +++ b/externals/coda-oss/modules/c++/sys/source/AbstractOS.cpp @@ -273,9 +273,7 @@ void AbstractOS::appendEnv(const std::string& envVar, const std::vector #include #include -#include -#include #include +#include #include #include "six/AmplitudeTable.h" @@ -21,184 +20,78 @@ using namespace six; -static void toComplex(six::Amp8iPhs8iLookup_t values, std::span inputs, std::span results) +static const six::sicd::ImageData imageData; +static std::vector fromComplex_seq(std::span inputs) { - const auto toComplex_ = [&values](const auto& v) - { - return values(v.amplitude, v.phase); - }; - - std::transform(inputs.begin(), inputs.end(), results.begin(), toComplex_); - //std::transform(std::execution::par, inputs.begin(), inputs.end(), results.begin(), toComplex_); + return imageData.fromComplex(six::execution_policy::seq, inputs); +} +static std::vector fromComplex_par(std::span inputs) +{ + return imageData.fromComplex(six::execution_policy::par, inputs); +} +static std::vector fromComplex_unseq(std::span inputs) +{ + return imageData.fromComplex(six::execution_policy::unseq, inputs); } -void toComplex(std::span inputs, std::span results) +static std::vector fromComplex_par_unseq(std::span inputs) { - const auto values = six::sicd::ImageData::getLookup(nullptr); - toComplex(values, inputs, results); + return imageData.fromComplex(six::execution_policy::par_unseq, inputs); } -auto make_inputs(size_t count) +static auto make_cxinputs(size_t count) { - std::vector retval; + std::vector retval; retval.reserve(count); for (size_t i = 0; i < count; i++) { - const auto amplitude = static_cast(i * i); - const auto phase = static_cast(~amplitude); - retval.push_back(AMP8I_PHS8I_t{ amplitude, phase }); + float f = static_cast(i); + retval.emplace_back(f, f); + retval.emplace_back(-f, f); + retval.emplace_back(f, -f); + retval.emplace_back(-f, -f); } return retval; } -static std::vector fromComplex_nearest_neighbors(std::span inputs) -{ - static const six::sicd::ImageData imageData; - assert(imageData.amplitudeTable.get() == nullptr); - return imageData.fromComplex(inputs); -} -//static std::vector fromComplex_nearest_neighbors2(std::span inputs) -//{ -// return six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors2(inputs, nullptr); -//} -// -//static std::vector fromComplex_nearest_neighbors_par(std::span inputs) -//{ -// return six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors(std::execution::par, inputs, nullptr); -//} -//static std::vector fromComplex_nearest_neighbors_par2(std::span inputs) -//{ -// return six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_par2(inputs, nullptr); -//} - -//static std::vector fromComplex_nearest_neighbors_unseq(std::span inputs) -//{ -// return six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors(std::execution::unseq, inputs, nullptr); -//} -//static std::vector fromComplex_nearest_neighbors_par_unseq(std::span inputs) -//{ -// return six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors(std::execution::par_unseq, inputs, nullptr); -//} - -//static void fromComplex_nearest_neighbors_threaded(std::span inputs, std::span results) -//{ -// // make a structure to quickly find the nearest neighbor -// auto& converter = six::sicd::details::ComplexToAMP8IPHS8I::make(nullptr); -// converter.nearest_neighbors_threaded(inputs, results); -//} -//static void fromComplex_nearest_neighbors_simd(std::span inputs, std::span results) -//{ -// // make a structure to quickly find the nearest neighbor -// auto& converter = six::sicd::details::ComplexToAMP8IPHS8I::make(nullptr); -// converter.nearest_neighbors_simd(inputs, results); -//} - #ifdef NDEBUG constexpr auto iterations = 10; #else constexpr auto iterations = 1; #endif template -static std::chrono::duration test(TFunc f, const std::vector& inputs) +static std::chrono::duration test(TFunc func, const std::vector& inputs_) { - auto ap_results = f(inputs); + const auto inputs = sys::make_span(inputs_); + + std::ignore = func(inputs); auto start = std::chrono::high_resolution_clock::now(); for (int i = 0; i < iterations; i++) { - ap_results = f(inputs); + std::ignore = func(inputs); } auto end = std::chrono::high_resolution_clock::now(); return end - start; } +#define TEST(name) diff = test(name, inputs); \ +std::cout << "Time (" #name "): " << std::setw(9) << diff.count() << "\n" + int main() { + assert(imageData.amplitudeTable.get() == nullptr); + #ifdef NDEBUG - constexpr auto inputs_size = 25000000; + constexpr auto inputs_size = 5'000'000; #else constexpr auto inputs_size = 100; #endif - const auto inputs = make_inputs(inputs_size * 4); - std::vector results(inputs.size()); - - toComplex(inputs, results); + const auto inputs = make_cxinputs(inputs_size); /*********************************************************************************/ - auto diff = test(fromComplex_nearest_neighbors, results); - std::cout << "Time (nearest_neighbors): " << std::setw(9) << diff.count() << "\n"; - - //diff = test(fromComplex_nearest_neighbors2, results); - //std::cout << "Time (nearest_neighbors2): " << std::setw(9) << diff.count() << "\n"; - - //diff = test(fromComplex_nearest_neighbors_par, results); - //std::cout << "Time (nearest_neighbors_par): " << std::setw(9) << diff.count() << "\n"; - - //diff = test(fromComplex_nearest_neighbors_par2, results); - //std::cout << "Time (nearest_neighbors_par2): " << std::setw(9) << diff.count() << "\n"; - - //diff = test(fromComplex_nearest_neighbors_unseq, results); - //std::cout << "Time (nearest_neighbors_unseq): " << std::setw(9) << diff.count() << "\n"; - - //diff = test(fromComplex_nearest_neighbors_par_unseq, results); - //std::cout << "Time (nearest_neighbors_par_unseq): " << std::setw(9) << diff.count() << "\n"; - - //fromComplex_transform(results, ap_results); - //start = std::chrono::high_resolution_clock::now(); - //for (int i = 0; i < iterations; i++) - //{ - // fromComplex_transform(results, ap_results); - //} - //end = std::chrono::high_resolution_clock::now(); - //diff = end - start; - //std::cout << "Time (transform): " << std::setw(9) << diff.count() << "\n"; - //const auto transform_results = ap_results; - - ///*********************************************************************************/ - - //fromComplex_nearest_neighbors_threaded(results, ap_results); - //start = std::chrono::high_resolution_clock::now(); - //for (int i = 0; i < iterations; i++) - //{ - // fromComplex_nearest_neighbors_threaded(results, ap_results); - //} - //end = std::chrono::high_resolution_clock::now(); - //diff = end - start; - //std::cout << "Time (nearest_neighbors_threaded): " << std::setw(9) << diff.count() << "\n"; - //const auto nearest_neighbors_threaded_results = ap_results; - - //fromComplex_transform_par(results, ap_results); - //start = std::chrono::high_resolution_clock::now(); - //for (int i = 0; i < iterations; i++) - //{ - // fromComplex_transform_par(results, ap_results); - //} - //end = std::chrono::high_resolution_clock::now(); - //diff = end - start; - //std::cout << "Time (transform_par): " << std::setw(9) << diff.count() << "\n"; - //const auto transform_par_results = ap_results; - - ///*********************************************************************************/ - - //fromComplex_nearest_neighbors_simd(results, ap_results); - //start = std::chrono::high_resolution_clock::now(); - //for (int i = 0; i < iterations; i++) - //{ - // fromComplex_nearest_neighbors_simd(results, ap_results); - //} - //end = std::chrono::high_resolution_clock::now(); - //diff = end - start; - //std::cout << "Time (nearest_neighbors_simd): " << std::setw(9) << diff.count() << "\n"; - //const auto nearest_neighbors_threaded_simd = ap_results; - - //fromComplex_transform_unseq(results, ap_results); - //start = std::chrono::high_resolution_clock::now(); - //for (int i = 0; i < iterations; i++) - //{ - // fromComplex_transform_unseq(results, ap_results); - //} - //end = std::chrono::high_resolution_clock::now(); - //diff = end - start; - //std::cout << "Time (transform_unseq): " << std::setw(9) << diff.count() << "\n"; - //const auto transform_unseq_results = ap_results; + std::chrono::duration diff; + TEST(fromComplex_seq); + TEST(fromComplex_par); + TEST(fromComplex_unseq); + TEST(fromComplex_par_unseq); } - diff --git a/six/modules/c++/samples/8AMPI_PHSI.dir/8AMPI_PHSI.dir.vcxproj b/six/modules/c++/samples/8AMPI_PHSI.dir/8AMPI_PHSI.dir.vcxproj index 1cd611727..b20b04052 100644 --- a/six/modules/c++/samples/8AMPI_PHSI.dir/8AMPI_PHSI.dir.vcxproj +++ b/six/modules/c++/samples/8AMPI_PHSI.dir/8AMPI_PHSI.dir.vcxproj @@ -66,8 +66,6 @@ true MultiThreadedDebugDLL true - stdcpp17 - stdc11 Console @@ -93,8 +91,6 @@ Speed Guard true - stdcpp17 - stdc11 Console diff --git a/six/modules/c++/six.sicd/CMakeLists.txt b/six/modules/c++/six.sicd/CMakeLists.txt index 31373160e..0adcb9528 100644 --- a/six/modules/c++/six.sicd/CMakeLists.txt +++ b/six/modules/c++/six.sicd/CMakeLists.txt @@ -8,7 +8,6 @@ coda_add_module( source/ComplexData.cpp source/ComplexDataBuilder.cpp source/ComplexToAMP8IPHS8I.cpp - source/ComplexToAMP8IPHS8I_unseq.cpp source/ComplexXMLControl.cpp source/ComplexXMLParser.cpp source/ComplexXMLParser040.cpp @@ -26,6 +25,8 @@ coda_add_module( source/Grid.cpp source/ImageData.cpp source/ImageFormation.cpp + source/NearestNeighbors.cpp + source/NearestNeighbors_unseq.cpp source/NITFReadComplexXMLControl.cpp source/PFA.cpp source/Position.cpp diff --git a/six/modules/c++/six.sicd/include/six/sicd/ImageData.h b/six/modules/c++/six.sicd/include/six/sicd/ImageData.h index 9a31e5767..bb5f29fb6 100644 --- a/six/modules/c++/six.sicd/include/six/sicd/ImageData.h +++ b/six/modules/c++/six.sicd/include/six/sicd/ImageData.h @@ -103,7 +103,9 @@ struct SIX_SICD_API ImageData static void toComplex(six::Amp8iPhs8iLookup_t lookup, std::span, std::span); std::vector toComplex(std::span) const; + std::vector fromComplex(std::span) const; + std::vector fromComplex(six::execution_policy, std::span) const; /*! * Create a lookup table for converting from AMP8I_PHS8I to complex. @@ -113,6 +115,7 @@ struct SIX_SICD_API ImageData static six::Amp8iPhs8iLookup_t getLookup(const six::AmplitudeTable* pAmplitudeTable); }; +// for unit-testing SIX_SICD_API const details::ComplexToAMP8IPHS8I& make_ComplexToAMP8IPHS8I(const six::AmplitudeTable*); SIX_SICD_API AMP8I_PHS8I_t nearest_neighbor(const details::ComplexToAMP8IPHS8I&, const six::zfloat& v); diff --git a/six/modules/c++/six.sicd/include/six/sicd/NearestNeighbors.h b/six/modules/c++/six.sicd/include/six/sicd/NearestNeighbors.h new file mode 100644 index 000000000..ea5aaf159 --- /dev/null +++ b/six/modules/c++/six.sicd/include/six/sicd/NearestNeighbors.h @@ -0,0 +1,85 @@ +/* ========================================================================= + * This file is part of six.sicd-c++ + * ========================================================================= + * + * (C) Copyright 2004 - 2014, MDA Information Systems LLC + * © Copyright 2024, Maxar Technologies, Inc. + * + * six.sicd-c++ is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; If not, + * see . + * + */ +#pragma once + +#include +#include + +#include + +#include "six/AmplitudeTable.h" + +namespace six +{ +namespace sicd +{ +/*! + * \brief A utility that's used to convert complex values into 8-bit amplitude and phase values. + */ +struct NearestNeighbors final +{ + NearestNeighbors(const details::ComplexToAMP8IPHS8I&); + ~NearestNeighbors() = default; + NearestNeighbors(const NearestNeighbors&) = delete; + NearestNeighbors& operator=(const NearestNeighbors&) = delete; + NearestNeighbors(NearestNeighbors&&) = delete; // implicitly deleted because of =delete for copy + NearestNeighbors& operator=(NearestNeighbors&&) = delete; // implicitly deleted because of =delete for copy + + /*! + * Get the nearest amplitude and phase value given a complex value + * @param v complex value to query with + * @return nearest amplitude and phase value + */ + AMP8I_PHS8I_t nearest_neighbor(const six::zfloat& v) const; + void nearest_neighbors(execution_policy, std::span inputs, std::span results) const; + void nearest_neighbors(std::span inputs, std::span results) const; // system picks execution_policy + + void nearest_neighbors_seq(std::span inputs, std::span results) const; + void nearest_neighbors_par(std::span inputs, std::span results) const; + + #if SIX_sicd_ComplexToAMP8IPHS8I_unseq + void nearest_neighbors_unseq(std::span inputs, std::span results) const; + void nearest_neighbors_par_unseq(std::span inputs, std::span results) const; + #endif + +private: + const details::ComplexToAMP8IPHS8I& converter_; + + uint8_t find_nearest(six::zfloat phase_direction, six::zfloat v) const; + uint8_t getPhase(six::zfloat) const; + +#if SIX_sicd_ComplexToAMP8IPHS8I_unseq + template + auto nearest_neighbors_unseq_T(const std::array&) const; // TODO: std::span ... ? + template + void nearest_neighbors_unseq_(std::span inputs, std::span results) const; + + template + void nearest_neighbors_par_unseq_T(std::span inputs, std::span results) const; +#endif + +}; +} +} + + diff --git a/six/modules/c++/six.sicd/six.sicd.vcxproj b/six/modules/c++/six.sicd/six.sicd.vcxproj index 6d58ad83c..17b2d7d91 100644 --- a/six/modules/c++/six.sicd/six.sicd.vcxproj +++ b/six/modules/c++/six.sicd/six.sicd.vcxproj @@ -128,6 +128,7 @@ + @@ -154,7 +155,7 @@ - + @@ -172,6 +173,7 @@ + diff --git a/six/modules/c++/six.sicd/six.sicd.vcxproj.filters b/six/modules/c++/six.sicd/six.sicd.vcxproj.filters index f7ec2e4b0..6cc1c134f 100644 --- a/six/modules/c++/six.sicd/six.sicd.vcxproj.filters +++ b/six/modules/c++/six.sicd/six.sicd.vcxproj.filters @@ -135,6 +135,9 @@ Header Files + + Header Files + @@ -248,7 +251,10 @@ Source Files - + + Source Files + + Source Files diff --git a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I.cpp b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I.cpp index 1bdef6f87..367f851ac 100644 --- a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I.cpp +++ b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I.cpp @@ -37,6 +37,7 @@ #include #include "six/sicd/Utilities.h" +#include "six/sicd/Exports.h" #undef min #undef max @@ -76,11 +77,11 @@ inline auto GetPhase(std::complex v) if (phase < 0.0) phase += std::numbers::pi * 2.0; // Wrap from [0, 2PI] return phase; } -uint8_t six::sicd::details::ComplexToAMP8IPHS8I::Impl::getPhase(six::zfloat v) const +uint8_t six::sicd::details::ComplexToAMP8IPHS8I::getPhase(six::zfloat v) const { // Phase is determined via arithmetic because it's equally spaced. const auto phase = GetPhase(v); - return gsl::narrow_cast(std::round(phase / phase_delta)); + return gsl::narrow_cast(std::round(phase / phase_delta_)); } template @@ -139,184 +140,43 @@ static std::span get_magnitudes(const six::AmplitudeTable* pAmplitu six::sicd::details::ComplexToAMP8IPHS8I::ComplexToAMP8IPHS8I(const six::AmplitudeTable *pAmplitudeTable) - :impl(*this) { - impl.magnitudes = get_magnitudes(pAmplitudeTable, impl.uncached_magnitudes); + magnitudes_ = get_magnitudes(pAmplitudeTable, uncached_magnitudes_); const auto p0 = GetPhase(Utilities::toComplex(1, 0, pAmplitudeTable)); const auto p1 = GetPhase(Utilities::toComplex(1, 1, pAmplitudeTable)); assert(p0 == 0.0); assert(p1 > p0); - impl.phase_delta = gsl::narrow_cast(p1 - p0); - for(size_t i = 0; i < 256; i++) + phase_delta_ = gsl::narrow_cast(p1 - p0); + for(size_t i = 0; i < six::AmplitudeTableSize; i++) { - const units::Radians angle{ static_cast(p0) + i * impl.phase_delta }; + const units::Radians angle{ static_cast(p0) + i * phase_delta_ }; float y, x; SinCos(angle, y, x); - impl.phase_directions[i] = { x, y }; + phase_directions_.value[i] = { x, y }; // Only need the parallel array when using the "vectorclass" library. #ifdef SIX_sicd_has_VCL - impl.phase_directions_real[i] = impl.phase_directions[i].real(); - impl.phase_directions_imag[i] = impl.phase_directions[i].imag(); + phase_directions_.real[i] = phase_directions_.value[i].real(); + phase_directions_.imag[i] = phase_directions_.value[i].imag(); #endif } } -/*! - * Find the nearest element given an iterator range. - * @param value query value - * @return index of nearest value within the iterator range. - */ -static uint8_t nearest(std::span magnitudes, float value) -{ - const auto begin = magnitudes.begin(); - const auto end = magnitudes.end(); - - const auto it = std::lower_bound(begin, end, value); - if (it == begin) return 0; - - const auto prev_it = std::prev(it); - const auto nearest_it = it == end ? prev_it : - (value - *prev_it <= *it - value ? prev_it : it); - const auto distance = std::distance(begin, nearest_it); - assert(distance <= std::numeric_limits::max()); - return gsl::narrow(distance); -} -uint8_t six::sicd::details::ComplexToAMP8IPHS8I::Impl::find_nearest(six::zfloat phase_direction, six::zfloat v) const -{ - // We have to do a 1D nearest neighbor search for magnitude. - // But it's not the magnitude of the input complex value - it's the projection of - // the complex value onto the ray of candidate magnitudes at the selected phase. - // i.e. dot product. - const auto projection = (phase_direction.real() * v.real()) + (phase_direction.imag() * v.imag()); - //assert(std::abs(projection - std::abs(v)) < 1e-5); // TODO ??? - return nearest(magnitudes, projection); -} -six::AMP8I_PHS8I_t six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbor_(const six::zfloat &v) const -{ - six::AMP8I_PHS8I_t retval; - retval.phase = impl.getPhase(v); - - auto&& phase_direction = impl.phase_directions[retval.phase]; - retval.amplitude = impl.find_nearest(phase_direction, v); - return retval; -} -void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_seq(std::span inputs, std::span results) const -{ - std::transform(inputs.begin(), inputs.end(), results.begin(), - [&](const auto& v) { return converter.nearest_neighbor_(v); }); -} -std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_seq( - std::span inputs, const six::AmplitudeTable* pAmplitudeTable) -{ - // make a structure to quickly find the nearest neighbor - const auto& converter = make_(pAmplitudeTable); - - std::vector retval(inputs.size()); - converter.impl.nearest_neighbors_seq(inputs, retval); - return retval; -} - -six::AMP8I_PHS8I_t six::sicd::nearest_neighbor(const details::ComplexToAMP8IPHS8I& i, const six::zfloat& v) -{ - return i.nearest_neighbor_(v); -} - - -void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_par(std::span inputs, std::span results) const -{ - const auto nearest_neighbor = [&](const auto& v) - { - return converter.nearest_neighbor_(v); - }; - std::ignore = mt::Transform_par(inputs.begin(), inputs.end(), results.begin(), nearest_neighbor); -} -std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_par( - std::span inputs, const six::AmplitudeTable* pAmplitudeTable) +std::span six::sicd::details::ComplexToAMP8IPHS8I::magnitudes() const { - // make a structure to quickly find the nearest neighbor - const auto& converter = make_(pAmplitudeTable); - - std::vector retval(inputs.size()); - converter.impl.nearest_neighbors_par(inputs, sys::make_span(retval)); - return retval; + return magnitudes_; } -std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors( - std::span inputs, const six::AmplitudeTable* pAmplitudeTable) +float six::sicd::details::ComplexToAMP8IPHS8I::phase_delta() const { - // TODO: there could be more complicated logic here to decide between - // _seq, _par, _unseq, and _par_unseq - #if SIX_sicd_ComplexToAMP8IPHS8I_unseq - - // None of our SIMD implementations are dramatically slower when - // running unittests; that's mostly because there's lots of IO. So that - // the more interesting code-path is tested, use UNSEQ in debugging. - #if CODA_OSS_DEBUG - return nearest_neighbors_unseq(inputs, pAmplitudeTable); // TODO: - #else - return nearest_neighbors_par(inputs, pAmplitudeTable); - #endif - - #else - return nearest_neighbors_par(inputs, pAmplitudeTable); - -#endif + return phase_delta_; } -#if SIX_sicd_ComplexToAMP8IPHS8I_unseq -std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq( - std::span inputs, const six::AmplitudeTable* pAmplitudeTable) +const six::sicd::details::ComplexToAMP8IPHS8I::phase_directions& six::sicd::details::ComplexToAMP8IPHS8I::get_phase_directions() const { - // TODO: there could be more complicated logic here to determine which UNSEQ - // implementation to use. - - #if SIX_sicd_has_simd - return nearest_neighbors_unseq_simd(inputs, pAmplitudeTable); - - #elif SIX_sicd_has_VCL - return nearest_neighbors_unseq_vcl(inputs, pAmplitudeTable); - - #elif SIX_sicd_has_ximd - return nearest_neighbors_unseq_ximd(inputs, pAmplitudeTable); - - #else - #error "Don't know how to implement nearest_neighbors_unseq()" - throw std::logic_error("Don't know how to implement nearest_neighbors_unseq()"); - - #endif + return phase_directions_; } -#endif // SIX_sicd_ComplexToAMP8IPHS8I_unseq - -//template -//void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_par_unseq(TInputIt first, TInputIt last, TOutputIt dest) const -//{ -// constexpr ptrdiff_t cutoff_ = 0; // too slow w/o multi-threading -// // The value of "default_cutoff" was determined by testing; there is nothing special about it, feel free to change it. -// constexpr auto default_cutoff = (256 * 256) * 2; -// const auto cutoff = cutoff_ == 0 ? default_cutoff : cutoff_; -// -// const auto transform_f = [&](const TInputIt first1, const TInputIt last1, TOutputIt d_first) -// { -// nearest_neighbors_unseq(first1, last1, d_first); -// static const TOutputIt retval; -// return retval; -// }; -// std::ignore = transform_async(first, last, dest, transform_f, cutoff); -//} -//#if SIX_sicd_ComplexToAMP8IPHS8I_unseq -//std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_par_unseq( -// std::span inputs, const six::AmplitudeTable* pAmplitudeTable) -//{ -// // make a structure to quickly find the nearest neighbor -// const auto& converter = make_(pAmplitudeTable); -// -// std::vector retval(inputs.size()); -// converter.impl.nearest_neighbors_par_unseq(inputs.begin(), inputs.end(), retval.begin()); -// return retval; -//} -//#endif // SIX_sicd_ComplexToAMP8IPHS8I_unseq const six::sicd::details::ComplexToAMP8IPHS8I& six::sicd::details::ComplexToAMP8IPHS8I::make_(const six::AmplitudeTable* pAmplitudeTable) { @@ -344,4 +204,4 @@ const six::sicd::details::ComplexToAMP8IPHS8I& six::sicd::details::ComplexToAMP8 const six::sicd::details::ComplexToAMP8IPHS8I& six::sicd::make_ComplexToAMP8IPHS8I(const six::AmplitudeTable* pAmplitudeTable) { return six::sicd::details::ComplexToAMP8IPHS8I::make_(pAmplitudeTable); -} \ No newline at end of file +} diff --git a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_unseq.cpp b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_unseq.cpp deleted file mode 100644 index 6b546a7d1..000000000 --- a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_unseq.cpp +++ /dev/null @@ -1,748 +0,0 @@ -/* ========================================================================= -* This file is part of six.sicd-c++ -* ========================================================================= -* -* (C) Copyright 2021, Maxar Technologies, Inc. -* -* six.sicd-c++ is free software; you can redistribute it and/or modify -* it under the terms of the GNU Lesser General Public License as published by -* the Free Software Foundation; either version 3 of the License, or -* (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Lesser General Public License for more details. -* -* You should have received a copy of the GNU Lesser General Public -* License along with this program; If not, -* see . -* -*/ -#include "six/AmplitudeTable.h" - -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "six/sicd/Utilities.h" - -#undef min -#undef max - -using zfloat = six::zfloat; -using AMP8I_PHS8I_t = six::AMP8I_PHS8I_t; - -#if SIX_sicd_has_VCL - -#define VCL_NAMESPACE vcl -#if _MSC_VER -#pragma warning(push) -#pragma warning(disable: 4100) // '...': unreferenced formal parameter -#pragma warning(disable: 4127) // conditional expression is constant -#pragma warning(disable: 4244) // '...': conversion from '...' to '...', possible loss of data -#pragma warning(disable: 4723) // potential divide by 0 -#endif -#include "six/sicd/vectorclass/version2/vectorclass.h" -#include "six/sicd/vectorclass/version2/vectormath_trig.h" -#include "six/sicd/vectorclass/complex/complexvec1.h" -#if _MSC_VER -#pragma warning(pop) -#endif - -using vcl_intv = vcl::Vec4i; -using vcl_floatv = vcl::Vec4f; -constexpr auto vcl_elements_per_iteration = vcl_floatv::size(); - -inline int ssize(const vcl_intv& z) noexcept -{ - return z.size(); -} -inline int ssize(const vcl_floatv& z) noexcept -{ - return z.size(); -} - -using vcl_zfloatv = vcl::Complex4f; -auto real(const vcl_zfloatv& z) -{ - return z.real(); -} -auto imag(const vcl_zfloatv& z) -{ - return z.imag(); -} -inline size_t size(const vcl_zfloatv& z) noexcept -{ - return z.size(); -} -inline int ssize(const vcl_zfloatv& z) noexcept -{ - return z.size(); -} - -inline auto arg(const vcl_zfloatv& z) -{ - // https://en.cppreference.com/w/cpp/numeric/complex/arg - // > `std::atan2(std::imag(z), std::real(z))` - return atan2(z.imag(), z.real()); // arg() -} - -template -inline auto if_add(const T& f, const vcl_floatv& a, float b) -{ - return vcl::if_add(f, a, b); -} - -inline bool any_of(const vcl_intv& m) -{ - return horizontal_or(m); -} - -inline void copy_from(std::span p, vcl_floatv& result) -{ - assert(p.size() == result.size()); - result.load(p.data()); -} -inline auto copy_from(std::span p, vcl_zfloatv& result) -{ - // https://en.cppreference.com/w/cpp/numeric/complex - // > For any pointer to an element of an array of `std::complex` named `p` and any valid array index `i`, ... - // > is the real part of the complex number `p[i]`, ... - result.load(reinterpret_cast(p.data())); -} - -template -inline auto lookup(const vcl_intv& zindex, const std::array& phase_directions) -{ - return vcl::lookup(zindex, phase_directions.data()); -} -inline auto lookup(const vcl_intv& zindex, std::span magnitudes) -{ - assert(magnitudes.size() == six::AmplitudeTableSize); - return vcl::lookup(zindex, magnitudes.data()); -} - -#endif // SIX_sicd_has_VCL - -#if SIX_sicd_has_ximd -#ifndef CODA_OSS_Ximd_ENABLE -#define CODA_OSS_Ximd_ENABLE 1 -#endif -#include - -template -using ximd = sys::ximd::Ximd; - -using ximd_intv = ximd; -using ximd_intv_mask = sys::ximd::ximd_mask; - -using ximd_floatv = ximd; -using ximd_floatv_mask = sys::ximd::ximd_mask; - -constexpr auto ximd_elements_per_iteration = ximd_floatv::size(); - -template -static inline auto ssize(const ximd& v) noexcept -{ - return gsl::narrow(v.size()); -} - -// Manage a SIMD complex as an array of two SIMDs -using ximd_zfloatv = std::array; -inline auto& real(ximd_zfloatv& z) noexcept -{ - return z[0]; -} -inline const auto& real(const ximd_zfloatv& z) noexcept -{ - return z[0]; -} -inline auto& imag(ximd_zfloatv& z) noexcept -{ - return z[1]; -} -inline const auto& imag(const ximd_zfloatv& z) noexcept -{ - return z[1]; -} -inline size_t size(const ximd_zfloatv& z) noexcept -{ - auto retval = real(z).size(); - assert(retval == imag(z).size()); - return retval; -} -inline auto ssize(const ximd_zfloatv& z) noexcept -{ - return gsl::narrow(size(z)); -} - -inline auto arg(const ximd_zfloatv& z) -{ - // https://en.cppreference.com/w/cpp/numeric/complex/arg - // > `std::atan2(std::imag(z), std::real(z))` - return atan2(imag(z), real(z)); // arg() -} - -template -static inline auto make_ximd_zfloatv(TGeneratorReal&& generate_real, TGeneratorImag&& generate_imag) -{ - ximd_zfloatv retval; - for (size_t i = 0; i < size(retval); i++) - { - real(retval)[i] = generate_real(i); - imag(retval)[i] = generate_imag(i); - } - return retval; -} - -template -static inline auto ximd_select_(const TTest& test, const TResult& t, const TResult& f) -{ - TResult retval; - for (size_t i = 0; i < test.size(); i++) - { - retval[i] = test[i] ? t[i] : f[i]; - } - return retval; -} - -static inline auto copy_from(std::span p, ximd_floatv& result) -{ - assert(p.size() == result.size()); - result.copy_from(p.data()); -} -static inline auto copy_from(std::span p, ximd_zfloatv& result) -{ - const auto generate_real = [&](size_t i) { return p[i].real(); }; - const auto generate_imag = [&](size_t i) { return p[i].imag(); }; - result = make_ximd_zfloatv(generate_real, generate_imag); -} - -static inline auto roundi(const ximd_floatv& v) // match vcl::roundi() -{ - const auto rounded = round(v); - const auto generate_roundi = [&](size_t i) - { return static_cast(rounded[i]); }; - return ximd_intv::generate(generate_roundi); -} - -static inline auto select(const ximd_floatv_mask& test, const ximd_floatv& t, const ximd_floatv& f) -{ - return ximd_select_(test, t, f); -} -static inline auto select(const ximd_intv_mask& test, const ximd_intv& t, const ximd_intv& f) -{ - return ximd_select_(test, t, f); -} - -static inline auto if_add(const ximd_floatv_mask& m, const ximd_floatv& v, typename ximd_floatv::value_type c) -{ - // phase = if_add(phase < 0.0, phase, std::numbers::pi_v * 2.0f); // Wrap from [0, 2PI] - const auto generate_add = [&](size_t i) { - return m[i] ? v[i] + c : v[i]; - }; - return ximd_floatv::generate(generate_add); -} - -template -static inline auto lookup(const ximd_intv& zindex, const std::array& phase_directions) -{ - // It seems that the "generator" constuctor is called with SIMD instructions. - // https://en.cppreference.com/w/cpp/experimental/simd/simd/simd - // > The calls to `generator` are unsequenced with respect to each other. - - const auto generate_real = [&](size_t i) { - const auto i_ = zindex[i]; - return phase_directions[i_].real(); - }; - const auto generate_imag = [&](size_t i) { - const auto i_ = zindex[i]; - return phase_directions[i_].imag(); - }; - return make_ximd_zfloatv(generate_real, generate_imag); -} - -static auto lookup(const ximd_intv& zindex, std::span magnitudes) -{ - const auto generate = [&](size_t i) { - const auto i_ = zindex[i]; - - // The index may be out of range. This is expected because `i` might be "don't care." - if ((i_ >= 0) && (i_ < std::ssize(magnitudes))) - { - return magnitudes[i_]; - } - return NAN; // propogate "don't care" - }; - return ximd_floatv::generate(generate); -} - -#endif // SIX_sicd_has_ximd - -#if SIX_sicd_has_simd - -#include - -// https://en.cppreference.com/w/cpp/experimental/simd/simd_cast -// > The TS specification is missing `simd_cast` and `static_simd_cast` overloads for `simd_mask`. ... -namespace stdx -{ - using namespace std::experimental; - using namespace std::experimental::__proposed; -} - -template -using simd = stdx::simd; - -using simd_intv = simd; -using simd_intv_mask = typename simd_intv::mask_type; - -using simd_floatv = simd; -using simd_floatv_mask = typename simd_floatv::mask_type; - -constexpr auto simd_elements_per_iteration = simd_floatv::size(); - -template -static inline auto ssize(const simd& v) noexcept -{ - return gsl::narrow(v.size()); -} - -// Manage a SIMD complex as an array of two SIMDs -using simd_zfloatv = std::array; -inline auto& real(simd_zfloatv& z) noexcept -{ - return z[0]; -} -inline const auto& real(const simd_zfloatv& z) noexcept -{ - return z[0]; -} -inline auto& imag(simd_zfloatv& z) noexcept -{ - return z[1]; -} -inline const auto& imag(const simd_zfloatv& z) noexcept -{ - return z[1]; -} -inline size_t size(const simd_zfloatv& z) noexcept -{ - auto retval = real(z).size(); - assert(retval == imag(z).size()); - return retval; -} -inline auto ssize(const simd_zfloatv& z) noexcept -{ - return gsl::narrow(size(z)); -} - -inline auto arg(const simd_zfloatv& z) -{ - // https://en.cppreference.com/w/cpp/numeric/complex/arg - // > `std::atan2(std::imag(z), std::real(z))` - return atan2(imag(z), real(z)); // arg() -} - -template -static inline auto make_simd_zfloatv(TGeneratorReal&& generate_real, TGeneratorImag&& generate_imag) -{ - simd_zfloatv retval; - for (size_t i = 0; i < size(retval); i++) - { - real(retval)[i] = generate_real(i); - imag(retval)[i] = generate_imag(i); - } - return retval; -} - -template -static inline auto simd_select_(const TTest& test, const TResult& t, const TResult& f) -{ - // https://en.cppreference.com/w/cpp/experimental/simd/where_expression - // > ... All other elements are left unchanged. - TResult retval; - where(test, retval) = t; - where(!test, retval) = f; - return retval; -} - -static inline auto copy_from(std::span p, simd_floatv& result) -{ - assert(p.size() == result.size()); - result.copy_from(p.data(), stdx::element_aligned); -} -static inline auto copy_from(std::span p, simd_zfloatv& result) -{ - const auto generate_real = [&](size_t i) { return p[i].real(); }; - const auto generate_imag = [&](size_t i) { return p[i].imag(); }; - result = make_simd_zfloatv(generate_real, generate_imag); -} - -static inline auto roundi(const simd_floatv& v) // match vcl::roundi() -{ - const auto rounded = round(v); - const auto generate_roundi = [&](size_t i) - { return static_cast(rounded[i]); }; - return simd_intv(generate_roundi); -} - -template -static inline auto select(const TMask& test_, const simd_floatv& t, const simd_floatv& f) -{ - //const auto test = test_.__cvt(); // https://github.com/VcDevel/std-simd/issues/41 - const auto test = stdx::static_simd_cast(test_); // https://github.com/VcDevel/std-simd/issues/41 - return simd_select_(test, t, f); -} -template -static inline auto select(const TMask& test_, const simd_intv& t, const simd_intv& f) -{ - //const auto test = test_.__cvt(); // https://github.com/VcDevel/std-simd/issues/41 - const auto test = stdx::static_simd_cast(test_); // https://github.com/VcDevel/std-simd/issues/41 - return simd_select_(test, t, f); -} - -static inline auto if_add(const simd_floatv_mask& m, const simd_floatv& v, typename simd_floatv::value_type c) -{ - // phase = if_add(phase < 0.0, phase, std::numbers::pi_v * 2.0f); // Wrap from [0, 2PI] - const auto generate_add = [&](size_t i) { - return m[i] ? v[i] + c : v[i]; - }; - return simd_floatv(generate_add); -} - -template -static inline auto lookup(const simd_intv& zindex, const std::array& phase_directions) -{ - // It seems that the "generator" constuctor is called with SIMD instructions. - // https://en.cppreference.com/w/cpp/experimental/simd/simd/simd - // > The calls to `generator` are unsequenced with respect to each other. - - const auto generate_real = [&](size_t i) { - const auto i_ = zindex[i]; - return phase_directions[i_].real(); - }; - const auto generate_imag = [&](size_t i) { - const auto i_ = zindex[i]; - return phase_directions[i_].imag(); - }; - return make_simd_zfloatv(generate_real, generate_imag); -} - -static auto lookup(const simd_intv& zindex, std::span magnitudes) -{ - const auto generate = [&](size_t i) { - const auto i_ = zindex[i]; - - // The index may be out of range. This is expected because `i` might be "don't care." - if ((i_ >= 0) && (i_ < std::ssize(magnitudes))) - { - return magnitudes[i_]; - } - return NAN; // propogate "don't care" - }; - return simd_floatv(generate); -} - -#endif // SIX_sicd_has_simd - -/******************************************************************************************************/ - -template -static auto getPhase(const ZFloatV& v, float phase_delta) -{ - // Phase is determined via arithmetic because it's equally spaced. - // There's an intentional conversion to zero when we cast 256 -> uint8. That wrap around - // handles cases that are close to 2PI. - auto phase = arg(v); - phase = if_add(phase < 0.0f, phase, std::numbers::pi_v * 2.0f); // Wrap from [0, 2PI] - return roundi(phase / phase_delta); -} - -// https://en.cppreference.com/w/cpp/algorithm/lower_bound -/* -template -ForwardIt lower_bound(ForwardIt first, ForwardIt last, const T& value) -{ - ForwardIt it; - typename std::iterator_traits::difference_type count, step; - count = std::distance(first, last); - - while (count > 0) - { - it = first; - step = count / 2; - std::advance(it, step); - - if (*it < value) - { - first = ++it; - count -= step + 1; - } - else - count = step; - } - - return first; -} -*/ -template -inline auto lower_bound_(std::span magnitudes, const FloatV& v) -{ - IntV first = 0; - const IntV last = gsl::narrow(magnitudes.size()); - - auto count = last - first; - while (any_of(count > 0)) - { - auto it = first; - const auto step = count / 2; - it += step; - - auto next = it; ++next; // ... ++it; - auto advance = count; advance -= step + 1; // ... -= step + 1; - - const auto c = lookup(it, magnitudes); // magnituides[it] - const auto test = c < v; - it = select(test, next, it); // ... ++it - first = select(test, it, first); // first = ... - count = select(test, advance, step); // count -= step + 1 <...OR...> count = step - } - return first; -} -template -inline auto lower_bound(std::span magnitudes, const FloatV& value) -{ - auto retval = lower_bound_(magnitudes, value); - - #if CODA_OSS_DEBUG - for (int i = 0; i < ssize(value); i++) - { - const auto it = std::lower_bound(magnitudes.begin(), magnitudes.end(), value[i]); - const auto result = gsl::narrow(std::distance(magnitudes.begin(), it)); - assert(retval[i] == result); - } - #endif - - return retval; -} - -template -static auto nearest(std::span magnitudes, const FloatV& value) -{ - assert(magnitudes.size() == six::AmplitudeTableSize); - - /* - const auto it = std::lower_bound(begin, end, value); - if (it == begin) return 0; - - const auto prev_it = std::prev(it); - const auto nearest_it = it == end ? prev_it : - (value - *prev_it <= *it - value ? prev_it : it); - const auto distance = std::distance(begin, nearest_it); - assert(distance <= std::numeric_limits::max()); - return gsl::narrow(distance); - */ - const auto it = ::lower_bound(magnitudes, value); - const auto prev_it = it - 1; // const auto prev_it = std::prev(it); - - const auto v0 = value - lookup(prev_it, magnitudes); // value - *prev_it - const auto v1 = lookup(it, magnitudes) - value; // *it - value - //const auto nearest_it = select(v0 <= v1, prev_it, it); // (value - *prev_it <= *it - value ? prev_it : it); - - const IntV end = gsl::narrow(magnitudes.size()); - //const auto end_test = select(it == end, prev_it, nearest_it); // it == end ? prev_it : ... - const IntV zero = 0; - auto retval = select(it == 0, zero, // if (it == begin) return 0; - select(it == end, prev_it, // it == end ? prev_it : ... - select(v0 <= v1, prev_it, it) // (value - *prev_it <= *it - value ? prev_it : it); - )); - return retval; -} - -template -static auto find_nearest(std::span magnitudes, - const FloatV& phase_direction_real, const FloatV& phase_direction_imag, - const ZFloatV& v) -{ - // We have to do a 1D nearest neighbor search for magnitude. - // But it's not the magnitude of the input complex value - it's the projection of - // the complex value onto the ray of candidate magnitudes at the selected phase. - // i.e. dot product. - const auto projection = (phase_direction_real * real(v)) + (phase_direction_imag * imag(v)); - //assert(std::abs(projection - std::abs(v)) < 1e-5); // TODO ??? - return nearest(magnitudes, projection); -} - -#if SIX_sicd_has_VCL -static auto lookup_and_find_nearest(const six::sicd::details::ComplexToAMP8IPHS8I& converter, - const vcl_intv& phase, const vcl_zfloatv& v) -{ - const auto& impl = converter.impl; - - const auto phase_direction_real = lookup(phase, impl.phase_directions_real); - const auto phase_direction_imag = lookup(phase, impl.phase_directions_imag); - return ::find_nearest(impl.magnitudes, phase_direction_real, phase_direction_imag, v); -} -#endif - -#if SIX_sicd_has_ximd -static auto lookup_and_find_nearest(const six::sicd::details::ComplexToAMP8IPHS8I& converter, - const ximd_intv& phase, const ximd_zfloatv& v) -{ - const auto& impl = converter.impl; - - const auto phase_direction = lookup(phase, impl.phase_directions); - return ::find_nearest(impl.magnitudes, real(phase_direction), imag(phase_direction), v); -} -#endif - -#if SIX_sicd_has_simd -static auto lookup_and_find_nearest(const six::sicd::details::ComplexToAMP8IPHS8I& converter, - const simd_intv& phase, const simd_zfloatv& v) -{ - const auto& impl = converter.impl; - - const auto phase_direction = lookup(phase, impl.phase_directions); - return ::find_nearest(impl.magnitudes, real(phase_direction), imag(phase_direction), v); -} -#endif - -#if SIX_sicd_ComplexToAMP8IPHS8I_unseq -template -void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_unseq_T(std::span p, std::span results) const -{ - ZFloatV v; - assert(p.size() == size(v)); - - copy_from(p, v); - #if CODA_OSS_DEBUG - for (int i = 0; i < ssize(v); i++) - { - //const auto z = p[i]; - //assert(real(v)[i] == z.real()); - //assert(imag(v)[i] == z.imag()); - } - #endif - - const auto phase = ::getPhase(v, phase_delta); - #if CODA_OSS_DEBUG - for (int i = 0; i < ssize(phase); i++) - { - const auto phase_ = getPhase(p[i]); - assert(static_cast(phase[i]) == phase_); - } - #endif - - const auto amplitude = lookup_and_find_nearest(converter, phase, v); - #if CODA_OSS_DEBUG - for (int i = 0; i < ssize(amplitude); i++) - { - const auto i_ = phase[i]; - const auto a = find_nearest(phase_directions[i_], p[i]); - assert(a == amplitude[i]); - } - #endif - - // interleave() and store() is slower than an explicit loop. - auto dest = results.begin(); - for (int i = 0; i < ssize(v); i++) - { - dest->phase = gsl::narrow_cast(phase[i]); - dest->amplitude = gsl::narrow_cast(amplitude[i]); - - ++dest; - } -} - -template -void six::sicd::details::ComplexToAMP8IPHS8I::Impl::nearest_neighbors_unseq(std::span inputs, std::span results) const -{ - auto first = inputs.begin(); - const auto last = inputs.end(); - auto dest = results.begin(); - - // The above code is simpler (no templates) if we use just a single VCL - // complex type: `zfloatv`. If there is any performance difference, - // it will only be for extreme edge cases since the smaller types are only used - // at the end of the loop. - // - // It also makes this loop simpler as we handle all non-multiples-of-8 in - // the same way. - - // Can do these checks one-time outside of the loop - const auto distance = std::distance(first, last); - - // First, do multiples of 8 - const auto distance_ = distance - (distance % elements_per_iteration); - const auto last_ = first + distance_; - for (; first != last_; first += elements_per_iteration, dest += elements_per_iteration) - { - const auto f = sys::make_span(&(*first), elements_per_iteration); - const auto d = sys::make_span(&(*dest), elements_per_iteration); - - nearest_neighbors_unseq_T(f, d); - } - - // Then finish off anything left - assert(std::distance(first, last) < elements_per_iteration); - for (; first != last; ++first, ++dest) - { - const auto f = sys::make_span(&(*first), 1); - const auto d = sys::make_span(&(*dest), 1); - nearest_neighbors_seq(f, d); - } -} -#endif // SIX_sicd_ComplexToAMP8IPHS8I_unseq - -#if SIX_sicd_has_VCL -std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq_vcl( - std::span inputs, const six::AmplitudeTable* pAmplitudeTable) -{ - // make a structure to quickly find the nearest neighbor - const auto& converter = make_(pAmplitudeTable); - - std::vector retval(inputs.size()); - converter.impl.nearest_neighbors_unseq(inputs, sys::make_span(retval)); - return retval; -} -#endif // SIX_sicd_has_VCL - -#if SIX_sicd_has_ximd -std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq_ximd( - std::span inputs, const six::AmplitudeTable* pAmplitudeTable) -{ - // make a structure to quickly find the nearest neighbor - const auto& converter = make_(pAmplitudeTable); - - std::vector retval(inputs.size()); - converter.impl.nearest_neighbors_unseq(inputs, sys::make_span(retval)); - return retval; -} -#endif // SIX_sicd_has_ximd - -#if SIX_sicd_has_simd -std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq_simd( - std::span inputs, const six::AmplitudeTable* pAmplitudeTable) -{ - // make a structure to quickly find the nearest neighbor - const auto& converter = make_(pAmplitudeTable); - - std::vector retval(inputs.size()); - converter.impl.nearest_neighbors_unseq(inputs, sys::make_span(retval)); - return retval; -} -#endif // SIX_sicd_has_simd diff --git a/six/modules/c++/six.sicd/source/ImageData.cpp b/six/modules/c++/six.sicd/source/ImageData.cpp index 1e761cd41..62760c6e5 100644 --- a/six/modules/c++/six.sicd/source/ImageData.cpp +++ b/six/modules/c++/six.sicd/source/ImageData.cpp @@ -37,6 +37,8 @@ #include "six/AmplitudeTable.h" #include "six/sicd/GeoData.h" #include "six/sicd/Utilities.h" +#include "six/sicd/NearestNeighbors.h" + using namespace six; using namespace six::sicd; @@ -218,6 +220,21 @@ std::vector ImageData::toComplex(std::span inp std::vector ImageData::fromComplex(std::span inputs) const { - return six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors(inputs, amplitudeTable.get()); + // make a structure to quickly find the nearest neighbor + const auto& converter_ = six::sicd::details::ComplexToAMP8IPHS8I::make_(amplitudeTable.get()); + const six::sicd::NearestNeighbors converter(converter_); + + std::vector retval(inputs.size()); + converter.nearest_neighbors(inputs, retval); + return retval; } +std::vector ImageData::fromComplex(execution_policy policy, std::span inputs) const +{ + // make a structure to quickly find the nearest neighbor + const auto& converter_ = six::sicd::details::ComplexToAMP8IPHS8I::make_(amplitudeTable.get()); + const six::sicd::NearestNeighbors converter(converter_); + std::vector retval(inputs.size()); + converter.nearest_neighbors(policy, inputs, retval); + return retval; +} diff --git a/six/modules/c++/six.sicd/source/NearestNeighbors.cpp b/six/modules/c++/six.sicd/source/NearestNeighbors.cpp new file mode 100644 index 000000000..f1ed35ccb --- /dev/null +++ b/six/modules/c++/six.sicd/source/NearestNeighbors.cpp @@ -0,0 +1,162 @@ +/* ========================================================================= + * This file is part of six.sicd-c++ + * ========================================================================= + * + * (C) Copyright 2004 - 2014, MDA Information Systems LLC + * © Copyright 2024, Maxar Technologies, Inc. + * + * six.sicd-c++ is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; If not, + * see . + * + */ +#include "six/sicd/NearestNeighbors.h" + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "six/sicd/ImageData.h" + +#undef min +#undef max + +using zfloat = six::zfloat; +using AMP8I_PHS8I = six::AMP8I_PHS8I_t; + +six::sicd::NearestNeighbors::NearestNeighbors(const details::ComplexToAMP8IPHS8I& converter) : converter_(converter) {} + +uint8_t six::sicd::NearestNeighbors::getPhase(zfloat z) const +{ + return converter_.getPhase(z); +} + +/*! + * Find the nearest element given an iterator range. + * @param value query value + * @return index of nearest value within the iterator range. + */ +static uint8_t nearest(std::span magnitudes, float value) +{ + const auto begin = magnitudes.begin(); + const auto end = magnitudes.end(); + + const auto it = std::lower_bound(begin, end, value); + if (it == begin) return 0; + + const auto prev_it = std::prev(it); + const auto nearest_it = it == end ? prev_it : + (value - *prev_it <= *it - value ? prev_it : it); + const auto distance = std::distance(begin, nearest_it); + assert(distance <= std::numeric_limits::max()); + return gsl::narrow(distance); +} +uint8_t six::sicd::NearestNeighbors::find_nearest(zfloat phase_direction, zfloat v) const +{ + // We have to do a 1D nearest neighbor search for magnitude. + // But it's not the magnitude of the input complex value - it's the projection of + // the complex value onto the ray of candidate magnitudes at the selected phase. + // i.e. dot product. + const auto projection = (phase_direction.real() * v.real()) + (phase_direction.imag() * v.imag()); + //assert(std::abs(projection - std::abs(v)) < 1e-5); // TODO ??? + return nearest(converter_.magnitudes(), projection); +} +AMP8I_PHS8I six::sicd::NearestNeighbors::nearest_neighbor(const zfloat &v) const +{ + AMP8I_PHS8I retval; + retval.phase = getPhase(v); + + auto&& phase_direction = converter_.get_phase_directions().value[retval.phase]; + retval.amplitude = find_nearest(phase_direction, v); + return retval; +} +void six::sicd::NearestNeighbors::nearest_neighbors_seq(std::span inputs, std::span results) const +{ + std::transform(inputs.begin(), inputs.end(), results.begin(), + [&](const auto& v) { return nearest_neighbor(v); }); +} + +void six::sicd::NearestNeighbors::nearest_neighbors_par(std::span inputs, std::span results) const +{ + const auto func = [&](const auto& v) + { + return nearest_neighbor(v); + }; + std::ignore = mt::Transform_par(inputs.begin(), inputs.end(), results.begin(), func); +} + +void six::sicd::NearestNeighbors::nearest_neighbors(std::span inputs, std::span results) const +{ + // TODO: there could be more complicated logic here to decide between + // _seq, _par, _unseq, and _par_unseq + #if SIX_sicd_ComplexToAMP8IPHS8I_unseq + + // None of our SIMD implementations are dramatically slower when + // running unittests; that's mostly because there's lots of IO. So that + // the more interesting code-path is tested, use UNSEQ in debugging. + #if CODA_OSS_DEBUG + return nearest_neighbors_unseq(inputs, results); // TODO: + #else + return nearest_neighbors_par(inputs, results); + #endif + + #else + return nearest_neighbors_par(inputs, results); + + #endif +} +void six::sicd::NearestNeighbors::nearest_neighbors(execution_policy policy, std::span inputs, std::span results) const +{ + #if SIX_sicd_ComplexToAMP8IPHS8I_unseq + if (policy == execution_policy::par_unseq) + { + return nearest_neighbors_par_unseq(inputs, results); + } + if (policy == execution_policy::unseq) + { + return nearest_neighbors_unseq(inputs, results); + } + #endif + + if (policy == execution_policy::par) + { + return nearest_neighbors_par(inputs, results); + } + if (policy == execution_policy::seq) + { + return nearest_neighbors_seq(inputs, results); + } + + // https://en.cppreference.com/w/cpp/algorithm/execution_policy_tag_t + // > If the implementation cannot parallelize or vectorize (e.g. due to lack of resources), + // > all standard execution policies can fall back to sequential execution. + #if CODA_OSS_DEBUG + throw std::logic_error("Unhandled execution_policy value."); + #endif + return nearest_neighbors(inputs, results); // no policy specified, "default policy" +} + +AMP8I_PHS8I six::sicd::nearest_neighbor(const details::ComplexToAMP8IPHS8I& converter_, const zfloat& v) +{ + const NearestNeighbors converter(converter_); + return converter.nearest_neighbor(v); +} diff --git a/six/modules/c++/six.sicd/source/NearestNeighbors_unseq.cpp b/six/modules/c++/six.sicd/source/NearestNeighbors_unseq.cpp new file mode 100644 index 000000000..1c57eeb47 --- /dev/null +++ b/six/modules/c++/six.sicd/source/NearestNeighbors_unseq.cpp @@ -0,0 +1,1050 @@ +/* ========================================================================= +* This file is part of six.sicd-c++ +* ========================================================================= +* +* (C) Copyright 2021, Maxar Technologies, Inc. +* +* six.sicd-c++ is free software; you can redistribute it and/or modify +* it under the terms of the GNU Lesser General Public License as published by +* the Free Software Foundation; either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Lesser General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public +* License along with this program; If not, +* see . +* +*/ +#include "six/sicd/NearestNeighbors.h" + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "six/sicd/Exports.h" + +#undef min +#undef max + +using zfloat = six::zfloat; +using AMP8I_PHS8I = six::AMP8I_PHS8I_t; + +#if SIX_sicd_has_VCL + +#define VCL_NAMESPACE vcl +#if _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4100) // '...': unreferenced formal parameter +#pragma warning(disable: 4127) // conditional expression is constant +#pragma warning(disable: 4244) // '...': conversion from '...' to '...', possible loss of data +#pragma warning(disable: 4723) // potential divide by 0 +#endif +#include "six/sicd/vectorclass/version2/vectorclass.h" +#include "six/sicd/vectorclass/version2/vectormath_trig.h" +#include "six/sicd/vectorclass/complex/complexvec1.h" +#if _MSC_VER +#pragma warning(pop) +#endif + +using vcl_intv = vcl::Vec8i; +using vcl_floatv = vcl::Vec8f; +constexpr auto vcl_elements_per_iteration = vcl_floatv::size(); + +inline int ssize(const vcl_intv& z) noexcept +{ + return z.size(); +} +inline int ssize(const vcl_floatv& z) noexcept +{ + return z.size(); +} + +using vcl_zfloatv = vcl::Complex8f; +auto real(const vcl_zfloatv& z) +{ + return z.real(); +} +auto imag(const vcl_zfloatv& z) +{ + return z.imag(); +} +inline size_t size(const vcl_zfloatv& z) noexcept +{ + return z.size(); +} +inline int ssize(const vcl_zfloatv& z) noexcept +{ + return z.size(); +} + +inline auto arg(const vcl_zfloatv& z) +{ + // https://en.cppreference.com/w/cpp/numeric/complex/arg + // > `std::atan2(std::imag(z), std::real(z))` + return atan2(z.imag(), z.real()); // arg() +} + +template +inline auto if_add(const T& f, const vcl_floatv& a, float b) +{ + return vcl::if_add(f, a, b); +} + +inline bool any_of(const vcl_intv& m) +{ + return horizontal_or(m); +} + +inline void copy_from(std::span p, vcl_floatv& result) +{ + assert(p.size() == result.size()); + result.load(p.data()); +} +inline auto copy_from(std::span p, vcl_zfloatv& result) +{ + // https://en.cppreference.com/w/cpp/numeric/complex + // > For any pointer to an element of an array of `std::complex` named `p` and any valid array index `i`, ... + // > is the real part of the complex number `p[i]`, ... + result.load(reinterpret_cast(p.data())); +} + +template +inline auto lookup(const vcl_intv& zindex, const std::array& phase_directions) +{ + return vcl::lookup(zindex, phase_directions.data()); +} +inline auto lookup(const vcl_intv& zindex, std::span magnitudes) +{ + assert(magnitudes.size() == six::AmplitudeTableSize); + return vcl::lookup(zindex, magnitudes.data()); +} + +#endif // SIX_sicd_has_VCL + +#if SIX_sicd_has_valarray +#include + +using valarray_intv = std::valarray; +using valarray_intv_mask = std::valarray; + +using valarray_floatv = std::valarray; +using valarray_floatv_mask = std::valarray; + +constexpr auto valarray_elements_per_iteration = 4; // valarray is not fixed size + +template +static inline auto ssize(const std::valarray& v) noexcept +{ + return gsl::narrow(v.size()); +} + +template +static auto valarray_generate(G&& generator) noexcept +{ + TValArray retval(valarray_elements_per_iteration); + for (size_t i = 0; i < retval.size(); i++) + { + retval[i] = generator(i); + } + return retval; +} +template +static inline auto generate(TGenerator&& generator, valarray_intv) +{ + return valarray_generate(generator); +} +template +static inline auto generate(TGenerator&& generator, valarray_floatv) +{ + return valarray_generate(generator); +} + +// Manage a SIMD complex as an array of two SIMDs +using valarray_zfloatv = std::array; +inline auto& real(valarray_zfloatv& z) noexcept +{ + return z[0]; +} +inline const auto& real(const valarray_zfloatv& z) noexcept +{ + return z[0]; +} +inline auto& imag(valarray_zfloatv& z) noexcept +{ + return z[1]; +} +inline const auto& imag(const valarray_zfloatv& z) noexcept +{ + return z[1]; +} +inline size_t size(const valarray_zfloatv& z) noexcept +{ + auto retval = real(z).size(); + assert(retval == imag(z).size()); + return retval; +} +inline auto ssize(const valarray_zfloatv& z) noexcept +{ + return gsl::narrow(size(z)); +} + +inline auto arg(const valarray_zfloatv& z) +{ + // https://en.cppreference.com/w/cpp/numeric/complex/arg + // > `std::atan2(std::imag(z), std::real(z))` + return std::atan2(imag(z), real(z)); // arg() +} + +template +static inline auto generate(TGeneratorReal&& generate_real, TGeneratorImag&& generate_imag, valarray_zfloatv) +{ + valarray_zfloatv retval; + real(retval) = generate(generate_real, valarray_floatv{}); + imag(retval) = generate(generate_imag, valarray_floatv{}); + return retval; +} + +static inline auto copy_from(std::span p, valarray_floatv& result) +{ + assert(p.size() == result.size()); + result = valarray_floatv(p.data(), p.size()); +} +static inline auto copy_from(std::span p, valarray_zfloatv& result) +{ + const auto generate_real = [&](size_t i) { return p[i].real(); }; + const auto generate_imag = [&](size_t i) { return p[i].imag(); }; + result = generate(generate_real, generate_imag, valarray_zfloatv{}); +} + +template +static auto valarray_select_(const TTest& test, const TResult& t, const TResult& f) +{ + TResult retval; + for (size_t i = 0; i < test.size(); i++) + { + retval[i] = test[i] ? t[i] : f[i]; + } + return retval; +} +static inline auto select(const valarray_floatv_mask& test, const valarray_floatv& t, const valarray_floatv& f) +{ + return valarray_select_(test, t, f); +} +static inline auto select(const valarray_intv_mask& test, const valarray_intv& t, const valarray_intv& f) +{ + return valarray_select_(test, t, f); +} + +static auto if_add(const valarray_floatv_mask& m, const valarray_floatv& v, typename valarray_floatv::value_type c) +{ + const auto generate_add = [&](size_t i) { + return m[i] ? v[i] + c : v[i]; + }; + return generate(generate_add, valarray_floatv{}); +} + +#endif // SIX_sicd_has_valarray + +#if SIX_sicd_has_ximd +#ifndef CODA_OSS_Ximd_ENABLE +#define CODA_OSS_Ximd_ENABLE 1 +#endif +#include + +template +using ximd = sys::ximd::Ximd; + +using ximd_intv = ximd; +using ximd_intv_mask = sys::ximd::ximd_mask; + +using ximd_floatv = ximd; +using ximd_floatv_mask = sys::ximd::ximd_mask; + +constexpr auto ximd_elements_per_iteration = ximd_floatv::size(); + +template +static inline auto ssize(const ximd& v) noexcept +{ + return gsl::narrow(v.size()); +} + +template +static inline auto generate(TGenerator&& generator, ximd_intv) +{ + return ximd_intv::generate(generator); +} +template +static inline auto generate(TGenerator&& generator, ximd_floatv) +{ + return ximd_floatv::generate(generator); +} + +// Manage a SIMD complex as an array of two SIMDs +using ximd_zfloatv = std::array; +inline auto& real(ximd_zfloatv& z) noexcept +{ + return z[0]; +} +inline const auto& real(const ximd_zfloatv& z) noexcept +{ + return z[0]; +} +inline auto& imag(ximd_zfloatv& z) noexcept +{ + return z[1]; +} +inline const auto& imag(const ximd_zfloatv& z) noexcept +{ + return z[1]; +} +inline size_t size(const ximd_zfloatv& z) noexcept +{ + auto retval = real(z).size(); + assert(retval == imag(z).size()); + return retval; +} +inline auto ssize(const ximd_zfloatv& z) noexcept +{ + return gsl::narrow(size(z)); +} + +inline auto arg(const ximd_zfloatv& z) +{ + // https://en.cppreference.com/w/cpp/numeric/complex/arg + // > `std::atan2(std::imag(z), std::real(z))` + return atan2(imag(z), real(z)); // arg() +} + +template +static inline auto generate(TGeneratorReal&& generate_real, TGeneratorImag&& generate_imag, ximd_zfloatv) +{ + ximd_zfloatv retval; + real(retval) = generate(generate_real, ximd_floatv{}); + imag(retval) = generate(generate_imag, ximd_floatv{}); + return retval; +} + +static inline void copy_from(std::span p, ximd_floatv& result) +{ + assert(p.size() == result.size()); + result.copy_from(p.data()); +} +static inline void copy_from(std::span p, ximd_zfloatv& result) +{ + const auto generate_real = [&](size_t i) { return p[i].real(); }; + const auto generate_imag = [&](size_t i) { return p[i].imag(); }; + result = generate(generate_real, generate_imag, ximd_zfloatv{}); +} + +template +static auto ximd_select_(const TTest& test, const TResult& t, const TResult& f) +{ + TResult retval; + for (size_t i = 0; i < test.size(); i++) + { + retval[i] = test[i] ? t[i] : f[i]; + } + return retval; +} +static inline auto select(const ximd_floatv_mask& test, const ximd_floatv& t, const ximd_floatv& f) +{ + return ximd_select_(test, t, f); +} +static inline auto select(const ximd_intv_mask& test, const ximd_intv& t, const ximd_intv& f) +{ + return ximd_select_(test, t, f); +} + +#endif // SIX_sicd_has_ximd + +#if SIX_sicd_has_simd + +#include + +// https://en.cppreference.com/w/cpp/experimental/simd/simd_cast +// > The TS specification is missing `simd_cast` and `static_simd_cast` overloads for `simd_mask`. ... +namespace stdx +{ + using namespace std::experimental; + using namespace std::experimental::__proposed; +} + +template +using simd = stdx::simd; + +using simd_intv = simd; +using simd_intv_mask = typename simd_intv::mask_type; + +using simd_floatv = simd; +using simd_floatv_mask = typename simd_floatv::mask_type; + +constexpr auto simd_elements_per_iteration = simd_floatv::size(); + +template +static inline auto ssize(const simd& v) noexcept +{ + return gsl::narrow(v.size()); +} + +template +static inline auto generate(TGenerator&& generator, simd_intv) +{ + return simd_intv(generator); +} +template +static inline auto generate(TGenerator&& generator, simd_floatv) +{ + return simd_floatv(generator); +} + +// Manage a SIMD complex as an array of two SIMDs +using simd_zfloatv = std::array; +inline auto& real(simd_zfloatv& z) noexcept +{ + return z[0]; +} +inline const auto& real(const simd_zfloatv& z) noexcept +{ + return z[0]; +} +inline auto& imag(simd_zfloatv& z) noexcept +{ + return z[1]; +} +inline const auto& imag(const simd_zfloatv& z) noexcept +{ + return z[1]; +} +inline size_t size(const simd_zfloatv& z) noexcept +{ + auto retval = real(z).size(); + assert(retval == imag(z).size()); + return retval; +} +inline auto ssize(const simd_zfloatv& z) noexcept +{ + return gsl::narrow(size(z)); +} + +inline auto arg(const simd_zfloatv& z) +{ + // https://en.cppreference.com/w/cpp/numeric/complex/arg + // > `std::atan2(std::imag(z), std::real(z))` + return atan2(imag(z), real(z)); // arg() +} + +template +static inline auto generate(TGeneratorReal&& generate_real, TGeneratorImag&& generate_imag, simd_zfloatv) +{ + simd_zfloatv retval; + for (size_t i = 0; i < size(retval); i++) + { + real(retval)[i] = generate_real(i); + imag(retval)[i] = generate_imag(i); + } + return retval; +} + +static inline auto copy_from(std::span p, simd_floatv& result) +{ + assert(p.size() == result.size()); + result.copy_from(p.data(), stdx::element_aligned); +} +static inline auto copy_from(std::span p, simd_zfloatv& result) +{ + const auto generate_real = [&](size_t i) { return p[i].real(); }; + const auto generate_imag = [&](size_t i) { return p[i].imag(); }; + result = generate(generate_real, generate_imag, simd_zfloatv{}); +} + +template +static auto simd_select_(const TTest& test, const TResult& t, const TResult& f) +{ + // https://en.cppreference.com/w/cpp/experimental/simd/where_expression + // > ... All other elements are left unchanged. + TResult retval; + where(test, retval) = t; + where(!test, retval) = f; + return retval; +} +template +static inline auto select(const TMask& test_, const simd_floatv& t, const simd_floatv& f) +{ + //const auto test = test_.__cvt(); // https://github.com/VcDevel/std-simd/issues/41 + const auto test = stdx::static_simd_cast(test_); // https://github.com/VcDevel/std-simd/issues/41 + return simd_select_(test, t, f); +} +template +static inline auto select(const TMask& test_, const simd_intv& t, const simd_intv& f) +{ + //const auto test = test_.__cvt(); // https://github.com/VcDevel/std-simd/issues/41 + const auto test = stdx::static_simd_cast(test_); // https://github.com/VcDevel/std-simd/issues/41 + return simd_select_(test, t, f); +} + +#endif // SIX_sicd_has_simd + +#if SIX_sicd_has_ximd || SIX_sicd_has_simd + +// There are slicker ways of doing this, but `std::enable_if` is complex. This +// is simple and easy to understand. Simplify with concedpts in C++20? + +template +static auto roundi_(const FloatV& v) // match vcl::roundi() +{ + const auto rounded = round(v); + const auto generate_roundi = [&](size_t i) + { return static_cast(rounded[i]); }; + return generate(generate_roundi, IntV{}); +} +#if SIX_sicd_has_ximd +static inline auto roundi(const ximd_floatv& v) // match vcl::roundi() +{ + return roundi_(v); +} +#endif +#if SIX_sicd_has_simd +static inline auto roundi(const simd_floatv& v) // match vcl::roundi() +{ + return roundi_(v); +} +#endif + +template +static auto if_add_(const TFloatVMask& m, const TFloatV& v, typename TFloatV::value_type c) +{ + const auto generate_add = [&](size_t i) { + return m[i] ? v[i] + c : v[i]; + }; + return generate(generate_add, TFloatV{}); +} +#if SIX_sicd_has_ximd +static inline auto if_add(const ximd_floatv_mask& m, const ximd_floatv& v, typename ximd_floatv::value_type c) +{ + return if_add_(m, v, c); +} +#endif +#if SIX_sicd_has_simd +static inline auto if_add(const simd_floatv_mask& m, const simd_floatv& v, typename simd_floatv::value_type c) +{ + return if_add_(m, v, c); +} +#endif + +template +static auto lookup_(const IntV& zindex, const std::array& phase_directions) +{ + // It seems that the "generator" constuctor is called with SIMD instructions. + // https://en.cppreference.com/w/cpp/experimental/simd/simd/simd + // > The calls to `generator` are unsequenced with respect to each other. + + const auto generate_real = [&](size_t i) { + const auto i_ = zindex[i]; + return phase_directions[i_].real(); + }; + const auto generate_imag = [&](size_t i) { + const auto i_ = zindex[i]; + return phase_directions[i_].imag(); + }; + return generate(generate_real, generate_imag, ZFloatV{}); +} +#if SIX_sicd_has_valarray +template +static inline auto lookup(const valarray_intv& zindex, const std::array& phase_directions) +{ + return lookup_(zindex, phase_directions); +} +#endif +#if SIX_sicd_has_ximd +template +static inline auto lookup(const ximd_intv& zindex, const std::array& phase_directions) +{ + return lookup_(zindex, phase_directions); +} +#endif +#if SIX_sicd_has_simd +template +static inline auto lookup(const simd_intv& zindex, const std::array& phase_directions) +{ + return lookup_(zindex, phase_directions); +} +#endif + +template +static auto lookup_(const IntV& zindex, std::span magnitudes) +{ + const auto lookup_f = [&](size_t i) { + const auto i_ = zindex[i]; + + // The index may be out of range. This is expected because `i` might be "don't care." + if ((i_ >= 0) && (i_ < std::ssize(magnitudes))) + { + return magnitudes[i_]; + } + return NAN; // propogate "don't care" + }; + return generate(lookup_f, FloatV{}); +} +#if SIX_sicd_has_ximd +static inline auto lookup(const ximd_intv& zindex, std::span magnitudes) +{ + return lookup_(zindex, magnitudes); +} +#endif +#if SIX_sicd_has_simd +static inline auto lookup(const simd_intv& zindex, std::span magnitudes) +{ + return lookup_(zindex, magnitudes); +} +#endif +#endif // SIX_sicd_has_ximd || SIX_sicd_has_simd + +/******************************************************************************************************/ + +template +static auto getPhase(const ZFloatV& v, float phase_delta) +{ + // Phase is determined via arithmetic because it's equally spaced. + // There's an intentional conversion to zero when we cast 256 -> uint8. That wrap around + // handles cases that are close to 2PI. + auto phase = arg(v); + phase = if_add(phase < 0.0f, phase, std::numbers::pi_v * 2.0f); // Wrap from [0, 2PI] + return roundi(phase / phase_delta); +} + +// https://en.cppreference.com/w/cpp/algorithm/lower_bound +/* +template +ForwardIt lower_bound(ForwardIt first, ForwardIt last, const T& value) +{ + ForwardIt it; + typename std::iterator_traits::difference_type count, step; + count = std::distance(first, last); + + while (count > 0) + { + it = first; + step = count / 2; + std::advance(it, step); + + if (*it < value) + { + first = ++it; + count -= step + 1; + } + else + count = step; + } + + return first; +} +*/ +template +inline auto lower_bound_(std::span magnitudes, const FloatV& v) +{ + IntV first = 0; + const IntV last = gsl::narrow(magnitudes.size()); + + auto count = last - first; + while (any_of(count > 0)) + { + auto it = first; + const auto step = count / 2; + it += step; + + auto next = it; ++next; // ... ++it; + auto advance = count; advance -= step + 1; // ... -= step + 1; + + const auto c = lookup(it, magnitudes); // magnituides[it] + const auto test = c < v; + it = select(test, next, it); // ... ++it + first = select(test, it, first); // first = ... + count = select(test, advance, step); // count -= step + 1 <...OR...> count = step + } + return first; +} +template +inline auto lower_bound(std::span magnitudes, const FloatV& value) +{ + auto retval = lower_bound_(magnitudes, value); + + #if CODA_OSS_DEBUG + for (int i = 0; i < ssize(value); i++) + { + const auto it = std::lower_bound(magnitudes.begin(), magnitudes.end(), value[i]); + const auto result = gsl::narrow(std::distance(magnitudes.begin(), it)); + assert(retval[i] == result); + } + #endif + + return retval; +} + +template +static auto nearest(std::span magnitudes, const FloatV& value) +{ + assert(magnitudes.size() == six::AmplitudeTableSize); + + /* + const auto it = std::lower_bound(begin, end, value); + if (it == begin) return 0; + + const auto prev_it = std::prev(it); + const auto nearest_it = it == end ? prev_it : + (value - *prev_it <= *it - value ? prev_it : it); + const auto distance = std::distance(begin, nearest_it); + assert(distance <= std::numeric_limits::max()); + return gsl::narrow(distance); + */ + const auto it = ::lower_bound(magnitudes, value); + const auto prev_it = it - 1; // const auto prev_it = std::prev(it); + + const auto v0 = value - lookup(prev_it, magnitudes); // value - *prev_it + const auto v1 = lookup(it, magnitudes) - value; // *it - value + //const auto nearest_it = select(v0 <= v1, prev_it, it); // (value - *prev_it <= *it - value ? prev_it : it); + + const IntV end = gsl::narrow(magnitudes.size()); + //const auto end_test = select(it == end, prev_it, nearest_it); // it == end ? prev_it : ... + const IntV zero = 0; + auto retval = select(it == 0, zero, // if (it == begin) return 0; + select(it == end, prev_it, // it == end ? prev_it : ... + select(v0 <= v1, prev_it, it) // (value - *prev_it <= *it - value ? prev_it : it); + )); + return retval; +} + +template +static auto find_nearest(std::span magnitudes, + const FloatV& phase_direction_real, const FloatV& phase_direction_imag, + const ZFloatV& v) +{ + // We have to do a 1D nearest neighbor search for magnitude. + // But it's not the magnitude of the input complex value - it's the projection of + // the complex value onto the ray of candidate magnitudes at the selected phase. + // i.e. dot product. + const auto projection = (phase_direction_real * real(v)) + (phase_direction_imag * imag(v)); + //assert(std::abs(projection - std::abs(v)) < 1e-5); // TODO ??? + return nearest(magnitudes, projection); +} + +#if SIX_sicd_has_VCL +static auto lookup_and_find_nearest(const six::sicd::details::ComplexToAMP8IPHS8I& converter, + const vcl_intv& phase, const vcl_zfloatv& v) +{ + const auto phase_direction_real = lookup(phase, converter.get_phase_directions().real); + const auto phase_direction_imag = lookup(phase, converter.get_phase_directions().imag); + return ::find_nearest(converter.magnitudes(), phase_direction_real, phase_direction_imag, v); +} +#endif + +#if SIX_sicd_has_valarray +static auto lookup_and_find_nearest(const six::sicd::details::ComplexToAMP8IPHS8I& converter, + const valarray_intv& phase, const valarray_zfloatv& v) +{ + const auto phase_direction = lookup(phase, converter.get_phase_directions().value); + return ::find_nearest(converter.magnitudes(), real(phase_direction), imag(phase_direction), v); +} +#endif + +#if SIX_sicd_has_ximd || SIX_sicd_has_simd +template +static auto lookup_and_find_nearest_(const six::sicd::details::ComplexToAMP8IPHS8I& converter, + const IntV& phase, const ZFloatV& v) +{ + const auto& phase_directions = converter.get_phase_directions().value; + const auto phase_direction = lookup(phase, phase_directions); + return ::find_nearest(converter.magnitudes(), real(phase_direction), imag(phase_direction), v); +} +#if SIX_sicd_has_ximd +static auto lookup_and_find_nearest(const six::sicd::details::ComplexToAMP8IPHS8I& converter, + const ximd_intv& phase, const ximd_zfloatv& v) +{ + return lookup_and_find_nearest_(converter, phase, v); +} +#endif +#if SIX_sicd_has_simd +static auto lookup_and_find_nearest(const six::sicd::details::ComplexToAMP8IPHS8I& converter, + const simd_intv& phase, const simd_zfloatv& v) +{ + return lookup_and_find_nearest_(converter, phase, v); +} +#endif +#endif + +#if SIX_sicd_ComplexToAMP8IPHS8I_unseq + +template +struct AMP8I_PHS8I_unseq final +{ + IntV amplitude; + IntV phase; +}; + +// Cast a blob of T* into "chunks" of T[N]; "undecay" an array. +// This isn't quite kosher, see: https://stackoverflow.com/questions/47924103/pointer-interconvertibility-vs-having-the-same-address +template +static inline auto array_cast(std::span data) +{ + using chunk_t = std::array; + const auto number_of_chunks = data.size() / N; + const void* const pData = data.data(); + return sys::make_span(static_cast(pData), number_of_chunks); +} + +// Inside of `std::transform()`, there is a copy; something like +// ``` +// *dest = func(*first); +// ``` +// By returning our own class from `func()`, we can take control of the assignment operator +// and use that to copy from `AMP8I_PHS8I_unseq` to `std::array&`. +// (Unlike most other operators, `operator=()` *must* be a member-function.) +// +// Using inheritance to avoid padding at the end of the `struct`. ... needed? +template +struct AMP8I_PHS8I_array final : public std::array +{ + AMP8I_PHS8I_array& operator=(const AMP8I_PHS8I_unseq&) = delete; // should only be using move-assignment + AMP8I_PHS8I_array& operator=(AMP8I_PHS8I_unseq&& other) + { + for (int i = 0; i < gsl::narrow(N); i++) + { + (*this)[i].phase = gsl::narrow(other.phase[i]); + (*this)[i].amplitude = gsl::narrow(other.amplitude[i]); + } + return *this; + } +}; +template +static inline auto AMP8I_PHS8I_array_cast(std::span data) +{ + // This isn't quite kosher, see `array_cast()` above. + using chunk_t = AMP8I_PHS8I_array; + const auto number_of_chunks = data.size() / N; + void* const pDest = data.data(); + return std::span(static_cast(pDest), number_of_chunks); +} + +template +using IntV = decltype(::getPhase(ZFloatV{}, 0.0f)); + +// The compiler can sometimes do better optimization with fixed-size structures. +template +auto six::sicd::NearestNeighbors::nearest_neighbors_unseq_T(const std::array& p) const // TODO: std::span ... ? +{ + ZFloatV v; + assert(p.size() == size(v)); + + copy_from(p, v); + #if CODA_OSS_DEBUG + for (int i = 0; i < ssize(v); i++) + { + //const auto z = p[i]; + //assert(real(v)[i] == z.real()); + //assert(imag(v)[i] == z.imag()); + } + #endif + + using intv_t = IntV; + AMP8I_PHS8I_unseq retval; + + retval.phase = ::getPhase(v, converter_.phase_delta()); + #if CODA_OSS_DEBUG + for (int i = 0; i < ssize(retval.phase); i++) + { + const auto phase_ = converter_.getPhase(p[i]); + assert(static_cast(retval.phase[i]) == phase_); + } + #endif + + retval.amplitude = lookup_and_find_nearest(converter_, retval.phase, v); + #if CODA_OSS_DEBUG + for (int i = 0; i < ssize(retval.amplitude); i++) + { + const auto i_ = retval.phase[i]; + const auto& phase_directions = converter_.get_phase_directions().value; + const auto a = find_nearest(phase_directions[i_], p[i]); + assert(a == retval.amplitude[i]); + } + #endif + + return retval; +} + +template +static void finish_nearest_neighbors_unseq(const six::sicd::NearestNeighbors& impl, + std::span inputs, std::span results) +{ + // Then finish off anything left + const auto remaining_count = inputs.size() % elements_per_iteration; + if (remaining_count > 0) + { + const auto remaining_index = inputs.size() - remaining_count; + const auto remaining_inputs = sys::make_span(&(inputs[remaining_index]), remaining_count); + const auto remaining_results = sys::make_span(&(results[remaining_index]), remaining_count); + impl.nearest_neighbors_seq(remaining_inputs, remaining_results); + } +} + +template +void six::sicd::NearestNeighbors::nearest_neighbors_unseq_(std::span inputs, std::span results) const +{ + using intv_t = IntV; + + // View the data as chunks of *elements_per_iteration*. This allows iterating + // to go *elements_per_iteration* at a time; and each chunk can be processed + // using `nearest_neighbors_unseq_T()`, above. + // + // This isn't quite kosher, see: https://stackoverflow.com/questions/47924103/pointer-interconvertibility-vs-having-the-same-address + auto const first = array_cast(inputs); + auto const dest = AMP8I_PHS8I_array_cast(results); + + const auto func = [&](const auto& v) + { + return nearest_neighbors_unseq_T(v); + }; + std::transform(first.begin(), first.end(), dest.begin(), func); + + // Then finish off anything left + finish_nearest_neighbors_unseq(*this, inputs, results); +} + +template +void six::sicd::NearestNeighbors::nearest_neighbors_par_unseq_T(std::span inputs, std::span results) const +{ + using intv_t = IntV; + + // View the data as chunks of *elements_per_iteration*. This allows iterating + // to go *elements_per_iteration* at a time; and each chunk can be processed + // using `nearest_neighbors_unseq_T()`, above. + // + // This isn't quite kosher, see: https://stackoverflow.com/questions/47924103/pointer-interconvertibility-vs-having-the-same-address + auto const first = array_cast(inputs); + auto const dest = AMP8I_PHS8I_array_cast(results); + + const auto func = [&](const auto& v) + { + return nearest_neighbors_unseq_T(v); + }; + mt::Transform_par(first.begin(), first.end(), dest.begin(), func); + + // Then finish off anything left + finish_nearest_neighbors_unseq(*this, inputs, results); +} + +#if SIX_sicd_has_simd +static const std::string unseq_simd = "simd"; +#endif +#if SIX_sicd_has_VCL +static const std::string unseq_vcl = "vcl"; +#endif +#if SIX_sicd_has_valarray +static const std::string unseq_valarray = "valarray"; +#endif +#if SIX_sicd_has_ximd +static const std::string unseq_ximd = "ximd"; +#endif +static std::string nearest_neighbors_unseq_ = +#if SIX_sicd_has_simd +unseq_simd; +#elif SIX_sicd_has_VCL +unseq_vcl; +#elif SIX_sicd_has_valarray +unseq_valarray; +#elif SIX_sicd_has_ximd +unseq_ximd; +#else +#error "Don't know how to implement six_sicd_set_nearest_neighbors_unseq()" +#endif +std::string SIX_SICD_API six_sicd_set_nearest_neighbors_unseq(std::string unseq) +{ + // We'll "validate" when the string is actually used; this minimizes + // the places that need updating when things change. + auto retval = nearest_neighbors_unseq_; + nearest_neighbors_unseq_ = std::move(unseq); + return retval; +} + +void six::sicd::NearestNeighbors::nearest_neighbors_unseq(std::span inputs, std::span results) const +{ + // TODO: there could be more complicated logic here to determine which UNSEQ + // implementation to use. + + + // This is very simple as it's only used for unit-testing + const auto& unseq = ::nearest_neighbors_unseq_; + #if SIX_sicd_has_simd + if (unseq == unseq_simd) + { + return nearest_neighbors_unseq_(inputs, results); + } + #endif + #if SIX_sicd_has_VCL + if (unseq == unseq_vcl) + { + return nearest_neighbors_unseq_(inputs, results); + } + #endif + #if SIX_sicd_has_valarray + if (unseq == unseq_valarray) + { + return nearest_neighbors_unseq_(inputs, results); + } + #endif + #if SIX_sicd_has_ximd + if (unseq == unseq_ximd) + { + return nearest_neighbors_unseq_(inputs, results); + } + #endif + + throw std::logic_error("Don't know how to implement nearest_neighbors_unseq() for unseq=" + unseq); +} + +void six::sicd::NearestNeighbors::nearest_neighbors_par_unseq(std::span inputs, std::span results) const +{ + // TODO: there could be more complicated logic here to determine which UNSEQ + // implementation to use. + + // This is very simple as it's only used for unit-testing + const auto& unseq = ::nearest_neighbors_unseq_; + #if SIX_sicd_has_simd + if (unseq == unseq_simd) + { + return nearest_neighbors_par_unseq_T(inputs, results); + } + #endif + #if SIX_sicd_has_VCL + if (unseq == unseq_vcl) + { + return nearest_neighbors_par_unseq_T(inputs, results); + } + #endif + #if SIX_sicd_has_valarray + if (unseq == unseq_valarray) + { + return nearest_neighbors_par_unseq_T(inputs, results); + } + #endif + #if SIX_sicd_has_ximd + if (unseq == unseq_ximd) + { + return nearest_neighbors_par_unseq_T(inputs, results); + } + #endif + + throw std::logic_error("Don't know how to implement nearest_neighbors_par_unseq() for unseq=" + unseq); +} +#endif // SIX_sicd_ComplexToAMP8IPHS8I_unseq diff --git a/six/modules/c++/six.sicd/unittests/test_AMP8I_PHS8I.cpp b/six/modules/c++/six.sicd/unittests/test_AMP8I_PHS8I.cpp index da3680a97..31357b77d 100644 --- a/six/modules/c++/six.sicd/unittests/test_AMP8I_PHS8I.cpp +++ b/six/modules/c++/six.sicd/unittests/test_AMP8I_PHS8I.cpp @@ -548,6 +548,12 @@ static std::vector testing_fromComplex(std::span testing_fromComplex(six::execution_policy policy, std::span inputs) +{ + static const six::sicd::ImageData imageData; + assert(imageData.amplitudeTable.get() == nullptr); + return imageData.fromComplex(policy, inputs); +} static void test_adjusted_values(const std::string& testName, const std::vector& values, const std::vector& expected, six::zfloat delta) @@ -566,7 +572,7 @@ static void test_adjusted_values(const std::string& testName, const std::vector< } } -TEST_CASE(test_nearest_neighbor) +static void test_nearest_neighbor_(const std::string& testName, const six::execution_policy* pPolicy = nullptr) { const std::vector values{ {0.0, 0.0}, {1.0, 1.0}, {10.0, -10.0}, {-100.0, 100.0}, {-1000.0, -1000.0} }; @@ -578,7 +584,8 @@ TEST_CASE(test_nearest_neighbor) {static_cast(141), static_cast(96)}, {static_cast(255), static_cast(160)} }; - const auto actual = testing_fromComplex(sys::make_span(values)); + const auto values_ = sys::make_span(values); + const auto actual = pPolicy ? testing_fromComplex(*pPolicy, values_) : testing_fromComplex(values_); for (size_t i = 0; i < expected.size(); i++) { TEST_ASSERT_EQ(expected[i].amplitude, actual[i].amplitude); @@ -614,6 +621,52 @@ TEST_CASE(test_nearest_neighbor) other_expected[0].phase += 32; TEST_ASSERT_EQ(other_expected[0].phase, expected[0].phase); } +static void test_nearest_neighbor_(const std::string& testName, six::execution_policy policy) +{ + test_nearest_neighbor_(testName, &policy); +} +TEST_CASE(test_nearest_neighbor) +{ + test_nearest_neighbor_(testName); + test_nearest_neighbor_(testName, six::execution_policy::seq); + test_nearest_neighbor_(testName, six::execution_policy::par); + test_nearest_neighbor_(testName, six::execution_policy::unseq); + test_nearest_neighbor_(testName, six::execution_policy::par_unseq); + + #if SIX_sicd_ComplexToAMP8IPHS8I_unseq + extern std::string six_sicd_set_nearest_neighbors_unseq(std::string unseq); + const auto default_unseq = six_sicd_set_nearest_neighbors_unseq("xyz"); + try + { + test_nearest_neighbor_(testName, six::execution_policy::unseq); + TEST_FAIL; + } + catch (const std::logic_error&) + { + TEST_SUCCESS; + } + + #if SIX_sicd_has_simd + six_sicd_set_nearest_neighbors_unseq("simd"); + test_nearest_neighbor_(testName, six::execution_policy::unseq); + test_nearest_neighbor_(testName, six::execution_policy::par_unseq); + #endif + + #if SIX_sicd_has_VCL + six_sicd_set_nearest_neighbors_unseq("vcl"); + test_nearest_neighbor_(testName, six::execution_policy::unseq); + test_nearest_neighbor_(testName, six::execution_policy::par_unseq); + #endif + + #if SIX_sicd_has_ximd + six_sicd_set_nearest_neighbors_unseq("ximd"); + test_nearest_neighbor_(testName, six::execution_policy::unseq); + test_nearest_neighbor_(testName, six::execution_policy::par_unseq); + #endif + + six_sicd_set_nearest_neighbors_unseq(default_unseq); // restore default + #endif // SIX_sicd_ComplexToAMP8IPHS8I_unseq +} TEST_CASE(test_verify_phase_uint8_ordering) { diff --git a/six/modules/c++/six/include/six/AmplitudeTable.h b/six/modules/c++/six/include/six/AmplitudeTable.h index 4f44bc76b..4003958ae 100644 --- a/six/modules/c++/six/include/six/AmplitudeTable.h +++ b/six/modules/c++/six/include/six/AmplitudeTable.h @@ -182,10 +182,11 @@ struct SIX_SIX_API AMP8I_PHS8I_t final #if __has_include("../../../six.sicd/include/six/sicd/vectorclass/version2/vectorclass.h") || \ __has_include("six/sicd/vectorclass/version2/vectorclass.h") #if _MSC_VER - #define SIX_sicd_has_VCL !CODA_OSS_cpp20 // TODO: MSVC works with C++17, but not C++20 ... ? + // Compiler error: bug in MSVC or VCL? + #define SIX_sicd_has_VCL !CODA_OSS_cpp20 // TODO: enable for C++20 #else - #define SIX_sicd_has_VCL 1 - #endif // _MSC_VER + #define SIX_sicd_has_VCL 1 + #endif #else #define SIX_sicd_has_VCL 0 #endif // __has_include @@ -208,21 +209,37 @@ struct SIX_SIX_API AMP8I_PHS8I_t final // std::experimental::simd is G++11/C++20. #define SIX_sicd_has_ximd CODA_OSS_DEBUG #endif +#ifndef SIX_sicd_has_valarray + // This is the "old way" of doing things. Note that GCC implements + // much of this using expresson templates which means writing + // generic routines must be done carefully. Turn this on by default + // so that "interesting" code is executed. + #define SIX_sicd_has_valarray 0 // TODO: finish implementing +#endif #ifndef SIX_sicd_ComplexToAMP8IPHS8I_unseq - #if SIX_sicd_has_VCL || SIX_sicd_has_simd || SIX_sicd_has_ximd + #if SIX_sicd_has_VCL || SIX_sicd_has_simd || SIX_sicd_has_valarray || SIX_sicd_has_ximd #define SIX_sicd_ComplexToAMP8IPHS8I_unseq 1 #else #define SIX_sicd_ComplexToAMP8IPHS8I_unseq 0 #endif // SIX_sicd_have_VCL || SIX_sicd_has_simd #endif // SIX_sicd_ComplexToAMP8IPHS8I_unseq +// We're still at C++14, so we don't have the types in +// https://en.cppreference.com/w/cpp/algorithm/execution_policy_tag +// For now, our use is very limited; so don't try to +// mimic C++17 (these should be types, not `enum` values). +enum class execution_policy +{ + seq, par, par_unseq, unseq +}; struct AmplitudeTable; // forward namespace sicd { namespace details { + /*! * \brief A utility that's used to convert complex values into 8-bit amplitude and phase values. * @@ -245,73 +262,32 @@ class ComplexToAMP8IPHS8I final ComplexToAMP8IPHS8I(ComplexToAMP8IPHS8I&&) = delete; // implicitly deleted because of =delete for copy ComplexToAMP8IPHS8I& operator=(ComplexToAMP8IPHS8I&&) = delete; // implicitly deleted because of =delete for copy - /*! - * Get the nearest amplitude and phase value given a complex value - * @param v complex value to query with - * @return nearest amplitude and phase value - */ - AMP8I_PHS8I_t nearest_neighbor_(const six::zfloat& v) const; - static std::vector nearest_neighbors_par(std::span inputs, const six::AmplitudeTable*); - static std::vector nearest_neighbors_seq(std::span inputs, const six::AmplitudeTable*); - - #if SIX_sicd_ComplexToAMP8IPHS8I_unseq - static std::vector nearest_neighbors_unseq(std::span inputs, const six::AmplitudeTable*); - static std::vector nearest_neighbors_par_unseq(std::span inputs, const six::AmplitudeTable*); - - #if SIX_sicd_has_VCL - static std::vector nearest_neighbors_unseq_vcl(std::span inputs, const six::AmplitudeTable*); - #endif - #if SIX_sicd_has_simd - static std::vector nearest_neighbors_unseq_simd(std::span inputs, const six::AmplitudeTable*); - #endif - #if SIX_sicd_has_ximd - static std::vector nearest_neighbors_unseq_ximd(std::span inputs, const six::AmplitudeTable*); - #endif - #endif - - static std::vector nearest_neighbors(std::span inputs, const six::AmplitudeTable*); // one of the above + std::span magnitudes() const; + float phase_delta() const; -private: - struct Impl final + //! Unit vector rays that represent each direction that phase can point. + struct phase_directions final { - const ComplexToAMP8IPHS8I& converter; - Impl(const ComplexToAMP8IPHS8I& c) : converter(c) {} - ~Impl() = default; - Impl(const Impl&) = delete; - Impl& operator=(const Impl&) = delete; - Impl(Impl&&) = delete; // implicitly deleted because of =delete for copy - Impl& operator=(Impl&&) = delete; // implicitly deleted because of =delete for copy - - void nearest_neighbors_seq(std::span inputs, std::span results) const; - void nearest_neighbors_par(std::span inputs, std::span results) const; - #if SIX_sicd_ComplexToAMP8IPHS8I_unseq - template - void nearest_neighbors_unseq(std::span inputs, std::span results) const; - void nearest_neighbors_par_unseq(std::span inputs, std::span results) const; - - template - void nearest_neighbors_unseq_T(std::span, std::span) const; - - #endif - - //! The sorted set of possible magnitudes order from small to large. - std::array uncached_magnitudes; - std::span magnitudes; - uint8_t find_nearest(six::zfloat phase_direction, six::zfloat v) const; - - //! The difference in phase angle between two UINT phase values. - float phase_delta; - uint8_t getPhase(six::zfloat) const; - - //! Unit vector rays that represent each direction that phase can point. - std::array phase_directions; // interleaved, std::complex + std::array value; // interleaved, std::complex #ifdef SIX_sicd_has_VCL - std::array phase_directions_real; - std::array phase_directions_imag; + std::array real; + std::array imag; #endif }; -public: - Impl impl; + const phase_directions& get_phase_directions() const; + + uint8_t getPhase(six::zfloat v) const; + +private: + //! The sorted set of possible magnitudes order from small to large. + std::array uncached_magnitudes_; + std::span magnitudes_; + + //! The difference in phase angle between two UINT phase values. + float phase_delta_; + + //! Unit vector rays that represent each direction that phase can point. + phase_directions phase_directions_; }; } } @@ -417,7 +393,7 @@ struct SIX_SIX_API AmplitudeTable final : public LUT } private: - mutable std::vector lookup; + mutable std::vector lookup; mutable std::unique_ptr pFromComplex; };