diff --git a/six.sln b/six.sln index d4bf483ca..e60ebc356 100644 --- a/six.sln +++ b/six.sln @@ -48,6 +48,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = ".github-workflows", ".githu .github\workflows\frequent_check.yml = .github\workflows\frequent_check.yml EndProjectSection EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "8AMPI_PHSI", "six\modules\c++\samples\8AMPI_PHSI.dir\8AMPI_PHSI.dir.vcxproj", "{05C352A8-9F70-4006-B851-70A6904B58C1}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 @@ -114,6 +116,10 @@ Global {9997E895-5161-4DDF-8F3F-099894CB2F21}.Debug|x64.Build.0 = Debug|x64 {9997E895-5161-4DDF-8F3F-099894CB2F21}.Release|x64.ActiveCfg = Release|x64 {9997E895-5161-4DDF-8F3F-099894CB2F21}.Release|x64.Build.0 = Release|x64 + {05C352A8-9F70-4006-B851-70A6904B58C1}.Debug|x64.ActiveCfg = Debug|x64 + {05C352A8-9F70-4006-B851-70A6904B58C1}.Debug|x64.Build.0 = Debug|x64 + {05C352A8-9F70-4006-B851-70A6904B58C1}.Release|x64.ActiveCfg = Release|x64 + {05C352A8-9F70-4006-B851-70A6904B58C1}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/six/modules/c++/samples/8AMPI_PHSI.cpp b/six/modules/c++/samples/8AMPI_PHSI.cpp new file mode 100644 index 000000000..af79cc4d1 --- /dev/null +++ b/six/modules/c++/samples/8AMPI_PHSI.cpp @@ -0,0 +1,205 @@ +// 8AMPI_PHSI.cpp : This file contains the 'main' function. Program execution begins and ends there. +// + +#ifndef _USE_MATH_DEFINES +#define _USE_MATH_DEFINES +#endif +#include + +#include + +#include +#include +#include +#include +#include +#include +//#include +#include + +#include "six/AmplitudeTable.h" +#include "six/sicd/Utilities.h" + +using namespace six; + +static void toComplex(six::Amp8iPhs8iLookup_t values, std::span inputs, std::span results) +{ + const auto toComplex_ = [&values](const auto& v) + { + return values(v.amplitude, v.phase); + }; + + std::transform(inputs.begin(), inputs.end(), results.begin(), toComplex_); + //std::transform(std::execution::par, inputs.begin(), inputs.end(), results.begin(), toComplex_); +} +void toComplex(std::span inputs, std::span results) +{ + const auto values = six::sicd::ImageData::getLookup(nullptr); + toComplex(values, inputs, results); +} + +auto make_inputs(size_t count) +{ + std::vector retval; + retval.reserve(count); + for (size_t i = 0; i < count; i++) + { + const auto amplitude = static_cast(i * i); + const auto phase = static_cast(~amplitude); + retval.push_back(AMP8I_PHS8I_t{ amplitude, phase }); + } + return retval; +} + +static std::vector fromComplex_nearest_neighbors(std::span inputs) +{ + static const six::sicd::ImageData imageData; + assert(imageData.amplitudeTable.get() == nullptr); + return imageData.fromComplex(inputs); +} +//static std::vector fromComplex_nearest_neighbors2(std::span inputs) +//{ +// return six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors2(inputs, nullptr); +//} +// +//static std::vector fromComplex_nearest_neighbors_par(std::span inputs) +//{ +// return six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors(std::execution::par, inputs, nullptr); +//} +//static std::vector fromComplex_nearest_neighbors_par2(std::span inputs) +//{ +// return six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_par2(inputs, nullptr); +//} + +//static std::vector fromComplex_nearest_neighbors_unseq(std::span inputs) +//{ +// return six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors(std::execution::unseq, inputs, nullptr); +//} +//static std::vector fromComplex_nearest_neighbors_par_unseq(std::span inputs) +//{ +// return six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors(std::execution::par_unseq, inputs, nullptr); +//} + +//static void fromComplex_nearest_neighbors_threaded(std::span inputs, std::span results) +//{ +// // make a structure to quickly find the nearest neighbor +// auto& converter = six::sicd::details::ComplexToAMP8IPHS8I::make(nullptr); +// converter.nearest_neighbors_threaded(inputs, results); +//} +//static void fromComplex_nearest_neighbors_simd(std::span inputs, std::span results) +//{ +// // make a structure to quickly find the nearest neighbor +// auto& converter = six::sicd::details::ComplexToAMP8IPHS8I::make(nullptr); +// converter.nearest_neighbors_simd(inputs, results); +//} + +#ifdef NDEBUG +constexpr auto iterations = 10; +#else +constexpr auto iterations = 1; +#endif +template +static std::chrono::duration test(TFunc f, const std::vector& inputs) +{ + auto ap_results = f(inputs); + auto start = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < iterations; i++) + { + ap_results = f(inputs); + } + auto end = std::chrono::high_resolution_clock::now(); + return end - start; +} + +int main() +{ + #ifdef NDEBUG + constexpr auto inputs_size = 25000000; + #else + constexpr auto inputs_size = 100; + #endif + const auto inputs = make_inputs(inputs_size * 4); + std::vector results(inputs.size()); + + toComplex(inputs, results); + + /*********************************************************************************/ + auto diff = test(fromComplex_nearest_neighbors, results); + std::cout << "Time (nearest_neighbors): " << std::setw(9) << diff.count() << "\n"; + + //diff = test(fromComplex_nearest_neighbors2, results); + //std::cout << "Time (nearest_neighbors2): " << std::setw(9) << diff.count() << "\n"; + + //diff = test(fromComplex_nearest_neighbors_par, results); + //std::cout << "Time (nearest_neighbors_par): " << std::setw(9) << diff.count() << "\n"; + + //diff = test(fromComplex_nearest_neighbors_par2, results); + //std::cout << "Time (nearest_neighbors_par2): " << std::setw(9) << diff.count() << "\n"; + + //diff = test(fromComplex_nearest_neighbors_unseq, results); + //std::cout << "Time (nearest_neighbors_unseq): " << std::setw(9) << diff.count() << "\n"; + + //diff = test(fromComplex_nearest_neighbors_par_unseq, results); + //std::cout << "Time (nearest_neighbors_par_unseq): " << std::setw(9) << diff.count() << "\n"; + + //fromComplex_transform(results, ap_results); + //start = std::chrono::high_resolution_clock::now(); + //for (int i = 0; i < iterations; i++) + //{ + // fromComplex_transform(results, ap_results); + //} + //end = std::chrono::high_resolution_clock::now(); + //diff = end - start; + //std::cout << "Time (transform): " << std::setw(9) << diff.count() << "\n"; + //const auto transform_results = ap_results; + + ///*********************************************************************************/ + + //fromComplex_nearest_neighbors_threaded(results, ap_results); + //start = std::chrono::high_resolution_clock::now(); + //for (int i = 0; i < iterations; i++) + //{ + // fromComplex_nearest_neighbors_threaded(results, ap_results); + //} + //end = std::chrono::high_resolution_clock::now(); + //diff = end - start; + //std::cout << "Time (nearest_neighbors_threaded): " << std::setw(9) << diff.count() << "\n"; + //const auto nearest_neighbors_threaded_results = ap_results; + + //fromComplex_transform_par(results, ap_results); + //start = std::chrono::high_resolution_clock::now(); + //for (int i = 0; i < iterations; i++) + //{ + // fromComplex_transform_par(results, ap_results); + //} + //end = std::chrono::high_resolution_clock::now(); + //diff = end - start; + //std::cout << "Time (transform_par): " << std::setw(9) << diff.count() << "\n"; + //const auto transform_par_results = ap_results; + + ///*********************************************************************************/ + + //fromComplex_nearest_neighbors_simd(results, ap_results); + //start = std::chrono::high_resolution_clock::now(); + //for (int i = 0; i < iterations; i++) + //{ + // fromComplex_nearest_neighbors_simd(results, ap_results); + //} + //end = std::chrono::high_resolution_clock::now(); + //diff = end - start; + //std::cout << "Time (nearest_neighbors_simd): " << std::setw(9) << diff.count() << "\n"; + //const auto nearest_neighbors_threaded_simd = ap_results; + + //fromComplex_transform_unseq(results, ap_results); + //start = std::chrono::high_resolution_clock::now(); + //for (int i = 0; i < iterations; i++) + //{ + // fromComplex_transform_unseq(results, ap_results); + //} + //end = std::chrono::high_resolution_clock::now(); + //diff = end - start; + //std::cout << "Time (transform_unseq): " << std::setw(9) << diff.count() << "\n"; + //const auto transform_unseq_results = ap_results; + +} + diff --git a/six/modules/c++/samples/8AMPI_PHSI.dir/8AMPI_PHSI.dir.vcxproj b/six/modules/c++/samples/8AMPI_PHSI.dir/8AMPI_PHSI.dir.vcxproj new file mode 100644 index 000000000..f65f3e8f7 --- /dev/null +++ b/six/modules/c++/samples/8AMPI_PHSI.dir/8AMPI_PHSI.dir.vcxproj @@ -0,0 +1,146 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 16.0 + Win32Proj + {05C352A8-9F70-4006-B851-70A6904B58C1} + checkvalidsix + 10.0 + 8AMPI_PHSI + + + + Application + true + v143 + + + Application + false + v143 + true + + + + + + + + + + + + + + + true + + + false + + + + true + _DEBUG;_CONSOLE;%(PreprocessorDefinitions);_SILENCE_NONFLOATING_COMPLEX_DEPRECATION_WARNING; CODA_OSS_LIBRARY_SHARED + true + $(SolutionDir)six\modules\c++\scene\include;$(SolutionDir)six\modules\c++\six\include;$(SolutionDir)six\modules\c++\six.sidd\include;$(SolutionDir)six\modules\c++\six.sicd\include;$(SolutionDir)six\modules\c++\cphd\include;$(SolutionDir)six\modules\c++\cphd03\include;$(SolutionDir)externals\nitro\modules\c\nrt\include;$(SolutionDir)externals\nitro\modules\c\nitf\include;$(SolutionDir)externals\nitro\modules\c++\nitf\include;$(SolutionDir)externals\coda-oss\out\install\$(Platform)-$(Configuration)\include;$(SolutionDir)out\install\$(Platform)-$(Configuration)\include + Use + pch.h + pch.h + true + true + AdvancedVectorExtensions2 + ProgramDatabase + Guard + Level4 + true + MultiThreadedDebugDLL + true + + + Console + true + $(SolutionDir)externals\coda-oss\out\install\$(Platform)-$(Configuration)\lib;$(SolutionDir)out\install\$(Platform)-$(Configuration)\lib;%(AdditionalLibraryDirectories) + + + + + Level3 + true + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions);_SILENCE_NONFLOATING_COMPLEX_DEPRECATION_WARNING; CODA_OSS_LIBRARY_SHARED + true + $(SolutionDir)six\modules\c++\scene\include;$(SolutionDir)six\modules\c++\six\include;$(SolutionDir)six\modules\c++\six.sidd\include;$(SolutionDir)six\modules\c++\six.sicd\include;$(SolutionDir)six\modules\c++\cphd\include;$(SolutionDir)six\modules\c++\cphd03\include;$(SolutionDir)externals\nitro\modules\c\nrt\include;$(SolutionDir)externals\nitro\modules\c\nitf\include;$(SolutionDir)externals\nitro\modules\c++\nitf\include;$(SolutionDir)externals\coda-oss\out\install\$(Platform)-$(Configuration)\include;$(SolutionDir)out\install\$(Platform)-$(Configuration)\include + Use + pch.h + pch.h + true + true + AdvancedVectorExtensions2 + Speed + Guard + true + + + Console + true + true + true + $(SolutionDir)externals\coda-oss\out\install\$(Platform)-$(Configuration)\lib;$(SolutionDir)out\install\$(Platform)-$(Configuration)\lib;%(AdditionalLibraryDirectories) + + + + + + Create + Create + + + + + + + + + + + + {9997e895-5161-4ddf-8f3f-099894cb2f21} + + + {8f357a19-799e-4971-850e-3f28485c130b} + + + {f06550ad-cfc7-40b8-8727-6c82c69a8982} + + + {78849481-d356-4cc7-b182-31c21f857ed1} + + + {1cfcde59-6410-4037-95eb-b37d31e10820} + + + {34ac2314-fbd1-42ad-aaf4-0cebc6bf737e} + + + {ddc587c2-53ba-44a9-94e7-07be52af3d0b} + + + {62aad4dd-35ba-41a0-8263-1f4dfd755689} + + + + + + \ No newline at end of file diff --git a/six/modules/c++/samples/8AMPI_PHSI.dir/8AMPI_PHSI.dir.vcxproj.filters b/six/modules/c++/samples/8AMPI_PHSI.dir/8AMPI_PHSI.dir.vcxproj.filters new file mode 100644 index 000000000..e6fdd59e6 --- /dev/null +++ b/six/modules/c++/samples/8AMPI_PHSI.dir/8AMPI_PHSI.dir.vcxproj.filters @@ -0,0 +1,30 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + + + Header Files + + + \ No newline at end of file diff --git a/six/modules/c++/samples/8AMPI_PHSI.dir/pch.cpp b/six/modules/c++/samples/8AMPI_PHSI.dir/pch.cpp new file mode 100644 index 000000000..1d9f38c57 --- /dev/null +++ b/six/modules/c++/samples/8AMPI_PHSI.dir/pch.cpp @@ -0,0 +1 @@ +#include "pch.h" diff --git a/six/modules/c++/samples/8AMPI_PHSI.dir/pch.h b/six/modules/c++/samples/8AMPI_PHSI.dir/pch.h new file mode 100644 index 000000000..c7a25e8a3 --- /dev/null +++ b/six/modules/c++/samples/8AMPI_PHSI.dir/pch.h @@ -0,0 +1,72 @@ +#pragma once + +#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers + +// We're building in Visual Studio ... used to control where we get a little bit of config info +#define NITRO_PCH 1 + +#pragma warning(disable: 4668) // '...' is not defined as a preprocessor macro, replacing with '...' for '...' +#pragma warning(disable: 4820) // '...': '...' bytes padding added after data member '...' +#pragma warning(disable: 4710) // '...': function not inlined + +#pragma warning(disable: 5045) // Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified + +#pragma warning(disable: 4619) // #pragma warning: there is no warning number '...' +#pragma warning(disable: 4625) // '...': copy constructor was implicitly defined as deleted +#pragma warning(disable: 4626) // '...': assignment operator was implicitly defined as deleted +#pragma warning(disable: 5026) // '...': move constructor was implicitly defined as deleted +#pragma warning(disable: 5027) // '...': move assignment operator was implicitly defined as deleted +#pragma warning(disable: 5264) // '...': '...' variable is not used + +// TODO: get rid of these someday? +#pragma warning(disable: 4774) // '...' : format string expected in argument 3 is not a string literal +#pragma warning(disable: 4267) // '...': conversion from '...' to '...', possible loss of data +#pragma warning(disable: 4244) // '...': conversion from '...' to '...', possible loss of data +#pragma warning(disable: 4242) // '...': conversion from '...' to '...', possible loss of data +#pragma warning(disable: 4018) // '...': signed / unsigned mismatch +#pragma warning(disable: 4389) // '...': signed / unsigned mismatch +#pragma warning(disable: 4365) // '...': conversion from '...' to '...', signed / unsigned mismatch +#pragma warning(disable: 5219) // implicit conversion from '...' to '...', possible loss of data +#pragma warning(disable: 4514) // '...': unreferenced inline function has been removed +#pragma warning(disable: 5039) // '...' : pointer or reference to potentially throwing function passed to 'extern "C"' function under -EHc. Undefined behavior may occur if this function throws an exception. +#pragma warning(disable: 5267) // definition of implicit copy constructor for '...' is deprecated because it has a user-provided destructor + +// TODO: get rid of these someday? ... from Visual Studio code-analysis +#pragma warning(disable: 26495) // Variable '...' is uninitialized. Always initialize a member variable(type.6). +#pragma warning(disable: 26451) // Arithmetic overflow : Using operator '...' on a 4 byte value and then casting the result to a 8 byte value. Cast the value to the wider type before calling operator '*' to avoid overflow(io.2). +#pragma warning(disable: 6385) // Reading invalid data from '...': the readable size is '...' bytes, but '...' bytes may be read. +#pragma warning(disable: 6386) // Buffer overrun while writing to '...': the writable size is '...' bytes, but '...' bytes might be written. + +#pragma warning(push) +#pragma warning(disable: 5220) // '...': a non - static data member with a volatile qualified type no longer implies +#pragma warning(disable: 5204) // 'Concurrency::details::_DefaultPPLTaskScheduler': class has virtual functions, but its trivial destructor is not virtual; instances of objects derived from this class may not be destructed correctly + +#pragma warning(disable: 4464) // relative include path contains '..' +#include "../../cpp_pch.h" + +#include + +#pragma warning(pop) + +#include +#include +#pragma warning(push) +#pragma warning(disable: 5039) // '...': pointer or reference to potentially throwing function passed to 'extern "C"' function under - EHc.Undefined behavior may occur if this function throws an exception. +#pragma warning(disable: 26493) // Don't use C-style casts (type.4). +#pragma warning(disable: 26473) // Don't cast between pointer types where the source type and the target type are the same (type.1). +#include +#include +#pragma warning(pop) +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + diff --git a/six/modules/c++/six.sicd/include/six/sicd/ImageData.h b/six/modules/c++/six.sicd/include/six/sicd/ImageData.h index 7121a0ba0..9a31e5767 100644 --- a/six/modules/c++/six.sicd/include/six/sicd/ImageData.h +++ b/six/modules/c++/six.sicd/include/six/sicd/ImageData.h @@ -30,6 +30,7 @@ #include #include #include +#include #include "logging/Logger.h" #include "types/Complex.h" @@ -100,17 +101,16 @@ struct SIX_SICD_API ImageData bool validate(const GeoData& geoData, logging::Logger& log) const; - static void toComplex(const six::Amp8iPhs8iLookup_t& lookup, std::span, std::span); + static void toComplex(six::Amp8iPhs8iLookup_t lookup, std::span, std::span); std::vector toComplex(std::span) const; std::vector fromComplex(std::span) const; - static std::vector testing_fromComplex_(std::span); // for unit-tests /*! * Create a lookup table for converting from AMP8I_PHS8I to complex. * @param pAmplitudeTable Input amplitude table. May be nullptr if no amplitude table is defined. * @return reference to the output lookup table. */ - static const six::Amp8iPhs8iLookup_t& getLookup(const six::AmplitudeTable* pAmplitudeTable); + static six::Amp8iPhs8iLookup_t getLookup(const six::AmplitudeTable* pAmplitudeTable); }; SIX_SICD_API const details::ComplexToAMP8IPHS8I& make_ComplexToAMP8IPHS8I(const six::AmplitudeTable*); diff --git a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I.cpp b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I.cpp index 695592f83..0c08b0447 100644 --- a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I.cpp +++ b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #if CODA_OSS_cpp17 @@ -44,6 +45,9 @@ #include "six/sicd/Utilities.h" +#undef min +#undef max + // https://github.com/ngageoint/six-library/pull/537#issuecomment-1026453353 /* I can add more detail, but be warned that my powerpoint skills aren't amazing and this is best drawn on a whiteboard or graph paper. @@ -65,14 +69,16 @@ The complex value `V` is projected onto the nearest `phase_direction` via the do The resulting green point is then what's used to find the nearest magnitude via binary search (`std::lower_bound`). */ - /*! * Get the phase of a complex value. * @param v complex value * @return phase between [0, 2PI] */ + inline auto GetPhase(std::complex v) { + // There's an intentional conversion to zero when we cast 256 -> uint8. That wrap around + // handles cases that are close to 2PI. double phase = std::arg(v); if (phase < 0.0) phase += std::numbers::pi * 2.0; // Wrap from [0, 2PI] return phase; @@ -80,30 +86,32 @@ inline auto GetPhase(std::complex v) uint8_t six::sicd::details::ComplexToAMP8IPHS8I::getPhase(six::zfloat v) const { // Phase is determined via arithmetic because it's equally spaced. - // There's an intentional conversion to zero when we cast 256 -> uint8. That wrap around - // handles cases that are close to 2PI. - return gsl::narrow_cast(std::round(GetPhase(v) / phase_delta)); + const auto phase = GetPhase(v); + return gsl::narrow_cast(std::round(phase / phase_delta)); } template -static std::vector make_magnitudes_(TToComplexFunc toComplex) +static auto make_magnitudes_(TToComplexFunc toComplex) { - std::vector retval; - retval.reserve(UINT8_MAX + 1); + std::vector result; + result.reserve(six::AmplitudeTableSize); for (const auto amplitude : six::sicd::Utilities::iota_0_256()) { // AmpPhase -> Complex const auto phase = amplitude; const auto complex = toComplex(amplitude, phase); - retval.push_back(std::abs(complex)); + result.push_back(std::abs(complex)); } // I don't know if we can guarantee that the amplitude table is non-decreasing. // Check to verify property at runtime. - if (!std::is_sorted(retval.begin(), retval.end())) + if (!std::is_sorted(result.begin(), result.end())) { throw std::runtime_error("magnitudes must be sorted"); } + + std::array retval; + std::copy(result.begin(), result.end(), retval.begin()); return retval; } static inline auto make_magnitudes(const six::AmplitudeTable& amplitudeTable) @@ -123,17 +131,17 @@ static inline auto make_magnitudes() return make_magnitudes_(toComplex); } -static const std::vector& get_magnitudes(const six::AmplitudeTable* pAmplitudeTable, - std::vector& uncached_magnitudes) +static std::span get_magnitudes(const six::AmplitudeTable* pAmplitudeTable, + std::array& uncached_magnitudes) { if (pAmplitudeTable == nullptr) { static const auto magnitudes = make_magnitudes(); // OK to cache, won't change - return magnitudes; + return sys::make_span(magnitudes); } uncached_magnitudes = make_magnitudes(*pAmplitudeTable); - return uncached_magnitudes; + return sys::make_const_span(uncached_magnitudes); } six::sicd::details::ComplexToAMP8IPHS8I::ComplexToAMP8IPHS8I(const six::AmplitudeTable *pAmplitudeTable) @@ -144,14 +152,15 @@ six::sicd::details::ComplexToAMP8IPHS8I::ComplexToAMP8IPHS8I(const six::Amplitud assert(p0 == 0.0); assert(p1 > p0); phase_delta = gsl::narrow_cast(p1 - p0); - size_t i = 0; - for(const auto value : six::sicd::Utilities::iota_0_256()) + for(size_t i = 0; i < 256; i++) { - const units::Radians angle{ static_cast(p0) + value * phase_delta }; + const units::Radians angle{ static_cast(p0) + i * phase_delta }; float y, x; SinCos(angle, y, x); phase_directions[i] = { x, y }; - i++; + + phase_directions_real[i] = phase_directions[i].real(); + phase_directions_imag[i] = phase_directions[i].imag(); } } @@ -160,7 +169,7 @@ six::sicd::details::ComplexToAMP8IPHS8I::ComplexToAMP8IPHS8I(const six::Amplitud * @param value query value * @return index of nearest value within the iterator range. */ -static inline uint8_t nearest(const std::vector& magnitudes, float value) +static uint8_t nearest(std::span magnitudes, float value) { const auto begin = magnitudes.begin(); const auto end = magnitudes.end(); @@ -175,22 +184,41 @@ static inline uint8_t nearest(const std::vector& magnitudes, float value) assert(distance <= std::numeric_limits::max()); return gsl::narrow(distance); } - -six::AMP8I_PHS8I_t six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbor_(const six::zfloat &v) const +uint8_t six::sicd::details::ComplexToAMP8IPHS8I::find_nearest(six::zfloat phase_direction, six::zfloat v) const { - six::AMP8I_PHS8I_t retval; - retval.phase = getPhase(v); - // We have to do a 1D nearest neighbor search for magnitude. // But it's not the magnitude of the input complex value - it's the projection of // the complex value onto the ray of candidate magnitudes at the selected phase. // i.e. dot product. - auto&& phase_direction = phase_directions[retval.phase]; - const auto projection = phase_direction.real() * v.real() + phase_direction.imag() * v.imag(); + const auto projection = (phase_direction.real() * v.real()) + (phase_direction.imag() * v.imag()); //assert(std::abs(projection - std::abs(v)) < 1e-5); // TODO ??? - retval.amplitude = nearest(magnitudes, projection); + return nearest(magnitudes, projection); +} +six::AMP8I_PHS8I_t six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbor_(const six::zfloat &v) const +{ + six::AMP8I_PHS8I_t retval; + retval.phase = getPhase(v); + + auto&& phase_direction = phase_directions[retval.phase]; + retval.amplitude = find_nearest(phase_direction, v); + return retval; +} +template +void six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_seq(TInputIt first, TInputIt last, TOutputIt dest) const +{ + std::transform(first, last, dest, [&](const auto& v) { return nearest_neighbor_(v); }); +} +std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_seq( + std::span inputs, const six::AmplitudeTable* pAmplitudeTable) +{ + // make a structure to quickly find the nearest neighbor + const auto& converter = make_(pAmplitudeTable); + + std::vector retval(inputs.size()); + converter.nearest_neighbors_seq(inputs.begin(), inputs.end(), retval.begin()); return retval; } + six::AMP8I_PHS8I_t six::sicd::nearest_neighbor(const details::ComplexToAMP8IPHS8I& i, const six::zfloat& v) { return i.nearest_neighbor_(v); @@ -203,54 +231,100 @@ six::AMP8I_PHS8I_t six::sicd::nearest_neighbor(const details::ComplexToAMP8IPHS8 // bit "half baked," and perhaps shouldn't be emulated. Then, C++17 added // parallel algorithms which might be a better way of satisfying our immediate // needs (below) ... although we're still at C++14. -template -static inline OutputIt transform_async(const InputIt first1, const InputIt last1, OutputIt d_first, TFunc f, +template +static inline OutputIt transform_async(const InputIt first1, const InputIt last1, OutputIt d_first, TTransformFunc transform_f, typename std::iterator_traits::difference_type cutoff) { // https://en.cppreference.com/w/cpp/thread/async const auto len = std::distance(first1, last1); if (len < cutoff) { - return std::transform(first1, last1, d_first, f); + return transform_f(first1, last1, d_first); } - constexpr auto policy = std::launch::async; - const auto mid1 = first1 + len / 2; const auto d_mid = d_first + len / 2; - auto handle = std::async(policy, transform_async, mid1, last1, d_mid, f, cutoff); - transform_async(first1, mid1, d_first, f, cutoff); + auto handle = std::async(transform_async, mid1, last1, d_mid, transform_f, cutoff); + transform_async(first1, mid1, d_first, transform_f, cutoff); return handle.get(); } -template -static inline void transform(std::span inputs, std::span results, TFunc f) + +template +void six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_par(TInputIt first, TInputIt last, TOutputIt dest) const { + const auto nearest_neighbor = [&](const auto& v) + { + return this->nearest_neighbor_(v); + }; + #if SIX_six_sicd_ComplexToAMP8IPHS8I_has_execution - std::ignore = std::transform(std::execution::par_unseq, inputs.begin(), inputs.end(), results.begin(), f); + std::ignore = std::transform(std::execution::par, first, last, dest, nearest_neighbor); #else constexpr ptrdiff_t cutoff_ = 0; // too slow w/o multi-threading // The value of "default_cutoff" was determined by testing; there is nothing special about it, feel free to change it. - constexpr auto dimension = 128 * 8; - constexpr auto default_cutoff = dimension * dimension; - const auto cutoff = cutoff_ == 0 ? default_cutoff : cutoff_; - std::ignore = transform_async(inputs.begin(), inputs.end(), results.begin(), f, cutoff); -#endif // CODA_OSS_cpp17 -} + constexpr auto dimension = 256; + constexpr auto default_cutoff = (dimension * dimension) * 4; + const auto cutoff = cutoff_ == 0 ? default_cutoff : cutoff_; -std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors( + const auto transform_f = [&](const TInputIt first1, const TInputIt last1, TOutputIt d_first) + { + return std::transform(first1, last1, d_first, nearest_neighbor); + }; + std::ignore = transform_async(first, last, dest, transform_f, cutoff); +#endif // SIX_six_sicd_ComplexToAMP8IPHS8I_has_execution +} +std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_par( std::span inputs, const six::AmplitudeTable* pAmplitudeTable) { // make a structure to quickly find the nearest neighbor const auto& converter = make_(pAmplitudeTable); - const auto nearest_neighbor = [&converter](const auto& v) + + std::vector retval(inputs.size()); + converter.nearest_neighbors_par(inputs.begin(), inputs.end(), retval.begin()); + return retval; +} + +std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors( + std::span inputs, const six::AmplitudeTable* pAmplitudeTable) +{ + // TODO: there could be more complicated logic here to decide between + // _seq, _par, _unseq, and _par_unseq + #if SIX_sicd_have_VCL || SIX_sicd_have_experimental_simd + return nearest_neighbors_par_unseq(inputs, pAmplitudeTable); + #else + return nearest_neighbors_par(inputs, pAmplitudeTable); + #endif +} + + +template +void six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_par_unseq(TInputIt first, TInputIt last, TOutputIt dest) const +{ + constexpr ptrdiff_t cutoff_ = 0; // too slow w/o multi-threading + // The value of "default_cutoff" was determined by testing; there is nothing special about it, feel free to change it. + constexpr auto default_cutoff = (256 * 256) * 2; + const auto cutoff = cutoff_ == 0 ? default_cutoff : cutoff_; + + const auto transform_f = [&](const TInputIt first1, const TInputIt last1, TOutputIt d_first) { - return converter.nearest_neighbor_(v); + nearest_neighbors_unseq(first1, last1, d_first); + static const TOutputIt retval; + return retval; }; + std::ignore = transform_async(first, last, dest, transform_f, cutoff); +} +#if SIX_sicd_ComplexToAMP8IPHS8I_unseq +std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_par_unseq( + std::span inputs, const six::AmplitudeTable* pAmplitudeTable) +{ + // make a structure to quickly find the nearest neighbor + const auto& converter = make_(pAmplitudeTable); std::vector retval(inputs.size()); - transform(sys::make_const_span(inputs), sys::make_span(retval), nearest_neighbor); + converter.nearest_neighbors_par_unseq(inputs.begin(), inputs.end(), retval.begin()); return retval; } +#endif // SIX_sicd_have_VCL || SIX_sicd_have_experimental_simd const six::sicd::details::ComplexToAMP8IPHS8I& six::sicd::details::ComplexToAMP8IPHS8I::make_(const six::AmplitudeTable* pAmplitudeTable) { diff --git a/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_vcl.cpp b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_vcl.cpp new file mode 100644 index 000000000..74fa23225 --- /dev/null +++ b/six/modules/c++/six.sicd/source/ComplexToAMP8IPHS8I_vcl.cpp @@ -0,0 +1,239 @@ +/* ========================================================================= +* This file is part of six.sicd-c++ +* ========================================================================= +* +* (C) Copyright 2021, Maxar Technologies, Inc. +* +* six.sicd-c++ is free software; you can redistribute it and/or modify +* it under the terms of the GNU Lesser General Public License as published by +* the Free Software Foundation; either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Lesser General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public +* License along with this program; If not, +* see . +* +*/ +#include "six/AmplitudeTable.h" + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "six/sicd/Utilities.h" + +#undef min +#undef max + +#if SIX_sicd_has_VCL + +#define VCL_NAMESPACE vcl +#if _MSC_VER +#pragma warning(disable: 4100) // '...': unreferenced formal parameter +#endif +#include "six/sicd/vectorclass/version2/vectorclass.h" +#include "six/sicd/vectorclass/version2/vectormath_trig.h" +#include "six/sicd/vectorclass/complex/complexvec1.h" + +// https://en.cppreference.com/w/cpp/experimental/simd +using zfloatv = vcl::Complex8f; +using floatv = vcl::Vec8f; +using intv = vcl::Vec8i; + +// https://en.cppreference.com/w/cpp/numeric/complex/arg +// > `std::atan2(std::imag(z), std::real(z))` +inline auto arg(const floatv& real, const floatv& imag) +{ + return atan2(imag, real); // arg() +} +inline auto arg(const zfloatv& z) +{ + return arg(z.real(), z.imag()); +} + +static inline auto interleave(const intv& a, const intv& b) +{ + // The blend() indicies are based on one large array + auto index0 = vcl::blend8<0, 8, 1, 9, 2, 10, 3, 11>(a, b); // i.e., a[0], b[0], a[1], b[1], ... + auto index1 = vcl::blend8<4, 12, 5, 13, 6, 14, 7, 15>(a, b); + return vcl::Vec16i(std::move(index0), std::move(index1)); +} + +// There's no `vcl::lookup()` for `std::complex*`, implement using floats +template +static auto lookup(const intv& zindex, const std::array& table_) +{ + const auto table = reinterpret_cast(table_.data()); + constexpr auto size_as_floats = N * 2; // table_t is six::zfloat + + // A `six::zfloat` at *n* is at *2n* when viewed as `float`. + const auto real_index = zindex * 2; + // The imaginary part is after the real part + const auto imag_index = real_index + 1; // [n] is real, [n+1] is imag + const auto index = interleave(real_index, imag_index); + + auto lookup = vcl::lookup(index, table); + return zfloatv(std::move(lookup)); +} + +static auto getPhase(const zfloatv& v, float phase_delta) +{ + // Phase is determined via arithmetic because it's equally spaced. + // There's an intentional conversion to zero when we cast 256 -> uint8. That wrap around + // handles cases that are close to 2PI. + auto phase = arg(v); + phase = if_add(phase < 0.0, phase, std::numbers::pi_v * 2.0f); // Wrap from [0, 2PI] + return roundi(phase / phase_delta); +} + +inline auto lower_bound(std::span magnitudes, const floatv& value) +{ + const auto begin = magnitudes.begin(); + const auto end = magnitudes.end(); + + intv retval; + for (int i = 0; i < value.size(); i++) + { + const auto it = std::lower_bound(begin, end, value[i]); + const auto result = std::distance(begin, it); + retval.insert(i, gsl::narrow(result)); + } + return retval; +} +static auto nearest(std::span magnitudes, const floatv& value) +{ + assert(magnitudes.size() == six::AmplitudeTableSize); + + /* + const auto it = std::lower_bound(begin, end, value); + if (it == begin) return 0; + + const auto prev_it = std::prev(it); + const auto nearest_it = it == end ? prev_it : + (value - *prev_it <= *it - value ? prev_it : it); + const auto distance = std::distance(begin, nearest_it); + assert(distance <= std::numeric_limits::max()); + return gsl::narrow(distance); + */ + const auto it = ::lower_bound(magnitudes, value); + const auto prev_it = it - 1; // const auto prev_it = std::prev(it); + + const auto v0 = value - vcl::lookup(prev_it, magnitudes.data()); // value - *prev_it + const auto v1 = vcl::lookup(it, magnitudes.data()) - value; // *it - value + //const auto nearest_it = select(v0 <= v1, prev_it, it); // (value - *prev_it <= *it - value ? prev_it : it); + + const intv end = gsl::narrow(magnitudes.size()); + //const auto end_test = select(it == end, prev_it, nearest_it); // it == end ? prev_it : ... + const intv zero = 0; + auto retval = select(it == 0, zero, // if (it == begin) return 0; + select(it == end, prev_it, // it == end ? prev_it : ... + select(v0 <=v1, prev_it, it) // (value - *prev_it <= *it - value ? prev_it : it); + )); + return retval; +} + +static inline auto find_nearest(std::span magnitudes, const zfloatv& phase_direction, const zfloatv& v) +{ + // We have to do a 1D nearest neighbor search for magnitude. + // But it's not the magnitude of the input complex value - it's the projection of + // the complex value onto the ray of candidate magnitudes at the selected phase. + // i.e. dot product. + const auto projection = (phase_direction.real() * v.real()) + (phase_direction.imag() * v.imag()); + //assert(std::abs(projection - std::abs(v)) < 1e-5); // TODO ??? + return nearest(magnitudes, projection); +} +static inline auto find_nearest(std::span magnitudes, const floatv& phase_direction_real, const floatv& phase_direction_imag, + const zfloatv& v) +{ + // We have to do a 1D nearest neighbor search for magnitude. + // But it's not the magnitude of the input complex value - it's the projection of + // the complex value onto the ray of candidate magnitudes at the selected phase. + // i.e. dot product. + const auto projection = (phase_direction_real * v.real()) + (phase_direction_imag * v.imag()); + //assert(std::abs(projection - std::abs(v)) < 1e-5); // TODO ??? + return nearest(magnitudes, projection); +} + +template< typename TOutputIter> +void six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq_(const six::zfloat* p, TOutputIter dest) const +{ + // https://en.cppreference.com/w/cpp/numeric/complex + // > For any pointer to an element of an array of `std::complex` named `p` and any valid array index `i`, ... + // > is the real part of the complex number `p[i]`, ... + zfloatv v; + v.load(reinterpret_cast(p)); + + const auto phase = ::getPhase(v, phase_delta); + + //const auto phase_direction = lookup(phase, phase_directions); + //const auto amplitude = ::find_nearest(magnitudes, phase_direction, v); + const auto phase_direction_real = vcl::lookup(phase, phase_directions_real.data()); + const auto phase_direction_imag = vcl::lookup(phase, phase_directions_imag.data()); + //const auto amplitude = ::find_nearest(magnitudes, phase_direction_real, phase_direction_imag, v); + + // interleave() and store() is slower than an explicit loop. + for (int i = 0; i < v.size(); i++) + { + dest->phase = gsl::narrow_cast(phase[i]); + + //dest->amplitude = find_nearest(phase_directions[dest->phase], p[i]); + //const auto phase_direction_ = phase_direction.extract(i); + //dest->amplitude = find_nearest(six::zfloat(phase_direction_.real(), phase_direction_.imag()), p[i]); + dest->amplitude = find_nearest(six::zfloat(phase_direction_real[i], phase_direction_imag[i]), p[i]); + //dest->amplitude = gsl::narrow_cast(amplitude[i]); + + ++dest; + } +} + +template +void six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq(TInputIt first, TInputIt last, TOutputIt dest) const +{ + // The above code is simpler (no templates) if we use just a single VCL + // complex type: `zfloatv`. If there is any performance differ4ence, + // it will only be for extreme edge cases since the smaller types are only used + // at the end of the loop. + // + // It also makes this loop simpler as we handle all non-multiples-of-8 in + // the same way. + for (; first != last; ++first, ++dest) + { + const auto distance = std::distance(first, last); + assert(distance > 0); // should be out of the loop! + if (distance % 8 == 0) + { + nearest_neighbors_unseq_(&(*first), dest); + first += 7; dest += 7; + } + else + { + nearest_neighbors_seq(first, last, dest); + } + } +} +std::vector six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors_unseq( + std::span inputs, const six::AmplitudeTable* pAmplitudeTable) +{ + // make a structure to quickly find the nearest neighbor + const auto& converter = make_(pAmplitudeTable); + + std::vector retval(inputs.size()); + converter.nearest_neighbors_unseq(inputs.begin(), inputs.end(), retval.begin()); + return retval; +} + +#endif // SIX_sicd_have_VCL \ No newline at end of file diff --git a/six/modules/c++/six.sicd/source/ImageData.cpp b/six/modules/c++/six.sicd/source/ImageData.cpp index c3c13a172..eaf993289 100644 --- a/six/modules/c++/six.sicd/source/ImageData.cpp +++ b/six/modules/c++/six.sicd/source/ImageData.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -188,11 +189,12 @@ bool ImageData::validate(const GeoData& geoData, logging::Logger& log) const return valid; } +constexpr std::array lookupDims{ AmplitudeTableSize, AmplitudeTableSize }; // size 256 x 256 matrix of complex values. template static auto createLookup(TToComplexFunc toComplex) { - auto retval = std::make_unique(); // too big for the stack - auto& values = *retval; + std::vector retval(lookupDims[0] * lookupDims[1]); + std::mdspan> values(retval.data(), lookupDims); // For all possible amp/phase values (there are "only" 256*256=65536), get and save the // complex value. @@ -200,7 +202,7 @@ static auto createLookup(TToComplexFunc toComplex) { for (const auto phase : Utilities::iota_0_256()) { - values[amplitude][phase] = toComplex(amplitude, phase); + values(amplitude, phase) = toComplex(amplitude, phase); } } @@ -221,19 +223,19 @@ static auto createLookup() return createLookup(toComplex); } -static const six::Amp8iPhs8iLookup_t* getCachedLookup(const six::AmplitudeTable* pAmplitudeTable) +static const std::vector* getCachedLookup(const six::AmplitudeTable* pAmplitudeTable) { if (pAmplitudeTable == nullptr) { static const auto lookup_no_table = createLookup(); - return lookup_no_table.get(); + return &lookup_no_table; } // Maybe one has already been created and stored on the table? return pAmplitudeTable->getLookup(); } -const six::Amp8iPhs8iLookup_t& ImageData::getLookup(const six::AmplitudeTable* pAmplitudeTable) +six::Amp8iPhs8iLookup_t ImageData::getLookup(const six::AmplitudeTable* pAmplitudeTable) { auto pLookup = getCachedLookup(pAmplitudeTable); if (pLookup == nullptr) @@ -245,14 +247,14 @@ const six::Amp8iPhs8iLookup_t& ImageData::getLookup(const six::AmplitudeTable* p pLookup = amplitudeTable.getLookup(); } assert(pLookup != nullptr); - return *pLookup; + return six::Amp8iPhs8iLookup_t(pLookup->data(), lookupDims); } -void ImageData::toComplex(const six::Amp8iPhs8iLookup_t& values, std::span inputs, std::span results) +void ImageData::toComplex(six::Amp8iPhs8iLookup_t values, std::span inputs, std::span results) { const auto toComplex_ = [&values](const auto& v) { - return values[v.amplitude][v.phase]; + return values(v.amplitude, v.phase); }; transform(inputs, results, toComplex_); } @@ -275,9 +277,3 @@ std::vector ImageData::fromComplex(std::span i return six::sicd::details::ComplexToAMP8IPHS8I::nearest_neighbors(inputs, amplitudeTable.get()); } -std::vector ImageData::testing_fromComplex_(std::span inputs) -{ - static const ImageData imageData; - assert(imageData.amplitudeTable.get() == nullptr); - return imageData.fromComplex(inputs); -} diff --git a/six/modules/c++/six.sicd/source/Utilities.cpp b/six/modules/c++/six.sicd/source/Utilities.cpp index 5dd9e3483..168bab6d2 100644 --- a/six/modules/c++/six.sicd/source/Utilities.cpp +++ b/six/modules/c++/six.sicd/source/Utilities.cpp @@ -220,7 +220,7 @@ class SICD_readerAndConverter final } const types::RowCol& offset; six::zfloat* buffer; - const six::Amp8iPhs8iLookup_t& lookup; + six::Amp8iPhs8iLookup_t lookup; public: SICD_readerAndConverter(six::NITFReadControl& reader, size_t imageNumber, diff --git a/six/modules/c++/six.sicd/unittests/test_AMP8I_PHS8I.cpp b/six/modules/c++/six.sicd/unittests/test_AMP8I_PHS8I.cpp index ddbdb2f2c..da3680a97 100644 --- a/six/modules/c++/six.sicd/unittests/test_AMP8I_PHS8I.cpp +++ b/six/modules/c++/six.sicd/unittests/test_AMP8I_PHS8I.cpp @@ -542,6 +542,13 @@ TEST_CASE(test_create_sicd_from_mem_8i) test_create_sicd_from_mem(testName, "test_create_sicd_from_mem_8i_noamp.sicd", six::PixelType::AMP8I_PHS8I, false /*makeAmplitudeTable*/); } +static std::vector testing_fromComplex(std::span inputs) +{ + static const six::sicd::ImageData imageData; + assert(imageData.amplitudeTable.get() == nullptr); + return imageData.fromComplex(inputs); +} + static void test_adjusted_values(const std::string& testName, const std::vector& values, const std::vector& expected, six::zfloat delta) { @@ -551,7 +558,7 @@ static void test_adjusted_values(const std::string& testName, const std::vector< v += delta; } std::span values_(adjusted_values.data(), adjusted_values.size()); - const auto actual = six::sicd::ImageData::testing_fromComplex_(values_); + const auto actual = testing_fromComplex(values_); for (size_t i = 0; i < expected.size(); i++) { TEST_ASSERT_EQ(expected[i].amplitude, actual[i].amplitude); @@ -571,7 +578,7 @@ TEST_CASE(test_nearest_neighbor) {static_cast(141), static_cast(96)}, {static_cast(255), static_cast(160)} }; - const auto actual = six::sicd::ImageData::testing_fromComplex_(sys::make_span(values)); + const auto actual = testing_fromComplex(sys::make_span(values)); for (size_t i = 0; i < expected.size(); i++) { TEST_ASSERT_EQ(expected[i].amplitude, actual[i].amplitude); diff --git a/six/modules/c++/six/include/six/AmplitudeTable.h b/six/modules/c++/six/include/six/AmplitudeTable.h index 3b70dfcc9..8fc034402 100644 --- a/six/modules/c++/six/include/six/AmplitudeTable.h +++ b/six/modules/c++/six/include/six/AmplitudeTable.h @@ -31,8 +31,10 @@ #include #include #include +#include #include +#include #include #include @@ -157,20 +159,59 @@ struct SIX_SIX_API LUT * double precision amplitude value */ -// Store the computed `six::zfloat` for every possible -// amp/phs pair, a total of 256*256 values. - //! Fixed size 256 element array of complex values. -using phase_values_t = std::array; -//! Fixed size 256 x 256 matrix of complex values. -using Amp8iPhs8iLookup_t = std::array; - -// More descriptive than std::pair + // Store the computed `six::zfloat` for every possible + // amp/phs pair, a total of 256*256 values. +static constexpr size_t AmplitudeTableSize = 256; // "This is a fixed size (256-element) LUT" +using Amp8iPhs8iLookup_t = std::mdspan>; + + // More descriptive than std::pair struct SIX_SIX_API AMP8I_PHS8I_t final { uint8_t amplitude; uint8_t phase; }; +// Control a few details of the ComplexToAMP8IPHS8I implementation, espeically "unseq" (i.e., SIMD). +#ifndef SIX_sicd_has_VCL + // Do we have the "vectorclass" library? https://github.com/vectorclass/version2 + #if !CODA_OSS_cpp17 // VCL needs C++17 + #define SIX_sicd_has_VCL 0 + #else + // __has_include is part of C++17 + #if __has_include("../../../six.sicd/include/six/sicd/vectorclass/version2/vectorclass.h") || \ + __has_include("six.sicd/include/six/sicd/vectorclass/version2/vectorclass.h") + #define SIX_sicd_has_VCL 1 + #else + #define SIX_sicd_has_VCL 0 + #endif // __has_include + #endif // C++17 +#endif + +#ifndef SIX_sicd_has_experimental_simd + // Do we have the `std::experimental::simd? https://en.cppreference.com/w/cpp/experimental/simd + #if (__GNUC__ >= 11) && CODA_OSS_cpp20 + // https://github.com/VcDevel/std-simd "... shipping with GCC since version 11." + #include + #define SIX_sicd_has_experimental_simd 1 + #else + #define SIX_sicd_has_experimental_simd 0 + #endif // __GNUC__ +#endif + +#if SIX_sicd_has_VCL && SIX_sicd_has_experimental_simd + // This is because there is a single ComplexToAMP8IPHS8I::nearest_neighbors_unseq() method. + // Other than that, it should be possible to use both VCL and std::experimental::simd + #error "Can't enable both VCL and std::experimental::simd at the same time'" +#endif +#ifndef SIX_sicd_ComplexToAMP8IPHS8I_unseq + #if SIX_sicd_has_VCL || SIX_sicd_has_experimental_simd + #define SIX_sicd_ComplexToAMP8IPHS8I_unseq 1 + #else + #define SIX_sicd_ComplexToAMP8IPHS8I_unseq 0 + #endif // SIX_sicd_have_VCL || SIX_sicd_have_experimental_simd +#endif // SIX_sicd_ComplexToAMP8IPHS8I_unseq + + struct AmplitudeTable; // forward namespace sicd { @@ -204,21 +245,40 @@ class ComplexToAMP8IPHS8I final * @return nearest amplitude and phase value */ AMP8I_PHS8I_t nearest_neighbor_(const six::zfloat& v) const; - static std::vector nearest_neighbors(std::span inputs, const six::AmplitudeTable*); + static std::vector nearest_neighbors_par(std::span inputs, const six::AmplitudeTable*); + static std::vector nearest_neighbors_seq(std::span inputs, const six::AmplitudeTable*); + #if SIX_sicd_ComplexToAMP8IPHS8I_unseq + static std::vector nearest_neighbors_unseq(std::span inputs, const six::AmplitudeTable*); + static std::vector nearest_neighbors_par_unseq(std::span inputs, const six::AmplitudeTable*); + #endif + static std::vector nearest_neighbors(std::span inputs, const six::AmplitudeTable*); // one of the above private: - std::vector nearest_neighbors(std::span inputs) const; + template + void nearest_neighbors_seq(TInputIt first, TInputIt last, TOutputIt dest) const; + template + void nearest_neighbors_par(TInputIt first, TInputIt last, TOutputIt dest) const; + template + void nearest_neighbors_unseq(TInputIt first, TInputIt last, TOutputIt dest) const; + template + void nearest_neighbors_par_unseq(TInputIt first, TInputIt last, TOutputIt dest) const; + + template< typename TOutputIter> + void nearest_neighbors_unseq_(const six::zfloat* p, TOutputIter dest) const; //! The sorted set of possible magnitudes order from small to large. - std::vector uncached_magnitudes; // Order is important! This must be ... - const std::vector& magnitudes; // ... before this. + std::array uncached_magnitudes; // Order is important! This must be ... + std::span magnitudes; // ... before this. + uint8_t find_nearest(six::zfloat phase_direction, six::zfloat v) const; //! The difference in phase angle between two UINT phase values. float phase_delta; uint8_t getPhase(six::zfloat) const; //! Unit vector rays that represent each direction that phase can point. - std::array phase_directions; + std::array phase_directions; // interleaved, std::complex + std::array phase_directions_real; + std::array phase_directions_imag; }; } } @@ -227,7 +287,7 @@ struct SIX_SIX_API AmplitudeTable final : public LUT { //! Constructor. Creates a 256-entry table AmplitudeTable(size_t elementSize) noexcept(false) : - LUT(UINT8_MAX + 1 /*i.e., 256*/, elementSize) + LUT(AmplitudeTableSize /*i.e., 256*/, elementSize) { } AmplitudeTable() noexcept(false) : AmplitudeTable(sizeof(double)) @@ -235,7 +295,7 @@ struct SIX_SIX_API AmplitudeTable final : public LUT } AmplitudeTable(const nitf::LookupTable& lookupTable) noexcept(false) : LUT(lookupTable) { - if (size() != 256) + if (size() != AmplitudeTableSize) { throw std::invalid_argument("lookupTable should have 256 elements."); } @@ -250,6 +310,10 @@ struct SIX_SIX_API AmplitudeTable final : public LUT { return numEntries; } + static constexpr auto ssize() noexcept // signed size() + { + return gsl::narrow(AmplitudeTableSize); + } bool operator==(const AmplitudeTable& rhs) const { @@ -300,13 +364,13 @@ struct SIX_SIX_API AmplitudeTable final : public LUT // This is a "cache" mostly because this is a convenient place to store the data; it // doesn't take that long to generate the lookup table. Note that existing code wants // to work with a `const AmplitudeTable &`, thus `mutable` ... . - void cacheLookup_(std::unique_ptr&& lookup) const + void cacheLookup_(std::vector&& lookup_) const { - pLookup = std::move(lookup); + lookup = std::move(lookup_); } - const Amp8iPhs8iLookup_t* getLookup() const + const std::vector* getLookup() const { - return pLookup.get(); + return lookup.empty() ? nullptr : &lookup; } // Again, this is a convenient place to store the data as it depends on an AmplitudeTable instance. @@ -320,7 +384,7 @@ struct SIX_SIX_API AmplitudeTable final : public LUT } private: - mutable std::unique_ptr pLookup; // to big for the stack + mutable std::vector lookup; mutable std::unique_ptr pFromComplex; };