From bb523aef88ed6dbdf7b067df3f5b847c6837bc79 Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Mon, 18 Mar 2024 13:36:00 -0400 Subject: [PATCH] Squashed 'externals/nitro/' changes from 3118941cc..00c30348d 00c30348d NITRO-2.11.6 (#609) git-subtree-dir: externals/nitro git-subtree-split: 00c30348d90124ac53804e98236b198dd75ece65 --- ReleaseNotes.md | 6 + UnitTest/UnitTest.vcxproj | 1 + .../.github/workflows/build_unittest.yml | 24 +- externals/coda-oss/ReleaseNotes.md | 8 +- externals/coda-oss/UnitTest/mt.cpp | 3 + externals/coda-oss/UnitTest/sys.cpp | 2 + externals/coda-oss/cmake/CodaBuild.cmake | 11 + externals/coda-oss/modules/c++/CMakeLists.txt | 9 +- .../modules/c++/avx/unittests/test_m256.cpp | 28 +- .../modules/c++/coda-oss.vcxproj.filters | 6 +- .../c++/config/include/config/Version.h | 8 +- .../hdf5.lite/include/hdf5/lite/highfive.h | 2 + .../modules/c++/logging/source/Setup.cpp | 18 +- .../modules/c++/mt/include/mt/Algorithm.h | 76 +++- .../c++/mt/unittests/test_mt_byte_swap.cpp | 82 +++- .../modules/c++/str/include/str/Manip.h | 16 +- .../modules/c++/str/source/Encoding.cpp | 365 +++++++++--------- .../coda-oss/modules/c++/str/source/Manip.cpp | 102 ++++- .../modules/c++/sys/include/sys/AbstractOS.h | 54 ++- .../modules/c++/sys/source/AbstractOS.cpp | 22 +- modules/c++/nitf/include/nitf/Version.hpp | 3 +- modules/c/j2k/J2KCompress.vcxproj | 1 + modules/c/j2k/J2KDecompress.vcxproj | 1 + modules/c/nitf/TEST_DES.vcxproj | 1 + modules/c/nrt/include/nrt/Version.h | 2 +- 25 files changed, 573 insertions(+), 278 deletions(-) diff --git a/ReleaseNotes.md b/ReleaseNotes.md index 7111a792e..c2a39cf7e 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -1,5 +1,11 @@ # NITRO (NITF i/o) Release Notes +## [Version 2.11.6](https://github.com/mdaus/nitro/releases/tag/NITRO-2.11.6); March 18, 2024 +* [coda-oss](https://github.com/mdaus/coda-oss) release [2024-03-18](https://github.com/mdaus/coda-oss/releases/tag/2024-03-18) +* TREs can now be statically ["pre-loaded"](https://github.com/mdaus/nitro/pull/601), no need to set NITF_PLUGIN_PATH. +* Be sure TRE field padding is done properly. +* Allow shared-libraries (DLLs) to be build; disabled by default. + ## [Version 2.11.5](https://github.com/mdaus/nitro/releases/tag/NITRO-2.11.5); October 23, 2023 * [coda-oss](https://github.com/mdaus/coda-oss) release [2023-10-23](https://github.com/mdaus/coda-oss/releases/tag/2023-10-23) * Fix some ASAN diagnostics, still more to do. diff --git a/UnitTest/UnitTest.vcxproj b/UnitTest/UnitTest.vcxproj index 547527c2a..44bcf0351 100644 --- a/UnitTest/UnitTest.vcxproj +++ b/UnitTest/UnitTest.vcxproj @@ -93,6 +93,7 @@ true true true + Speed Windows diff --git a/externals/coda-oss/.github/workflows/build_unittest.yml b/externals/coda-oss/.github/workflows/build_unittest.yml index 4490ff194..3e0cb263a 100644 --- a/externals/coda-oss/.github/workflows/build_unittest.yml +++ b/externals/coda-oss/.github/workflows/build_unittest.yml @@ -11,9 +11,9 @@ jobs: name: ${{ matrix.os }}-${{ matrix.python-version }}-CMake runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout - name: Set up Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 # https://github.com/marketplace/actions/setup-python with: python-version: ${{ matrix.python-version }} - name: Install python dependencies @@ -58,12 +58,12 @@ jobs: os: [windows-latest] platform: [x64] configuration: [Debug] # Debug turns on more compiler warnings - avx: [AVX2, AVX512F] + avx: [AVX512F] name: ${{ matrix.os }}-${{ matrix.avx }}-msbuild runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout - name: configure run: | ls env: @@ -76,7 +76,7 @@ jobs: cmake --build . --config ${{ matrix.configuration }} -j cmake --build . --config ${{ matrix.configuration }} --target install - name: Add msbuild to PATH - uses: microsoft/setup-msbuild@v1.0.2 # https://github.com/marketplace/actions/setup-msbuild + uses: microsoft/setup-msbuild@v1.1 # https://github.com/marketplace/actions/setup-msbuild with: msbuild-architecture: x64 - name: msbuild @@ -98,9 +98,9 @@ jobs: name: ${{ matrix.os }}-${{ matrix.python-version }}-CMake runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout - name: Set up Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 # https://github.com/marketplace/actions/setup-python with: python-version: ${{ matrix.python-version }} - name: Install python dependencies @@ -132,15 +132,15 @@ jobs: matrix: os: [ubuntu-latest] configuration: [Debug, Release] - avx: [AVX2, AVX512F] + avx: [AVX512F] name: ${{ matrix.os }}-${{ matrix.configuration }}-${{ matrix.avx }}-CMake runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout - name: configure run: | mkdir out && cd out - cmake .. -DENABLE_PYTHON=OFF -DENABLE_ASAN=ON -DENABLE_${{ matrix.avx }}=ON + cmake .. -DENABLE_PYTHON=OFF -DENABLE_${{ matrix.avx }}=ON - name: build run: | cd out @@ -161,9 +161,9 @@ jobs: name: ${{ matrix.os }}-${{ matrix.python-version }}-waf${{ matrix.debugging }} runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 # https://github.com/marketplace/actions/checkout - name: Set up Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 # https://github.com/marketplace/actions/setup-python with: python-version: ${{ matrix.python-version }} - name: configure_with_swig diff --git a/externals/coda-oss/ReleaseNotes.md b/externals/coda-oss/ReleaseNotes.md index bd37823dd..997db45b3 100644 --- a/externals/coda-oss/ReleaseNotes.md +++ b/externals/coda-oss/ReleaseNotes.md @@ -1,7 +1,13 @@ # coda-oss Release Notes -## [Release 202?-??-??](https://github.com/mdaus/coda-oss/releases/tag/202?-??-??) +## [Release 2024-03-18](https://github.com/mdaus/coda-oss/releases/tag/2024-03-18) * Update to [HighFive 2.8.0](https://github.com/BlueBrain/HighFive/releases/tag/v2.8.0). +* Use lookup tables for converting between character encodings and upper/lower-case. +* [`sys::Transform(std::execution)`](https://en.cppreference.com/w/cpp/algorithm/execution_policy_tag_t) for C++14. +* Infrastructure to support SIMD processing. +* Simple/incomplete [`std::mdspan`](https://en.cppreference.com/w/cpp/container/mdspan) implementation. +* Support for building as a shared library/DLL, disabled by default. +* Create/destroy Xerces just once per process. ## [Release 2023-10-23](https://github.com/mdaus/coda-oss/releases/tag/2023-10-23) * Tweaked **.gitattributes**. diff --git a/externals/coda-oss/UnitTest/mt.cpp b/externals/coda-oss/UnitTest/mt.cpp index 6be7aa933..8bbcd4356 100644 --- a/externals/coda-oss/UnitTest/mt.cpp +++ b/externals/coda-oss/UnitTest/mt.cpp @@ -1,6 +1,8 @@ #include "pch.h" #include "CppUnitTest.h" +#include + #include #include #include @@ -11,6 +13,7 @@ #include #include #include +#include namespace mt { diff --git a/externals/coda-oss/UnitTest/sys.cpp b/externals/coda-oss/UnitTest/sys.cpp index 7ac63c6a1..6561d2002 100644 --- a/externals/coda-oss/UnitTest/sys.cpp +++ b/externals/coda-oss/UnitTest/sys.cpp @@ -1,6 +1,8 @@ #include "pch.h" #include "CppUnitTest.h" +#include + #include #include #include diff --git a/externals/coda-oss/cmake/CodaBuild.cmake b/externals/coda-oss/cmake/CodaBuild.cmake index 5ac3ed962..a74502f3c 100644 --- a/externals/coda-oss/cmake/CodaBuild.cmake +++ b/externals/coda-oss/cmake/CodaBuild.cmake @@ -146,6 +146,17 @@ macro(coda_initialize_build) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) + # Turn on AVX2 by default ... it's from 2013. + # Well, no :-( ... it seems to cause crashes w/older + # compilers on build servers. :-( + set(ENABLE_AVX2 false) + set(ENABLE_AVX512F false) + #if (NOT ENABLE_AVX512F) + # if (NOT DISABLE_AVX2) + # set(ENABLE_AVX2 true) + # endif() + #endif() + # MSVC-specific flags and options. if (MSVC) set_property(GLOBAL PROPERTY USE_FOLDERS ON) diff --git a/externals/coda-oss/modules/c++/CMakeLists.txt b/externals/coda-oss/modules/c++/CMakeLists.txt index 694e037aa..f8e98accb 100644 --- a/externals/coda-oss/modules/c++/CMakeLists.txt +++ b/externals/coda-oss/modules/c++/CMakeLists.txt @@ -11,14 +11,14 @@ if (MSVC) elseif (UNIX) # https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html - add_compile_options(-Werror) # Make all warnings into errors + #add_compile_options(-Werror) # Make all warnings into errors add_compile_options(-Wall -Wextra -Wpedantic -pedantic-errors -Wunused) add_compile_options(-Wzero-as-null-pointer-constant) - add_compile_options(-Wsuggest-final-types -Wsuggest-final-methods) + #add_compile_options(-Wsuggest-final-types -Wsuggest-final-methods) add_compile_options(-Wsuggest-override) add_compile_options(-Woverloaded-virtual) - add_compile_options(-Warray-bounds) + #add_compile_options(-Warray-bounds) add_compile_options(-Wduplicated-branches -Wduplicated-cond) add_compile_options(-Wtrampolines) add_compile_options(-Wshadow) @@ -29,6 +29,9 @@ elseif (UNIX) add_compile_options(-Wno-double-promotion) # implicit conversion of `float` to `double` is fine + add_compile_options(-Wno-array-bounds) # TODO: fix the code! + add_compile_options(-Wno-maybe-uninitialized) # TODO: fix the code! + # Need a newer compiler than GCC 9 #add_compile_options(-Wnrvo) endif() diff --git a/externals/coda-oss/modules/c++/avx/unittests/test_m256.cpp b/externals/coda-oss/modules/c++/avx/unittests/test_m256.cpp index 920a0f9f8..251a86b8e 100644 --- a/externals/coda-oss/modules/c++/avx/unittests/test_m256.cpp +++ b/externals/coda-oss/modules/c++/avx/unittests/test_m256.cpp @@ -56,20 +56,24 @@ TEST_CASE(test_getSIMDInstructionSet) { // This is the reverse of getSIMDInstructionSet(): it uses the macros to generate a value. constexpr auto simdInstructionSet = sys::getSIMDInstructionSet(); - #if __AVX512F__ - static_assert(simdInstructionSet == sys::SIMDInstructionSet::AVX512F, "getSIMDInstructionSet()"); - #elif __AVX2__ - static_assert(simdInstructionSet == sys::SIMDInstructionSet::AVX2, "getSIMDInstructionSet()"); + #if CODA_OSS_ENABLE_SIMD + #if __AVX512F__ + static_assert(simdInstructionSet == sys::SIMDInstructionSet::AVX512F, "getSIMDInstructionSet()"); + #elif __AVX2__ + static_assert(simdInstructionSet == sys::SIMDInstructionSet::AVX2, "getSIMDInstructionSet()"); + #else + static_assert(simdInstructionSet == sys::SIMDInstructionSet::SSE2, "getSIMDInstructionSet()"); + #endif #else - static_assert(simdInstructionSet == sys::SIMDInstructionSet::SSE2, "getSIMDInstructionSet()"); - #endif + static_assert(simdInstructionSet == sys::SIMDInstructionSet::Disabled, "getSIMDInstructionSet()"); + #endif // CODA_OSS_ENABLE_SIMD CODA_OSS_disable_warning_push #if _MSC_VER #pragma warning(disable: 4127) // conditional expression is constant #endif - switch (sys::getSIMDInstructionSet()) // run-time value + switch (sys::getSIMDInstructionSet()) // run-time value (well, not really, but it could be) { case sys::SIMDInstructionSet::SSE2: { @@ -86,6 +90,16 @@ TEST_CASE(test_getSIMDInstructionSet) TEST_ASSERT(simdInstructionSet == sys::SIMDInstructionSet::AVX512F); break; } + case sys::SIMDInstructionSet::Disabled: + { + TEST_ASSERT(simdInstructionSet == sys::SIMDInstructionSet::Disabled); + break; + } + case sys::SIMDInstructionSet::Unknown: + { + TEST_ASSERT(simdInstructionSet == sys::SIMDInstructionSet::Unknown); + break; + } default: { TEST_FAIL; diff --git a/externals/coda-oss/modules/c++/coda-oss.vcxproj.filters b/externals/coda-oss/modules/c++/coda-oss.vcxproj.filters index a94b8aafd..04a9d4d66 100644 --- a/externals/coda-oss/modules/c++/coda-oss.vcxproj.filters +++ b/externals/coda-oss/modules/c++/coda-oss.vcxproj.filters @@ -1442,9 +1442,6 @@ {15f9b62f-d17e-4d84-bc34-de6fd5fbcb33} - - {f2544ccb-0933-44c7-af39-cd986982af3d} - {9050a469-23a5-4da0-92b1-a07a8e52e9fc} @@ -1499,6 +1496,9 @@ {59f3d9a1-06d3-4779-aef2-cc55223c3017} + + {f2544ccb-0933-44c7-af39-cd986982af3d} + diff --git a/externals/coda-oss/modules/c++/config/include/config/Version.h b/externals/coda-oss/modules/c++/config/include/config/Version.h index d14635575..e444b5a80 100644 --- a/externals/coda-oss/modules/c++/config/include/config/Version.h +++ b/externals/coda-oss/modules/c++/config/include/config/Version.h @@ -42,12 +42,12 @@ static_assert(CODA_OSS_MAKE_VERSION_MMPB(9999, 9999, 9999, 9999) <= UINT64_MAX, // Do this ala C++ ... we don't currently have major/minor/patch //#define CODA_OSS_VERSION_ 20210910L // c.f. __cplusplus -#define CODA_OSS_VERSION_ 2023 ## 0010 ## 0023 ## 0000 ## L +#define CODA_OSS_VERSION_ 2024 ## 0003 ## 0018 ## 0000 ## L // Use the same macros other projects might want to use; overkill for us. -#define CODA_OSS_VERSION_MAJOR 2023 -#define CODA_OSS_VERSION_MINOR 10 -#define CODA_OSS_VERSION_PATCH 23 // a.k.a. "point," but too similar to "patch." +#define CODA_OSS_VERSION_MAJOR 2024 +#define CODA_OSS_VERSION_MINOR 3 +#define CODA_OSS_VERSION_PATCH 18 // a.k.a. "point," but too similar to "patch." #define CODA_OSS_VERSION_BUILD 0 // a.k.a. "patch," but too similar to "point." #define CODA_OSS_VERSION CODA_OSS_MAKE_VERSION_MMPB(CODA_OSS_VERSION_MAJOR, CODA_OSS_VERSION_MINOR, CODA_OSS_VERSION_PATCH, CODA_OSS_VERSION_BUILD) diff --git a/externals/coda-oss/modules/c++/hdf5.lite/include/hdf5/lite/highfive.h b/externals/coda-oss/modules/c++/hdf5.lite/include/hdf5/lite/highfive.h index d5d91987b..c93d5a9a4 100644 --- a/externals/coda-oss/modules/c++/hdf5.lite/include/hdf5/lite/highfive.h +++ b/externals/coda-oss/modules/c++/hdf5.lite/include/hdf5/lite/highfive.h @@ -33,6 +33,8 @@ #include #include +#include "types/RowCol.h" + #include "H5_.h" #include "SpanRC.h" diff --git a/externals/coda-oss/modules/c++/logging/source/Setup.cpp b/externals/coda-oss/modules/c++/logging/source/Setup.cpp index 345c6b516..b8ff117bd 100644 --- a/externals/coda-oss/modules/c++/logging/source/Setup.cpp +++ b/externals/coda-oss/modules/c++/logging/source/Setup.cpp @@ -35,7 +35,7 @@ std::unique_ptr logging::setupLogger(const path& program_, const std::string& logLevel, - const path& logFile_, + const path& logFile, const std::string& logFormat, size_t logCount, size_t logBytes) @@ -44,29 +44,25 @@ logging::setupLogger(const path& program_, std::unique_ptr log(new logging::Logger(program)); // setup logging level - std::string lev = logLevel; - str::upper(lev); + auto lev = str::upper(logLevel); str::trim(lev); - logging::LogLevel level = (lev.empty()) ? logging::LogLevel::LOG_WARNING : - logging::LogLevel(lev); + const auto level = lev.empty() ? logging::LogLevel::LOG_WARNING : logging::LogLevel(lev); // setup logging formatter std::unique_ptr formatter; - const auto logFile = logFile_.string(); - const auto file = str::lower(logFile); + const auto file = str::lower(logFile.string()); if (str::endsWith(file, ".xml")) { - formatter.reset( - new logging::XMLFormatter("", "")); + formatter = std::make_unique("", ""); } else { - formatter.reset(new logging::StandardFormatter(logFormat)); + formatter = std::make_unique(logFormat); } // setup logging handler std::unique_ptr logHandler; - if (file.empty() || file == "console") + if (file.empty() || (file == "console") || (file == "-")) logHandler.reset(new logging::StreamHandler()); else { diff --git a/externals/coda-oss/modules/c++/mt/include/mt/Algorithm.h b/externals/coda-oss/modules/c++/mt/include/mt/Algorithm.h index ba17f39f5..83041d8c2 100644 --- a/externals/coda-oss/modules/c++/mt/include/mt/Algorithm.h +++ b/externals/coda-oss/modules/c++/mt/include/mt/Algorithm.h @@ -20,22 +20,80 @@ * */ -#ifndef CODA_OSS_mt_Algorithm_h_INCLUDED_ -#define CODA_OSS_mt_Algorithm_h_INCLUDED_ #pragma once #include #include #include +#include "config/compiler_extensions.h" +#include "coda_oss/CPlusPlus.h" +#if CODA_OSS_cpp17 + // is broken with the older version of GCC we're using + #if (__GNUC__ >= 10) || _MSC_VER + #include + #define CODA_OSS_mt_Algorithm_has_execution 1 + #endif +#endif + namespace mt { -// There was a transform_async() utility here, but I removed it. -// -// First of all, C++11's std::async() is now (in 2023) thought of as maybe a -// bit "half baked," and perhaps shouldn't be emulated. Then, C++17 added -// parallel algorithms which might be a better ... although we're still at C++14. -} +// "Roll our own" `std::transform(execution::par)` using std::async() +// https://en.cppreference.com/w/cpp/algorithm/transform + +// Our own `Transform_par_()` is built on `std::async()`; for that we need to control +// a couple of settings. +struct Transform_par_settings final +{ + Transform_par_settings() = default; + + Transform_par_settings(ptrdiff_t cutoff) : cutoff_(cutoff) { } + Transform_par_settings(std::launch policy) : policy_(policy) { } + Transform_par_settings(ptrdiff_t cutoff, std::launch policy) : cutoff_(cutoff), policy_(policy) { } + Transform_par_settings(std::launch policy, ptrdiff_t cutoff) : Transform_par_settings(cutoff, policy) { } + + // The value of "default_cutoff" was determined by testing; there is nothing + // special about it, feel free to change it. + static constexpr ptrdiff_t dimension = 128 * 8; + static constexpr ptrdiff_t default_cutoff = dimension * dimension; + ptrdiff_t cutoff_ = default_cutoff; -#endif // CODA_OSS_mt_Algorithm_h_INCLUDED_ + // https://en.cppreference.com/w/cpp/thread/launch + std::launch policy_ = std::launch::async; // "the task is executed on a different thread, potentially by creating and launching it first" +}; +template +inline OutputIt Transform_par_(InputIt first1, InputIt last1, OutputIt d_first, UnaryOperation unary_op, + const Transform_par_settings& settings) +{ + // https://en.cppreference.com/w/cpp/thread/async + const auto len = std::distance(first1, last1); + if (len < settings.cutoff_) + { + return std::transform(first1, last1, d_first, unary_op); + } + + const auto mid1 = first1 + len / 2; + const auto d_mid = d_first + len / 2; + auto handle = std::async(settings.policy_, Transform_par_, mid1, last1, d_mid, unary_op, settings); + Transform_par_(first1, mid1, d_first, unary_op, settings); + return handle.get(); +} +template +inline OutputIt Transform_par(InputIt first1, InputIt last1, OutputIt d_first, UnaryOperation unary_op, + Transform_par_settings settings = Transform_par_settings{}) +{ +#if CODA_OSS_mt_Algorithm_has_execution + #if __GNUC__ + // std::execution::par is dramatically slower w/GCC than using our own ... ??? + return Transform_par_(first1, last1, d_first, unary_op, settings); // TODO: std::execution::par + #else + CODA_OSS_mark_symbol_unused(settings); + return std::transform(std::execution::par, first1, last1, d_first, unary_op); + #endif // __GNUC__ +#else + return Transform_par_(first1, last1, d_first, unary_op, settings); +#endif // CODA_OSS_mt_Algorithm_has_execution +} + +} diff --git a/externals/coda-oss/modules/c++/mt/unittests/test_mt_byte_swap.cpp b/externals/coda-oss/modules/c++/mt/unittests/test_mt_byte_swap.cpp index d2099cf83..ccc2e92cc 100644 --- a/externals/coda-oss/modules/c++/mt/unittests/test_mt_byte_swap.cpp +++ b/externals/coda-oss/modules/c++/mt/unittests/test_mt_byte_swap.cpp @@ -28,14 +28,20 @@ #include // std::byte #include +#include + #include +#include + +#undef min +#undef max -static std::vector make_origValues(size_t NUM_PIXELS) +static std::vector make_origValues_(size_t count) { ::srand(334); - std::vector retval(NUM_PIXELS); - for (size_t ii = 0; ii < NUM_PIXELS; ++ii) + std::vector retval(count); + for (size_t ii = 0; ii < count; ++ii) { const auto value = static_cast(::rand()) / RAND_MAX * std::numeric_limits::max(); @@ -44,10 +50,16 @@ static std::vector make_origValues(size_t NUM_PIXELS) return retval; } +static constexpr size_t NUM_PIXELS = 10000; +static const std::vector& make_origValues() +{ + static const auto retval = make_origValues_(NUM_PIXELS); + return retval; +} + TEST_CASE(testThreadedByteSwap) { - constexpr size_t NUM_PIXELS = 10000; - const auto origValues = make_origValues(NUM_PIXELS); + const auto& origValues = make_origValues(); constexpr size_t numThreads = 4; @@ -59,14 +71,70 @@ TEST_CASE(testThreadedByteSwap) std::vector swappedValues2(origValues.size()); mt::threadedByteSwap(origValues.data(), sizeof(origValues[0]), NUM_PIXELS, numThreads, swappedValues2.data()); - // Everything should match - for (size_t ii = 0; ii < NUM_PIXELS; ++ii) + for (size_t ii = 0; ii < NUM_PIXELS; ++ii) // Everything should match { TEST_ASSERT_EQ(values1[ii], swappedValues2[ii]); } } +TEST_CASE(test_transform_ByteSwap) +{ + const auto& origValues = make_origValues(); + + // Byte swap the old-fashioned way + constexpr size_t numThreads = 4; + auto expected_(origValues); + constexpr auto elemSize = sizeof(expected_[0]); + mt::threadedByteSwap(expected_.data(), elemSize, NUM_PIXELS, numThreads); + const auto& expected = expected_; + + // Byte swap into output buffer + const auto byteSwap = [&](const auto& buffer_) { + auto buffer = buffer_; + sys::byteSwap(&buffer, elemSize, 1 /*numElements*/); + return buffer; + }; + + std::vector actual(origValues.size()); + std::transform(origValues.begin(), origValues.end(), actual.begin(), byteSwap); + for (size_t ii = 0; ii < NUM_PIXELS; ++ii) // Everything should match + { + TEST_ASSERT_EQ(expected[ii], actual[ii]); + } +} + +TEST_CASE(test_Transform_par_ByteSwap) +{ + const auto& origValues = make_origValues(); + + // Byte swap the old-fashioned way + constexpr size_t numThreads = 4; + auto expected_(origValues); + constexpr auto elemSize = sizeof(expected_[0]); + mt::threadedByteSwap(expected_.data(), elemSize, NUM_PIXELS, numThreads); + const auto& expected = expected_; + + // Byte swap into output buffer + const auto byteSwap = [&](const auto& buffer_) { + auto buffer = buffer_; + sys::byteSwap(&buffer, elemSize, 1 /*numElements*/); + return buffer; + }; + + // be sure we do something more than just call std::transform() + const mt::Transform_par_settings settings{ NUM_PIXELS / 4 /*cutoff*/ }; + + std::vector actual(origValues.size()); + mt::Transform_par(origValues.begin(), origValues.end(), actual.begin(), byteSwap, settings); + for (size_t ii = 0; ii < NUM_PIXELS; ++ii) // Everything should match + { + TEST_ASSERT_EQ(expected[ii], actual[ii]); + } +} + TEST_MAIN( TEST_CHECK(testThreadedByteSwap); + TEST_CHECK(test_transform_ByteSwap); + TEST_CHECK(test_Transform_par_ByteSwap); ) \ No newline at end of file diff --git a/externals/coda-oss/modules/c++/str/include/str/Manip.h b/externals/coda-oss/modules/c++/str/include/str/Manip.h index c9c96eb60..c50a37555 100644 --- a/externals/coda-oss/modules/c++/str/include/str/Manip.h +++ b/externals/coda-oss/modules/c++/str/include/str/Manip.h @@ -211,26 +211,30 @@ inline std::string upper(const std::string& s) // At this point, you might want to `lower()` and `upper()` for UTF-8 and/or // Windows-1252. That can be done, but ... our needs are mostly English (99.9%) -// with a very occassional smattering of French (Canada). We've gotten by this +// with a very occassional smattering of (Canadian-) French. We've gotten by this // long without being able to upper/lower 'ä' and 'Ä' and there's no current // requirement to do so. // // Furthermore, while Windows-1252 is easy as it's a single-byte encoding and -// covers many european languages, the standard is UTF-8. -// Upper/lower-casing in Unicode is quite a bit more complicated as there can be +// covers many european languages, the standard is UTF-8. Changing case +// with Unicode is quite a bit more complicated as there can be // numerous rules for various languages. For example, in German, the "old // rules" where that 'ß' was uppercased to "SS"; however, there is now a 'ẞ'. // And then there are semantics: in German, no word can begin with 'ß' (or 'ẞ') // making "ßanything" rather non-sensical. // // So for now (until there is a real use case), just "define these problems -// away" by not implementing `w1252_lower()`, `utf8_upper()`, etc. +// away" by not exposing `w1252_lower()`, `utf8_upper()`, etc. /* +// With Windows-1252 encoding, we can convert between 'ä' and 'Ä'. CODA_OSS_API void w1252_lower(std::string& s); CODA_OSS_API void w1252_upper(std::string& s); CODA_OSS_API void lower(str::W1252string& s); CODA_OSS_API void upper(str::W1252string& s); +// Hooking up UTF-8 for completeness and unit-testing. +// ** THESE ROUTINES ARE SLOW ** +// Performance improvements can be made, but nobody needs such right now. CODA_OSS_API void utf8_lower(std::string& s); CODA_OSS_API void utf8_upper(std::string& s); CODA_OSS_API void lower(coda_oss::u8string& s); @@ -244,6 +248,10 @@ CODA_OSS_API str::Windows1252_T to_w1252_lower(str::Windows1252_T); /***********************************************************************************/ +// Using std::transform() with ::toupper() is considerably slower than a lookup-table +CODA_OSS_API void ascii_lower(std::string& s); +CODA_OSS_API void ascii_upper(std::string& s); + /*! * Replaces any characters that are invalid in XML (&, <, >, ', ") with their * escaped counterparts diff --git a/externals/coda-oss/modules/c++/str/source/Encoding.cpp b/externals/coda-oss/modules/c++/str/source/Encoding.cpp index 80603dae9..c9833ea88 100644 --- a/externals/coda-oss/modules/c++/str/source/Encoding.cpp +++ b/externals/coda-oss/modules/c++/str/source/Encoding.cpp @@ -29,7 +29,6 @@ #endif #include -#include #include #include #include @@ -49,34 +48,24 @@ CODA_OSS_disable_warning(-Wshadow) #include "str/utf8.h" CODA_OSS_disable_warning_pop -//// "sys" depends on "str" so can't use sys::PlatformType -//enum class PlatformType -//{ -// Windows, -// Linux, -// // MacOS -//}; -#if _WIN32 -//static constexpr auto Platform = PlatformType::Windows; -#elif defined(_POSIX_C_SOURCE) -//static constexpr auto Platform = PlatformType::Linux; -#else -#error "Unknown platform" -#endif - // Need to look up characters from \x80 (EURO SIGN) to \x9F (LATIN CAPITAL LETTER Y WITH DIAERESIS) // in a map: http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT -inline coda_oss::u8string utf8_(char32_t i) +static inline coda_oss::u8string utf8_(char32_t i) { const auto ch = gsl::narrow(i); return str::to_u8string(std::u32string{ch}); } +// https://en.wikipedia.org/wiki/Windows-1252 +// > According to the information on Microsoft's and the Unicode Consortium's +// > websites, positions 81, 8D, 8F, 90, and 9D are unused; however, the +// > Windows API `MultiByteToWideChar` maps these to the corresponding +// > C1 control codes. The "best fit" mapping documents this behavior, too. static const auto& Windows1252_x80_x9F_to_u8string_() { static const std::map retval{ - {U'\x80', utf8_(U'\x20AC')} // EURO SIGN - // , {U'\x81, replacement_character } // UNDEFINED + {U'\x80', utf8_(U'\x20AC') } // EURO SIGN + , {U'\x81', utf8_(U'\x0081') } // UNDEFINED; _bstr_t just preserves these values, do the same // , {U'\x81', replacement_character } // UNDEFINED , {U'\x82', utf8_(U'\x201A') } // SINGLE LOW-9 QUOTATION MARK , {U'\x83', utf8_(U'\x0192') } // LATIN SMALL LETTER F WITH HOOK , {U'\x84', utf8_(U'\x201E') } // DOUBLE LOW-9 QUOTATION MARK @@ -88,10 +77,10 @@ static const auto& Windows1252_x80_x9F_to_u8string_() , {U'\x8A', utf8_(U'\x0160') } // LATIN CAPITAL LETTER S WITH CARON , {U'\x8B', utf8_(U'\x2039') } // SINGLE LEFT-POINTING ANGLE QUOTATION MARK , {U'\x8C', utf8_(U'\x0152') } // LATIN CAPITAL LIGATURE OE - //, {U'\x8D, replacement_character } // UNDEFINED + , {U'\x8D', utf8_(U'\x008D') } // UNDEFINED; _bstr_t just preserves these values, do the same // , {U'\x8D', replacement_character } // UNDEFINED , {U'\x8E', utf8_(U'\x017D') } // LATIN CAPITAL LETTER Z WITH CARON - //, {U'\x8F, replacement_character } // UNDEFINED - //, {U'\x90, replacement_character } // UNDEFINED + , {U'\x8F', utf8_(U'\x008F') } // UNDEFINED; _bstr_t just preserves these values, do the same // , {U'\x8F', replacement_character } // UNDEFINED + , {U'\x90', utf8_(U'\x0090') } // UNDEFINED; _bstr_t just preserves these values, do the same // , {U'\x90', replacement_character } // UNDEFINED , {U'\x91', utf8_(U'\x2018') } // LEFT SINGLE QUOTATION MARK , {U'\x92', utf8_(U'\x2019') } // RIGHT SINGLE QUOTATION MARK , {U'\x93', utf8_(U'\x201C') } // LEFT DOUBLE QUOTATION MARK @@ -104,7 +93,7 @@ static const auto& Windows1252_x80_x9F_to_u8string_() , {U'\x9A', utf8_(U'\x0161') } // LATIN SMALL LETTER S WITH CARON , {U'\x9B', utf8_(U'\x203A') } // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK , {U'\x9C', utf8_(U'\x0153') } // LATIN SMALL LIGATURE OE - //, {U'\x9D, replacement_character } // UNDEFINED + , {U'\x9D', utf8_(U'\x009D') } // UNDEFINED; _bstr_t just preserves these values, do the same // , {U'\x9D', replacement_character } // UNDEFINED , {U'\x9E', utf8_(U'\x017E') } // LATIN SMALL LETTER Z WITH CARON , {U'\x9F', utf8_(U'\x0178') } // LATIN CAPITAL LETTER Y WITH DIAERESIS }; @@ -114,19 +103,31 @@ static auto Windows1252_to_u8string() { auto retval = Windows1252_x80_x9F_to_u8string_(); - // Add the ISO8859-1 values to the map too. 1) We're already looking + using value_type = coda_oss::u8string::value_type; + // Add the ASCII values to the map too. 1) We're already looking // in the map anyway for Windows-1252 characters. 2) Need map // entires for conversion from UTF-8 to Windows-1252. + for (char32_t ch = U'\x00'; ch < U'\x80'; ch++) + { + assert(retval.find(ch) == retval.end()); // be sure we're not clobbering anything! + + coda_oss::u8string s {static_cast(ch)}; + retval[ch] = std::move(s); + } + + // Ditto for ISO8859-1 ... for (char32_t ch = U'\xA0'; ch <= U'\xff'; ch++) { + assert(retval.find(ch) == retval.end()); // be sure we're not clobbering anything! + // ISO8859-1 can be converted to UTF-8 with bit-twiddling - + // // https://stackoverflow.com/questions/4059775/convert-iso-8859-1-strings-to-utf-8-in-c-c // *out++=0xc2+(*in>0xbf), *out++=(*in++&0x3f)+0x80; const auto b1 = 0xc2 + (ch > 0xbf); const auto b2 = (ch & 0x3f) + 0x80; - coda_oss::u8string s {static_cast(b1)}; - s += coda_oss::u8string {static_cast(b2)}; + coda_oss::u8string s{static_cast(b1)}; + s += coda_oss::u8string{static_cast(b2)}; retval[ch] = std::move(s); } @@ -154,75 +155,72 @@ inline void append(std::u32string& result, const coda_oss::u8string& utf8) } template -static void fromWindows1252_(str::W1252string::value_type ch, std::basic_string& result, bool strict=false) +static void fromWindows1252_(str::W1252string::value_type ch, std::basic_string& result) { - // ASCII is the same in UTF-8 - if (ch < static_cast(0x80)) - { - using value_type = typename std::basic_string::value_type; - result += static_cast(ch); // ASCII - return; - } - static const auto map = Windows1252_to_u8string(); - const auto ch32 = static_cast(ch); + const auto ch32 = gsl::narrow(ch); const auto it = map.find(ch32); if (it != map.end()) { append(result, it->second); return; } - - switch (static_cast(ch)) + + // https://en.wikipedia.org/wiki/Windows-1252 + // > According to the information on Microsoft's and the Unicode + // Consortium's > websites, positions 81, 8D, 8F, 90, and 9D are unused; + // however, the > Windows API `MultiByteToWideChar` maps these to the + // corresponding > C1 control codes. The "best fit" mapping documents this + // behavior, too. + // static const auto replacement_character = utf8_(U'\xfffd'); + // append(result, replacement_character); + throw std::logic_error("Windows-1252 value not in map."); +} +template +class Windows1252_to_basic_string final +{ + static auto make_Windows1252_lookup() { - case 0x81: - case 0x8d: - case 0x8f: - case 0x90: - case 0x9d: - { - if (strict) + std::vector> retval(0xff + 1); + for (size_t i = 0; i <= 0xff; i++) { - // If the input text contains a character that isn't defined in Windows-1252; return a - // "replacement character." Yes, this will **corrupt** the input data as information is lost: - // https://en.wikipedia.org/wiki/Specials_(Unicode_block)#Replacement_character - // - // Or ... https://en.wikipedia.org/wiki/Windows-1252 - // > According to the information on Microsoft's and the Unicode - // > Consortium's websites, positions 81, 8D, 8F, 90, and 9D are - // > unused; however, the Windows API `MultiByteToWideChar` maps these - // > to the corresponding C1 control codes. The "best fit" mapping - // > documents this behavior, too. - static const coda_oss::u8string replacement_character = utf8_(U'\xfffd'); - append(result, replacement_character); + const auto ch = static_cast(i); + fromWindows1252_(ch, retval[i]); } - else - { - // _bstr_t just preserves these values, do the same - append(result, utf8_(ch32)); - } - break; + return retval; } - default: - throw std::invalid_argument("Invalid Windows-1252 character."); + +public: + static const auto& getLookup() + { + static const auto lookup = make_Windows1252_lookup(); + return lookup; } -} -template -inline void w1252_to_string(str::W1252string::const_pointer p, size_t sz, std::basic_string& result) -{ - for (size_t i = 0; i < sz; i++) + + Windows1252_to_basic_string() = default; + auto operator()(str::W1252string::const_pointer p, size_t sz) const { - fromWindows1252_(p[i], result); + static const auto& lookup = getLookup(); + + std::basic_string retval; + for (size_t i = 0; i < sz; i++) + { + const auto ch = gsl::narrow(p[i]); + retval += lookup[ch]; + } + return retval; } -} -template -inline void w1252to8(str::W1252string::const_pointer p, size_t sz, std::basic_string& result) +}; +template +static inline void w1252_to_basic_string(str::W1252string::const_pointer p, size_t sz, std::basic_string& result) { - w1252_to_string(p, sz, result); + static const Windows1252_to_basic_string convert; + result = convert(p, sz); } -inline void w1252to16(str::W1252string::const_pointer p, size_t sz, std::u16string& result) + +static inline void w1252to16(str::W1252string::const_pointer p, size_t sz, std::u16string& result) { - w1252_to_string(p, sz, result); + w1252_to_basic_string(p, sz, result); #if defined(_WIN32) && (!defined(_NDEBUG) || defined(DEBUG)) const _bstr_t bstr(std::string(str::details::cast(p), sz).c_str()); // no _bstr_t ctor taking sz @@ -230,20 +228,9 @@ inline void w1252to16(str::W1252string::const_pointer p, size_t sz, std::u16stri assert(result == str::str(wstr)); #endif } -inline void w1252to32(str::W1252string::const_pointer p, size_t sz, std::u32string& result) +static inline void w1252to32(str::W1252string::const_pointer p, size_t sz, std::u32string& result) { - w1252_to_string(p, sz, result); -} - -template -auto kv_to_vk(const std::map& kv) -{ - std::map retval; - for (const auto& p : kv) - { - retval[p.second] = p.first; - } - return retval; + w1252_to_basic_string(p, sz, result); } static void get_next_utf8_byte(coda_oss::u8string::const_pointer p, size_t sz, @@ -256,115 +243,127 @@ static void get_next_utf8_byte(coda_oss::u8string::const_pointer p, size_t sz, i++; // move to next byte // Bytes 2, 3 and 4 are always >= 0x80 (10xxxxxx), see https://en.wikipedia.org/wiki/UTF-8 - const auto b = static_cast(p[i]); - if (b < static_cast(0x80)) // 10xxxxxx + const auto b = gsl::narrow(p[i]); + if (b < gsl::narrow(0x80)) // 10xxxxxx { throw std::invalid_argument("Invalid next byte in UTF-8 encoding."); } utf8 += coda_oss::u8string{static_cast(b)}; } -template -static void utf8to1252(coda_oss::u8string::const_pointer p, size_t sz, std::basic_string& result, bool strict=false) +static void get_utf8_string(coda_oss::u8string::const_pointer p, size_t sz, size_t& i, coda_oss::u8string& utf8) { - using value_type = TChar; - for (size_t i = 0; i < sz; i++) + const auto b1 = gsl::narrow(p[i]); + if (b1 >= 0x80) // 0xxxxxxx { - const auto b1 = static_cast(p[i]); - - // ASCII is the same in UTF-8 - if (b1 < 0x80) // 0xxxxxxx - { - result += static_cast(b1); // ASCII - continue; - } - - auto utf8 = coda_oss::u8string{static_cast(b1)}; - get_next_utf8_byte(p, sz, i, utf8); if (b1 >= 0xE0) // 1110xxxx { // should be a 3- or 4-byte sequence - get_next_utf8_byte(p, sz, i, utf8); + get_next_utf8_byte(p, sz, i, utf8); if (b1 >= 0xF0) // 1111xxx { // should be a 4-byte sequence - get_next_utf8_byte(p, sz, i, utf8); + get_next_utf8_byte(p, sz, i, utf8); } } + } +} - static const auto map = kv_to_vk(Windows1252_to_u8string()); - const auto it = map.find(utf8); +template // may be stored in std::string or str::Windows1252 +class Utf_to_Windows1252 final +{ + template + void utf_to_1252(const TMap& map, const TUtf& utf, std::basic_string& result) const + { + auto w1252 = static_cast(0x7F); // + const auto it = map.find(utf); if (it != map.end()) { - result += static_cast(it->second); - } - else if (strict) - { - throw std::invalid_argument("UTF-8 sequence can't be converted to Windows-1252."); - //assert("UTF-8 sequence can't be converted to Windows-1252." && 0); - //result += static_cast(0x7F); // + w1252 = static_cast(it->second); } + #ifndef NDEBUG else { - // _bstr_t preserves these values - if (utf8.length() == 2) - { - result += static_cast(utf8[1]); - } - else - { - assert("UTF-8 sequence can't be converted to Windows-1252." && 0); - result += static_cast(0x7F); // - } + assert("UTF sequence can't be converted to Windows-1252." && 0); } + #endif // NDEBUG + result += w1252; } -} -static auto u16_to_Windows1252() -{ - // Find the corresponding UTF-16 value for every Windows-1252 input; - // obviously, most UTF-16 values can't be converted. Skip the first half - // as they're the same for ASCII. - std::map retval; - for (uint16_t i = 0x0080; i <= 0x00ff; i++) // **not** `uint8_t` to avoid wrap-around + static auto make_u16_map() { - const auto ch = static_cast(i); - const auto u16 = str::to_u16string(&ch, 1); - assert(u16.length() == 1); - retval[u16[0]] = ch; + // Find the corresponding UTF-16 value for every Windows-1252 input; + // obviously, most UTF-16 values can't be converted. + auto&& lookup = Windows1252_to_basic_string::getLookup(); + + std::map retval; + for (size_t i = 0; i <= 0xff; i++) // **not** `uint8_t` to avoid wrap-around + { + const auto u16 = lookup[i]; + assert(u16.length() == 1); // all values in Basic Multi-lingual Plane (BMP); no emojis, etc. + const auto ch = static_cast(i); + retval[u16[0]] = ch; + } + return retval; } - return retval; -} -static inline void utf16to1252(std::u16string::const_pointer p, size_t sz, std::string& result, bool strict=false) -{ - using value_type = std::string::value_type; - static const auto map = u16_to_Windows1252(); - for (size_t i = 0; i < sz; i++) + static auto make_utf8_map() { - const auto ch = p[i]; + // Find the corresponding UTF-8 value for every Windows-1252 input. + static const auto map = make_u16_map(); - if (ch < 0x0080) // ASCII + // Convert UTF-16 to UTF-8 + std::map retval; + for (auto&& kv : map) { - result += gsl::narrow(ch); - continue; + retval[utf8_(kv.first)] = kv.second; } + return retval; + } - const auto it = map.find(ch); - if (it != map.end()) - { - result += static_cast(it->second); - } - else if (strict) +public: + Utf_to_Windows1252() = default; + + auto operator()(std::u16string::const_pointer p, size_t sz) const + { + static const auto map = make_u16_map(); + + std::basic_string retval; + for (size_t i = 0; i < sz; i++) { - throw std::invalid_argument("UTF-16 sequence can't be converted to Windows-1252."); + const auto utf16 = p[i]; + utf_to_1252(map, utf16, retval); } - else + return retval; + } + + auto operator()(coda_oss::u8string::const_pointer p, size_t sz) const + { + static const auto map = make_utf8_map(); + + std::basic_string retval; + for (size_t i = 0; i < sz; i++) { - assert("UTF-16 sequence can't be converted to Windows-1252." && 0); - result += static_cast(0x7F); // + auto utf8 = coda_oss::u8string{p[i]}; + get_utf8_string(p, sz, i, utf8); + + utf_to_1252(map, utf8, retval); } + return retval; } +}; + +template +static inline void utf8to1252(coda_oss::u8string::const_pointer p, size_t sz, std::basic_string& result) +{ + static const Utf_to_Windows1252 convert; + result = convert(p, sz); +} + +static inline void utf16to1252(std::u16string::const_pointer p, size_t sz, std::string& result) +{ + static const Utf_to_Windows1252 convert; + result = convert(p, sz); } struct back_inserter final @@ -391,12 +390,30 @@ inline auto to_uXXstring(const std::basic_string& s) return str::to_u32string(p, s.length()); // assume std::wstring is UTF-32 everywhere except Windows #endif } -template -static std::wstring to_wstring_(const std::basic_string& s, bool is_utf8) + +template +struct basic_string_to_uXXstring_ final { }; +template +struct basic_string_to_uXXstring_ final +{ + auto operator()(const std::basic_string& s) const + { + return to_uXXstring(s); + } +}; +template +struct basic_string_to_uXXstring_ final { - - const auto result = is_utf8 ? to_uXXstring(s) - : to_uXXstring(s); + auto operator()(const std::basic_string& s) const + { + return to_uXXstring(s); + } +}; +template +inline auto to_wstring_(const std::basic_string&s) +{ + static const basic_string_to_uXXstring_ convert; + const auto result = convert(s); return str::str(result); } @@ -419,7 +436,7 @@ std::string str::testing::to_string(const str::W1252string& s) return str(s); #else std::string retval; - w1252to8(s.c_str(), s.length(), retval); + w1252_to_basic_string(s.c_str(), s.length(), retval); return retval; #endif } @@ -440,19 +457,19 @@ std::string str::details::to_string(const std::wstring& s) std::wstring str::details::to_wstring(const std::string& s) { - #if _WIN32 - return to_wstring_(s, false /*is_utf8*/); // Input is Windows-1252 on Windows + #if _WIN32 + return to_wstring_(s); // Input is Windows-1252 on Windows #else - return to_wstring_(s, true /*is_utf8*/); // Input is UTF-8 everywhere except Windows + return to_wstring_(s); // Input is UTF-8 everywhere except Windows #endif } std::wstring str::details::to_wstring(const coda_oss::u8string& s) { - return to_wstring_(s, true /*is_utf8*/); + return to_wstring_(s); } std::wstring str::testing::to_wstring(const str::W1252string& s) { - return to_wstring_(s, false /*is_utf8*/); + return to_wstring_(s); } /***********************************************************************************/ @@ -510,6 +527,6 @@ coda_oss::u8string str::to_u8string(std::u32string::const_pointer p, size_t sz) coda_oss::u8string str::to_u8string(W1252string::const_pointer p, size_t sz) { coda_oss::u8string retval; - w1252to8(p, sz, retval); + w1252_to_basic_string(p, sz, retval); return retval; } diff --git a/externals/coda-oss/modules/c++/str/source/Manip.cpp b/externals/coda-oss/modules/c++/str/source/Manip.cpp index 1d6bae1c2..87b06acdf 100644 --- a/externals/coda-oss/modules/c++/str/source/Manip.cpp +++ b/externals/coda-oss/modules/c++/str/source/Manip.cpp @@ -339,28 +339,31 @@ inline char to_w1252_upper_(char ch) } // See chart at: https://en.wikipedia.org/wiki/Windows-1252 + const auto u8 = static_cast(ch); + constexpr uint8_t s_with_caron = 0x9a /* š */; constexpr uint8_t oe = 0x9c /* œ */; constexpr uint8_t z_with_caron = 0x9e /* ž */; - constexpr uint8_t a_with_grave = 0xe0 /* à */; - constexpr uint8_t o_with_diaeresis = 0xf6 /* ö */; - constexpr uint8_t o_with_slash = 0xf8 /* ø */; - constexpr uint8_t small_thorn = 0xfe /* þ */; - constexpr uint8_t y_with_diaeresis = 0xff /* ÿ */; - - const auto u8 = static_cast(ch); if ((u8 == s_with_caron) || (u8 == oe) || (u8 == z_with_caron)) { return ch ^ 0x10; } + + constexpr uint8_t a_with_grave = 0xe0 /* à */; + constexpr uint8_t o_with_diaeresis = 0xf6 /* ö */; if ((u8 >= a_with_grave) && (u8 <= o_with_diaeresis)) { return ch ^ 0x20; } + // U+00F7 ÷ DIVISION SIGN + constexpr uint8_t o_with_slash = 0xf8 /* ø */; + constexpr uint8_t small_thorn = 0xfe /* þ */; if ((u8 >= o_with_slash) && (u8 <= small_thorn)) { return ch ^ 0x20; } + + constexpr uint8_t y_with_diaeresis = 0xff /* ÿ */; if (u8 == y_with_diaeresis) { constexpr uint8_t Y_with_diaeresis = 0x9f /* Ÿ */; @@ -382,33 +385,38 @@ inline char to_w1252_lower_(char ch) return ch | 0x20; } + // See chart at: https://en.wikipedia.org/wiki/Windows-1252 + const auto u8 = static_cast(ch); + constexpr uint8_t S_with_caron = 0x8a /* Š */; constexpr uint8_t OE = 0x8c /*Œ */; constexpr uint8_t Z_with_caron = 0x8e /* Ž */; - constexpr uint8_t Y_with_diaeresis = 0x9f /* Ÿ */; - constexpr uint8_t A_with_grave = 0xc0 /* À */; - constexpr uint8_t O_with_diaeresis = 0xd6 /* Ö */; - constexpr uint8_t O_with_slash = 0xd8 /* Ø */; - constexpr uint8_t capital_thorn = 0xde /* Þ */; - - const auto u8 = static_cast(ch); if ((u8 == S_with_caron) || (u8 == OE) || (u8 == Z_with_caron)) { return ch | 0x10; } + + constexpr uint8_t Y_with_diaeresis = 0x9f /* Ÿ */; if (u8 == Y_with_diaeresis) { constexpr uint8_t y_with_diaeresis = 0xff /* ÿ */; return y_with_diaeresis; } + + constexpr uint8_t A_with_grave = 0xc0 /* À */; + constexpr uint8_t O_with_diaeresis = 0xd6 /* Ö */; if ((u8 >= A_with_grave) && (u8 <= O_with_diaeresis)) { return ch | 0x20; } + // U+00D7 × MULTIPLICATION SIGN + constexpr uint8_t O_with_slash = 0xd8 /* Ø */; + constexpr uint8_t capital_thorn = 0xde /* Þ */; if ((u8 >= O_with_slash) && (u8 <= capital_thorn)) { return ch | 0x20; } + return ch; } str::Windows1252_T to_w1252_lower(str::Windows1252_T ch) @@ -417,6 +425,72 @@ str::Windows1252_T to_w1252_lower(str::Windows1252_T ch) return static_cast(retval); } +static const auto& w1252_upper_lookup() +{ + static std::array lookup_; + static const auto& lookup = make_lookup(lookup_, to_w1252_upper_); + return lookup; +} +void w1252_upper(std::string& w1252) +{ + do_lookup(w1252, w1252_upper_lookup()); +} +void upper(str::W1252string& s) +{ + do_lookup(s, w1252_upper_lookup()); +} + +static const auto& w1252_lower_lookup() +{ + static std::array lookup_; + static const auto& lookup = make_lookup(lookup_, to_w1252_lower_); + return lookup; +} +void w1252_lower(std::string& w1252) +{ + do_lookup(w1252, w1252_lower_lookup()); +} +void lower(str::W1252string& s) +{ + do_lookup(s, w1252_lower_lookup()); +} + +// These routines are SLOW ... yes, they can be made faster +// but nobody needs that right now. +inline auto utf8_convert(str::W1252string& w1252, void (*convert)(str::W1252string&)) +{ + convert(w1252); // upper() or lower() for Windows-1252 + return to_u8string(w1252); +} +inline void utf8_convert(std::string& strUtf8, void (*convert)(str::W1252string&)) +{ + auto w1252 = to_w1252string(str::str(strUtf8)); + const auto utf8 = utf8_convert(w1252, convert); + strUtf8 = str::str(utf8); +} +void utf8_upper(std::string& strUtf8) +{ + utf8_convert(strUtf8, upper); +} +void utf8_lower(std::string& strUtf8) +{ + utf8_convert(strUtf8, lower); +} + +inline void utf8_convert(coda_oss::u8string& s, void (*convert)(str::W1252string&)) +{ + auto w1252 = to_w1252string(s); + s = utf8_convert(w1252, convert); +} +void lower(coda_oss::u8string& s) +{ + utf8_convert(s, lower); +} +void upper(coda_oss::u8string& s) +{ + utf8_convert(s, upper); +} + void escapeForXML(std::string& str) { // & needs to be first or else it'll mess up the other characters that we replace diff --git a/externals/coda-oss/modules/c++/sys/include/sys/AbstractOS.h b/externals/coda-oss/modules/c++/sys/include/sys/AbstractOS.h index efd80de08..5eb2e10a1 100644 --- a/externals/coda-oss/modules/c++/sys/include/sys/AbstractOS.h +++ b/externals/coda-oss/modules/c++/sys/include/sys/AbstractOS.h @@ -55,27 +55,57 @@ namespace sys * Also see https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html * "... For the x86-64 compiler, these extensions [ -msse2 ] are enabled by default." * We're 64-bit only. +* +* Well ... it turns out third parties want to compile this code in different +* enviroments which we don't know about; SIMD support makes that +* more difficult. */ +#ifdef CODA_OSS_DISABLE_SIMD + #ifdef CODA_OSS_ENABLE_SIMD + #error "CODA_OSS_ENABLE_SIMD already #define'd'" + #endif + #define CODA_OSS_ENABLE_SIMD 0 +#endif // CODA_OSS_DISABLE_SIMD + +#ifndef CODA_OSS_ENABLE_SIMD + #if __AVX512F__ || __AVX2__ + #define CODA_OSS_ENABLE_SIMD 1 + #elif _MSC_VER && _M_X64 /*MSVC for SSE2*/ + #define CODA_OSS_ENABLE_SIMD 1 + #elif __GNUC__ && __SSE2__ + #define CODA_OSS_ENABLE_SIMD 1 + #else + #define CODA_OSS_ENABLE_SIMD 0 + #endif +#endif + enum class SIMDInstructionSet { + Disabled, // CODA_OSS_ENABLE_SIMD = 0 + Unknown, // CODA_OSS_ENABLE_SIMD = 1, but can't determine + SSE2, // https://en.wikipedia.org/wiki/SSE2 AVX2, // https://en.wikipedia.org/wiki/Advanced_Vector_Extensions AVX512F, // https://en.wikipedia.org/wiki/AVX-512 }; -constexpr auto getSIMDInstructionSet() { return SIMDInstructionSet:: - // https://learn.microsoft.com/en-us/cpp/preprocessor/predefined-macros?view=msvc-170 - - #if __AVX512F__ - AVX512F - #elif __AVX2__ - AVX2 - #elif _M_X64 /*MSVC*/ || __SSE2__ /*GCC*/ - SSE2 +constexpr auto getSIMDInstructionSet() { + #if !CODA_OSS_ENABLE_SIMD + return SIMDInstructionSet::Disabled; #else - #error "Can't determine SIMDInstructionSet'" - #endif -; } + // https://learn.microsoft.com/en-us/cpp/preprocessor/predefined-macros?view=msvc-170 + #if __AVX512F__ + return SIMDInstructionSet::AVX512F; + #elif __AVX2__ + return SIMDInstructionSet::AVX2; + #elif _M_X64 /*MSVC*/ || __SSE2__ /*GCC*/ + return SIMDInstructionSet::SSE2; + #else + #error "Can't determine SIMDInstructionSet'" + return SIMDInstructionSet::Unknown; + #endif + #endif // CODA_OSS_ENABLE_SIMD +} /*! * \class AbstractOS diff --git a/externals/coda-oss/modules/c++/sys/source/AbstractOS.cpp b/externals/coda-oss/modules/c++/sys/source/AbstractOS.cpp index 436e12bb6..ebe072ff7 100644 --- a/externals/coda-oss/modules/c++/sys/source/AbstractOS.cpp +++ b/externals/coda-oss/modules/c++/sys/source/AbstractOS.cpp @@ -273,9 +273,7 @@ void AbstractOS::appendEnv(const std::string& envVar, const std::vectorGuard true AdvancedVectorExtensions2 + Speed diff --git a/modules/c/j2k/J2KDecompress.vcxproj b/modules/c/j2k/J2KDecompress.vcxproj index 3fc0690be..a3abea9e0 100644 --- a/modules/c/j2k/J2KDecompress.vcxproj +++ b/modules/c/j2k/J2KDecompress.vcxproj @@ -100,6 +100,7 @@ Guard true AdvancedVectorExtensions2 + Speed diff --git a/modules/c/nitf/TEST_DES.vcxproj b/modules/c/nitf/TEST_DES.vcxproj index 1457b9fd3..893726fa7 100644 --- a/modules/c/nitf/TEST_DES.vcxproj +++ b/modules/c/nitf/TEST_DES.vcxproj @@ -100,6 +100,7 @@ Guard true AdvancedVectorExtensions2 + Speed diff --git a/modules/c/nrt/include/nrt/Version.h b/modules/c/nrt/include/nrt/Version.h index 46c83f80c..f53a42604 100644 --- a/modules/c/nrt/include/nrt/Version.h +++ b/modules/c/nrt/include/nrt/Version.h @@ -1,5 +1,5 @@ #pragma once #if !defined(NRT_LIB_VERSION) -#define NRT_LIB_VERSION "2.11.5" +#define NRT_LIB_VERSION "2.11.6" #endif