Skip to content

Commit

Permalink
Merge branch 'main' into feature/shared-library
Browse files Browse the repository at this point in the history
  • Loading branch information
Dan Smith committed Dec 12, 2023
2 parents 7174552 + 938fa48 commit 326ebe4
Show file tree
Hide file tree
Showing 31 changed files with 524 additions and 283 deletions.
3 changes: 1 addition & 2 deletions externals/coda-oss/UnitTest/UnitTest.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,13 @@
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
Expand Down Expand Up @@ -97,6 +95,7 @@
<ConformanceMode>true</ConformanceMode>
<AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
Expand Down
3 changes: 1 addition & 2 deletions externals/coda-oss/modules/c++/logging/source/Setup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,7 @@ logging::setupLogger(const path& program_,
// setup logging formatter
std::unique_ptr <logging::Formatter> formatter;
const auto logFile = logFile_.string();
auto file = logFile;
str::lower(file);
const auto file = str::lower(logFile);
if (str::endsWith(file, ".xml"))
{
formatter.reset(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class CODA_OSS_API ErrorHandler
virtual void onPluginError(except::Context& c) = 0;
};

class CODA_OSS_API DefaultErrorHandler final : public ErrorHandler
class CODA_OSS_API DefaultErrorHandler : public ErrorHandler
{
public:
DefaultErrorHandler(logging::LoggerPtr logger = logging::LoggerPtr());
Expand Down
68 changes: 65 additions & 3 deletions externals/coda-oss/modules/c++/str/include/str/Manip.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* =========================================================================
/* =========================================================================
* This file is part of str-c++
* =========================================================================
*
Expand Down Expand Up @@ -34,6 +34,7 @@
#include "coda_oss/CPlusPlus.h"
#include "coda_oss/string.h"
#include "str/Convert.h"
#include "str/W1252string.h"

namespace str
{
Expand Down Expand Up @@ -177,10 +178,71 @@ CODA_OSS_API std::vector<std::string> split(const std::string& s,
const std::string& splitter = " ",
size_t maxSplit = std::string::npos);

/***********************************************************************************/
//! Uses std::transform to convert all chars to lower case
//! Uses std::transform to convert all chars to upper case
CODA_OSS_API void lower(std::string& s);
CODA_OSS_API void upper(std::string& s);
//CODA_OSS_API void lower(std::string& s);
//CODA_OSS_API void upper(std::string& s);
//
// Using std::transform() with ::toupper() is considerably slower than a lookup-table
CODA_OSS_API void ascii_lower(std::string& s);
inline void lower(std::string& s)
{
ascii_lower(s);
}
inline std::string lower(const std::string& s)
{
std::string retval = s;
lower(retval);
return retval;
}

CODA_OSS_API void ascii_upper(std::string& s);
inline void upper(std::string& s)
{
ascii_upper(s);
}
inline std::string upper(const std::string& s)
{
std::string retval = s;
upper(retval);
return retval;
}

// At this point, you might want to `lower()` and `upper()` for UTF-8 and/or
// Windows-1252. That can be done, but ... our needs are mostly English (99.9%)
// with a very occassional smattering of French (Canada). We've gotten by this
// long without being able to upper/lower 'ä' and 'Ä' and there's no current
// requirement to do so.
//
// Furthermore, while Windows-1252 is easy as it's a single-byte encoding and
// covers many european languages, the standard is UTF-8.
// Upper/lower-casing in Unicode is quite a bit more complicated as there can be
// numerous rules for various languages. For example, in German, the "old
// rules" where that 'ß' was uppercased to "SS"; however, there is now a 'ẞ'.
// And then there are semantics: in German, no word can begin with 'ß' (or 'ẞ')
// making "ßanything" rather non-sensical.
//
// So for now (until there is a real use case), just "define these problems
// away" by not implementing `w1252_lower()`, `utf8_upper()`, etc.
/*
CODA_OSS_API void w1252_lower(std::string& s);
CODA_OSS_API void w1252_upper(std::string& s);
CODA_OSS_API void lower(str::W1252string& s);
CODA_OSS_API void upper(str::W1252string& s);
CODA_OSS_API void utf8_lower(std::string& s);
CODA_OSS_API void utf8_upper(std::string& s);
CODA_OSS_API void lower(coda_oss::u8string& s);
CODA_OSS_API void upper(coda_oss::u8string& s);
*/

// I've already got these hooked up, keep the code around ... long ugly
// names to discourage use.
CODA_OSS_API str::Windows1252_T to_w1252_upper(str::Windows1252_T);
CODA_OSS_API str::Windows1252_T to_w1252_lower(str::Windows1252_T);

/***********************************************************************************/

/*!
* Replaces any characters that are invalid in XML (&, <, >, ', ") with their
Expand Down
89 changes: 48 additions & 41 deletions externals/coda-oss/modules/c++/str/source/Encoding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <stdexcept>
#include <vector>
#include <iterator>
#include <string>

#include "gsl/gsl.h"
#include "config/compiler_extensions.h"
Expand Down Expand Up @@ -65,59 +66,58 @@ CODA_OSS_disable_warning_pop

// Need to look up characters from \x80 (EURO SIGN) to \x9F (LATIN CAPITAL LETTER Y WITH DIAERESIS)
// in a map: http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT
inline coda_oss::u8string utf8_(uint32_t i)
inline coda_oss::u8string utf8_(char32_t i)
{
const auto ch = gsl::narrow<std::u32string::value_type>(i);
return str::to_u8string(std::u32string{ch});
}

static const auto& Windows1252_x80_x9F_to_u8string()
{
static const std::map<uint32_t, coda_oss::u8string> retval {
{0x80, utf8_(0x20AC) } // EURO SIGN
// , {0x81, replacement_character } // UNDEFINED
, {0x82, utf8_(0x201A) } // SINGLE LOW-9 QUOTATION MARK
, {0x83, utf8_(0x0192) } // LATIN SMALL LETTER F WITH HOOK
, {0x84, utf8_(0x201E) } // DOUBLE LOW-9 QUOTATION MARK
, {0x85, utf8_(0x2026) } // HORIZONTAL ELLIPSIS
, {0x86, utf8_(0x2020) } // DAGGER
, {0x87, utf8_(0x2021) } // DOUBLE DAGGER
, {0x88, utf8_(0x02C6) } // MODIFIER LETTER CIRCUMFLEX ACCENT
, {0x89, utf8_(0x2030) } // PER MILLE SIGN
, {0x8A, utf8_(0x0160) } // LATIN CAPITAL LETTER S WITH CARON
, {0x8B, utf8_(0x2039) } // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
, {0x8C, utf8_(0x0152) } // LATIN CAPITAL LIGATURE OE
//, {0x8D, replacement_character } // UNDEFINED
, {0x8E, utf8_(0x017D) } // LATIN CAPITAL LETTER Z WITH CARON
//, {0x8F, replacement_character } // UNDEFINED
//, {0x90, replacement_character } // UNDEFINED
, {0x91, utf8_(0x2018) } // LEFT SINGLE QUOTATION MARK
, {0x92, utf8_(0x2019) } // RIGHT SINGLE QUOTATION MARK
, {0x93, utf8_(0x201C) } // LEFT DOUBLE QUOTATION MARK
, {0x94, utf8_(0x201D) } // RIGHT DOUBLE QUOTATION MARK
, {0x95, utf8_(0x2022) } // BULLET
, {0x96, utf8_(0x2013) } // EN DASH
, {0x97, utf8_(0x2014) } // EM DASH
, {0x98, utf8_(0x02DC) } // SMALL TILDE
, {0x99, utf8_(0x2122) } // TRADE MARK SIGN
, {0x9A, utf8_(0x0161) } // LATIN SMALL LETTER S WITH CARON
, {0x9B, utf8_(0x203A) } // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
, {0x9C, utf8_(0x0153) } // LATIN SMALL LIGATURE OE
//, {0x9D, replacement_character } // UNDEFINED
, {0x9E, utf8_(0x017E) } // LATIN SMALL LETTER Z WITH CARON
, {0x9F, utf8_(0x0178) } // LATIN CAPITAL LETTER Y WITH DIAERESIS
static const auto& Windows1252_x80_x9F_to_u8string_()
{
static const std::map<char32_t, coda_oss::u8string> retval{
{U'\x80', utf8_(U'\x20AC')} // EURO SIGN
// , {U'\x81, replacement_character } // UNDEFINED
, {U'\x82', utf8_(U'\x201A') } // SINGLE LOW-9 QUOTATION MARK
, {U'\x83', utf8_(U'\x0192') } // LATIN SMALL LETTER F WITH HOOK
, {U'\x84', utf8_(U'\x201E') } // DOUBLE LOW-9 QUOTATION MARK
, {U'\x85', utf8_(U'\x2026') } // HORIZONTAL ELLIPSIS
, {U'\x86', utf8_(U'\x2020') } // DAGGER
, {U'\x87', utf8_(U'\x2021') } // DOUBLE DAGGER
, {U'\x88', utf8_(U'\x02C6') } // MODIFIER LETTER CIRCUMFLEX ACCENT
, {U'\x89', utf8_(U'\x2030') } // PER MILLE SIGN
, {U'\x8A', utf8_(U'\x0160') } // LATIN CAPITAL LETTER S WITH CARON
, {U'\x8B', utf8_(U'\x2039') } // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
, {U'\x8C', utf8_(U'\x0152') } // LATIN CAPITAL LIGATURE OE
//, {U'\x8D, replacement_character } // UNDEFINED
, {U'\x8E', utf8_(U'\x017D') } // LATIN CAPITAL LETTER Z WITH CARON
//, {U'\x8F, replacement_character } // UNDEFINED
//, {U'\x90, replacement_character } // UNDEFINED
, {U'\x91', utf8_(U'\x2018') } // LEFT SINGLE QUOTATION MARK
, {U'\x92', utf8_(U'\x2019') } // RIGHT SINGLE QUOTATION MARK
, {U'\x93', utf8_(U'\x201C') } // LEFT DOUBLE QUOTATION MARK
, {U'\x94', utf8_(U'\x201D') } // RIGHT DOUBLE QUOTATION MARK
, {U'\x95', utf8_(U'\x2022') } // BULLET
, {U'\x96', utf8_(U'\x2013') } // EN DASH
, {U'\x97', utf8_(U'\x2014') } // EM DASH
, {U'\x98', utf8_(U'\x02DC') } // SMALL TILDE
, {U'\x99', utf8_(U'\x2122') } // TRADE MARK SIGN
, {U'\x9A', utf8_(U'\x0161') } // LATIN SMALL LETTER S WITH CARON
, {U'\x9B', utf8_(U'\x203A') } // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
, {U'\x9C', utf8_(U'\x0153') } // LATIN SMALL LIGATURE OE
//, {U'\x9D, replacement_character } // UNDEFINED
, {U'\x9E', utf8_(U'\x017E') } // LATIN SMALL LETTER Z WITH CARON
, {U'\x9F', utf8_(U'\x0178') } // LATIN CAPITAL LETTER Y WITH DIAERESIS
};
return retval;
}

static auto Windows1252_to_u8string()
{
auto retval = Windows1252_x80_x9F_to_u8string();
auto retval = Windows1252_x80_x9F_to_u8string_();

// Add the ISO8859-1 values to the map too. 1) We're already looking
// in the map anyway for Windows-1252 characters. 2) Need map
// entires for conversion from UTF-8 to Windows-1252.
for (std::u32string::value_type ch = 0xA0; ch <= 0xff; ch++)
for (char32_t ch = U'\xA0'; ch <= U'\xff'; ch++)
{
// ISO8859-1 can be converted to UTF-8 with bit-twiddling

Expand Down Expand Up @@ -186,7 +186,14 @@ static void fromWindows1252_(str::W1252string::value_type ch, std::basic_string<
// If the input text contains a character that isn't defined in Windows-1252; return a
// "replacement character." Yes, this will **corrupt** the input data as information is lost:
// https://en.wikipedia.org/wiki/Specials_(Unicode_block)#Replacement_character
static const coda_oss::u8string replacement_character = utf8_(0xfffd);
//
// Or ... https://en.wikipedia.org/wiki/Windows-1252
// > According to the information on Microsoft's and the Unicode
// > Consortium's websites, positions 81, 8D, 8F, 90, and 9D are
// > unused; however, the Windows API `MultiByteToWideChar` maps these
// > to the corresponding C1 control codes. The "best fit" mapping
// > documents this behavior, too.
static const coda_oss::u8string replacement_character = utf8_(U'\xfffd');
append(result, replacement_character);
}
else
Expand Down Expand Up @@ -229,7 +236,7 @@ inline void w1252to32(str::W1252string::const_pointer p, size_t sz, std::u32stri
}

template<typename TKey, typename TValue>
std::map<TValue, TKey> kv_to_vk(const std::map<TKey, TValue>& kv)
auto kv_to_vk(const std::map<TKey, TValue>& kv)
{
std::map<TValue, TKey> retval;
for (const auto& p : kv)
Expand Down
Loading

0 comments on commit 326ebe4

Please sign in to comment.