diff --git a/UnitTest/str.cpp b/UnitTest/str.cpp index 5a64bc3619..a1987212ea 100644 --- a/UnitTest/str.cpp +++ b/UnitTest/str.cpp @@ -4,8 +4,6 @@ #include #include -#include -#include namespace str { diff --git a/modules/c++/cli/include/cli/Value.h b/modules/c++/cli/include/cli/Value.h index 6c72fa108d..571565cab5 100644 --- a/modules/c++/cli/include/cli/Value.h +++ b/modules/c++/cli/include/cli/Value.h @@ -106,10 +106,7 @@ class CODA_OSS_API Value { if (index >= mValues.size()) throw except::IndexOutOfRangeException( - Ctxt( - FmtX( - "Invalid index: %d", - index))); + Ctxt(FmtX("Invalid index: %d", index))); return str::toType(mValues[index]); } diff --git a/modules/c++/cli/source/ArgumentParser.cpp b/modules/c++/cli/source/ArgumentParser.cpp index be702484ef..3694e7ed31 100644 --- a/modules/c++/cli/source/ArgumentParser.cpp +++ b/modules/c++/cli/source/ArgumentParser.cpp @@ -317,8 +317,7 @@ std::unique_ptr cli::ArgumentParser::parse(const std::string& prog std::map& flagMap = (subOption ? shortOptionsFlags : shortFlags); if (flagMap.find(op) != flagMap.end()) - parseError(FmtX("Conflicting option: %c%s", mPrefixChar, - op.c_str())); + parseError(FmtX("Conflicting option: %c%s", mPrefixChar, op)); flagMap[op] = arg; } for (std::vector::const_iterator it = @@ -328,8 +327,7 @@ std::unique_ptr cli::ArgumentParser::parse(const std::string& prog std::map& flagMap = (subOption ? longOptionsFlags : longFlags); if (flagMap.find(op) != flagMap.end()) - parseError(FmtX("Conflicting option: %c%c%s", mPrefixChar, - mPrefixChar, op.c_str())); + parseError(FmtX("Conflicting option: %c%c%s", mPrefixChar, mPrefixChar, op)); flagMap[op] = arg; } } @@ -458,8 +456,7 @@ std::unique_ptr cli::ArgumentParser::parse(const std::string& prog } else { - throw except::Exception(Ctxt( - FmtX("Invalid option: [%s]", argStr.c_str()))); + throw except::Exception(Ctxt(FmtX("Invalid option: [%s]", argStr))); } } } @@ -500,8 +497,7 @@ std::unique_ptr cli::ArgumentParser::parse(const std::string& prog } else { - throw except::Exception(Ctxt( - FmtX("Invalid option: [%s]", argStr.c_str()))); + throw except::Exception(Ctxt(FmtX("Invalid option: [%s]", argStr))); } } @@ -549,10 +545,7 @@ std::unique_ptr cli::ArgumentParser::parse(const std::string& prog } if (!added) - parseError( - FmtX( - "option requires value or has exceeded its max: [%s]", - argVar.c_str())); + parseError(FmtX("option requires value or has exceeded its max: [%s]", argVar)); currentResults->put(argVar, v); break; @@ -677,11 +670,9 @@ std::unique_ptr cli::ArgumentParser::parse(const std::string& prog if (arg->isRequired() || numGiven > 0) { if (minArgs > 0 && numGiven < static_cast(minArgs)) - parseError(FmtX("not enough arguments, %d required: [%s]", - minArgs, argId.c_str())); + parseError(FmtX("not enough arguments, %d required: [%s]", minArgs, argId)); if (maxArgs >= 0 && numGiven > static_cast(maxArgs)) - parseError(FmtX("too many arguments, %d supported: [%s]", - maxArgs, argId.c_str())); + parseError(FmtX("too many arguments, %d supported: [%s]", maxArgs, argId)); } diff --git a/modules/c++/coda-oss.vcxproj b/modules/c++/coda-oss.vcxproj index 88a1d9a622..3b81ab3706 100644 --- a/modules/c++/coda-oss.vcxproj +++ b/modules/c++/coda-oss.vcxproj @@ -223,13 +223,12 @@ - - + @@ -422,8 +421,6 @@ - - diff --git a/modules/c++/coda-oss.vcxproj.filters b/modules/c++/coda-oss.vcxproj.filters index 366ad6b5c5..1a54be81bb 100644 --- a/modules/c++/coda-oss.vcxproj.filters +++ b/modules/c++/coda-oss.vcxproj.filters @@ -72,12 +72,6 @@ str - - str - - - str - str @@ -951,6 +945,9 @@ mem + + str + @@ -969,12 +966,6 @@ str - - str - - - str - str @@ -1412,9 +1403,6 @@ {619ad1da-f21c-4027-9b5d-23f08225b96f} - - {59f3d9a1-06d3-4779-aef2-cc55223c3017} - {3051f4b6-dad4-47ea-b4bd-d25d9e09f782} @@ -1499,6 +1487,9 @@ {79aa73a7-ed0d-4826-b7ed-c8d91d96998a} + + {59f3d9a1-06d3-4779-aef2-cc55223c3017} + diff --git a/modules/c++/include/UnitTest.h b/modules/c++/include/UnitTest.h index 751843c1f0..b99a2cde42 100644 --- a/modules/c++/include/UnitTest.h +++ b/modules/c++/include/UnitTest.h @@ -4,7 +4,6 @@ #include #include "TestCase.h" -#include "str/EncodedStringView.h" #if !_MSC_VER #error "This file is for use with a Visual Studio 'UnitTest' project." @@ -133,7 +132,7 @@ inline void assert_almost_eq(const std::string& testName, long double X1, long d #define TEST_ASSERT_EQ_MSG(msg, X1, X2) testName, Microsoft::VisualStudio::CppUnitTestFramework::Logger::WriteMessage(msg.c_str()); TEST_ASSERT_EQ(X1, X2) #undef TEST_FAIL_MSG -#define TEST_FAIL_MSG(msg) { (void)testName; Microsoft::VisualStudio::CppUnitTestFramework::Assert::Fail(str::EncodedStringView(msg).wstring().c_str()); } +#define TEST_FAIL_MSG(msg) { (void)testName; Microsoft::VisualStudio::CppUnitTestFramework::Assert::Fail(str::toWString(msg).c_str()); } #undef TEST_EXCEPTION #undef TEST_THROWS diff --git a/modules/c++/mem/unittests/test_unique_ptr.cpp b/modules/c++/mem/unittests/test_unique_ptr.cpp index 9c15781c8a..97fac431ce 100644 --- a/modules/c++/mem/unittests/test_unique_ptr.cpp +++ b/modules/c++/mem/unittests/test_unique_ptr.cpp @@ -45,17 +45,17 @@ TEST_CASE(testStdUniquePtr) TEST_ASSERT_NULL(fooCtor.get()); fooCtor.reset(new Foo(123)); - TEST_ASSERT_NOT_EQ(nullptr, fooCtor.get()); + TEST_ASSERT_NOT_NULL(fooCtor.get()); TEST_ASSERT_EQ(123, fooCtor->mVal); } { auto fooCtor = std::make_unique(123); - TEST_ASSERT_NOT_EQ(nullptr, fooCtor.get()); + TEST_ASSERT_NOT_NULL(fooCtor.get()); TEST_ASSERT_EQ(123, fooCtor->mVal); } { auto pFoos = std::make_unique(123); // 123 instances of Foo - TEST_ASSERT_NOT_EQ(nullptr, pFoos.get()); + TEST_ASSERT_NOT_NULL(pFoos.get()); TEST_ASSERT_EQ(0, pFoos[0].mVal); TEST_ASSERT_EQ(0, pFoos[122].mVal); } @@ -65,24 +65,24 @@ TEST_CASE(test_make_unique) { { auto fooCtor = std::make_unique(123); - TEST_ASSERT_NOT_EQ(nullptr, fooCtor.get()); + TEST_ASSERT_NOT_NULL(fooCtor.get()); TEST_ASSERT_EQ(123, fooCtor->mVal); } { auto pFoos = std::make_unique(123); // 123 instances of Foo - TEST_ASSERT_NOT_EQ(nullptr, pFoos.get()); + TEST_ASSERT_NOT_NULL(pFoos.get()); TEST_ASSERT_EQ(0, pFoos[0].mVal); TEST_ASSERT_EQ(0, pFoos[122].mVal); } { auto fooCtor = std::make_unique(123); - TEST_ASSERT_NOT_EQ(nullptr, fooCtor.get()); + TEST_ASSERT_NOT_NULL(fooCtor.get()); TEST_ASSERT_EQ(123, fooCtor->mVal); } { auto pFoos = std::make_unique(123); // 123 instances of Foo - TEST_ASSERT_NOT_EQ(nullptr, pFoos.get()); + TEST_ASSERT_NOT_NULL(pFoos.get()); TEST_ASSERT_EQ(0, pFoos[0].mVal); TEST_ASSERT_EQ(0, pFoos[122].mVal); } diff --git a/modules/c++/net/include/net/URL.h b/modules/c++/net/include/net/URL.h index ea9b4f8fe7..60e914e795 100644 --- a/modules/c++/net/include/net/URL.h +++ b/modules/c++/net/include/net/URL.h @@ -113,6 +113,13 @@ class URL std::string mFragment; }; + +inline std::ostream& operator<<(std::ostream& os, const URL& url) +{ + os << url.toString(); + return os; +} + } #endif diff --git a/modules/c++/pch.h b/modules/c++/pch.h index 236c1100ad..7952e6efa2 100644 --- a/modules/c++/pch.h +++ b/modules/c++/pch.h @@ -103,10 +103,10 @@ CODA_OSS_disable_warning_pop // change we want to rebuild everything anyway. #include "gsl/gsl.h" #include "config/Exports.h" +#include "mem/SharedPtr.h" +#include "sys/filesystem.h" #include "except/Throwable.h" #include "sys/Conf.h" -#include "sys/filesystem.h" -#include "mem/SharedPtr.h" #include "xml/lite/xerces_.h" #pragma comment(lib, "xerces-c") diff --git a/modules/c++/plugin/include/plugin/BasicPluginManager.h b/modules/c++/plugin/include/plugin/BasicPluginManager.h index c829446816..738dabe944 100644 --- a/modules/c++/plugin/include/plugin/BasicPluginManager.h +++ b/modules/c++/plugin/include/plugin/BasicPluginManager.h @@ -279,7 +279,7 @@ template class BasicPluginManager for (unsigned int i = 0; ops[i] != nullptr; i++) oss << ops[i] << ":"; eh->onPluginVersionUnsupported( - FmtX("For plugin supporting ops %s version [%d.%d] not supported (%d.%d)", + str::Format("For plugin supporting ops %s version [%d.%d] not supported (%d.%d)", oss.str().c_str(), majorVersion, minorVersion, mMajorVersion, mMinorVersion ) diff --git a/modules/c++/str/include/str/Convert.h b/modules/c++/str/include/str/Convert.h index 0b3698e49f..83afad5b7d 100644 --- a/modules/c++/str/include/str/Convert.h +++ b/modules/c++/str/include/str/Convert.h @@ -25,6 +25,7 @@ #define CODA_OSS_str_Convert_h_INCLUDED_ #include +#include #include #include #include @@ -33,7 +34,6 @@ #include #include #include -#include #include "config/Exports.h" #include "coda_oss/string.h" @@ -41,6 +41,8 @@ #include "coda_oss/cstddef.h" #include "types/Complex.h" #include "import/except.h" +#include "gsl/gsl.h" +#include "str/Encoding.h" namespace str { @@ -50,123 +52,28 @@ template int getPrecision(const std::complex&); template int getPrecision(const types::ComplexInteger&); #endif -namespace details -{ - // Templating (and then specializing) toString() creates all kinds of weird name-look - // problems; avoid trying to work-around all that by just not doing it. - // - // The preferred approach is to make a a toString() free function. - template - inline std::string default_toString(const T& value) - { - // Use operator<<() to generate a string value; this may not be quite - // 100% kosher, but it's been long-standing practice in this codebase. - // - // Note that std::to_string() doesn't necessarily generate the same - // output as writing to std::cout; see - // https://en.cppreference.com/w/cpp/string/basic_string/to_string - std::ostringstream buf; - buf.precision(getPrecision(value)); - buf << std::boolalpha << value; - return buf.str(); - } - - // https://stackoverflow.com/a/73594999/19912380 - template struct priority : priority {}; - template<> struct priority<0> {}; - - template - inline auto toString_imp(const T& obj, priority<2>) -> decltype(obj.toString(), std::string()) - { - return obj.toString(); // member-function - } - - template - inline auto toString_imp(const T& obj, priority<1>) -> decltype(toString(obj), std::string()) - { - return toString(obj); // free function - } - - template - inline auto toString_imp(const T& obj, priority<0>) -> decltype(default_toString(obj), std::string()) - { - return details::default_toString(obj); // our default utility which uses operator<<() - } - - // In order, try to call 1) obj.toString() (highest priority), 2) toString(obj), - // and finally 3) toString_(obj) (lowest priority). - template - inline auto toString_(const T& obj) -> decltype(toString_imp(obj, priority<2>{}), std::string()) - { - return details::toString_imp(obj, priority<2>{}); - } -} +// Note that std::to_string() doesn't necessarily generate the same output as writing +// to std::cout; see https://en.cppreference.com/w/cpp/string/basic_string/to_string template -inline std::string toString(const T& value) // no dectype() noise here, leave that in details::toString_() -{ - // This breaks the Windows-CMake build on GitHub (when building as an "external" in NITRO) - // ... different compilers or compile-options? - //return details::toString_(value); - - return details::default_toString(value); -} - -// C++11 has a bunch of overloads, do the same. -// https://en.cppreference.com/w/cpp/string/basic_string/to_string -inline std::string toString(int value) -{ - return details::default_toString(value); -} -inline std::string toString(long value) -{ - return details::default_toString(value); -} -inline std::string toString(long long value) -{ - return details::default_toString(value); -} -inline std::string toString(unsigned value) -{ - return details::default_toString(value); -} -inline std::string toString(unsigned long value) +std::string toString(const T& value) { - return details::default_toString(value); -} -inline std::string toString(unsigned long long value) -{ - return details::default_toString(value); -} -inline std::string toString(float value) -{ - return details::default_toString(value); -} -inline std::string toString(double value) -{ - return details::default_toString(value); -} -inline std::string oString(long double value) -{ - return details::default_toString(value); + std::ostringstream buf; + buf.precision(getPrecision(value)); + buf << std::boolalpha << value; + return buf.str(); } -// C++ doesn't have these ... -// https://en.cppreference.com/w/cpp/string/basic_string/to_string -inline std::string toString(bool value) -{ - return details::default_toString(value); -} inline std::string toString(uint8_t value) { - return toString(static_cast(value)); + return toString(gsl::narrow(value)); } inline std::string toString(int8_t value) { - return toString(static_cast(value)); + return toString(gsl::narrow(value)); } inline std::string toString(coda_oss::byte value) { - return toString(static_cast(value)); + return toString(gsl::narrow(value)); } inline std::string toString(std::nullptr_t) @@ -174,38 +81,58 @@ inline std::string toString(std::nullptr_t) return ""; } -CODA_OSS_API std::string toString(const coda_oss::u8string&); inline std::string toString(const std::string& value) { return value; } -inline std::string toString(char value) +// can't be a template; `bool` overload above is a better match +inline std::string toString(std::string::const_pointer pStr) { - return std::string(1, value); + return toString(std::string(pStr)); +} + +// The resultant `std::string`s have "native" encoding (which is lost) depending +// on the platform: UTF-8 on Linux and Windows-1252 on Windows. +CODA_OSS_API std::string toString(const coda_oss::u8string&); +CODA_OSS_API std::string toString(const str::W1252string&); +CODA_OSS_API std::string toString(const std::wstring&); // input is UTF-16 or UTF-32 depending on the platform +// can't be a template; `bool` overload above is a better match +std::string toString(std::wstring::const_pointer) = delete; // only used in unittests +std::string toString(std::u16string::const_pointer) = delete; // only used in unittests +std::string toString(std::u32string::const_pointer) = delete; // only used in unittests + +CODA_OSS_API coda_oss::u8string u8FromString(const std::string&); // platform determines Windows-1252 or UTF-8 input + +inline std::ostream& operator<<(std::ostream& os, const coda_oss::u8string& s) +{ + os << toString(s); + return os; } -inline std::string toString(const char* pStr) + +inline std::string toString(char value) { - return std::string(pStr); + return std::string(1, value); } template inline std::string toString(const coda_oss::optional& value) { // TODO: handle empty/NULL optional? - return details::default_toString(value.value()); + return toString(value.value()); } -template +template inline std::string toString(const T& real, const T& imag) { - return details::default_toString(std::complex(real, imag)); + return toString(std::complex(real, imag)); } -template -inline std::string toString(const T* ptr) -{ - return details::default_toString(ptr); -} +CODA_OSS_API std::wstring toWString(const std::string&); // platform determines Windows-1252 or UTF-8 input and output encoding +CODA_OSS_API std::wstring toWString(const coda_oss::u8string&); // platform determines UTF-16 or UTF-32 output encoding +CODA_OSS_API std::wstring toWString(const str::W1252string&); // platform determines UTF-16 or UTF-32 output encoding + +CODA_OSS_API coda_oss::u8string u8FromWString(const std::wstring&); // platform determines UTF16 or UTF-32 input + template T toType(const std::string& s) diff --git a/modules/c++/str/include/str/EncodedString.h b/modules/c++/str/include/str/EncodedString.h deleted file mode 100644 index 75cf3d1289..0000000000 --- a/modules/c++/str/include/str/EncodedString.h +++ /dev/null @@ -1,171 +0,0 @@ -/* ========================================================================= - * This file is part of str-c++ - * ========================================================================= - * - * (C) Copyright 2004 - 2014, MDA Information Systems LLC - * (C) Copyright 2022, Maxar Technologies, Inc. - * - * str-c++ is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; If not, - * see . - * - */ - -#ifndef CODA_OSS_str_EncodedString_h_INCLLUDED_ -#define CODA_OSS_str_EncodedString_h_INCLLUDED_ -#pragma once - -#include -#include - -#include "config/Exports.h" - #include "str/EncodedStringView.h" - -/*! - * \file EncodedString.h - * \brief A String that can be either UTF-8 or "native" (Windows-1252). - * Unlike EncodedStringView, this hangs onto the underlying string. - * - * On Linux, there is good support for UTF-8, so a std::string encoded - * as UTF-8 will display the "foreign" characters properly. On Windows, - * the preferred way to do that is by using UTF-16 (WCHAR, std::wstring), - * but little (none?) of our existing code bases do that. (On Linux, std::wstring - * is typically UTF-32.) - * - */ - -namespace str -{ -class CODA_OSS_API EncodedString final -{ - std::string s_; - // We can do most everything through the view, so keep one around. - EncodedStringView v_; - - // No "public" operator=() for these; this class is mostly for storage and/or conversion, - // not extensive manipulation. Create a new instance and assign/move that. - -public: - EncodedString(); - ~EncodedString() = default; - EncodedString(const EncodedString&); - EncodedString& operator=(const EncodedString&); - EncodedString(EncodedString&&); - EncodedString& operator=(EncodedString&&); - - EncodedString(coda_oss::u8string::const_pointer, coda_oss::u8string::size_type); - explicit EncodedString(coda_oss::u8string::const_pointer); - explicit EncodedString(const coda_oss::u8string& s); - - EncodedString(str::W1252string::const_pointer, str::W1252string::size_type); - explicit EncodedString(str::W1252string::const_pointer); - explicit EncodedString(const str::W1252string&); - - EncodedString(std::string::const_pointer, std::string::size_type); - explicit EncodedString(std::string::const_pointer); // Assume platform native encoding: UTF-8 on Linux, Windows-1252 on Windows - explicit EncodedString(const std::string&); // Assume platform native encoding: UTF-8 on Linux, Windows-1252 on Windows - - explicit EncodedString(const std::u16string&); // converted to UTF-8 for storage - explicit EncodedString(const std::u32string&); // converted to UTF-8 for storage - explicit EncodedString(const std::wstring&); // Assume platform native encoding: UTF-32 on Linux, UTF-16 on Windows - explicit EncodedString(std::wstring::const_pointer); // can call wcslen() - - // create from a view - EncodedString(const EncodedStringView&); - EncodedString& operator=(const EncodedStringView&); - - // Input is encoded as specified on all platforms. - //static EncodedString fromUtf16(const std::wstring&); // not currently implemetned, no need - //static EncodedString fromUtf32(const std::wstring&); // not currently implemetned, no need - - // For "complex" operatations, use the view. - const EncodedStringView& view() const - { - return v_; - } - - // Regardless of what string we're looking at, return a string in platform - // native encoding: UTF-8 on Linux, Windows-1252 on Windows; this - // might result in string conversion. - std::string native() const // c.f. std::filesystem::path::native() - { - return view().native(); - } - - // Convert (perhaps) whatever we're looking at to UTF-8 - coda_oss::u8string u8string() const // c.f. std::filesystem::path::u8string() - { - return view().u8string(); - } - //std::string& toUtf8(std::string&) const; // std::string is encoded as UTF-8, always. - //str::W1252string w1252string() const; // c.f. std::filesystem::path::u8string() - - // Convert whatever we're looking at to UTF-16 or UTF-32 - std::u16string u16string() const // c.f. std::filesystem::path::u8string() - { - return view().u16string(); - } - std::u32string u32string() const // c.f. std::filesystem::path::u8string() - { - return view().u32string(); - } - std::wstring wstring() const // UTF-16 on Windows, UTF-32 on Linux - { - return view().wstring(); - } - - bool empty() const - { - return s_.empty(); - } - - struct details final - { - static const std::string& string(const EncodedString& es) // for unit-testing - { - return es.s_; - } - }; -}; - -inline bool operator==(const EncodedString& lhs, const EncodedStringView& rhs) -{ - return lhs.view() == rhs; -} -inline bool operator!=(const EncodedString& lhs, const EncodedStringView& rhs) -{ - return !(lhs == rhs); -} - -inline bool operator==(const EncodedString& lhs, const EncodedString& rhs) -{ - return lhs == rhs.view(); -} -inline bool operator!=(const EncodedString& lhs, const EncodedString& rhs) -{ - return !(lhs == rhs); -} - -inline std::string toString(const EncodedString& es) -{ - return es.native(); -} - -inline std::ostream& operator<<(std::ostream& os, const EncodedString& es) -{ - os << toString(es); - return os; -} - -} -#endif // CODA_OSS_str_EncodedString_h_INCLLUDED_ diff --git a/modules/c++/str/include/str/EncodedStringView.h b/modules/c++/str/include/str/EncodedStringView.h deleted file mode 100644 index 69bfbebb93..0000000000 --- a/modules/c++/str/include/str/EncodedStringView.h +++ /dev/null @@ -1,204 +0,0 @@ -/* ========================================================================= - * This file is part of str-c++ - * ========================================================================= - * - * (C) Copyright 2004 - 2014, MDA Information Systems LLC - * (C) Copyright 2022, Maxar Technologies, Inc. - * - * xml.lite-c++ is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; If not, - * see . - * - */ - -#ifndef CODA_OSS_str_EncodedStringView_h_INCLLUDED_ -#define CODA_OSS_str_EncodedStringView_h_INCLLUDED_ -#pragma once - -#include -#include - -#include "coda_oss/span.h" -#include "config/Exports.h" - #include "str/Encoding.h" - -/*! - * \file EncodedStringView.h - * \brief A read-only "view" onto a string. - * - * On Linux, there is good support for UTF-8, so a std::string encoded - * as UTF-8 will display the "foreign" characters properly. On Windows, - * the preferred way to do that is by using UTF-16 (WCHAR, std::wstring), - * but little (none?) of our existing code bases do that. (On Linux, std::wstring - * is typically UTF-32.) - * - */ - -namespace str -{ -class EncodedString; // forward -class CODA_OSS_API EncodedStringView final -{ - // Since we only support two encodings--UTF-8 (native on Linux) and Windows-1252 - // (native on Windows)--both of which are 8-bits, a simple "bool" flag will do. - coda_oss::span mString; - explicit EncodedStringView(coda_oss::span); - explicit EncodedStringView(coda_oss::span); - - #ifdef _WIN32 - static constexpr bool mNativeIsUtf8 = false; // Windows-1252 - #else - static constexpr bool mNativeIsUtf8 = true; // !_WIN32, assume Linux - #endif - bool mIsUtf8 = mNativeIsUtf8; - - // Want to create an EncodedString from EncodedStringView. The public interface - // doesn't expose "mIsUtf8" so there's (intentinally) no way for clients to know the encoding. - friend EncodedString; - - str::W1252string w1252string() const; // c.f. std::filesystem::path::u8string() - -public: - EncodedStringView() = default; - ~EncodedStringView() = default; - EncodedStringView(const EncodedStringView&) = default; - EncodedStringView& operator=(const EncodedStringView&) = default; - EncodedStringView(EncodedStringView&&) = default; - EncodedStringView& operator=(EncodedStringView&&) = default; - - // Need the const char* overloads to avoid creating temporary std::basic_string<> instances. - // Routnes always return a copy, never a reference, so there's no additional overhead - // with storing a raw pointer rather than a pointer to std::basic_string<>. - EncodedStringView(coda_oss::u8string::const_pointer, coda_oss::u8string::size_type); - explicit EncodedStringView(coda_oss::u8string::const_pointer); - explicit EncodedStringView(const coda_oss::u8string&); - - EncodedStringView(str::W1252string::const_pointer, str::W1252string::size_type); - explicit EncodedStringView(str::W1252string::const_pointer); - explicit EncodedStringView(const str::W1252string&); - - EncodedStringView(std::string::const_pointer, std::string::size_type); - explicit EncodedStringView(std::string::const_pointer); // Assume platform native encoding: UTF-8 on Linux, Windows-1252 on Windows - explicit EncodedStringView(const std::string&); // Assume platform native encoding: UTF-8 on Linux, Windows-1252 on Windows - - // Can't "view" UTF-16 or UTF-32 data; we assume we're looking at an 8-bit encoding, - // either UTF-8 or Windows-1252. - - // Regardless of what string we're looking at, return a string in platform - // native encoding: UTF-8 on Linux, Windows-1252 on Windows; this - // might result in string conversion. - std::string native() const; // c.f. std::filesystem::path::native() - - // Convert (perhaps) whatever we're looking at to UTF-8 - coda_oss::u8string u8string() const; // c.f. std::filesystem::path::u8string() - - // Convert whatever we're looking at to UTF-16 or UTF-32 - std::u16string u16string() const; // c.f. std::filesystem::path::u8string() - std::u32string u32string() const; // c.f. std::filesystem::path::u8string() - // This is especially useful on Windows because the default for characters - // is WCHAR (char* is converted to UTF-16). - std::wstring wstring() const; // UTF-16 on Windows, UTF-32 on Linux - - // These are for "advanced" use, most "normal" code should use the routines above. - std::string::const_pointer c_str() const noexcept - { - return mString.data(); - } - coda_oss::u8string::const_pointer c_u8str() const - { - return mIsUtf8 ? cast(c_str()) : nullptr; - } - str::W1252string::const_pointer c_w1252str() const - { - return mIsUtf8 ? nullptr : cast(c_str()); - } - size_t size() const noexcept - { - return mString.size(); - } - - // Input is encoded as specified on all platforms. - static EncodedStringView fromUtf8(const std::string& utf8) - { - return EncodedStringView(str::c_str(utf8), utf8.length()); - } - static EncodedStringView fromUtf8(std::string::const_pointer pUtf8) - { - return EncodedStringView(str::cast(pUtf8)); - } - static EncodedStringView fromWindows1252(const std::string& w1252) - { - return EncodedStringView(str::c_str(w1252), w1252.length()); - } - static EncodedStringView fromWindows1252(std::string::const_pointer pW1252) - { - return EncodedStringView(str::cast(pW1252)); - } - - std::string asUtf8() const; - std::string asWindows1252() const; - - bool operator_eq(const EncodedStringView&) const; - - struct details final - { - // Convert (perhaps) whatever we're looking at to Windows-1252 - // Intended for unit-testing; normal use is native(). - static str::W1252string w1252string(const EncodedStringView& v) - { - return v.w1252string(); - } - }; -}; - -inline bool operator==(const EncodedStringView& lhs, const EncodedStringView& rhs) -{ - return lhs.operator_eq(rhs); -} -inline bool operator!=(const EncodedStringView& lhs, const EncodedStringView& rhs) -{ - return !(lhs == rhs); -} - -// Since we'd really like to "traffic" in UTF-8 strings (at least when encoding is a consideration) -// make that comparision easy. -inline bool operator==(const EncodedStringView& lhs, const coda_oss::u8string& rhs) -{ - return lhs == EncodedStringView(rhs); -} -inline bool operator!=(const EncodedStringView& lhs, const coda_oss::u8string& rhs) -{ - return !(lhs == rhs); -} -inline bool operator==(const coda_oss::u8string& lhs, const EncodedStringView& rhs) -{ - return rhs == lhs; -} -inline bool operator!=(const coda_oss::u8string& lhs, const EncodedStringView& rhs) -{ - return !(lhs == rhs); -} - -inline std::string toString(const EncodedStringView& esv) -{ - return esv.native(); -} - -inline std::ostream& operator<<(std::ostream& os, const EncodedStringView& esv) -{ - os << toString(esv); - return os; -} - -} -#endif // CODA_OSS_str_EncodedStringView_h_INCLLUDED_ diff --git a/modules/c++/str/include/str/Encoding.h b/modules/c++/str/include/str/Encoding.h index 0147306ecf..9709ff5c6b 100644 --- a/modules/c++/str/include/str/Encoding.h +++ b/modules/c++/str/include/str/Encoding.h @@ -21,9 +21,9 @@ * */ +#pragma once #ifndef CODA_OSS_str_Encoding_h_INCLUDED_ #define CODA_OSS_str_Encoding_h_INCLUDED_ -#pragma once #include #include @@ -36,76 +36,88 @@ #include "coda_oss/string.h" #include "gsl/gsl.h" #include "config/Exports.h" +#include "str/W1252string.h" namespace str { template -inline TReturn cast(const TChar* s) +inline auto cast(const TChar* s) { // This is OK as UTF-8 can be stored in std::string // Note that casting between the string types will CRASH on some - // implementations. NO: reinterpret_cast(value) + // implementations. NO: reinterpret_cast(value). + // Instead, use c_str() or str(), below. const void* const pStr = s; auto const retval = static_cast(pStr); static_assert(sizeof(*retval) == sizeof(*s), "sizeof(*TReturn) != sizeof(*TChar)"); return retval; } template -inline typename TBasicStringT::const_pointer c_str(const std::basic_string& s) +inline auto c_str(const std::basic_string& s) { using return_t = typename TBasicStringT::const_pointer; return cast(s.c_str()); } - -// This is to make it difficult to get encodings mixed up; it's here (in a .h -// file) as we want to unit-test it. Windows1252_T for Windows-1252 characters -enum class Windows1252_T : unsigned char { }; // https://en.cppreference.com/w/cpp/language/types -using W1252string = std::basic_string; // https://en.cppreference.com/w/cpp/string - -////////////////////////////////////////////////////////////////////////////////////////// - -// We'll get strange errors, possibibly at link-time, if wchar_t is not a wchar_t type. -// MSVC has an option to control this: https://docs.microsoft.com/en-us/cpp/build/reference/zc-wchar-t-wchar-t-is-native-type -// https://en.cppreference.com/w/cpp/language/types -// "It has the same size, signedness, and alignment as one of the integer types, but is a distinct type." -static_assert(!std::is_same::value, "wchar_t should not be the same as uint16_t"); -static_assert(!std::is_same::value, "wchar_t should not be the same as int16_t"); -static_assert(!std::is_same::value, "wchar_t should not be the same as uint32_t"); -static_assert(!std::is_same::value, "wchar_t should not be the same as int32_t"); +template +inline TBasicStringT str(const std::basic_string& s) +{ + return TBasicStringT(c_str(s), s.length()); // avoid extra strlen() call +} +template +inline TBasicStringT make_string(TChar* p) +{ + using return_t = typename TBasicStringT::const_pointer; + return cast(p); // copy into RV +} // When the encoding is important, we want to "traffic" in coda_oss::u8string (UTF-8), not // str::W1252string (Windows-1252) or std::string (unknown). Make it easy to get those from other encodings. CODA_OSS_API coda_oss::u8string to_u8string(str::W1252string::const_pointer, size_t); -inline coda_oss::u8string to_u8string(coda_oss::u8string::const_pointer s, size_t sz) +CODA_OSS_API coda_oss::u8string to_u8string(std::u16string::const_pointer, size_t); +CODA_OSS_API coda_oss::u8string to_u8string(std::u32string::const_pointer, size_t); +inline coda_oss::u8string to_u8string(coda_oss::u8string::const_pointer p, size_t sz) { - return coda_oss::u8string(s, sz); + return coda_oss::u8string(p, sz); } -// UTF-16 is typically uses on Windows (where it is std::wstring::value_type); Linux prefers UTF-32. -CODA_OSS_API coda_oss::u8string to_u8string(std::u16string::const_pointer, size_t); - +// UTF-16 is the default on Windows. CODA_OSS_API std::u16string to_u16string(coda_oss::u8string::const_pointer, size_t); -std::u16string to_u16string(str::W1252string::const_pointer, size_t); +CODA_OSS_API std::u16string to_u16string(str::W1252string::const_pointer, size_t); // UTF-32 is convenient because each code-point is a single 32-bit integer. // It's typically std::wstring::value_type on Linux, but NOT Windows. -CODA_OSS_API coda_oss::u8string to_u8string(std::u32string::const_pointer, size_t); CODA_OSS_API std::u32string to_u32string(coda_oss::u8string::const_pointer, size_t); -std::u32string to_u32string(str::W1252string::const_pointer, size_t); +CODA_OSS_API std::u32string to_u32string(str::W1252string::const_pointer, size_t); -template -inline coda_oss::u8string to_u8string(const std::basic_string& s) +// Windows-1252 (almost the same as ISO8859-1) is the default single-byte encoding on Windows. +CODA_OSS_API str::W1252string to_w1252string(coda_oss::u8string::const_pointer p, size_t sz); +inline auto to_w1252string(const coda_oss::u8string& s) { - return to_u8string(s.c_str(), s.size()); + return to_w1252string(s.c_str(), s.length()); } -CODA_OSS_API str::W1252string to_w1252string(coda_oss::u8string::const_pointer p, size_t sz); +// These two routines are "dangerous" as they make it easy to convert +// a `char*` **already** in UTF-8 encoding to UTF-8; the result is garbage. +// Use u8FromString() or u8FromWString() which is a bit more explicit. +coda_oss::u8string to_u8string(std::string::const_pointer, size_t) = delete; +coda_oss::u8string to_u8string(std::wstring::const_pointer, size_t) = delete; -namespace details // YOU should use EncodedStringView +template +inline auto to_u8string(const std::basic_string& s) +{ + return to_u8string(s.c_str(), s.length()); +} +template +inline auto to_u16string(const std::basic_string& s) { -void w1252to8(str::W1252string::const_pointer p, size_t sz, std::string&); // encoding is lost -void utf8to1252(coda_oss::u8string::const_pointer p, size_t sz, std::string&); // encoding is lost + return to_u16string(s.c_str(), s.length()); } +template +inline auto to_u32string(const std::basic_string& s) +{ + return to_u32string(s.c_str(), s.length()); +} + } #endif // CODA_OSS_str_Encoding_h_INCLUDED_ diff --git a/modules/c++/str/include/str/Format.h b/modules/c++/str/include/str/Format.h index 2409438076..3ecb9e81a9 100644 --- a/modules/c++/str/include/str/Format.h +++ b/modules/c++/str/include/str/Format.h @@ -21,40 +21,148 @@ */ -#ifndef __STR_FORMAT_H__ -#define __STR_FORMAT_H__ +#pragma once +#ifndef CODA_OSS_str_Format_h_INCLUDED_ +#define CODA_OSS_str_Format_h_INCLUDED_ #include +#include +#include + #include #include "config/Exports.h" namespace str { + CODA_OSS_API std::string Format(const char* format, ...); +} /*! * \param format The format * \param ... Any printf like thing */ -CODA_OSS_API std::string format(const char* format, ...); +//CODA_OSS_API std::string format(const char* format, ...); -struct CODA_OSS_API Format final +inline auto FmtX(const char* format) { - Format(const char* format, ...); + return str::Format(format); +} - operator std::string() const - { - return mString; - } +inline auto FmtX(const char* format, const char* pStr) +{ + return str::Format(format, pStr); +} +inline auto FmtX(const char* format, const std::string& s) +{ + return str::Format(format, s.c_str()); +} +inline auto FmtX(const char* format, int i) +{ + return str::Format(format, i); +} +inline auto FmtX(const char* format, uint32_t i) +{ + return str::Format(format, i); +} +inline auto FmtX(const char* format, ptrdiff_t l) +{ + return str::Format(format, l); +} +inline auto FmtX(const char* format, size_t ul) +{ + return str::Format(format, ul); +} +inline auto FmtX(const char* format, float f) +{ + return str::Format(format, f); +} +inline auto FmtX(const char* format, double d) +{ + return str::Format(format, d); +} - operator std::string& () noexcept - { - return mString; - } +inline auto FmtX(const char* format, char ch, const char* pStr) +{ + return str::Format(format, ch, pStr); +} +inline auto FmtX(const char* format, char ch, const std::string& s) +{ + return str::Format(format, ch, s.c_str()); +} +inline auto FmtX(const char* format, const std::string& s1, const std::string& s2) +{ + return str::Format(format, s1.c_str(), s2.c_str()); +} +inline auto FmtX(const char* format, const std::string& s, size_t ul) +{ + return str::Format(format, s.c_str(), ul); +} +inline auto FmtX(const char* format, char ch1, char ch2) +{ + return str::Format(format, ch1, ch2); +} +inline auto FmtX(const char* format, long l1, long l2) +{ + return str::Format(format, l1, l2); +} +inline auto FmtX(const char* format, size_t ul1, size_t ul2) +{ + return str::Format(format, ul1, ul2); +} +inline auto FmtX(const char* format, double d1, double d2) +{ + return str::Format(format, d1, d2); +} +inline auto FmtX(const char* format, int i, const char* pStr) +{ + return str::Format(format, i, pStr); +} +inline auto FmtX(const char* fmt, int i, const std::string& s) +{ + return FmtX(fmt, i, s.c_str()); +} -protected: - std::string mString; -}; +inline auto FmtX(const char* format, char ch1, char ch2, const std::string& s) +{ + return str::Format(format, ch1, ch2, s.c_str()); +} +inline auto FmtX(const char* format, int i1, int i2, unsigned long ul) +{ + return str::Format(format, i1, i2, ul); +} +inline auto FmtX(const char* format, int i1, int i2, int i3) +{ + return str::Format(format, i1, i2, i3); +} +inline auto FmtX(const char* format, float f1, float f2, float f3) +{ + return str::Format(format, f1, f2, f3); +} +inline std::string FmtX(const char* format, const std::string& s1, int i2, int i3) +{ + return str::Format(format, s1.c_str(), i2, i3); +} +inline auto FmtX(const char* format, const std::string& s1, const std::string& s2, uint32_t ui) +{ + return str::Format(format, s1.c_str(), s2.c_str(), ui); +} +inline auto FmtX(const char* format, const std::string& s1, const std::string& s2, const std::string& s3) +{ + return str::Format(format, s1.c_str(), s2.c_str(), s3.c_str()); +} +inline auto FmtX(const char* format, int i1, int i2, int i3, int i4) +{ + return str::Format(format, i1, i2, i3, i4); +} +inline auto FmtX(const char* format, const char* pStr1, const std::string& s2, const char* pStr3, const std::string& s4) +{ + return str::Format(format, pStr1, s2.c_str(), pStr3, s4.c_str()); } -#endif +inline auto FmtX(const char* format, const std::string& s1, int i2, const std::string& s3, int i4) +{ + return str::Format(format, s1.c_str(), i2, s3.c_str(), i4); +} + +#endif // CODA_OSS_str_Format_h_INCLUDED_ diff --git a/modules/c++/str/include/str/W1252string.h b/modules/c++/str/include/str/W1252string.h new file mode 100644 index 0000000000..9459227002 --- /dev/null +++ b/modules/c++/str/include/str/W1252string.h @@ -0,0 +1,55 @@ +/* ========================================================================= + * This file is part of str-c++ + * ========================================================================= + * + * (C) Copyright 2004 - 2014, MDA Information Systems LLC + * (C) Copyright 2020, 2021, 2022, 2023, Maxar Technologies, Inc. + * + * str-c++ is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; If not, + * see . + * + */ + +#pragma once +#ifndef CODA_OSS_str_W1252string_h_INCLUDED_ +#define CODA_OSS_str_W1252string_h_INCLUDED_ + +#include +#include + +#include +#include + +namespace str +{ + +// This is to make it difficult to get encodings mixed up; it's here (in a .h +// file) as we want to unit-test it. Windows1252_T for Windows-1252 characters +enum class Windows1252_T : unsigned char { }; // https://en.cppreference.com/w/cpp/language/types +using W1252string = std::basic_string; // https://en.cppreference.com/w/cpp/string + +////////////////////////////////////////////////////////////////////////////////////////// + +// We'll get strange errors, possibibly at link-time, if wchar_t is not a wchar_t type. +// MSVC has an option to control this: https://docs.microsoft.com/en-us/cpp/build/reference/zc-wchar-t-wchar-t-is-native-type +// https://en.cppreference.com/w/cpp/language/types +// "It has the same size, signedness, and alignment as one of the integer types, but is a distinct type." +static_assert(!std::is_same::value, "wchar_t should not be the same as uint16_t"); +static_assert(!std::is_same::value, "wchar_t should not be the same as int16_t"); +static_assert(!std::is_same::value, "wchar_t should not be the same as uint32_t"); +static_assert(!std::is_same::value, "wchar_t should not be the same as int32_t"); + +} + +#endif // CODA_OSS_str_W1252string_h_INCLUDED_ diff --git a/modules/c++/str/source/EncodedString.cpp b/modules/c++/str/source/EncodedString.cpp deleted file mode 100644 index 82fe693af6..0000000000 --- a/modules/c++/str/source/EncodedString.cpp +++ /dev/null @@ -1,145 +0,0 @@ -/* ========================================================================= - * This file is part of str-c++ - * ========================================================================= - * - * (C) Copyright 2004 - 2014, MDA Information Systems LLC - * (C) Copyright 2022, Maxar Technologies, Inc. - * - * str-c++ is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; If not, - * see . - * - */ -#include "str/EncodedString.h" - -#include - -template -inline void assign_(const CharT* p, size_t c, std::string& s_, str::EncodedStringView& v_) -{ - using string_t = std::basic_string; - s_.assign(str::cast(p), c); // copy - v_ = str::EncodedStringView(str::c_str(s_), c); -} -template -inline void assign_(const CharT* p, std::string& s_, str::EncodedStringView& v_) -{ - using string_t = std::basic_string; - s_ = str::cast(p); // copy - v_ = str::EncodedStringView(str::c_str(s_), s_.length()); -} - -static str::EncodedStringView make_EncodedStringView(const std::string& s, bool isUtf8) -{ - if (isUtf8) - { - return str::EncodedStringView(str::c_str(s), s.length()); - } - - // not UTF-8, assume Windows-1252 - return str::EncodedStringView(str::c_str(s), s.length()); -} - -str::EncodedString::EncodedString(std::string::const_pointer p, std::string::size_type c) : s_(p, c) /*copy*/, v_(s_) { } -str::EncodedString::EncodedString(std::string::const_pointer p) : s_(p) /*copy*/, v_(s_) { } -str::EncodedString::EncodedString(const std::string& s) : s_(s) /*copy*/, v_(s_) { } - -str::EncodedString::EncodedString() : EncodedString(""){ } - -str::EncodedString::EncodedString(coda_oss::u8string::const_pointer p, coda_oss::u8string::size_type c) -{ - assign_(p, c, s_, v_); -} -str::EncodedString::EncodedString(coda_oss::u8string::const_pointer p) -{ - assign_(p, s_, v_); -} -str::EncodedString::EncodedString(const coda_oss::u8string& s) : EncodedString(s.c_str(), s.size()) { } - -str::EncodedString::EncodedString(str::W1252string::const_pointer p, str::W1252string::size_type c) -{ - assign_(p, c, s_, v_); -} -str::EncodedString::EncodedString(str::W1252string::const_pointer p) -{ - assign_(p, s_, v_); -} -str::EncodedString::EncodedString(const str::W1252string& s) : EncodedString(s.c_str(), s.size()) { } - -str::EncodedString::EncodedString(const std::u16string& s) : EncodedString(to_u8string(s)) { } -str::EncodedString::EncodedString(const std::u32string& s) : EncodedString(to_u8string(s)) { } - -static inline coda_oss::u8string to_u8string_(std::wstring::const_pointer p_, size_t sz) // std::wstring is UTF-16 or UTF-32 depending on platform -{ - const auto p = - // Need to use #ifdef's because str::cast() checks to be sure the sizes are correct. - #if _WIN32 - str::cast(p_); // std::wstring is UTF-16 on Windows - #endif - #if !_WIN32 - str::cast(p_); // std::wstring is UTF-32 on Linux - #endif - return str::to_u8string(p, sz); -} - -str::EncodedString::EncodedString(std::wstring::const_pointer s) : EncodedString(to_u8string_(s, wcslen(s))) { } -str::EncodedString::EncodedString(const std::wstring& s) : EncodedString(to_u8string_(s.c_str(), s.size())) { } - -// create from a view -str::EncodedString& str::EncodedString::operator=(const EncodedStringView& v) -{ - if (v.mIsUtf8) - { - assign_(v.c_u8str(), v.size(), s_, v_); - } - else - { - // not UTF-8, assume Windows-1252 - auto p = cast(v.c_str()); - assign_(p, v.size(), s_, v_); - } - - return *this; -} -str::EncodedString::EncodedString(const EncodedStringView& v) -{ - *this = v; -} - -str::EncodedString& str::EncodedString::operator=(const EncodedString& es) -{ - if (this != &es) - { - this->s_ = es.s_; // copy - this->v_ = make_EncodedStringView(s_, es.view().mIsUtf8); - } - return *this; -} -str::EncodedString::EncodedString(const EncodedString& es) -{ - *this = es; -} - -str::EncodedString& str::EncodedString::operator=(EncodedString&& es) -{ - if (this != &es) - { - this->s_ = std::move(es.s_); - this->v_ = make_EncodedStringView(s_, es.view().mIsUtf8); - } - return *this; -} -str::EncodedString::EncodedString(EncodedString&& es) -{ - *this = std::move(es); -} diff --git a/modules/c++/str/source/EncodedStringView.cpp b/modules/c++/str/source/EncodedStringView.cpp deleted file mode 100644 index e9d83df64b..0000000000 --- a/modules/c++/str/source/EncodedStringView.cpp +++ /dev/null @@ -1,218 +0,0 @@ -/* ========================================================================= - * This file is part of str-c++ - * ========================================================================= - * - * (C) Copyright 2004 - 2014, MDA Information Systems LLC - * (C) Copyright 2022, Maxar Technologies, Inc. - * - * str-c++ is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; If not, - * see . - * - */ - -#include "str/EncodedStringView.h" - -#include -#include - -#include -#include "coda_oss/span.h" - -#include "str/Convert.h" -#include "str/Encoding.h" -#include "str/EncodedString.h" - -enum class PlatformType -{ - Windows, - Linux, - // MacOS -}; - -#ifdef _WIN32 -static auto Platform = PlatformType::Windows; -#else -static auto Platform = PlatformType::Linux; -#endif - -inline std::u16string to_u16string(std::string::const_pointer s, size_t sz, bool is_utf8 /* is 's' UTF-8? */) -{ - if (is_utf8) - { - return str::to_u16string(str::cast(s), sz); - } - return str::to_u16string(str::cast(s), sz); -} - -static std::string to_native(coda_oss::u8string::const_pointer p, size_t sz) -{ - if (Platform == PlatformType::Windows) - { - std::string retval; - str::details::utf8to1252(p, sz, retval); - return retval; - } - if (Platform == PlatformType::Linux) - { - return std::string(str::cast(p), sz); - } - throw std::logic_error("Unknown platform."); -} - -static std::string to_native(str::W1252string::const_pointer p, size_t sz) -{ - if (Platform == PlatformType::Windows) - { - return std::string(str::cast(p), sz); - } - if (Platform == PlatformType::Linux) - { - std::string retval; - str::details::w1252to8(p, sz, retval); - return retval; - } - throw std::logic_error("Unknown platform."); -} - -template -inline auto make_span(const Char8T* s, size_t c) -{ - static_assert(sizeof(Char8T) == sizeof(char), "sizeof(Char8T) != sizeof(char)"); - return coda_oss::span(s, c); -} -template -inline auto make_span(const Char8T* s) -{ - auto const s_ = str::cast(s); - return make_span(s, strlen(s_)); -} -template -inline auto make_span(const std::basic_string& s) -{ - assert(strlen(str::c_str(s)) == s.length()); - return make_span(s.c_str(), s.length()); -} -template -inline auto make_span(coda_oss::span s) -{ - auto const s_ = str::cast(s.data()); - assert(strlen(s_) == s.size()); - return coda_oss::span(s_, s.size()); -} - -str::EncodedStringView::EncodedStringView(std::string::const_pointer p, std::string::size_type c) : mString(make_span(p, c)) { } -str::EncodedStringView::EncodedStringView(std::string::const_pointer p) : mString(make_span(p)) { } -str::EncodedStringView::EncodedStringView(const std::string& s) : mString(make_span(s)){ } - -str::EncodedStringView::EncodedStringView(coda_oss::span s) : mString(make_span(s)), mIsUtf8(true) {} -str::EncodedStringView::EncodedStringView(coda_oss::u8string::const_pointer p, coda_oss::u8string::size_type c) : EncodedStringView(make_span(p, c)) { } -str::EncodedStringView::EncodedStringView(coda_oss::u8string::const_pointer p) : EncodedStringView(make_span(p)) { } -str::EncodedStringView::EncodedStringView(const coda_oss::u8string& s) : EncodedStringView(make_span(s)) { } - -str::EncodedStringView::EncodedStringView(coda_oss::span s) : mString(make_span(s)), mIsUtf8(false) {} -str::EncodedStringView::EncodedStringView(str::W1252string::const_pointer p, str::W1252string::size_type c) : EncodedStringView(make_span(p, c)) { } -str::EncodedStringView::EncodedStringView(str::W1252string::const_pointer p) : EncodedStringView(make_span(p)) { } -str::EncodedStringView::EncodedStringView(const str::W1252string& s) : EncodedStringView(make_span(s)) { } - -std::string str::EncodedStringView::native() const -{ - const auto s = mString.data(); - const auto sz = mString.size(); - return mIsUtf8 ? to_native(str::cast(s), sz) - : to_native(str::cast(s), sz); -} - -coda_oss::u8string str::EncodedStringView::u8string() const -{ - return mIsUtf8 ? - str::cast(mString.data()) : // copy - str::to_u8string(str::cast(mString.data()), mString.size()); -} -std::string str::EncodedStringView::asUtf8() const -{ - const auto result = u8string(); - return str::c_str(result); // cast & copy -} - -std::u16string str::EncodedStringView::u16string() const -{ - return ::to_u16string(mString.data(), mString.size(), mIsUtf8); -} - -inline std::u32string to_u32string(std::string::const_pointer s, size_t sz, bool is_utf8 /* is 's' UTF-8? */) -{ - if (is_utf8) - { - return str::to_u32string(str::cast(s), sz); - } - return str::to_u32string(str::cast(s), sz); -} -std::u32string str::EncodedStringView::u32string() const -{ - return ::to_u32string(mString.data(), mString.size(), mIsUtf8); -} - -std::wstring str::EncodedStringView::wstring() const // UTF-16 on Windows, UTF-32 on Linux -{ - const auto p = mString.data(); - const auto sz = mString.size(); - const auto s = - // Need to use #ifdef's because str::cast() checks to be sure the sizes are correct. - #ifdef _WIN32 - ::to_u16string(p, sz, mIsUtf8); // std::wstring is UTF-16 on Windows - #else - ::to_u32string(p, sz, mIsUtf8); // std::wstring is UTF-32 on Linux - #endif - return str::c_str(s); // copy -} - -str::W1252string str::EncodedStringView::w1252string() const -{ - return mIsUtf8 ? - str::to_w1252string(str::cast(mString.data()), mString.size()) : - str::cast(mString.data()); // copy -} -std::string str::EncodedStringView::asWindows1252() const -{ - const auto result = w1252string(); - return str::c_str(result); // cast & copy -} - -bool str::EncodedStringView::operator_eq(const EncodedStringView& rhs) const -{ - auto& lhs = *this; - - // if encoding is the same, strcmp() will work - if (lhs.mIsUtf8 == rhs.mIsUtf8) // both are UTF-8 or both are Windows-1252 - { - // But we can avoid that call if the pointers are the same - const auto pLhs = lhs.mString.data(); - const auto pRhs = rhs.mString.data(); - if ((pLhs == pRhs) && (rhs.mString.size() == rhs.mString.size())) - { - return true; - } - return strcmp(pLhs, pRhs) == 0; - } - - // LHS and RHS have different encodings, but one must be UTF-8 - assert((lhs.mIsUtf8 && !rhs.mIsUtf8) || (!lhs.mIsUtf8 && rhs.mIsUtf8)); // should have used strcmp(), above - auto& utf8 = lhs.mIsUtf8 ? lhs : rhs; - auto& w1252 = !lhs.mIsUtf8 ? lhs : rhs; - - // If UTF-8 is native on this platform, convert to UTF-8; otherwise do a native comparision - return mNativeIsUtf8 ? utf8.c_u8str() == w1252.u8string() : utf8.native() == w1252.mString.data(); -} - - diff --git a/modules/c++/str/source/Encoding.cpp b/modules/c++/str/source/Encoding.cpp index 46ce45b738..98795b259a 100644 --- a/modules/c++/str/source/Encoding.cpp +++ b/modules/c++/str/source/Encoding.cpp @@ -20,6 +20,7 @@ * see . * */ +#include "str/Encoding.h" #include #include @@ -36,12 +37,8 @@ #include "gsl/gsl.h" #include "config/compiler_extensions.h" -#include "str/Encoding.h" #include "str/Manip.h" #include "str/Convert.h" -#include "str/EncodedStringView.h" - - CODA_OSS_disable_warning_push #if _MSC_VER #pragma warning(disable: 26818) // Switch statement does not cover all cases. Consider adding a '...' label (es.79). @@ -51,6 +48,21 @@ CODA_OSS_disable_warning(-Wshadow) #include "str/utf8.h" CODA_OSS_disable_warning_pop +//// "sys" depends on "str" so can't use sys::PlatformType +//enum class PlatformType +//{ +// Windows, +// Linux, +// // MacOS +//}; +#if _WIN32 +//static constexpr auto Platform = PlatformType::Windows; +#elif defined(_POSIX_C_SOURCE) +//static constexpr auto Platform = PlatformType::Linux; +#else +#error "Unknown platform" +#endif + // Need to look up characters from \x80 (EURO SIGN) to \x9F (LATIN CAPITAL LETTER Y WITH DIAERESIS) // in a map: http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT inline coda_oss::u8string utf8_(uint32_t i) @@ -123,7 +135,7 @@ static auto Windows1252_to_u8string() inline void append(std::string& result, const coda_oss::u8string& utf8) { - result += str::c_str(utf8); + result += str::str(utf8); } inline void append(coda_oss::u8string& result, const coda_oss::u8string& utf8) { @@ -189,42 +201,31 @@ static void fromWindows1252_(str::W1252string::value_type ch, std::basic_string< } } template -void windows1252_to_string_(str::W1252string::const_pointer p, size_t sz, std::basic_string& result) +inline void w1252_to_string(str::W1252string::const_pointer p, size_t sz, std::basic_string& result) { for (size_t i = 0; i < sz; i++) { fromWindows1252_(p[i], result); } } -template -inline TReturn to_Tstring(str::W1252string::const_pointer p, size_t sz) -{ - TReturn retval; - windows1252_to_string_(p, sz, retval); - return retval; -} - -inline void windows1252_to_string(str::W1252string::const_pointer p, size_t sz, coda_oss::u8string& result) -{ - windows1252_to_string_(p, sz, result); -} -void str::details::w1252to8(str::W1252string::const_pointer p, size_t sz, std::string& result) +template +inline void w1252to8(str::W1252string::const_pointer p, size_t sz, std::basic_string& result) { - result = to_Tstring(p, sz); + w1252_to_string(p, sz, result); } -std::u16string str::to_u16string(str::W1252string::const_pointer p, size_t sz) +inline void w1252to16(str::W1252string::const_pointer p, size_t sz, std::u16string& result) { - auto retval = to_Tstring(p, sz); + w1252_to_string(p, sz, result); + #if defined(_WIN32) && (!defined(_NDEBUG) || defined(DEBUG)) const _bstr_t bstr(std::string(str::cast(p), sz).c_str()); // no _bstr_t ctor taking sz const std::wstring wstr(static_cast(bstr)); - assert(retval == str::cast(wstr.c_str())); + assert(result == str::str(wstr)); #endif - return retval; } -std::u32string str::to_u32string(str::W1252string::const_pointer p, size_t sz) +inline void w1252to32(str::W1252string::const_pointer p, size_t sz, std::u32string& result) { - return to_Tstring(p, sz); + w1252_to_string(p, sz, result); } template @@ -258,7 +259,7 @@ static void get_next_utf8_byte(coda_oss::u8string::const_pointer p, size_t sz, template static void utf8to1252(coda_oss::u8string::const_pointer p, size_t sz, std::basic_string& result, bool strict=false) { - using value_type = typename std::basic_string::value_type; + using value_type = TChar; for (size_t i = 0; i < sz; i++) { const auto b1 = static_cast(p[i]); @@ -301,25 +302,62 @@ static void utf8to1252(coda_oss::u8string::const_pointer p, size_t sz, std::basi // _bstr_t preserves these values if (utf8.length() == 2) { - result += static_cast(utf8[1]); + result += static_cast(utf8[1]); } else { assert("UTF-8 sequence can't be converted to Windows-1252." && 0); - result += static_cast(0x7F); // + result += static_cast(0x7F); // } } } } -void str::details::utf8to1252(coda_oss::u8string::const_pointer p, size_t sz, std::string& result) + +static auto u16_to_Windows1252() { - ::utf8to1252(p, sz, result); + // Find the corresponding UTF-16 value for every Windows-1252 input; + // obviously, most UTF-16 values can't be converted. Skip the first half + // as they're the same for ASCII. + std::map retval; + for (uint16_t i = 0x0080; i <= 0x00ff; i++) // **not** `uint8_t` to avoid wrap-around + { + const auto ch = static_cast(i); + const auto u16 = str::to_u16string(&ch, 1); + assert(u16.length() == 1); + retval[u16[0]] = ch; + } + return retval; } -str::W1252string str::to_w1252string(coda_oss::u8string::const_pointer p, size_t sz) +static inline void utf16to1252(std::u16string::const_pointer p, size_t sz, std::string& result, bool strict=false) { - str::W1252string retval; - utf8to1252(p, sz, retval); - return retval; + using value_type = std::string::value_type; + + static const auto map = u16_to_Windows1252(); + for (size_t i = 0; i < sz; i++) + { + const auto ch = p[i]; + + if (ch < 0x0080) // ASCII + { + result += gsl::narrow(ch); + continue; + } + + const auto it = map.find(ch); + if (it != map.end()) + { + result += static_cast(it->second); + } + else if (strict) + { + throw std::invalid_argument("UTF-16 sequence can't be converted to Windows-1252."); + } + else + { + assert("UTF-16 sequence can't be converted to Windows-1252." && 0); + result += static_cast(0x7F); // + } + } } struct back_inserter final @@ -336,27 +374,134 @@ struct back_inserter final back_inserter operator++(int) noexcept { return *this; } }; -coda_oss::u8string str::to_u8string(std::u16string::const_pointer p, size_t sz) +template +inline auto to_uXXstring(const std::basic_string& s) +{ + const auto p = str::c_str(s); + #if _WIN32 + return str::to_u16string(p, s.length()); // std::wstring is UTF-16 on Windows + #else + return str::to_u32string(p, s.length()); // assume std::wstring is UTF-32 everywhere except Windows + #endif +} +template +static std::wstring toWString_(const std::basic_string& s, bool is_utf8) { - // http://utfcpp.sourceforge.net/#introsample - /* - // Convert it to utf-16 - vector utf16line; - utf8::utf8to16(line.begin(), end_it, back_inserter(utf16line)); + + const auto result = is_utf8 ? to_uXXstring(s) + : to_uXXstring(s); + return str::str(result); +} + +/***********************************************************************************/ - // And back to utf-8 - string utf8line; - utf8::utf16to8(utf16line.begin(), utf16line.end(), back_inserter(utf8line)); - */ +std::string str::toString(const coda_oss::u8string& s) +{ + #if _WIN32 + std::string retval; + utf8to1252(s.c_str(), s.length(), retval); + return retval; + #else + return str::str(s); + #endif +} + +std::string str::toString(const str::W1252string& s) +{ + #if _WIN32 + return str::str(s); + #else + std::string retval; + w1252to8(s.c_str(), s.length(), retval); + return retval; + #endif +} + +inline auto c_str_(const std::wstring& s) +{ + #if _WIN32 + return str::c_str(s); // std::wstring is UTF-16 on Windows + #else + return str::c_str(s); // assume std::wstring is UTF-32 on any non-Windows platform + #endif +} + +std::string str::toString(const std::wstring& s) +{ + const auto p = c_str_(s); + const auto sz = s.length(); + + std::string retval; + #if _WIN32 + utf16to1252(p, sz, retval); // UTF16 -> Windows-1252 on Windows. + #else + utf8::utf32to8(p, p + sz, std::back_inserter(retval)); // UTF32 -> UTF-8 everywhere else. + #endif + return retval; +} + +std::wstring str::toWString(const std::string& s) +{ + #if _WIN32 + return toWString_(s, false /*is_utf8*/); // Input is Windows-1252 on Windows + #else + return toWString_(s, true /*is_utf8*/); // Input is UTF-8 everywhere except Windows + #endif +} +std::wstring str::toWString(const coda_oss::u8string& s) +{ + return toWString_(s, true /*is_utf8*/); +} +std::wstring str::toWString(const str::W1252string& s) +{ + return toWString_(s, false /*is_utf8*/); +} + +inline auto c_str_(const std::string& s) +{ + #if _WIN32 + return str::c_str(s); // std::string is Windows-1252 on Windows + #else + return str::c_str(s); // assume std::string is UTF-8 on any non-Windows platform + #endif +} +coda_oss::u8string str::u8FromString(const std::string& s) +{ + return str::to_u8string(c_str_(s), s.length()); +} + +coda_oss::u8string str::u8FromWString(const std::wstring& s) +{ + return str::to_u8string(c_str_(s), s.length()); +} + +/***********************************************************************************/ + +std::u16string str::to_u16string(str::W1252string::const_pointer p, size_t sz) +{ + std::u16string retval; + w1252to16(p, sz, retval); + return retval; +} +std::u32string str::to_u32string(str::W1252string::const_pointer p, size_t sz) +{ + std::u32string retval; + w1252to32(p, sz, retval); + return retval; +} + +str::W1252string str::to_w1252string(coda_oss::u8string::const_pointer p, size_t sz) +{ + str::W1252string retval; + utf8to1252(p, sz, retval); + return retval; +} + +coda_oss::u8string str::to_u8string(std::u16string::const_pointer p, size_t sz) +{ coda_oss::u8string retval; utf8::utf16to8(p, p + sz, back_inserter(retval)); return retval; - - /* - std::vector utf16line; - auto begin = c_str(result); - utf8::utf8to16(begin, begin+result.size(), std::back_inserter(utf16line)); - */ } std::u16string str::to_u16string(coda_oss::u8string::const_pointer p_, size_t sz) @@ -385,11 +530,6 @@ coda_oss::u8string str::to_u8string(std::u32string::const_pointer p, size_t sz) coda_oss::u8string str::to_u8string(W1252string::const_pointer p, size_t sz) { coda_oss::u8string retval; - ::windows1252_to_string(p, sz, retval); + w1252to8(p, sz, retval); return retval; } - -std::string str::toString(const coda_oss::u8string& utf8) -{ - return str::EncodedStringView(utf8).native(); -} diff --git a/modules/c++/str/source/Format.cpp b/modules/c++/str/source/Format.cpp index b89c974e8f..8fc1205c70 100644 --- a/modules/c++/str/source/Format.cpp +++ b/modules/c++/str/source/Format.cpp @@ -51,20 +51,11 @@ inline void va_end_(va_list& args) CODA_OSS_disable_warning_pop } -std::string str::format(const char *format, ...) -{ - va_list args; - va_start(args, format); - auto retval = vformat(format, args); - va_end_(args); - return retval; -} - -str::Format::Format(const char* format, ...) +std::string str::Format(const char* format, ...) { va_list args; va_start(args, format); auto result = vformat(format, args); va_end_(args); - mString = std::move(result); + return result; } diff --git a/modules/c++/str/tests/VersionTest.cpp b/modules/c++/str/tests/VersionTest.cpp index a3e5503682..6b2c00b6d4 100644 --- a/modules/c++/str/tests/VersionTest.cpp +++ b/modules/c++/str/tests/VersionTest.cpp @@ -23,11 +23,7 @@ #include #include -#if defined(TEST_FUNTOR) -# define FORMAT_FUNC (std::string)str::Format -#else -# define FORMAT_FUNC str::format -#endif +#define FORMAT_FUNC str::Format int main() { diff --git a/modules/c++/str/unittests/test_base_convert.cpp b/modules/c++/str/unittests/test_base_convert.cpp index 51f3f55557..785cc327c0 100644 --- a/modules/c++/str/unittests/test_base_convert.cpp +++ b/modules/c++/str/unittests/test_base_convert.cpp @@ -30,9 +30,8 @@ #include #endif +#include #include -#include -#include #include #include #include @@ -76,12 +75,6 @@ TEST_CASE(testCharToString) TEST_ASSERT_EQ(str::toString(static_cast(65)), "A"); } -static std::u8string fromWindows1252(const std::string& s) -{ - // s is Windows-1252 on ALL platforms - return str::EncodedStringView::fromWindows1252(s).u8string(); -} - template static constexpr std::u8string::value_type cast8(T ch) { @@ -94,11 +87,16 @@ static inline constexpr std::u32string::value_type U(TChar ch) return static_cast(ch); } +static auto from_windows1252(const std::string& w1252) +{ + return to_u8string(str::c_str(w1252), w1252.length()); +} + TEST_CASE(test_string_to_u8string_ascii) { { const std::string input = "|\x00"; // ASCII, "|" - const auto actual = fromWindows1252(input); + const auto actual = from_windows1252(input); const std::u8string expected{cast8('|')}; // '\x00' is the end of the string in C/C++ TEST_ASSERT_EQ(actual, expected); } @@ -107,7 +105,7 @@ TEST_CASE(test_string_to_u8string_ascii) for (uint8_t ch = start_of_heading; ch <= delete_character; ch++) // ASCII { const std::string input { '|', static_cast(ch), '|'}; - const auto actual = fromWindows1252(input); + const auto actual = from_windows1252(input); const std::u8string expected8{cast8('|'), cast8(ch), cast8('|')}; TEST_ASSERT_EQ(actual, expected8); const std::u32string expected{U'|', U(ch), U'|'}; @@ -117,8 +115,8 @@ TEST_CASE(test_string_to_u8string_ascii) static void test_string_to_u8string_windows_1252_(const std::string& testName, const std::string& input_) { - const str::W1252string input(str::c_str(input_)); - const auto actual = to_u8string(input); + const auto input(str::str(input_)); + const auto actual = str::to_u8string(input); // No "expected" to test against as the UTF-8 values for these Windows-1252 characters // are mapped one-by-one. However, we can test that UTF-8 to Windows-1252 @@ -126,22 +124,13 @@ static void test_string_to_u8string_windows_1252_(const std::string& testName, c // for a single code-point. const auto w1252 = str::to_w1252string(actual.data(), actual.size()); TEST_ASSERT(input == w1252); - - // Can't compare the values with == because TEST_ASSERT_EQ() - // wants to do toString() and that doesn't work on Linux as the encoding - // is wrong (see above). - //const std::string w1252_ = str::c_str(w1252); - //TEST_ASSERT_EQ(input_, w1252_); - const str::EncodedStringView inputView(input); - const str::EncodedStringView w1252View(w1252); - TEST_ASSERT_EQ(inputView, w1252View); } TEST_CASE(test_string_to_u8string_windows_1252) { // Windows-1252 only characters must be mapped to UTF-8 { const std::string input = "|\x80|"; // Windows-1252, "|€|" - const auto actual = fromWindows1252(input); + const auto actual = from_windows1252(input); const std::u8string expected8{cast8('|'), cast8('\xE2'), cast8('\x82'), cast8('\xAC'), cast8('|')}; // UTF-8, "|€|" TEST_ASSERT_EQ(actual, expected8); const std::u32string expected{U"|\u20AC|"}; // UTF-32, "|€|" @@ -149,7 +138,7 @@ TEST_CASE(test_string_to_u8string_windows_1252) } { const std::string input = "|\x9F|"; // Windows-1252, "|Ÿ|" - const auto actual = fromWindows1252(input); + const auto actual = from_windows1252(input); const std::u8string expected8{cast8('|'), cast8('\xC5'), cast8('\xB8'), cast8('|')}; // UTF-8, "|Ÿ|" TEST_ASSERT_EQ(actual, expected8); const std::u32string expected{U"|\u0178|"}; // UTF-32, "|Ÿ|" @@ -160,13 +149,13 @@ TEST_CASE(test_string_to_u8string_windows_1252) for (const auto& ch : undefined) { const std::string input{'|', ch, '|'}; - const auto actual = fromWindows1252(input); + const auto actual = from_windows1252(input); TEST_ASSERT_TRUE(!actual.empty()); //const std::u8string expected8{cast8('|'), cast8('\xEF'), cast8('\xBF'), cast8('\xBD'), cast8('|')}; // UTF-8, "||" const std::u8string expected8{cast8('|'), cast8(194), cast8(ch), cast8('|')}; TEST_ASSERT_EQ(actual, expected8); //const std::u32string expected{U"|\ufffd|"}; // UTF-32, "||" - const auto expected = str::EncodedString(expected8).u32string(); + const auto expected = str::to_u32string(expected8); test_assert_eq(testName, actual, expected); } } @@ -226,20 +215,13 @@ TEST_CASE(test_string_to_u8string_iso8859_1) for (uint32_t ch = nobreak_space; ch <= latin_small_letter_y_with_diaeresis; ch++) // ISO8859-1 { const std::string input_ { '|', static_cast(ch), '|'}; - const str::W1252string input(str::c_str(input_)); + const auto input(str::str(input_)); const auto actual = to_u8string(input); const std::u32string expected{U'|', U(ch), U'|'}; test_assert_eq(testName, actual, expected); - // Can't compare the values with == because TEST_ASSERT_EQ() - // wants to do toString() and that doesn't work on Linux as the encoding - // is wrong (see above). - //std::string actual_; - //str::details::toString(actual.c_str(), actual_); - //TEST_ASSERT_EQ(input_, actual_); - const str::EncodedStringView inputView(input); - const str::EncodedStringView actualView(actual); - TEST_ASSERT_EQ(inputView, actualView); + TEST_ASSERT(str::to_u8string(input) == actual); + TEST_ASSERT(input == str::to_w1252string(actual)); } } @@ -272,83 +254,127 @@ TEST_CASE(test_change_case) //// Yes, this can really come up, "non classifié" is French (Canadian) for "unclassified". //const std::string DEF_1252{'D', '\xc9', 'F'}; // "DÉF" Windows-1252 - //const auto DEF8 = fromWindows1252(DEF_1252); + //const auto DEF8 = from_windows1252(DEF_1252); //const std::string def_1252{'d', '\xe9', 'f'}; // "déf" Windows-1252 - //const auto def8 = fromWindows1252(def_1252); + //const auto def8 = from_windows1252(def_1252); ////test_change_case_(testName, def, DEF); //test_change_case_(testName, def_1252, DEF_1252); } // https://en.wikipedia.org/wiki/%C3%89#Character_mappings -static const str::EncodedString& classificationText_utf_8() +static const coda_oss::u8string& classificationText_u8() { - static const str::EncodedString retval(str::cast("A\xc3\x89IOU")); // UTF-8 "AÉIOU" + static const auto retval(str::make_string("A\xc3\x89IOU")); // UTF-8 "AÉIOU" return retval; } -static const str::EncodedString& classificationText_iso8859_1() -{ - static const str::EncodedString retval(str::cast("A\xc9IOU")); // ISO8859-1 "AÉIOU" + +static const str::W1252string& classificationText_w1252() + { + static const auto retval(str::make_string("A\xc9IOU")); // ISO8859-1 "AÉIOU" return retval; } + // UTF-16 on Windows, UTF-32 on Linux static const wchar_t* classificationText_wide_() { return L"A\xc9IOU"; } // UTF-8 "AÉIOU" -static str::EncodedString classificationText_wide() { return str::EncodedString(classificationText_wide_()); } +static std::u16string classificationText_u16() { return u"A\xc9IOU"; } // UTF-16 "AÉIOU" +static std::u32string classificationText_u32() { return U"A\xc9IOU"; } // UTF-32 "AÉIOU" + static std::string classificationText_platform() { return - sys::Platform == sys::PlatformType::Linux ? classificationText_utf_8().native() : classificationText_iso8859_1().native(); } + sys::Platform == sys::PlatformType::Linux ? str::toString(classificationText_u8()) : str::toString(classificationText_w1252()); } TEST_CASE(test_u8string_to_string) { - { - const auto utf8 = classificationText_utf_8().u8string(); - const str::EncodedStringView utf8View(utf8); - const auto actual = utf8View.native(); - TEST_ASSERT_EQ(classificationText_platform(), actual); - } - { - const auto utf8 = classificationText_iso8859_1().u8string(); - const str::EncodedStringView utf8View(utf8); - const auto actual = utf8View.native(); - TEST_ASSERT_EQ(classificationText_platform(), actual); - } + auto actual = str::toString(classificationText_u8()); + TEST_ASSERT_EQ(classificationText_platform(), actual); + + actual = str::toString(classificationText_w1252()); + TEST_ASSERT_EQ(classificationText_platform(), actual); +} + +static auto to_w1252string(const std::wstring& s) +{ + return str::to_w1252string(str::u8FromWString(s)); +} +static auto to_u16string(const str::W1252string& s) +{ + return str::to_u16string(s.c_str(), s.length()); +} +static auto to_u32string(const str::W1252string& s) +{ + return str::to_u32string(s.c_str(), s.length()); } TEST_CASE(test_u8string_to_u16string) { + const auto actual = classificationText_u16(); + const std::wstring wide(classificationText_wide_()); #if _WIN32 - const auto actual = classificationText_utf_8().u16string(); - const std::wstring s = str::c_str(actual); // Windows: std::wstring == std::u16string - TEST_ASSERT(classificationText_wide_() == s); // _EQ wants to do toString() + const auto s = str::str(actual); // Windows: std::wstring == std::u16string + TEST_ASSERT(wide == s); // _EQ wants to do toString() #endif - TEST_ASSERT_EQ(classificationText_wide(), classificationText_utf_8()); - TEST_ASSERT_EQ(classificationText_wide(), classificationText_iso8859_1()); + const auto u8 = classificationText_u8(); + TEST_ASSERT(str::u8FromWString(wide) == u8); + TEST_ASSERT(wide == str::toWString(u8)); + + const auto w1252 = str::c_str(classificationText_w1252()); + TEST_ASSERT(to_w1252string(wide) == w1252); + TEST_ASSERT(wide == str::toWString(w1252)); - TEST_ASSERT(classificationText_wide().u16string() == classificationText_utf_8().u16string()); // _EQ wants to do toString() - TEST_ASSERT(classificationText_wide().u16string() == classificationText_iso8859_1().u16string()); // _EQ wants to do toString() + TEST_ASSERT(classificationText_u16() == actual); // _EQ wants to do toString() + TEST_ASSERT(classificationText_u16() == to_u16string(w1252)); // _EQ wants to do toString() } TEST_CASE(test_u8string_to_u32string) { - #if !_WIN32 - const auto actual = classificationText_utf_8().u32string(); - const std::wstring s = str::c_str(actual); // Linux: std::wstring == std::u32string + const auto actual = classificationText_u32(); +#if !_WIN32 + const auto s = str::str(actual); // Linux: std::wstring == std::u32string TEST_ASSERT(classificationText_wide_() == s); // _EQ wants to do toString() #endif + + const std::wstring wide(classificationText_wide_()); + const auto u8 = classificationText_u8(); + TEST_ASSERT(str::u8FromWString(wide) == u8); + TEST_ASSERT(wide == str::toWString(u8)); - TEST_ASSERT_EQ(classificationText_wide(), classificationText_utf_8()); - TEST_ASSERT_EQ(classificationText_wide(), classificationText_iso8859_1()); + const auto w1252 = str::c_str(classificationText_w1252()); + TEST_ASSERT(to_w1252string(wide) == w1252); + TEST_ASSERT(wide == str::toWString(w1252)); + + TEST_ASSERT(classificationText_u32() == actual); // _EQ wants to do toString() + TEST_ASSERT(classificationText_u32() == to_u32string(w1252)); // _EQ wants to do toString() +} - TEST_ASSERT(classificationText_wide().u32string() == classificationText_utf_8().u32string()); // _EQ wants to do toString() - TEST_ASSERT(classificationText_wide().u32string() == classificationText_iso8859_1().u32string()); // _EQ wants to do toString() +static auto as_windows1252(const coda_oss::u8string& s) +{ + const auto w1252 = str::to_w1252string(s.c_str(), s.length()); + return str::toString(w1252); +} +static auto from_utf8(const std::string& utf8) +{ + return str::str(utf8); +} +static auto as_utf8(const coda_oss::u8string& s) +{ + return str::str(s); +} +static auto toWString(const std::u16string& s) +{ + return str::toWString(str::to_u8string(s)); +} +static auto toString(const std::u16string& s) +{ + return str::toString(str::to_u8string(s)); } -static void test_wide_(const std::string& testName, const char* pStr, std::u16string::const_pointer pUtf16, const str::EncodedString& encoded) +static void test_wide_(const std::string& testName, const char* pStr, std::u16string::const_pointer pUtf16, + const std::wstring& wstring, const std::string& native, const str::W1252string& w1252) { // from UTF-16 back to Windows-1252 - const auto w1252 = str::EncodedStringView::details::w1252string(encoded.view()); - const std::string str_w1252 = str::c_str(w1252); + const auto str_w1252 = str::str(w1252); TEST_ASSERT_EQ(str_w1252, pStr); #if _WIN32 @@ -357,35 +383,40 @@ static void test_wide_(const std::string& testName, const char* pStr, std::u16st const _bstr_t str(pStr); const std::wstring std_wstr(static_cast(str)); // Windows-1252 -> UTF-16 - TEST_ASSERT(encoded.wstring() == std_wstr); + TEST_ASSERT(wstring == std_wstr); TEST_ASSERT(std_wstr == pWide); const _bstr_t wide_str(pWide); const std::string std_str(static_cast(wide_str)); // UTF-16 -> Windows-1252 - TEST_ASSERT_EQ(encoded.native(), std_str); + TEST_ASSERT_EQ(native, std_str); TEST_ASSERT_EQ(std_str, pStr); #else - pUtf16 = pUtf16; // avoid unused-parameter warning + CODA_OSS_mark_symbol_unused(pUtf16); + CODA_OSS_mark_symbol_unused(wstring); + CODA_OSS_mark_symbol_unused(native); #endif } - static void test_Windows1252_ascii(const std::string& testName, const char* pStr, std::u16string::const_pointer pUtf16) { // For both UTF-8 and Windows-1252, ASCII is the same (they only differ for 0x80-0xff). - const auto view8 = str::EncodedStringView::fromUtf8(pStr); - TEST_ASSERT_EQ(pStr, view8.native()); // native() is the same on all platforms/encodings for ASCII - const auto view1252 = str::EncodedStringView::fromWindows1252(pStr); - TEST_ASSERT_EQ(pStr, view1252.native()); // native() is the same on all platforms/encodings for ASCII - - const str::EncodedString encoded(pStr); - TEST_ASSERT(encoded.u16string() == pUtf16); - test_wide_(testName, pStr, pUtf16, encoded); + const auto u8 = as_utf8(from_utf8(pStr)); + TEST_ASSERT_EQ(pStr, u8); // native() is the same on all platforms/encodings for ASCII + { + const auto w1252 = as_windows1252(from_windows1252(pStr)); + TEST_ASSERT_EQ(pStr, w1252); // native() is the same on all platforms/encodings for ASCII + } - const str::EncodedString wide_encoded(pUtf16); - TEST_ASSERT_EQ(wide_encoded.native(), pStr); // native() is the same on all platforms/encodings for ASCII - TEST_ASSERT_EQ(view8, wide_encoded); - TEST_ASSERT_EQ(view1252, wide_encoded); - test_wide_(testName, pStr, pUtf16, wide_encoded); + const auto u16 = str::to_u16string(str::u8FromString(pStr)); + TEST_ASSERT(u16 == pUtf16); + auto wstring = str::toWString(pStr); + std::string native = pStr; + auto w1252 = str::cast(pStr); + test_wide_(testName, pStr, pUtf16, wstring, native, w1252); + + native = toString(pUtf16); + TEST_ASSERT_EQ(native, pStr); // native() is the same on all platforms/encodings for ASCII + wstring = toWString(pUtf16); + test_wide_(testName, pStr, pUtf16, wstring, native, w1252); } TEST_CASE(test_ASCII) { @@ -407,17 +438,24 @@ TEST_CASE(test_ASCII) test_Windows1252_ascii(testName, ascii, u16_ascii); } -static void test_Windows1252_(const std::string& testName, const char* pStr, std::u16string::const_pointer pUtf16) +static auto to_w1252string(const std::u16string& s) { - const auto view = str::EncodedStringView::fromWindows1252(pStr); - - const str::EncodedString encoded(view); - TEST_ASSERT(view.u16string() == pUtf16); - test_wide_(testName, pStr, pUtf16, encoded); + return str::to_w1252string(str::to_u8string(s)); +} - const str::EncodedString wide_encoded(pUtf16); - TEST_ASSERT_EQ(view, wide_encoded); - test_wide_(testName, pStr, pUtf16, wide_encoded); +static void test_Windows1252_(const std::string& testName, const char* pStr, std::u16string::const_pointer pUtf16) +{ + const auto u16 = str::to_u16string(from_windows1252(pStr)); + TEST_ASSERT(u16 == pUtf16); + auto wstring = str::toWString(from_windows1252(pStr)); + auto s = str::toString(from_windows1252(pStr)); + auto w1252 = str::make_string(pStr); + test_wide_(testName, pStr, pUtf16, wstring, s, w1252); + + wstring = toWString(pUtf16); + s = toString(pUtf16); + w1252 = to_w1252string(pUtf16); + test_wide_(testName, pStr, pUtf16, wstring, s, w1252); } TEST_CASE(test_Windows1252_WIN32) { @@ -505,101 +543,52 @@ TEST_CASE(test_Windows1252) running_utf16 += utf16; test_Windows1252_(testName, running_w1252.c_str(), running_utf16.c_str()); } -} - -static void test_EncodedStringView_(const std::string& testName, - const str::EncodedStringView& utf_8_view, const str::EncodedStringView& iso8859_1_view) -{ - (void)testName; - TEST_ASSERT_EQ(iso8859_1_view, iso8859_1_view); - TEST_ASSERT_EQ(utf_8_view, utf_8_view); - TEST_ASSERT_EQ(iso8859_1_view, utf_8_view); - TEST_ASSERT_EQ(utf_8_view, iso8859_1_view); - - TEST_ASSERT_EQ(iso8859_1_view.native(), utf_8_view.native()); - const auto native = classificationText_platform(); - TEST_ASSERT_EQ(iso8859_1_view.native(), native); - TEST_ASSERT_EQ(utf_8_view.native(), native); - - TEST_ASSERT(utf_8_view == classificationText_utf_8()); - TEST_ASSERT_EQ(utf_8_view, classificationText_utf_8()); - TEST_ASSERT(iso8859_1_view == classificationText_utf_8()); - TEST_ASSERT_EQ(iso8859_1_view, classificationText_utf_8()); - TEST_ASSERT(iso8859_1_view.u8string() == utf_8_view.u8string()); - const auto expected = str::EncodedString::details::string(classificationText_utf_8()); - { - const auto actual = utf_8_view.asUtf8(); - TEST_ASSERT_EQ(actual, expected); - } + #if _WIN32 + // This only works on Windows because the "assume encoding" APIs are used. + for (auto&& ch : w1252_to_utf16) { - const auto actual = iso8859_1_view.asUtf8(); - TEST_ASSERT_EQ(actual, expected); + const std::string expected(1, ch.first); + const std::wstring input(1, ch.second); // `std::wstring` is UTF-16 on Windows + const auto actual = str::toString(input); + TEST_ASSERT_EQ(expected, actual); } + #endif } -TEST_CASE(test_EncodedStringView) + +static void test_Encodeding_(const std::string& testName, const coda_oss::u8string& classificationText_u8, + const std::string& utf_8, const std::string& iso8859_1, + const coda_oss::u8string& utf_8_u8, const coda_oss::u8string& iso8859_1_u8, + const std::string& utf_8_view, const std::string& iso8859_1_view) { - str::EncodedStringView esv; - auto copy(esv); - copy = esv; // assignment + TEST_ASSERT_EQ(iso8859_1, utf_8); + static const auto s = classificationText_platform(); + TEST_ASSERT_EQ(iso8859_1, s); + TEST_ASSERT_EQ(utf_8, s); - { - auto utf_8_view(classificationText_utf_8().view()); - auto iso8859_1_view(classificationText_iso8859_1().view()); - test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); - - utf_8_view = classificationText_iso8859_1().view(); - iso8859_1_view = classificationText_utf_8().view(); - test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); - } - { - auto utf_8_view = classificationText_utf_8().view(); - auto iso8859_1_view = classificationText_iso8859_1().view(); - test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); + TEST_ASSERT(iso8859_1_u8 == utf_8_u8); - utf_8_view = classificationText_iso8859_1().view(); - iso8859_1_view = classificationText_utf_8().view(); - test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); - } - { - str::EncodedStringView utf_8_view; - utf_8_view = classificationText_iso8859_1().view(); - str::EncodedStringView iso8859_1_view; - iso8859_1_view = classificationText_utf_8().view(); - test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); - } + const auto expected = str::c_str(classificationText_u8); + TEST_ASSERT_EQ(utf_8_view, expected); + TEST_ASSERT_EQ(iso8859_1_view, expected); } - -TEST_CASE(test_EncodedString) +TEST_CASE(test_Encodeding) { - str::EncodedString es; - TEST_ASSERT_TRUE(es.empty()); - TEST_ASSERT_TRUE(es.native().empty()); - { - str::EncodedString es_copy(es); // copy - TEST_ASSERT_TRUE(es_copy.empty()); - TEST_ASSERT_TRUE(es_copy.native().empty()); - } - es = str::EncodedString("abc"); // assignment - TEST_ASSERT_EQ(es.native(), "abc"); - { - str::EncodedString es_copy(es); // copy, again; this time w/o default content - TEST_ASSERT_EQ(es_copy.native(), "abc"); - } - - str::EncodedString abc(es); // copy, for use below - TEST_ASSERT_EQ(abc.native(), "abc"); - - str::EncodedString es2; - es = std::move(es2); // move assignment - TEST_ASSERT_TRUE(es.empty()); - TEST_ASSERT_TRUE(es.native().empty()); - str::EncodedString abc_(abc); // copy - es = std::move(abc_); // move assignment, w/o default content - TEST_ASSERT_EQ(es.native(), "abc"); - - str::EncodedString es3(std::move(abc)); // move constructor - TEST_ASSERT_EQ(es3.native(), "abc"); + const auto utf_8 = str::toString(classificationText_u8()); + const auto iso8859_1 = str::toString(classificationText_w1252()); + const auto utf_8_u8 = classificationText_u8(); + const auto iso8859_1_u8 = str::to_u8string(classificationText_w1252()); + const auto utf_8_view = as_utf8(classificationText_u8()); + const auto iso8859_1_view = as_utf8(from_windows1252(str::c_str(classificationText_w1252()))); + + test_Encodeding_(testName, classificationText_u8(), + utf_8, iso8859_1, + utf_8_u8, iso8859_1_u8, + utf_8_view, iso8859_1_view); + test_Encodeding_(testName, classificationText_u8(), + iso8859_1, utf_8, + iso8859_1_u8, utf_8_u8, + iso8859_1_view, utf_8_view); } TEST_MAIN( @@ -617,6 +606,5 @@ TEST_MAIN( TEST_CHECK(test_ASCII); TEST_CHECK(test_Windows1252_WIN32); TEST_CHECK(test_Windows1252); - TEST_CHECK(test_EncodedStringView); - TEST_CHECK(test_EncodedString); + TEST_CHECK(test_Encodeding); ) diff --git a/modules/c++/str/unittests/test_str.cpp b/modules/c++/str/unittests/test_str.cpp index 05ea746a1e..8fd60b90dd 100644 --- a/modules/c++/str/unittests/test_str.cpp +++ b/modules/c++/str/unittests/test_str.cpp @@ -28,11 +28,6 @@ #include "TestCase.h" -inline std::string to_string(const std::string& value) -{ - return value; -} - TEST_CASE(testTrim) { std::string s = " test "; diff --git a/modules/c++/sys/include/sys/Conf.h b/modules/c++/sys/include/sys/Conf.h index 45cf7da292..6dd45ea28d 100644 --- a/modules/c++/sys/include/sys/Conf.h +++ b/modules/c++/sys/include/sys/Conf.h @@ -156,8 +156,6 @@ namespace sys #include "except/Exception.h" -#define FmtX str::format - #define SYS_FUNC NativeLayer_func__ #define Ctxt(MESSAGE) except::Context(__FILE__, __LINE__, SYS_FUNC, \ diff --git a/modules/c++/sys/source/CppUnitTestAssert_.cpp_ b/modules/c++/sys/source/CppUnitTestAssert_.cpp_ index 9b2594ac58..072d66db91 100644 --- a/modules/c++/sys/source/CppUnitTestAssert_.cpp_ +++ b/modules/c++/sys/source/CppUnitTestAssert_.cpp_ @@ -1,8 +1,6 @@ #include "pch.h" #include "TestCase.h" -#include "str/EncodedStringView.h" - using namespace Microsoft::VisualStudio::CppUnitTestFramework; // EQUALS_MESSAGE() wants ToString() specializations (or overloads) for our types, which is a nusiance. @@ -37,7 +35,5 @@ void test::Assert::FailOnCondition(bool condition, const unsigned short* message std::wstring GetAssertMessage(bool equality, const std::wstring& expected, const std::wstring& actual, const wchar_t *message); // declare caller std::wstring test::Assert::GetAssertMessage(bool equality, const std::string& expected, const std::string& actual, const wchar_t *message) { - const str::EncodedStringView vExpected(expected); - const str::EncodedStringView vActual(actual); - return ::GetAssertMessage(equality, vExpected.wstring(), vActual.wstring(), message); // and call! + return ::GetAssertMessage(equality, str::toWString(expected), str::toWString(actual), message); // and call! } diff --git a/modules/c++/sys/source/OSUnix.cpp b/modules/c++/sys/source/OSUnix.cpp index c9241b639f..e561186f57 100644 --- a/modules/c++/sys/source/OSUnix.cpp +++ b/modules/c++/sys/source/OSUnix.cpp @@ -143,7 +143,7 @@ std::string sys::OSUnix::getPlatformName() const if (uname(&name) == -1) throw sys::SystemException("Uname failed"); - return FmtX("%s (%s): %s [build: %s]", name.sysname, name.machine, + return str::Format("%s (%s): %s [build: %s]", name.sysname, name.machine, name.release, name.version); } diff --git a/modules/c++/sys/source/OSWin32.cpp b/modules/c++/sys/source/OSWin32.cpp index e381f2f917..e50dee5ec2 100644 --- a/modules/c++/sys/source/OSWin32.cpp +++ b/modules/c++/sys/source/OSWin32.cpp @@ -58,7 +58,7 @@ std::string sys::OSWin32::getPlatformName() const { platform = "Unknown Windows OS"; } - return FmtX("%s: %d.%d [build: %d], %s", platform.c_str(), + return str::Format("%s: %d.%d [build: %d], %s", platform.c_str(), info.dwMajorVersion, info.dwMinorVersion, info.dwBuildNumber, info.szCSDVersion); } diff --git a/modules/c++/sys/source/sys_filesystem.cpp b/modules/c++/sys/source/sys_filesystem.cpp index 95f9688962..13d77f7a91 100644 --- a/modules/c++/sys/source/sys_filesystem.cpp +++ b/modules/c++/sys/source/sys_filesystem.cpp @@ -16,7 +16,6 @@ #include "sys/Path.h" #include "gsl/gsl.h" -#include "str/EncodedString.h" namespace fs = sys::filesystem; @@ -49,7 +48,7 @@ fs::path::string_type fs::path::to_native(const std::string& s_) { #ifdef _WIN32 - return str::EncodedStringView(s_).wstring(); + return str::toWString(s_); #else return s_; #endif @@ -104,7 +103,7 @@ fs::path::operator string_type() const std::string fs::path::string() const { - return str::EncodedString(p_).native(); + return str::toString(p_); } fs::path fs::path::root_path() const diff --git a/modules/c++/xml.lite/include/xml/lite/Element.h b/modules/c++/xml.lite/include/xml/lite/Element.h index 14e23a900e..01c5aab0ce 100644 --- a/modules/c++/xml.lite/include/xml/lite/Element.h +++ b/modules/c++/xml.lite/include/xml/lite/Element.h @@ -34,7 +34,6 @@ #include #include #include -#include #include "sys/Conf.h" #include "mem/SharedPtr.h" diff --git a/modules/c++/xml.lite/include/xml/lite/MinidomHandler.h b/modules/c++/xml.lite/include/xml/lite/MinidomHandler.h index ac9590662b..1413b0fc58 100644 --- a/modules/c++/xml.lite/include/xml/lite/MinidomHandler.h +++ b/modules/c++/xml.lite/include/xml/lite/MinidomHandler.h @@ -50,8 +50,6 @@ #include "coda_oss/string.h" #include -#include "str/EncodedString.h" -#include "str/EncodedStringView.h" #include "XMLReader.h" #include "io/StandardStreams.h" #include "Document.h" diff --git a/modules/c++/xml.lite/source/Attributes.cpp b/modules/c++/xml.lite/source/Attributes.cpp index a70077ad1a..2628ca7c84 100644 --- a/modules/c++/xml.lite/source/Attributes.cpp +++ b/modules/c++/xml.lite/source/Attributes.cpp @@ -152,8 +152,7 @@ std::string xml::lite::Attributes::getValue(const QName& qname) const { const auto uri = qname.getUri().value; const auto localName = qname.getName(); - throw except::NoSuchKeyException(Ctxt(FmtX("(uri: %s, localName: %s", - uri.c_str(), localName.c_str()))); + throw except::NoSuchKeyException(Ctxt(FmtX("(uri: %s, localName: %s", uri, localName))); } return retval; } diff --git a/modules/c++/xml.lite/source/Element.cpp b/modules/c++/xml.lite/source/Element.cpp index 150588e5e4..80470ab029 100644 --- a/modules/c++/xml.lite/source/Element.cpp +++ b/modules/c++/xml.lite/source/Element.cpp @@ -31,7 +31,6 @@ #include #include #include -#include #include "xml/lite/Attributes.h" std::unique_ptr xml::lite::Element::create(const std::string& qname, const std::string& uri, const std::string& characterData) @@ -260,7 +259,7 @@ void xml::lite::Element::prettyConsoleOutput_(io::OutputStream& stream, std::string xml::lite::Element::getCharacterData() const { - return str::EncodedStringView(mCharacterData).native(); + return str::toString(mCharacterData); } coda_oss::u8string& xml::lite::Element::getCharacterData(coda_oss::u8string& result) const { @@ -279,7 +278,7 @@ static void writeCharacterData_utf8(io::OutputStream& stream, const std::u8strin } static void writeCharacterData_native(io::OutputStream& stream, const std::u8string& characterData) { - stream.write(str::EncodedStringView(characterData).native()); + stream.write(str::toString(characterData)); } static void depthPrint_(const xml::lite::Element& element, @@ -466,7 +465,7 @@ void xml::lite::Element::setNamespaceURI( void xml::lite::Element::setCharacterData(const std::string& characters) { - mCharacterData = str::EncodedStringView(characters).u8string(); + mCharacterData = str::u8FromString(characters); } xml::lite::Element& xml::lite::Element::operator=(const std::string& characterData) { @@ -507,7 +506,7 @@ xml::lite::Element& xml::lite::addChild(Element& e, const QName& qname, const co } xml::lite::Element& xml::lite::addChild(Element& e, const QName& qname, const std::string& characterData) { - return addChild(e, qname, str::EncodedStringView(characterData).u8string()); + return addChild(e, qname, str::u8FromString(characterData)); } xml::lite::Element& xml::lite::addChild(Element& e, const QName& qname) { diff --git a/modules/c++/xml.lite/source/MinidomHandler.cpp b/modules/c++/xml.lite/source/MinidomHandler.cpp index 3e2e1334cd..f895562c49 100644 --- a/modules/c++/xml.lite/source/MinidomHandler.cpp +++ b/modules/c++/xml.lite/source/MinidomHandler.cpp @@ -28,7 +28,6 @@ #include "str/Convert.h" #include "str/Encoding.h" #include "sys/OS.h" -#include "str/EncodedStringView.h" #include "xml/lite/MinidomHandler.h" @@ -74,8 +73,7 @@ void xml::lite::MinidomHandler::characters(const char *value, int length) // If we're still here despite use_char() being "false" then the // wide-character routine "failed." On Windows, that means the char* value // is encoded as Windows-1252 (more-or-less ISO8859-1). - const str::EncodedString chars(std::string(value, length)); - characters(chars.u8string()); + characters(str::u8FromString(std::string(value, length))); } bool xml::lite::MinidomHandler::vcharacters(const void /*XMLCh*/* chars_, size_t length) @@ -92,8 +90,7 @@ bool xml::lite::MinidomHandler::vcharacters(const void /*XMLCh*/* chars_, size_t static_assert(sizeof(XMLCh) == sizeof(char16_t), "XMLCh should be 16-bits."); auto pChars16 = static_cast(chars_); - auto chars = str::EncodedString(std::u16string(pChars16, length)).u8string(); - characters(std::move(chars)); + characters(str::to_u8string(pChars16, length)); return true; // vcharacters() processed } diff --git a/modules/c++/xml.lite/source/ValidatorXerces.cpp b/modules/c++/xml.lite/source/ValidatorXerces.cpp index cc1a1b516a..46d5c7bc8c 100644 --- a/modules/c++/xml.lite/source/ValidatorXerces.cpp +++ b/modules/c++/xml.lite/source/ValidatorXerces.cpp @@ -29,7 +29,6 @@ #include // std::ignore #include -#include CODA_OSS_disable_warning_push #ifndef _MSC_VER CODA_OSS_disable_warning(-Wshadow) @@ -206,18 +205,18 @@ static_assert(sizeof(XMLCh) == 2, "XMLCh should be two bytes for UTF-16."); // On other platforms, char16_t is used; only wchar_t on Windows. using XMLCh_t = wchar_t; static_assert(std::is_same<::XMLCh, XMLCh_t>::value, "XMLCh should be wchar_t"); -inline void reset(str::EncodedStringView xmlView, std::unique_ptr& pWString) +inline void reset(const std::u8string& xml, std::unique_ptr& pWString) { - pWString = std::make_unique(xmlView.wstring()); + pWString = std::make_unique(str::toWString(xml)); } #else using XMLCh_t = char16_t; static_assert(std::is_same<::XMLCh, XMLCh_t>::value, "XMLCh should be char16_t"); #endif -inline void reset(str::EncodedStringView xmlView, std::unique_ptr& pWString) +inline void reset(const std::u8string& xml, std::unique_ptr& pWString) { - pWString = std::make_unique(xmlView.u16string()); + pWString = std::make_unique(str::to_u16string(xml)); } using XMLCh_string = std::basic_string; @@ -225,7 +224,7 @@ static std::unique_ptr setStringData(xercesc::DOMLSInputImpl& inpu { // expand to the wide character data for use with xerces std::unique_ptr retval; - reset(str::EncodedStringView(xml), retval); + reset(xml, retval); input.setStringData(retval->c_str()); return retval; } @@ -264,7 +263,7 @@ bool ValidatorXerces::validate_(const std::u8string& xml, return (!mErrorHandler->getErrorLog().empty()); } -static str::EncodedStringView encodeXml(const std::string& xml) +static coda_oss::u8string encodeXml(const std::string& xml) { // The XML might contain a specific encoding, if it does; // we want to use it, otherwise we'll corrupt the data. @@ -274,34 +273,34 @@ static str::EncodedStringView encodeXml(const std::string& xml) std::cmatch m; if (std::regex_search(xml.c_str(), m, reUtf8)) { - return str::EncodedStringView::fromUtf8(xml); + return str::str(xml); } // Maybe this is poor XML with Windows-1252 encoding :-( const std::regex reWindows1252("<\?.*encoding=.*['\"]?.*windows-1252.*['\"]?.*\?>", std::regex::icase); if (std::regex_search(xml.c_str(), m, reWindows1252)) { - return str::EncodedStringView::fromWindows1252(xml); + return to_u8string(str::str(xml)); } - // No "... encoding= ..."; let EncodedStringView deal with it - return str::EncodedStringView(xml); + // No "... encoding= ..."; let u8FromString() deal with it + return str::u8FromString(xml); } bool ValidatorXerces::validate(const std::string& xml, const std::string& xmlID, std::vector& errors) const { - const auto view = encodeXml(xml); + const auto u8xml = encodeXml(xml); try { - return validate(view.u8string(), xmlID, errors); + return validate(u8xml, xmlID, errors); } catch (const utf8::invalid_utf8&) { } // Can't process as "native" (UTF-8 on Linux, Windows-1252 on Windows). // Must be Windows-1252 on Linux. - return validate(str::c_str(xml), xmlID, errors); + return validate(str::str(xml), xmlID, errors); } bool ValidatorXerces::validate(const coda_oss::u8string& xml, const std::string& xmlID, @@ -313,8 +312,7 @@ bool ValidatorXerces::validate(const str::W1252string& xml, const std::string& xmlID, std::vector& errors) const { - const str::EncodedStringView xmlView(xml); - return validate(xmlView.u8string(), xmlID, errors); + return validate(str::to_u8string(xml), xmlID, errors); } } diff --git a/modules/c++/xml.lite/unittests/test_xmlcreate.cpp b/modules/c++/xml.lite/unittests/test_xmlcreate.cpp index 7b60986569..7d982ef185 100644 --- a/modules/c++/xml.lite/unittests/test_xmlcreate.cpp +++ b/modules/c++/xml.lite/unittests/test_xmlcreate.cpp @@ -115,11 +115,11 @@ TEST_CASE(testXmlCreateWhitespace) xml::lite::MinidomParser xmlParser; auto& document = getDocument(xmlParser); - const auto text = str::EncodedStringView(" ").u8string(); + const auto text = str::u8FromString(" "); auto documents_ = document.createElement(xml::lite::QName(""_u, "text"), text); auto& documents = *documents_; - auto strXml = str::EncodedStringView(print(documents)).u8string(); - const auto expected = str::EncodedStringView("").u8string() + text + str::EncodedStringView("").u8string(); + auto strXml = str::u8FromString(print(documents)); + const auto expected = str::u8FromString("") + text + str::u8FromString(""); TEST_ASSERT(strXml == expected); { @@ -130,8 +130,8 @@ TEST_CASE(testXmlCreateWhitespace) const auto& root = getRootElement(getDocument(xmlParser)); std::u8string actual; root.getCharacterData(actual); - static const auto blank = str::EncodedStringView("").u8string(); - TEST_ASSERT(actual == blank); // preserveCharacterData == false + static const coda_oss::u8string empty; + TEST_ASSERT(actual == empty); // preserveCharacterData == false } { io::U8StringStream input; diff --git a/modules/c++/xml.lite/unittests/test_xmlparser.cpp b/modules/c++/xml.lite/unittests/test_xmlparser.cpp index b0e88e33b3..32deee263c 100644 --- a/modules/c++/xml.lite/unittests/test_xmlparser.cpp +++ b/modules/c++/xml.lite/unittests/test_xmlparser.cpp @@ -28,7 +28,6 @@ #include "io/FileInputStream.h" #include "str/Convert.h" #include "str/Encoding.h" -#include "str/EncodedString.h" #include "coda_oss/CPlusPlus.h" #include "sys/OS.h" #include "sys/FileFinder.h" @@ -38,11 +37,6 @@ #include "xml/lite/Validator.h" #include "xml/lite/QName.h" -static inline std::u8string fromUtf8(const std::string& utf8) -{ - return str::EncodedStringView::fromUtf8(utf8).u8string(); -} - static const std::string& text() { static const std::string retval("TEXT"); @@ -53,20 +47,19 @@ static const std::string& strXml() static const std::string retval = "" + text() + ""; return retval; } -static const std::u8string& text8() +static auto from_utf8(const std::string& utf8) { - static const auto retval = fromUtf8(text()); - return retval; + return str::str(utf8); } - -static const str::EncodedString& iso88591Text() +static const std::u8string& text8() { - static const str::EncodedString retval(str::cast("T\xc9XT")); // ISO8859-1, "TÉXT" + static const auto retval = from_utf8(text()); return retval; } + static const auto& iso88591Text1252() { - static const auto retval = str::EncodedStringView::details::w1252string(iso88591Text().view()); + static const str::W1252string retval = str::cast("T\xc9XT"); // ISO8859-1, "TÉXT" return retval; } static auto pIso88591Text_() @@ -75,15 +68,9 @@ static auto pIso88591Text_() return retval; } -static const str::EncodedString& utf8Text() -{ - static const str::EncodedString retval(str::cast("T\xc3\x89XT")); // UTF-8, "TÉXT" - return retval; -} - static const auto& utf8Text8() { - static const auto retval = utf8Text().u8string(); + static const coda_oss::u8string retval = str::cast("T\xc3\x89XT"); // UTF-8, "TÉXT" return retval; } static const auto pUtf8Text_() @@ -94,12 +81,12 @@ static const auto pUtf8Text_() static const auto& strUtf8Xml8() { - static const auto retval = fromUtf8("") + utf8Text8() + fromUtf8(""); + static const auto retval = from_utf8("") + utf8Text8() + from_utf8(""); return retval; } static const std::string& strUtf8Xml() { - static const std::string retval = str::c_str(strUtf8Xml8()); + static const auto retval = str::str(strUtf8Xml8()); return retval; } @@ -216,10 +203,9 @@ TEST_CASE(testXmlPrintSimple) TEST_ASSERT_EQ(actual, expected); } -static std::u8string fromWindows1252(const std::string& s) +static auto from_windows1252(const std::string& w1252) { - // s is Windows-1252 on ALL platforms - return str::EncodedStringView::fromWindows1252(s).u8string(); + return to_u8string(str::str(w1252)); } TEST_CASE(testXmlPrintUtf8) @@ -231,7 +217,7 @@ TEST_CASE(testXmlPrintUtf8) xml::lite::MinidomParser xmlParser; auto& document = getDocument(xmlParser); - const auto s8_w1252 = fromWindows1252(pIso88591Text_()); + const auto s8_w1252 = from_windows1252(pIso88591Text_()); const auto pRootElement = document.createElement(root, s8_w1252); io::StringStream output; @@ -272,7 +258,7 @@ TEST_CASE(testXmlConsoleOutput) xml::lite::MinidomParser xmlParser; auto& document = getDocument(xmlParser); - const auto s8_w1252 = fromWindows1252(pIso88591Text_()); + const auto s8_w1252 = from_windows1252(pIso88591Text_()); const auto pRootElement = document.createElement(root, s8_w1252); io::StringStream output; @@ -437,6 +423,11 @@ static bool find_string(io::FileInputStream& stream, const std::string& s) return false; } +static std::string as_utf8(const coda_oss::u8string& s) +{ + return str::str(s); +} + TEST_CASE(testReadEmbeddedXml) { // This is a binary file with XML burried in it somewhere @@ -457,11 +448,11 @@ TEST_CASE(testReadEmbeddedXml) const auto characterData = classificationXML.getCharacterData(); TEST_ASSERT_EQ(characterData, classificationText_platform); - const str::EncodedStringView expectedCharDataView(str::c_str(classificationText_utf_8), classificationText_utf_8.length()); + const auto expected = from_utf8(classificationText_utf_8); std::u8string u8_characterData; classificationXML.getCharacterData(u8_characterData); - TEST_ASSERT_EQ(u8_characterData, expectedCharDataView); - const auto u8_characterData_ = str::EncodedStringView(u8_characterData).asUtf8(); + TEST_ASSERT_EQ(u8_characterData, expected); + const auto u8_characterData_ = as_utf8(u8_characterData); TEST_ASSERT_EQ(classificationText_utf_8, u8_characterData_); }