From abac7becfb0a97163957f0edb4e24a7a77e18a34 Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Sun, 24 Sep 2023 14:56:03 +0200 Subject: [PATCH 01/18] fix: use unified emoji for sending, & use both unified & nonQualified for parsing --- src/providers/emoji/Emojis.cpp | 118 +++++++++++++++++++++++---------- src/providers/emoji/Emojis.hpp | 3 + 2 files changed, 86 insertions(+), 35 deletions(-) diff --git a/src/providers/emoji/Emojis.cpp b/src/providers/emoji/Emojis.cpp index dbf22aa36e7..38ec8978b22 100644 --- a/src/providers/emoji/Emojis.cpp +++ b/src/providers/emoji/Emojis.cpp @@ -12,7 +12,6 @@ #include #include -#include #include namespace chatterino { @@ -27,7 +26,7 @@ namespace { const rapidjson::Value &unparsedEmoji, QString shortCode = QString()) { - std::array unicodeBytes{}; + std::vector unicodeBytes{}; struct { bool apple; @@ -75,31 +74,50 @@ namespace { emojiData->capabilities.insert("Facebook"); } - QStringList unicodeCharacters; - if (!emojiData->nonQualifiedCode.isEmpty()) - { - unicodeCharacters = - emojiData->nonQualifiedCode.toLower().split('-'); - } - else - { - unicodeCharacters = emojiData->unifiedCode.toLower().split('-'); - } - if (unicodeCharacters.length() < 1) - { - return; - } + QStringList nonQualifiedCharacters = + emojiData->nonQualifiedCode.toLower().split('-'); + QStringList unicodeCharacters = + emojiData->unifiedCode.toLower().split('-'); - int numUnicodeBytes = 0; + assert(unicodeCharacters.length() >= 1); for (const QString &unicodeCharacter : unicodeCharacters) { - unicodeBytes.at(numUnicodeBytes++) = - QString(unicodeCharacter).toUInt(nullptr, 16); + bool ok{false}; + unicodeBytes.push_back(QString(unicodeCharacter).toUInt(&ok, 16)); + if (!ok) + { + qCWarning(chatterinoEmoji) + << "Failed to parse emoji" << emojiData->shortCodes; + return; + } } - emojiData->value = - QString::fromUcs4(unicodeBytes.data(), numUnicodeBytes); + // We can safely do a narrowing static cast since unicodeBytes will never be a large number + emojiData->value = QString::fromUcs4( + unicodeBytes.data(), static_cast(unicodeBytes.size())); + + if (nonQualifiedCharacters.length() > 0) + { + std::vector nonQualifiedBytes{}; + for (const QString &unicodeCharacter : nonQualifiedCharacters) + { + bool ok{false}; + nonQualifiedBytes.push_back( + QString(unicodeCharacter).toUInt(&ok, 16)); + if (!ok) + { + qCWarning(chatterinoEmoji) + << "Failed to parse emoji" << emojiData->shortCodes; + return; + } + } + + // We can safely do a narrowing static cast since unicodeBytes will never be a large number + emojiData->nonQualified = + QString::fromUcs4(nonQualifiedBytes.data(), + static_cast(nonQualifiedBytes.size())); + } } // getToneNames takes a tones and returns their names in the same order @@ -296,31 +314,61 @@ std::vector> Emojis::parse( for (const std::shared_ptr &emoji : possibleEmojis) { + int emojiNonQualifiedExtraCharacters = + emoji->nonQualified.length() - 1; int emojiExtraCharacters = emoji->value.length() - 1; - if (emojiExtraCharacters > remainingCharacters) + if (remainingCharacters >= emojiExtraCharacters) { - // It cannot be this emoji, there's not enough space for it - continue; - } + // look in emoji->value + bool match = true; - bool match = true; + for (int j = 1; j < emoji->value.length(); ++j) + { + if (text.at(i + j) != emoji->value.at(j)) + { + match = false; - for (int j = 1; j < emoji->value.length(); ++j) - { - if (text.at(i + j) != emoji->value.at(j)) + break; + } + } + + if (match) { - match = false; + matchedEmoji = emoji; + matchedEmojiLength = emoji->value.length(); break; } } - - if (match) + else if (remainingCharacters >= emojiNonQualifiedExtraCharacters) { - matchedEmoji = emoji; - matchedEmojiLength = emoji->value.length(); + // This checking here relies on the fact that the nonQualified string + // always starts with the same byte as value (the unified string) + bool match = true; + + for (int j = 1; j < emoji->nonQualified.length(); ++j) + { + if (text.at(i + j) != emoji->nonQualified.at(j)) + { + match = false; + + break; + } + } + + if (match) + { + matchedEmoji = emoji; + matchedEmojiLength = emoji->nonQualified.length(); - break; + break; + } + // look in emoji->nonQualified + } + else + { + // It cannot be this emoji, there's not enough space for it + continue; } } diff --git a/src/providers/emoji/Emojis.hpp b/src/providers/emoji/Emojis.hpp index 217aa1f4ad3..51da6ab03bb 100644 --- a/src/providers/emoji/Emojis.hpp +++ b/src/providers/emoji/Emojis.hpp @@ -21,6 +21,9 @@ struct EmojiData { // :male:) QString value; + // actual byte-representation of the non qualified emoji + QString nonQualified; + // i.e. 204e-50a2 QString unifiedCode; QString nonQualifiedCode; From 8d6834fe9c30759892719ff395c3e959cb955be0 Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Sun, 24 Sep 2023 14:56:28 +0200 Subject: [PATCH 02/18] refactor: rename parsed shortCodes to shortNames to avoid name clashes --- src/providers/emoji/Emojis.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/providers/emoji/Emojis.cpp b/src/providers/emoji/Emojis.cpp index 38ec8978b22..6189da1aa1e 100644 --- a/src/providers/emoji/Emojis.cpp +++ b/src/providers/emoji/Emojis.cpp @@ -41,10 +41,11 @@ namespace { } else { - const auto &shortCodes = unparsedEmoji["short_names"]; - for (const auto &_shortCode : shortCodes.GetArray()) + // Load short codes from the suggested short_names + const auto &shortNames = unparsedEmoji["short_names"]; + for (const auto &shortName : shortNames.GetArray()) { - emojiData->shortCodes.emplace_back(_shortCode.GetString()); + emojiData->shortCodes.emplace_back(shortName.GetString()); } } From 7d942a56f27a3727b20466fbd90e45ab57fba251 Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Sun, 24 Sep 2023 14:56:43 +0200 Subject: [PATCH 03/18] refactor: mark emojis name as unused in the emoji set loading --- src/providers/emoji/Emojis.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/providers/emoji/Emojis.cpp b/src/providers/emoji/Emojis.cpp index 6189da1aa1e..63ff83768e8 100644 --- a/src/providers/emoji/Emojis.cpp +++ b/src/providers/emoji/Emojis.cpp @@ -238,6 +238,8 @@ void Emojis::loadEmojiSet() getSettings()->emojiSet.connect([this](const auto &emojiSet) { this->emojis.each([=](const auto &name, std::shared_ptr &emoji) { + (void)name; + QString emojiSetToUse = emojiSet; // clang-format off static std::map emojiSets = { From 62548c3cdee8ae89b70ee111a83a5b7da3d2a0a1 Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Sun, 24 Sep 2023 14:56:53 +0200 Subject: [PATCH 04/18] refactor: use contains(..) instead of count(..) --- src/providers/emoji/Emojis.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/providers/emoji/Emojis.cpp b/src/providers/emoji/Emojis.cpp index 63ff83768e8..e4eeb1ae952 100644 --- a/src/providers/emoji/Emojis.cpp +++ b/src/providers/emoji/Emojis.cpp @@ -264,7 +264,7 @@ void Emojis::loadEmojiSet() }; // clang-format on - if (emoji->capabilities.count(emojiSetToUse) == 0) + if (!emoji->capabilities.contains(emojiSetToUse)) { emojiSetToUse = "Twitter"; } From af0a92bbdc357b31369bd816cc64aa0da0e9aa61 Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Sun, 24 Sep 2023 14:57:06 +0200 Subject: [PATCH 05/18] refactor: const auto the emoji --- src/providers/emoji/Emojis.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/providers/emoji/Emojis.cpp b/src/providers/emoji/Emojis.cpp index e4eeb1ae952..163e56070d6 100644 --- a/src/providers/emoji/Emojis.cpp +++ b/src/providers/emoji/Emojis.cpp @@ -433,7 +433,7 @@ QString Emojis::replaceShortCodes(const QString &text) const continue; } - auto emojiData = emojiIt.value(); + const auto &emojiData = emojiIt.value(); ret.replace(offset + match.capturedStart(), match.capturedLength(), emojiData->value); From 424f0b78e0a02cc05f10ff56e638e77da4343ee3 Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Sun, 24 Sep 2023 14:57:45 +0200 Subject: [PATCH 06/18] refactor: move anon namespace out to flatten in --- src/providers/emoji/Emojis.cpp | 199 +++++++++++++++++---------------- 1 file changed, 100 insertions(+), 99 deletions(-) diff --git a/src/providers/emoji/Emojis.cpp b/src/providers/emoji/Emojis.cpp index 163e56070d6..ca5465d374a 100644 --- a/src/providers/emoji/Emojis.cpp +++ b/src/providers/emoji/Emojis.cpp @@ -14,78 +14,97 @@ #include -namespace chatterino { namespace { - auto toneNames = std::map{ - {"1F3FB", "tone1"}, {"1F3FC", "tone2"}, {"1F3FD", "tone3"}, - {"1F3FE", "tone4"}, {"1F3FF", "tone5"}, - }; +using namespace chatterino; - void parseEmoji(const std::shared_ptr &emojiData, - const rapidjson::Value &unparsedEmoji, - QString shortCode = QString()) - { - std::vector unicodeBytes{}; +auto toneNames = std::map{ + {"1F3FB", "tone1"}, {"1F3FC", "tone2"}, {"1F3FD", "tone3"}, + {"1F3FE", "tone4"}, {"1F3FF", "tone5"}, +}; - struct { - bool apple; - bool google; - bool twitter; - bool facebook; - } capabilities{}; +void parseEmoji(const std::shared_ptr &emojiData, + const rapidjson::Value &unparsedEmoji, + QString shortCode = QString()) +{ + std::vector unicodeBytes{}; - if (!shortCode.isEmpty()) - { - emojiData->shortCodes.push_back(shortCode); - } - else + struct { + bool apple; + bool google; + bool twitter; + bool facebook; + } capabilities{}; + + if (!shortCode.isEmpty()) + { + emojiData->shortCodes.push_back(shortCode); + } + else + { + // Load short codes from the suggested short_names + const auto &shortNames = unparsedEmoji["short_names"]; + for (const auto &shortName : shortNames.GetArray()) { - // Load short codes from the suggested short_names - const auto &shortNames = unparsedEmoji["short_names"]; - for (const auto &shortName : shortNames.GetArray()) - { - emojiData->shortCodes.emplace_back(shortName.GetString()); - } + emojiData->shortCodes.emplace_back(shortName.GetString()); } + } - rj::getSafe(unparsedEmoji, "non_qualified", - emojiData->nonQualifiedCode); - rj::getSafe(unparsedEmoji, "unified", emojiData->unifiedCode); + rj::getSafe(unparsedEmoji, "non_qualified", emojiData->nonQualifiedCode); + rj::getSafe(unparsedEmoji, "unified", emojiData->unifiedCode); - rj::getSafe(unparsedEmoji, "has_img_apple", capabilities.apple); - rj::getSafe(unparsedEmoji, "has_img_google", capabilities.google); - rj::getSafe(unparsedEmoji, "has_img_twitter", capabilities.twitter); - rj::getSafe(unparsedEmoji, "has_img_facebook", capabilities.facebook); + rj::getSafe(unparsedEmoji, "has_img_apple", capabilities.apple); + rj::getSafe(unparsedEmoji, "has_img_google", capabilities.google); + rj::getSafe(unparsedEmoji, "has_img_twitter", capabilities.twitter); + rj::getSafe(unparsedEmoji, "has_img_facebook", capabilities.facebook); - if (capabilities.apple) - { - emojiData->capabilities.insert("Apple"); - } - if (capabilities.google) - { - emojiData->capabilities.insert("Google"); - } - if (capabilities.twitter) - { - emojiData->capabilities.insert("Twitter"); - } - if (capabilities.facebook) + if (capabilities.apple) + { + emojiData->capabilities.insert("Apple"); + } + if (capabilities.google) + { + emojiData->capabilities.insert("Google"); + } + if (capabilities.twitter) + { + emojiData->capabilities.insert("Twitter"); + } + if (capabilities.facebook) + { + emojiData->capabilities.insert("Facebook"); + } + + QStringList nonQualifiedCharacters = + emojiData->nonQualifiedCode.toLower().split('-'); + QStringList unicodeCharacters = emojiData->unifiedCode.toLower().split('-'); + + assert(unicodeCharacters.length() >= 1); + + for (const QString &unicodeCharacter : unicodeCharacters) + { + bool ok{false}; + unicodeBytes.push_back(QString(unicodeCharacter).toUInt(&ok, 16)); + if (!ok) { - emojiData->capabilities.insert("Facebook"); + qCWarning(chatterinoEmoji) + << "Failed to parse emoji" << emojiData->shortCodes; + return; } + } - QStringList nonQualifiedCharacters = - emojiData->nonQualifiedCode.toLower().split('-'); - QStringList unicodeCharacters = - emojiData->unifiedCode.toLower().split('-'); - - assert(unicodeCharacters.length() >= 1); + // We can safely do a narrowing static cast since unicodeBytes will never be a large number + emojiData->value = QString::fromUcs4(unicodeBytes.data(), + static_cast(unicodeBytes.size())); - for (const QString &unicodeCharacter : unicodeCharacters) + if (nonQualifiedCharacters.length() > 0) + { + std::vector nonQualifiedBytes{}; + for (const QString &unicodeCharacter : nonQualifiedCharacters) { bool ok{false}; - unicodeBytes.push_back(QString(unicodeCharacter).toUInt(&ok, 16)); + nonQualifiedBytes.push_back( + QString(unicodeCharacter).toUInt(&ok, 16)); if (!ok) { qCWarning(chatterinoEmoji) @@ -95,59 +114,41 @@ namespace { } // We can safely do a narrowing static cast since unicodeBytes will never be a large number - emojiData->value = QString::fromUcs4( - unicodeBytes.data(), static_cast(unicodeBytes.size())); - - if (nonQualifiedCharacters.length() > 0) - { - std::vector nonQualifiedBytes{}; - for (const QString &unicodeCharacter : nonQualifiedCharacters) - { - bool ok{false}; - nonQualifiedBytes.push_back( - QString(unicodeCharacter).toUInt(&ok, 16)); - if (!ok) - { - qCWarning(chatterinoEmoji) - << "Failed to parse emoji" << emojiData->shortCodes; - return; - } - } - - // We can safely do a narrowing static cast since unicodeBytes will never be a large number - emojiData->nonQualified = - QString::fromUcs4(nonQualifiedBytes.data(), - static_cast(nonQualifiedBytes.size())); - } + emojiData->nonQualified = + QString::fromUcs4(nonQualifiedBytes.data(), + static_cast(nonQualifiedBytes.size())); } +} - // getToneNames takes a tones and returns their names in the same order - // The format of the tones is: "1F3FB-1F3FB" or "1F3FB" - // The output of the tone names is: "tone1-tone1" or "tone1" - QString getToneNames(const QString &tones) +// getToneNames takes a tones and returns their names in the same order +// The format of the tones is: "1F3FB-1F3FB" or "1F3FB" +// The output of the tone names is: "tone1-tone1" or "tone1" +QString getToneNames(const QString &tones) +{ + auto toneParts = tones.split('-'); + QStringList toneNameResults; + for (const auto &tonePart : toneParts) { - auto toneParts = tones.split('-'); - QStringList toneNameResults; - for (const auto &tonePart : toneParts) + auto toneNameIt = toneNames.find(tonePart); + if (toneNameIt == toneNames.end()) { - auto toneNameIt = toneNames.find(tonePart); - if (toneNameIt == toneNames.end()) - { - qDebug() << "Tone with key" << tonePart - << "does not exist in tone names map"; - continue; - } - - toneNameResults.append(toneNameIt->second); + qDebug() << "Tone with key" << tonePart + << "does not exist in tone names map"; + continue; } - assert(!toneNameResults.isEmpty()); - - return toneNameResults.join('-'); + toneNameResults.append(toneNameIt->second); } + assert(!toneNameResults.isEmpty()); + + return toneNameResults.join('-'); +} + } // namespace +namespace chatterino { + void Emojis::load() { this->loadEmojis(); From 93618aa47e7fcb38dd61c56b033d1069a947f0e6 Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Sun, 24 Sep 2023 14:58:10 +0200 Subject: [PATCH 07/18] refactor: constify & rename TONE_NAMES --- src/providers/emoji/Emojis.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/providers/emoji/Emojis.cpp b/src/providers/emoji/Emojis.cpp index ca5465d374a..ef4b4a0f151 100644 --- a/src/providers/emoji/Emojis.cpp +++ b/src/providers/emoji/Emojis.cpp @@ -18,7 +18,7 @@ namespace { using namespace chatterino; -auto toneNames = std::map{ +const std::map TONE_NAMES{ {"1F3FB", "tone1"}, {"1F3FC", "tone2"}, {"1F3FD", "tone3"}, {"1F3FE", "tone4"}, {"1F3FF", "tone5"}, }; @@ -129,8 +129,8 @@ QString getToneNames(const QString &tones) QStringList toneNameResults; for (const auto &tonePart : toneParts) { - auto toneNameIt = toneNames.find(tonePart); - if (toneNameIt == toneNames.end()) + auto toneNameIt = TONE_NAMES.find(tonePart); + if (toneNameIt == TONE_NAMES.end()) { qDebug() << "Tone with key" << tonePart << "does not exist in tone names map"; From a9736f5d7eed6be1b30dee4ce4aabb41d2370ab1 Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Sun, 24 Sep 2023 14:58:23 +0200 Subject: [PATCH 08/18] refactor: parseEmoji make shortCode const ref --- src/providers/emoji/Emojis.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/providers/emoji/Emojis.cpp b/src/providers/emoji/Emojis.cpp index ef4b4a0f151..64e8c0abfd0 100644 --- a/src/providers/emoji/Emojis.cpp +++ b/src/providers/emoji/Emojis.cpp @@ -25,7 +25,7 @@ const std::map TONE_NAMES{ void parseEmoji(const std::shared_ptr &emojiData, const rapidjson::Value &unparsedEmoji, - QString shortCode = QString()) + const QString &shortCode = {}) { std::vector unicodeBytes{}; From ff9d123a30759835681afda163de5c332f7e262c Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Sun, 24 Sep 2023 15:01:39 +0200 Subject: [PATCH 09/18] move map include --- src/providers/emoji/Emojis.cpp | 1 + src/providers/emoji/Emojis.hpp | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/providers/emoji/Emojis.cpp b/src/providers/emoji/Emojis.cpp index 64e8c0abfd0..a370430b9a6 100644 --- a/src/providers/emoji/Emojis.cpp +++ b/src/providers/emoji/Emojis.cpp @@ -12,6 +12,7 @@ #include #include +#include #include namespace { diff --git a/src/providers/emoji/Emojis.hpp b/src/providers/emoji/Emojis.hpp index 51da6ab03bb..d0f21c862c9 100644 --- a/src/providers/emoji/Emojis.hpp +++ b/src/providers/emoji/Emojis.hpp @@ -7,7 +7,6 @@ #include #include -#include #include #include From bc8e7317a50217f1053dcd56d1008f6290909f27 Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Thu, 5 Oct 2023 22:49:47 +0200 Subject: [PATCH 10/18] Do non-qualified emoji checking & remove duplicate comment Co-authored-by: nerix --- src/providers/emoji/Emojis.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/providers/emoji/Emojis.cpp b/src/providers/emoji/Emojis.cpp index a370430b9a6..20a44fe5851 100644 --- a/src/providers/emoji/Emojis.cpp +++ b/src/providers/emoji/Emojis.cpp @@ -345,7 +345,8 @@ std::vector> Emojis::parse( break; } } - else if (remainingCharacters >= emojiNonQualifiedExtraCharacters) + if (!emoji->nonQualified.isNull() && + remainingCharacters >= emojiNonQualifiedExtraCharacters) { // This checking here relies on the fact that the nonQualified string // always starts with the same byte as value (the unified string) @@ -368,12 +369,6 @@ std::vector> Emojis::parse( break; } - // look in emoji->nonQualified - } - else - { - // It cannot be this emoji, there's not enough space for it - continue; } } From 027dcf6674647d5bcc676530c222af5a87378570 Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Thu, 5 Oct 2023 23:00:45 +0200 Subject: [PATCH 11/18] Move byte parsing assert to right after the unified code is parsed --- src/providers/emoji/Emojis.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/providers/emoji/Emojis.cpp b/src/providers/emoji/Emojis.cpp index 20a44fe5851..70642163bf9 100644 --- a/src/providers/emoji/Emojis.cpp +++ b/src/providers/emoji/Emojis.cpp @@ -53,6 +53,7 @@ void parseEmoji(const std::shared_ptr &emojiData, rj::getSafe(unparsedEmoji, "non_qualified", emojiData->nonQualifiedCode); rj::getSafe(unparsedEmoji, "unified", emojiData->unifiedCode); + assert(!emojiData->unifiedCode.isEmpty()); rj::getSafe(unparsedEmoji, "has_img_apple", capabilities.apple); rj::getSafe(unparsedEmoji, "has_img_google", capabilities.google); @@ -80,8 +81,6 @@ void parseEmoji(const std::shared_ptr &emojiData, emojiData->nonQualifiedCode.toLower().split('-'); QStringList unicodeCharacters = emojiData->unifiedCode.toLower().split('-'); - assert(unicodeCharacters.length() >= 1); - for (const QString &unicodeCharacter : unicodeCharacters) { bool ok{false}; From 5198a287325719b7481357bd7d75b4140536e6b2 Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Thu, 5 Oct 2023 23:10:01 +0200 Subject: [PATCH 12/18] Parse non-qualified-code better better Co-authored-by: nerix --- src/providers/emoji/Emojis.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/providers/emoji/Emojis.cpp b/src/providers/emoji/Emojis.cpp index 70642163bf9..98cdd00ab60 100644 --- a/src/providers/emoji/Emojis.cpp +++ b/src/providers/emoji/Emojis.cpp @@ -77,8 +77,6 @@ void parseEmoji(const std::shared_ptr &emojiData, emojiData->capabilities.insert("Facebook"); } - QStringList nonQualifiedCharacters = - emojiData->nonQualifiedCode.toLower().split('-'); QStringList unicodeCharacters = emojiData->unifiedCode.toLower().split('-'); for (const QString &unicodeCharacter : unicodeCharacters) @@ -97,8 +95,10 @@ void parseEmoji(const std::shared_ptr &emojiData, emojiData->value = QString::fromUcs4(unicodeBytes.data(), static_cast(unicodeBytes.size())); - if (nonQualifiedCharacters.length() > 0) + if (!emojiData->nonQualifiedCode.isEmpty()) { + QStringList nonQualifiedCharacters = + emojiData->nonQualifiedCode.toLower().split('-'); std::vector nonQualifiedBytes{}; for (const QString &unicodeCharacter : nonQualifiedCharacters) { @@ -108,7 +108,8 @@ void parseEmoji(const std::shared_ptr &emojiData, if (!ok) { qCWarning(chatterinoEmoji) - << "Failed to parse emoji" << emojiData->shortCodes; + << "Failed to parse emoji nonQualified" + << emojiData->shortCodes; return; } } From c479a6cc229e9dea8cab8b7b35b7303de5113c63 Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Thu, 5 Oct 2023 23:19:37 +0200 Subject: [PATCH 13/18] unrelated change: add missing unordered_map include in XDGDirectory --- src/util/XDGDirectory.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/util/XDGDirectory.cpp b/src/util/XDGDirectory.cpp index 3bfef95b5ec..979e58170c8 100644 --- a/src/util/XDGDirectory.cpp +++ b/src/util/XDGDirectory.cpp @@ -3,6 +3,8 @@ #include "util/CombinePath.hpp" #include "util/Qt.hpp" +#include + namespace chatterino { #if defined(Q_OS_UNIX) and !defined(Q_OS_DARWIN) From 31026b3b85247e825ed4e86ca1fc747d309bf67d Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Thu, 5 Oct 2023 23:46:24 +0200 Subject: [PATCH 14/18] wip: work on benchmarks for emoji parsing --- benchmarks/src/Emojis.cpp | 184 +++++++++++++++++++++++++++++++++ src/providers/emoji/Emojis.cpp | 7 +- 2 files changed, 190 insertions(+), 1 deletion(-) diff --git a/benchmarks/src/Emojis.cpp b/benchmarks/src/Emojis.cpp index 7eb5106e3e3..bd87b150b8d 100644 --- a/benchmarks/src/Emojis.cpp +++ b/benchmarks/src/Emojis.cpp @@ -6,6 +6,28 @@ using namespace chatterino; +namespace { + +std::shared_ptr getEmojis() +{ + static std::shared_ptr emojis = []() { + auto *emojis = new Emojis(); + emojis->load(); + return std::make_shared(emojis); + }(); + + return emojis; +} + +EmotePtr penguin() +{ + std::shared_ptr penguin; + getEmojis()->getEmojis().tryGet("1F427", penguin); + return penguin->emote; +} + +} // namespace + static void BM_ShortcodeParsing(benchmark::State &state) { Emojis emojis; @@ -55,3 +77,165 @@ static void BM_ShortcodeParsing(benchmark::State &state) } BENCHMARK(BM_ShortcodeParsing); + +static void BM_EmojiParsing(benchmark::State &state) +{ + Emojis emojis; + + emojis.load(); + + struct TestCase { + QString input; + std::vector> expectedOutput; + }; + + const auto &emojiMap = emojis.getEmojis(); + std::shared_ptr penguin; + emojiMap.tryGet("1F427", penguin); + auto penguinEmoji = penguin->emote; + + std::vector tests{ + { + // 1 emoji + "foo 🐧 bar", + // expected output + { + "foo ", + penguinEmoji, + " bar", + }, + }, + { + // no emoji + "foo bar", + // expected output + { + "foo bar", + }, + }, + { + // many emoji + "foo 🐧 bar 🐧🐧🐧🐧🐧", + // expected output + { + "foo ", + penguinEmoji, + " bar ", + penguinEmoji, + penguinEmoji, + penguinEmoji, + penguinEmoji, + penguinEmoji, + }, + }, + }; + + for (auto _ : state) + { + for (const auto &test : tests) + { + auto output = emojis.parse(test.input); + + bool areEqual = std::equal(output.begin(), output.end(), + test.expectedOutput.begin()); + + if (!areEqual) + { + qDebug() << "BAD BENCH"; + for (const auto &v : output) + { + if (v.type() == typeid(QString)) + { + qDebug() << "output:" << boost::get(v); + } + } + } + } + } +} + +BENCHMARK(BM_EmojiParsing); + +template +static void BM_EmojiParsing2(benchmark::State &state, Args &&...args) +{ + Emojis emojis; + + emojis.load(); + + struct TestCase { + QString input; + std::vector> expectedOutput; + }; + + const auto &emojiMap = emojis.getEmojis(); + std::shared_ptr penguin; + emojiMap.tryGet("1F427", penguin); + auto penguinEmoji = penguin->emote; + + std::vector tests{ + { + // 1 emoji + "foo 🐧 bar", + // expected output + { + "foo ", + penguinEmoji, + " bar", + }, + }, + { + // no emoji + "foo bar", + // expected output + { + "foo bar", + }, + }, + { + // many emoji + "foo 🐧 bar 🐧🐧🐧🐧🐧", + // expected output + { + "foo ", + penguinEmoji, + " bar ", + penguinEmoji, + penguinEmoji, + penguinEmoji, + penguinEmoji, + penguinEmoji, + }, + }, + }; + + auto argsTuple = std::make_tuple(std::move(args)...); + auto input = std::get<0>(argsTuple); + auto expectedOutput = std::get<1>(argsTuple); + for (auto _ : state) + { + auto output = emojis.parse(input); + + bool areEqual = + std::equal(output.begin(), output.end(), expectedOutput.begin()); + + if (!areEqual) + { + qDebug() << "BAD BENCH"; + for (const auto &v : output) + { + if (v.type() == typeid(QString)) + { + qDebug() << "output:" << boost::get(v); + } + } + } + } +} + +BENCHMARK_CAPTURE(BM_EmojiParsing2, "foo 🐧 bar", + { + "foo ", + penguin(), + " bar", + }); diff --git a/src/providers/emoji/Emojis.cpp b/src/providers/emoji/Emojis.cpp index 98cdd00ab60..033ac178b18 100644 --- a/src/providers/emoji/Emojis.cpp +++ b/src/providers/emoji/Emojis.cpp @@ -350,8 +350,12 @@ std::vector> Emojis::parse( { // This checking here relies on the fact that the nonQualified string // always starts with the same byte as value (the unified string) - bool match = true; + // bool match = true; + bool match = QStringView{emoji->nonQualified}.mid(1) == + QStringView{text}.mid( + i + 1, emojiNonQualifiedExtraCharacters); + /* for (int j = 1; j < emoji->nonQualified.length(); ++j) { if (text.at(i + j) != emoji->nonQualified.at(j)) @@ -361,6 +365,7 @@ std::vector> Emojis::parse( break; } } + */ if (match) { From 92133d81daa4d15dcacd9915766e93bf44b1f01b Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Sat, 7 Oct 2023 11:51:25 +0200 Subject: [PATCH 15/18] Add new benchmark function for QStringView Qt 6.5.3 results: 2023-10-07T11:48:49+02:00 Running ./bin/chatterino-benchmark Run on (32 X 5500 MHz CPU s) CPU Caches: L1 Data 48 KiB (x16) L1 Instruction 32 KiB (x16) L2 Unified 2048 KiB (x16) L3 Unified 36864 KiB (x1) Load Average: 5.93, 5.29, 2.99 -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- BM_EmojiParsing 10639 ns 10634 ns 65687 BM_EmojiParsing 10627 ns 10622 ns 65687 BM_EmojiParsing 10623 ns 10614 ns 65687 BM_EmojiParsing_mean 10630 ns 10623 ns 3 BM_EmojiParsing_median 10627 ns 10622 ns 3 BM_EmojiParsing_stddev 8.70 ns 10.1 ns 3 BM_EmojiParsing_cv 0.08 % 0.09 % 3 BM_EmojiParsing2/one_emoji 1525 ns 1524 ns 457581 BM_EmojiParsing2/one_emoji 1562 ns 1558 ns 457581 BM_EmojiParsing2/one_emoji 1555 ns 1553 ns 457581 BM_EmojiParsing2/one_emoji_mean 1547 ns 1545 ns 3 BM_EmojiParsing2/one_emoji_median 1555 ns 1553 ns 3 BM_EmojiParsing2/one_emoji_stddev 19.6 ns 18.4 ns 3 BM_EmojiParsing2/one_emoji_cv 1.27 % 1.19 % 3 BM_EmojiParsing2/two_emoji 2837 ns 2832 ns 246593 BM_EmojiParsing2/two_emoji 2836 ns 2832 ns 246593 BM_EmojiParsing2/two_emoji 2832 ns 2831 ns 246593 BM_EmojiParsing2/two_emoji_mean 2835 ns 2832 ns 3 BM_EmojiParsing2/two_emoji_median 2836 ns 2832 ns 3 BM_EmojiParsing2/two_emoji_stddev 2.46 ns 0.778 ns 3 BM_EmojiParsing2/two_emoji_cv 0.09 % 0.03 % 3 BM_EmojiParsing2/many_emoji 137515 ns 137179 ns 5090 BM_EmojiParsing2/many_emoji 137757 ns 137260 ns 5090 BM_EmojiParsing2/many_emoji 137436 ns 137216 ns 5090 BM_EmojiParsing2/many_emoji_mean 137570 ns 137218 ns 3 BM_EmojiParsing2/many_emoji_median 137515 ns 137216 ns 3 BM_EmojiParsing2/many_emoji_stddev 167 ns 40.7 ns 3 BM_EmojiParsing2/many_emoji_cv 0.12 % 0.03 % 3 BM_EmojiParsing2New/one_emoji 1528 ns 1526 ns 457211 BM_EmojiParsing2New/one_emoji 1530 ns 1529 ns 457211 BM_EmojiParsing2New/one_emoji 1532 ns 1530 ns 457211 BM_EmojiParsing2New/one_emoji_mean 1530 ns 1528 ns 3 BM_EmojiParsing2New/one_emoji_median 1530 ns 1529 ns 3 BM_EmojiParsing2New/one_emoji_stddev 1.72 ns 1.89 ns 3 BM_EmojiParsing2New/one_emoji_cv 0.11 % 0.12 % 3 BM_EmojiParsing2New/two_emoji 2834 ns 2830 ns 246832 BM_EmojiParsing2New/two_emoji 2843 ns 2839 ns 246832 BM_EmojiParsing2New/two_emoji 2829 ns 2827 ns 246832 BM_EmojiParsing2New/two_emoji_mean 2835 ns 2832 ns 3 BM_EmojiParsing2New/two_emoji_median 2834 ns 2830 ns 3 BM_EmojiParsing2New/two_emoji_stddev 7.28 ns 6.16 ns 3 BM_EmojiParsing2New/two_emoji_cv 0.26 % 0.22 % 3 BM_EmojiParsing2New/many_emoji 137688 ns 137630 ns 5095 BM_EmojiParsing2New/many_emoji 137594 ns 137443 ns 5095 BM_EmojiParsing2New/many_emoji 137601 ns 137541 ns 5095 BM_EmojiParsing2New/many_emoji_mean 137628 ns 137538 ns 3 BM_EmojiParsing2New/many_emoji_median 137601 ns 137541 ns 3 BM_EmojiParsing2New/many_emoji_stddev 52.5 ns 93.7 ns 3 BM_EmojiParsing2New/many_emoji_cv 0.04 % 0.07 % 3 Qt 5.12.12 results: 2023-10-07T11:44:36+02:00 Running ./bin/chatterino-benchmark Run on (32 X 5500 MHz CPU s) CPU Caches: L1 Data 48 KiB (x16) L1 Instruction 32 KiB (x16) L2 Unified 2048 KiB (x16) L3 Unified 36864 KiB (x1) Load Average: 1.41, 1.37, 1.35 -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- BM_EmojiParsing 12179 ns 12161 ns 57711 BM_EmojiParsing 12170 ns 12147 ns 57711 BM_EmojiParsing 12232 ns 12223 ns 57711 BM_EmojiParsing_mean 12193 ns 12177 ns 3 BM_EmojiParsing_median 12179 ns 12161 ns 3 BM_EmojiParsing_stddev 33.4 ns 40.2 ns 3 BM_EmojiParsing_cv 0.27 % 0.33 % 3 BM_EmojiParsing2/one_emoji 1774 ns 1771 ns 394534 BM_EmojiParsing2/one_emoji 1772 ns 1769 ns 394534 BM_EmojiParsing2/one_emoji 1774 ns 1773 ns 394534 BM_EmojiParsing2/one_emoji_mean 1773 ns 1771 ns 3 BM_EmojiParsing2/one_emoji_median 1774 ns 1771 ns 3 BM_EmojiParsing2/one_emoji_stddev 1.22 ns 2.07 ns 3 BM_EmojiParsing2/one_emoji_cv 0.07 % 0.12 % 3 BM_EmojiParsing2/two_emoji 3320 ns 3318 ns 210619 BM_EmojiParsing2/two_emoji 3316 ns 3311 ns 210619 BM_EmojiParsing2/two_emoji 3308 ns 3302 ns 210619 BM_EmojiParsing2/two_emoji_mean 3315 ns 3311 ns 3 BM_EmojiParsing2/two_emoji_median 3316 ns 3311 ns 3 BM_EmojiParsing2/two_emoji_stddev 6.31 ns 7.98 ns 3 BM_EmojiParsing2/two_emoji_cv 0.19 % 0.24 % 3 BM_EmojiParsing2/many_emoji 146008 ns 145938 ns 4786 BM_EmojiParsing2/many_emoji 146694 ns 146459 ns 4786 BM_EmojiParsing2/many_emoji 146554 ns 146329 ns 4786 BM_EmojiParsing2/many_emoji_mean 146418 ns 146242 ns 3 BM_EmojiParsing2/many_emoji_median 146554 ns 146329 ns 3 BM_EmojiParsing2/many_emoji_stddev 362 ns 271 ns 3 BM_EmojiParsing2/many_emoji_cv 0.25 % 0.19 % 3 BM_EmojiParsing2New/one_emoji 1775 ns 1774 ns 394159 BM_EmojiParsing2New/one_emoji 1772 ns 1772 ns 394159 BM_EmojiParsing2New/one_emoji 1775 ns 1774 ns 394159 BM_EmojiParsing2New/one_emoji_mean 1774 ns 1773 ns 3 BM_EmojiParsing2New/one_emoji_median 1775 ns 1774 ns 3 BM_EmojiParsing2New/one_emoji_stddev 1.45 ns 1.45 ns 3 BM_EmojiParsing2New/one_emoji_cv 0.08 % 0.08 % 3 BM_EmojiParsing2New/two_emoji 3308 ns 3307 ns 210242 BM_EmojiParsing2New/two_emoji 3316 ns 3314 ns 210242 BM_EmojiParsing2New/two_emoji 3304 ns 3302 ns 210242 BM_EmojiParsing2New/two_emoji_mean 3309 ns 3308 ns 3 BM_EmojiParsing2New/two_emoji_median 3308 ns 3307 ns 3 BM_EmojiParsing2New/two_emoji_stddev 6.05 ns 6.07 ns 3 BM_EmojiParsing2New/two_emoji_cv 0.18 % 0.18 % 3 BM_EmojiParsing2New/many_emoji 146275 ns 146050 ns 4790 BM_EmojiParsing2New/many_emoji 146473 ns 146406 ns 4790 BM_EmojiParsing2New/many_emoji 146438 ns 146219 ns 4790 BM_EmojiParsing2New/many_emoji_mean 146396 ns 146225 ns 3 BM_EmojiParsing2New/many_emoji_median 146438 ns 146219 ns 3 BM_EmojiParsing2New/many_emoji_stddev 106 ns 178 ns 3 BM_EmojiParsing2New/many_emoji_cv 0.07 % 0.12 % 3 --- benchmarks/src/Emojis.cpp | 136 +++++++++++++-------------------- src/providers/emoji/Emojis.cpp | 110 ++++++++++++++++++++++++-- src/providers/emoji/Emojis.hpp | 4 + 3 files changed, 162 insertions(+), 88 deletions(-) diff --git a/benchmarks/src/Emojis.cpp b/benchmarks/src/Emojis.cpp index bd87b150b8d..62abeb38dba 100644 --- a/benchmarks/src/Emojis.cpp +++ b/benchmarks/src/Emojis.cpp @@ -6,28 +6,6 @@ using namespace chatterino; -namespace { - -std::shared_ptr getEmojis() -{ - static std::shared_ptr emojis = []() { - auto *emojis = new Emojis(); - emojis->load(); - return std::make_shared(emojis); - }(); - - return emojis; -} - -EmotePtr penguin() -{ - std::shared_ptr penguin; - getEmojis()->getEmojis().tryGet("1F427", penguin); - return penguin->emote; -} - -} // namespace - static void BM_ShortcodeParsing(benchmark::State &state) { Emojis emojis; @@ -163,79 +141,73 @@ static void BM_EmojiParsing2(benchmark::State &state, Args &&...args) emojis.load(); - struct TestCase { - QString input; - std::vector> expectedOutput; - }; - - const auto &emojiMap = emojis.getEmojis(); - std::shared_ptr penguin; - emojiMap.tryGet("1F427", penguin); - auto penguinEmoji = penguin->emote; - - std::vector tests{ - { - // 1 emoji - "foo 🐧 bar", - // expected output - { - "foo ", - penguinEmoji, - " bar", - }, - }, + auto argsTuple = std::make_tuple(std::move(args)...); + auto input = std::get<0>(argsTuple); + auto expectedNumEmojis = std::get<1>(argsTuple); + for (auto _ : state) + { + auto output = emojis.parse(input); + int actualNumEmojis = 0; + for (const auto &part : output) { - // no emoji - "foo bar", - // expected output + if (part.type() == typeid(EmotePtr)) { - "foo bar", - }, - }, + ++actualNumEmojis; + } + } + + if (actualNumEmojis != expectedNumEmojis) { - // many emoji - "foo 🐧 bar 🐧🐧🐧🐧🐧", - // expected output - { - "foo ", - penguinEmoji, - " bar ", - penguinEmoji, - penguinEmoji, - penguinEmoji, - penguinEmoji, - penguinEmoji, - }, - }, - }; + qDebug() << "BAD BENCH, EXPECTED NUM EMOJIS IS WRONG" + << actualNumEmojis; + } + } +} + +template +static void BM_EmojiParsing2New(benchmark::State &state, Args &&...args) +{ + Emojis emojis; + + emojis.load(); auto argsTuple = std::make_tuple(std::move(args)...); auto input = std::get<0>(argsTuple); - auto expectedOutput = std::get<1>(argsTuple); + auto expectedNumEmojis = std::get<1>(argsTuple); for (auto _ : state) { auto output = emojis.parse(input); - - bool areEqual = - std::equal(output.begin(), output.end(), expectedOutput.begin()); - - if (!areEqual) + int actualNumEmojis = 0; + for (const auto &part : output) { - qDebug() << "BAD BENCH"; - for (const auto &v : output) + if (part.type() == typeid(EmotePtr)) { - if (v.type() == typeid(QString)) - { - qDebug() << "output:" << boost::get(v); - } + ++actualNumEmojis; } } + + if (actualNumEmojis != expectedNumEmojis) + { + qDebug() << "BAD BENCH, EXPECTED NUM EMOJIS IS WRONG" + << actualNumEmojis; + } } } -BENCHMARK_CAPTURE(BM_EmojiParsing2, "foo 🐧 bar", - { - "foo ", - penguin(), - " bar", - }); +BENCHMARK_CAPTURE(BM_EmojiParsing2, one_emoji, "foo 🐧 bar", 1); +BENCHMARK_CAPTURE(BM_EmojiParsing2, two_emoji, "foo 🐧 bar 🐧", 2); +BENCHMARK_CAPTURE( + BM_EmojiParsing2, many_emoji, + "😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 " + "😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 " + "😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 ", + 61); + +BENCHMARK_CAPTURE(BM_EmojiParsing2New, one_emoji, "foo 🐧 bar", 1); +BENCHMARK_CAPTURE(BM_EmojiParsing2New, two_emoji, "foo 🐧 bar 🐧", 2); +BENCHMARK_CAPTURE( + BM_EmojiParsing2New, many_emoji, + "😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 " + "😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 " + "😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 ", + 61); diff --git a/src/providers/emoji/Emojis.cpp b/src/providers/emoji/Emojis.cpp index 033ac178b18..66899e2c7e8 100644 --- a/src/providers/emoji/Emojis.cpp +++ b/src/providers/emoji/Emojis.cpp @@ -350,12 +350,8 @@ std::vector> Emojis::parse( { // This checking here relies on the fact that the nonQualified string // always starts with the same byte as value (the unified string) - // bool match = true; - bool match = QStringView{emoji->nonQualified}.mid(1) == - QStringView{text}.mid( - i + 1, emojiNonQualifiedExtraCharacters); + bool match = true; - /* for (int j = 1; j < emoji->nonQualified.length(); ++j) { if (text.at(i + j) != emoji->nonQualified.at(j)) @@ -365,7 +361,109 @@ std::vector> Emojis::parse( break; } } - */ + + if (match) + { + matchedEmoji = emoji; + matchedEmojiLength = emoji->nonQualified.length(); + + break; + } + } + } + + if (matchedEmojiLength == 0) + { + continue; + } + + int currentParsedEmojiFirstIndex = i; + int currentParsedEmojiEndIndex = i + (matchedEmojiLength); + + int charactersFromLastParsedEmoji = + currentParsedEmojiFirstIndex - lastParsedEmojiEndIndex; + + if (charactersFromLastParsedEmoji > 0) + { + // Add characters inbetween emojis + result.emplace_back(text.mid(lastParsedEmojiEndIndex, + charactersFromLastParsedEmoji)); + } + + // Push the emoji as a word to parsedWords + result.emplace_back(matchedEmoji->emote); + + lastParsedEmojiEndIndex = currentParsedEmojiEndIndex; + + i += matchedEmojiLength - 1; + } + + if (lastParsedEmojiEndIndex < text.length()) + { + // Add remaining characters + result.emplace_back(text.mid(lastParsedEmojiEndIndex)); + } + + return result; +} + +std::vector> Emojis::parse2( + const QString &text) const +{ + auto result = std::vector>(); + int lastParsedEmojiEndIndex = 0; + + for (auto i = 0; i < text.length(); ++i) + { + const QChar character = text.at(i); + + if (character.isLowSurrogate()) + { + continue; + } + + auto it = this->emojiFirstByte_.find(character); + if (it == this->emojiFirstByte_.end()) + { + // No emoji starts with this character + continue; + } + + const auto &possibleEmojis = it.value(); + + int remainingCharacters = text.length() - i - 1; + + std::shared_ptr matchedEmoji; + + int matchedEmojiLength = 0; + + for (const std::shared_ptr &emoji : possibleEmojis) + { + int emojiNonQualifiedExtraCharacters = + emoji->nonQualified.length() - 1; + int emojiExtraCharacters = emoji->value.length() - 1; + if (remainingCharacters >= emojiExtraCharacters) + { + // look in emoji->value + bool match = QStringView{emoji->value}.mid(1) == + QStringView{text}.mid(i + 1, emojiExtraCharacters); + + if (match) + { + matchedEmoji = emoji; + matchedEmojiLength = emoji->value.length(); + + break; + } + } + if (!emoji->nonQualified.isNull() && + remainingCharacters >= emojiNonQualifiedExtraCharacters) + { + // This checking here relies on the fact that the nonQualified string + // always starts with the same byte as value (the unified string) + bool match = QStringView{emoji->nonQualified}.mid(1) == + QStringView{text}.mid( + i + 1, emojiNonQualifiedExtraCharacters); if (match) { diff --git a/src/providers/emoji/Emojis.hpp b/src/providers/emoji/Emojis.hpp index d0f21c862c9..bd9b65d9859 100644 --- a/src/providers/emoji/Emojis.hpp +++ b/src/providers/emoji/Emojis.hpp @@ -46,6 +46,8 @@ class IEmojis virtual std::vector> parse( const QString &text) const = 0; + virtual std::vector> parse2( + const QString &text) const = 0; virtual const EmojiMap &getEmojis() const = 0; virtual const std::vector &getShortCodes() const = 0; virtual QString replaceShortCodes(const QString &text) const = 0; @@ -58,6 +60,8 @@ class Emojis : public IEmojis void load(); std::vector> parse( const QString &text) const override; + std::vector> parse2( + const QString &text) const override; EmojiMap emojis; std::vector shortCodes; From 9531758f38c4076956b98ea0d1daeb157825ee29 Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Sat, 7 Oct 2023 11:52:47 +0200 Subject: [PATCH 16/18] Use parse2 --- benchmarks/src/Emojis.cpp | 39 ----------- src/providers/emoji/Emojis.cpp | 120 --------------------------------- src/providers/emoji/Emojis.hpp | 4 -- 3 files changed, 163 deletions(-) diff --git a/benchmarks/src/Emojis.cpp b/benchmarks/src/Emojis.cpp index 62abeb38dba..830a2941abe 100644 --- a/benchmarks/src/Emojis.cpp +++ b/benchmarks/src/Emojis.cpp @@ -164,36 +164,6 @@ static void BM_EmojiParsing2(benchmark::State &state, Args &&...args) } } -template -static void BM_EmojiParsing2New(benchmark::State &state, Args &&...args) -{ - Emojis emojis; - - emojis.load(); - - auto argsTuple = std::make_tuple(std::move(args)...); - auto input = std::get<0>(argsTuple); - auto expectedNumEmojis = std::get<1>(argsTuple); - for (auto _ : state) - { - auto output = emojis.parse(input); - int actualNumEmojis = 0; - for (const auto &part : output) - { - if (part.type() == typeid(EmotePtr)) - { - ++actualNumEmojis; - } - } - - if (actualNumEmojis != expectedNumEmojis) - { - qDebug() << "BAD BENCH, EXPECTED NUM EMOJIS IS WRONG" - << actualNumEmojis; - } - } -} - BENCHMARK_CAPTURE(BM_EmojiParsing2, one_emoji, "foo 🐧 bar", 1); BENCHMARK_CAPTURE(BM_EmojiParsing2, two_emoji, "foo 🐧 bar 🐧", 2); BENCHMARK_CAPTURE( @@ -202,12 +172,3 @@ BENCHMARK_CAPTURE( "😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 " "😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 ", 61); - -BENCHMARK_CAPTURE(BM_EmojiParsing2New, one_emoji, "foo 🐧 bar", 1); -BENCHMARK_CAPTURE(BM_EmojiParsing2New, two_emoji, "foo 🐧 bar 🐧", 2); -BENCHMARK_CAPTURE( - BM_EmojiParsing2New, many_emoji, - "😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 " - "😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 " - "😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂 ", - 61); diff --git a/src/providers/emoji/Emojis.cpp b/src/providers/emoji/Emojis.cpp index 66899e2c7e8..3c6e031474b 100644 --- a/src/providers/emoji/Emojis.cpp +++ b/src/providers/emoji/Emojis.cpp @@ -293,126 +293,6 @@ std::vector> Emojis::parse( auto result = std::vector>(); int lastParsedEmojiEndIndex = 0; - for (auto i = 0; i < text.length(); ++i) - { - const QChar character = text.at(i); - - if (character.isLowSurrogate()) - { - continue; - } - - auto it = this->emojiFirstByte_.find(character); - if (it == this->emojiFirstByte_.end()) - { - // No emoji starts with this character - continue; - } - - const auto &possibleEmojis = it.value(); - - int remainingCharacters = text.length() - i - 1; - - std::shared_ptr matchedEmoji; - - int matchedEmojiLength = 0; - - for (const std::shared_ptr &emoji : possibleEmojis) - { - int emojiNonQualifiedExtraCharacters = - emoji->nonQualified.length() - 1; - int emojiExtraCharacters = emoji->value.length() - 1; - if (remainingCharacters >= emojiExtraCharacters) - { - // look in emoji->value - bool match = true; - - for (int j = 1; j < emoji->value.length(); ++j) - { - if (text.at(i + j) != emoji->value.at(j)) - { - match = false; - - break; - } - } - - if (match) - { - matchedEmoji = emoji; - matchedEmojiLength = emoji->value.length(); - - break; - } - } - if (!emoji->nonQualified.isNull() && - remainingCharacters >= emojiNonQualifiedExtraCharacters) - { - // This checking here relies on the fact that the nonQualified string - // always starts with the same byte as value (the unified string) - bool match = true; - - for (int j = 1; j < emoji->nonQualified.length(); ++j) - { - if (text.at(i + j) != emoji->nonQualified.at(j)) - { - match = false; - - break; - } - } - - if (match) - { - matchedEmoji = emoji; - matchedEmojiLength = emoji->nonQualified.length(); - - break; - } - } - } - - if (matchedEmojiLength == 0) - { - continue; - } - - int currentParsedEmojiFirstIndex = i; - int currentParsedEmojiEndIndex = i + (matchedEmojiLength); - - int charactersFromLastParsedEmoji = - currentParsedEmojiFirstIndex - lastParsedEmojiEndIndex; - - if (charactersFromLastParsedEmoji > 0) - { - // Add characters inbetween emojis - result.emplace_back(text.mid(lastParsedEmojiEndIndex, - charactersFromLastParsedEmoji)); - } - - // Push the emoji as a word to parsedWords - result.emplace_back(matchedEmoji->emote); - - lastParsedEmojiEndIndex = currentParsedEmojiEndIndex; - - i += matchedEmojiLength - 1; - } - - if (lastParsedEmojiEndIndex < text.length()) - { - // Add remaining characters - result.emplace_back(text.mid(lastParsedEmojiEndIndex)); - } - - return result; -} - -std::vector> Emojis::parse2( - const QString &text) const -{ - auto result = std::vector>(); - int lastParsedEmojiEndIndex = 0; - for (auto i = 0; i < text.length(); ++i) { const QChar character = text.at(i); diff --git a/src/providers/emoji/Emojis.hpp b/src/providers/emoji/Emojis.hpp index bd9b65d9859..d0f21c862c9 100644 --- a/src/providers/emoji/Emojis.hpp +++ b/src/providers/emoji/Emojis.hpp @@ -46,8 +46,6 @@ class IEmojis virtual std::vector> parse( const QString &text) const = 0; - virtual std::vector> parse2( - const QString &text) const = 0; virtual const EmojiMap &getEmojis() const = 0; virtual const std::vector &getShortCodes() const = 0; virtual QString replaceShortCodes(const QString &text) const = 0; @@ -60,8 +58,6 @@ class Emojis : public IEmojis void load(); std::vector> parse( const QString &text) const override; - std::vector> parse2( - const QString &text) const override; EmojiMap emojis; std::vector shortCodes; From 1213173a88fc62a174c42253c3efcabc971ff50b Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Sat, 7 Oct 2023 11:56:40 +0200 Subject: [PATCH 17/18] use QString::size_type or auto where possible in parse method --- src/providers/emoji/Emojis.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/providers/emoji/Emojis.cpp b/src/providers/emoji/Emojis.cpp index 3c6e031474b..f0a0f14d9f1 100644 --- a/src/providers/emoji/Emojis.cpp +++ b/src/providers/emoji/Emojis.cpp @@ -291,7 +291,7 @@ std::vector> Emojis::parse( const QString &text) const { auto result = std::vector>(); - int lastParsedEmojiEndIndex = 0; + QString::size_type lastParsedEmojiEndIndex = 0; for (auto i = 0; i < text.length(); ++i) { @@ -311,17 +311,17 @@ std::vector> Emojis::parse( const auto &possibleEmojis = it.value(); - int remainingCharacters = text.length() - i - 1; + auto remainingCharacters = text.length() - i - 1; std::shared_ptr matchedEmoji; - int matchedEmojiLength = 0; + QString::size_type matchedEmojiLength = 0; for (const std::shared_ptr &emoji : possibleEmojis) { - int emojiNonQualifiedExtraCharacters = + auto emojiNonQualifiedExtraCharacters = emoji->nonQualified.length() - 1; - int emojiExtraCharacters = emoji->value.length() - 1; + auto emojiExtraCharacters = emoji->value.length() - 1; if (remainingCharacters >= emojiExtraCharacters) { // look in emoji->value @@ -360,10 +360,10 @@ std::vector> Emojis::parse( continue; } - int currentParsedEmojiFirstIndex = i; - int currentParsedEmojiEndIndex = i + (matchedEmojiLength); + auto currentParsedEmojiFirstIndex = i; + auto currentParsedEmojiEndIndex = i + (matchedEmojiLength); - int charactersFromLastParsedEmoji = + auto charactersFromLastParsedEmoji = currentParsedEmojiFirstIndex - lastParsedEmojiEndIndex; if (charactersFromLastParsedEmoji > 0) From 8a1dfc2311705efd2dd29329b55395fa9ebfe8d6 Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Sat, 7 Oct 2023 11:59:49 +0200 Subject: [PATCH 18/18] Add changelog entry --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 826b436d6ef..f32eac95b56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - Minor: The account switcher is now styled to match your theme. (#4817) - Minor: Add an invisible resize handle to the bottom of frameless user info popups and reply thread popups. (#4795) - Minor: The installer now checks for the VC Runtime version and shows more info when it's outdated. (#4847) +- Bugfix: Fixed an issue where certain emojis did not send to Twitch chat correctly. (#4840) - Bugfix: Fixed capitalized channel names in log inclusion list not being logged. (#4848) - Bugfix: Trimmed custom streamlink paths on all platforms making sure you don't accidentally add spaces at the beginning or end of its path. (#4834) - Bugfix: Fixed a performance issue when displaying replies to certain messages. (#4807)