From aaadd712c168b6cd3a6f7ef2694783def66ef36e Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 16 Oct 2025 15:49:09 -0500 Subject: [PATCH 1/5] [Prefetch] Use Regex class instead of pcre This is part of the effort to remove pcre dependency. --- plugins/prefetch/pattern.cc | 118 ++++++++---------------------------- plugins/prefetch/pattern.h | 14 +---- 2 files changed, 28 insertions(+), 104 deletions(-) diff --git a/plugins/prefetch/pattern.cc b/plugins/prefetch/pattern.cc index c6c8ddd47b3..ff1c74af08b 100644 --- a/plugins/prefetch/pattern.cc +++ b/plugins/prefetch/pattern.cc @@ -49,8 +49,6 @@ Pattern::Pattern() : _pattern(""), _replacement("") {} bool Pattern::init(const String &pattern, const String &replacement) { - pcreFree(); - _pattern.assign(pattern); _replacement.assign(replacement); @@ -58,7 +56,6 @@ Pattern::init(const String &pattern, const String &replacement) if (!compile()) { PrefetchDebug("failed to initialize pattern:'%s', replacement:'%s'", pattern.c_str(), replacement.c_str()); - pcreFree(); return false; } @@ -129,33 +126,13 @@ Pattern::init(const String &config) bool Pattern::empty() const { - return _pattern.empty() || nullptr == _re; -} - -/** - * @brief Frees PCRE library related resources. - */ -void -Pattern::pcreFree() -{ - if (_re) { - pcre_free(_re); - _re = nullptr; - } - - if (_extra) { - pcre_free(_extra); - _extra = nullptr; - } + return _pattern.empty() || _regex.empty(); } /** * @brief Destructor, frees PCRE related resources. */ -Pattern::~Pattern() -{ - pcreFree(); -} +Pattern::~Pattern() {} /** * @brief Capture or capture-and-replace depending on whether a replacement string is specified. @@ -204,22 +181,13 @@ Pattern::process(const String &subject, StringVector &result) bool Pattern::match(const String &subject) { - int matchCount; PrefetchDebug("matching '%s' to '%s'", _pattern.c_str(), subject.c_str()); - if (!_re) { + if (_regex.empty()) { return false; } - matchCount = pcre_exec(_re, _extra, subject.c_str(), subject.length(), 0, PCRE_NOTEMPTY, nullptr, 0); - if (matchCount < 0) { - if (matchCount != PCRE_ERROR_NOMATCH) { - PrefetchError("matching error %d", matchCount); - } - return false; - } - - return true; + return _regex.exec(subject); } /** @@ -230,31 +198,23 @@ Pattern::match(const String &subject) bool Pattern::capture(const String &subject, StringVector &result) { - int matchCount; - int ovector[OVECOUNT]; - PrefetchDebug("matching '%s' to '%s'", _pattern.c_str(), subject.c_str()); - if (!_re) { + if (_regex.empty()) { return false; } - matchCount = pcre_exec(_re, nullptr, subject.c_str(), subject.length(), 0, PCRE_NOTEMPTY, ovector, OVECOUNT); - if (matchCount < 0) { - if (matchCount != PCRE_ERROR_NOMATCH) { - PrefetchError("matching error %d", matchCount); - } + RegexMatches matches; + int matchCount = _regex.exec(subject, matches); + + if (matchCount <= 0) { return false; } for (int i = 0; i < matchCount; i++) { - int start = ovector[2 * i]; - int length = ovector[2 * i + 1] - ovector[2 * i]; - - String dst(subject, start, length); - - PrefetchDebug("capturing '%s' %d[%d,%d]", dst.c_str(), i, ovector[2 * i], ovector[2 * i + 1]); - result.push_back(dst); + std::string_view match = matches[i]; + result.emplace_back(match.data(), match.length()); + PrefetchDebug("capturing '%s' %d", result.back().c_str(), i); } return true; @@ -269,20 +229,16 @@ Pattern::capture(const String &subject, StringVector &result) bool Pattern::replace(const String &subject, String &result) { - int matchCount; - int ovector[OVECOUNT]; - PrefetchDebug("matching '%s' to '%s'", _pattern.c_str(), subject.c_str()); - if (!_re) { + if (_regex.empty()) { return false; } - matchCount = pcre_exec(_re, nullptr, subject.c_str(), subject.length(), 0, PCRE_NOTEMPTY, ovector, OVECOUNT); - if (matchCount < 0) { - if (matchCount != PCRE_ERROR_NOMATCH) { - PrefetchError("matching error %d", matchCount); - } + RegexMatches matches; + int matchCount = _regex.exec(subject, matches); + + if (matchCount <= 0) { return false; } @@ -296,17 +252,15 @@ Pattern::replace(const String &subject, String &result) int previous = 0; for (int i = 0; i < _tokenCount; i++) { - int replIndex = _tokens[i]; - int start = ovector[2 * replIndex]; - int length = ovector[2 * replIndex + 1] - ovector[2 * replIndex]; + int replIndex = _tokens[i]; + std::string_view dst = matches[replIndex]; String src(_replacement, _tokenOffset[i], 2); - String dst(subject, start, length); - PrefetchDebug("replacing '%s' with '%s'", src.c_str(), dst.c_str()); + PrefetchDebug("replacing '%s' with '%.*s'", src.c_str(), static_cast(dst.length()), dst.data()); result.append(_replacement, previous, _tokenOffset[i] - previous); - result.append(dst); + result.append(dst.data(), dst.length()); previous = _tokenOffset[i] + 2; /* 2 is the size of $0 or $1 or $2, ... or $9 */ } @@ -325,30 +279,12 @@ Pattern::replace(const String &subject, String &result) bool Pattern::compile() { - const char *errPtr; /* PCRE error */ - int errOffset; /* PCRE error offset */ - PrefetchDebug("compiling pattern:'%s', replacement:'%s'", _pattern.c_str(), _replacement.c_str()); - _re = pcre_compile(_pattern.c_str(), /* the pattern */ - 0, /* options */ - &errPtr, /* for error message */ - &errOffset, /* for error offset */ - nullptr); /* use default character tables */ - - if (nullptr == _re) { - PrefetchError("compile of regex '%s' at char %d: %s", _pattern.c_str(), errOffset, errPtr); - - return false; - } - - _extra = pcre_study(_re, 0, &errPtr); - - if ((nullptr == _extra) && (nullptr != errPtr) && (0 != *errPtr)) { - PrefetchError("failed to study regex '%s': %s", _pattern.c_str(), errPtr); - - pcre_free(_re); - _re = nullptr; + std::string error; + int erroffset; + if (!_regex.compile(_pattern, error, erroffset)) { + PrefetchError("compile of regex '%s' at char %d: %s", _pattern.c_str(), erroffset, error.c_str()); return false; } @@ -384,10 +320,6 @@ Pattern::compile() } } - if (!success) { - pcreFree(); - } - return success; } diff --git a/plugins/prefetch/pattern.h b/plugins/prefetch/pattern.h index 5ee58873912..e1883ee5949 100644 --- a/plugins/prefetch/pattern.h +++ b/plugins/prefetch/pattern.h @@ -23,13 +23,8 @@ #pragma once -#ifdef HAVE_PCRE_PCRE_H -#include -#else -#include -#endif - #include "common.h" +#include "tsutil/Regex.h" /** * @brief PCRE matching, capturing and replacing @@ -37,8 +32,7 @@ class Pattern { public: - static const int TOKENCOUNT = 10; /**< @brief Capturing groups $0..$9 */ - static const int OVECOUNT = TOKENCOUNT * 3; /**< @brief pcre_exec() array count, handle 10 capture groups */ + static const int TOKENCOUNT = 10; /**< @brief Capturing groups $0..$9 */ Pattern(); virtual ~Pattern(); @@ -54,10 +48,8 @@ class Pattern private: bool compile(); bool failed(const String &subject) const; - void pcreFree(); - pcre *_re = nullptr; /**< @brief PCRE compiled info structure, computed during initialization */ - pcre_extra *_extra = nullptr; /**< @brief PCRE study data block, computed during initialization */ + Regex _regex; String _pattern; /**< @brief PCRE pattern string, containing PCRE patterns and capturing groups. */ String _replacement; /**< @brief PCRE replacement string, containing $0..$9 to be replaced with content of the capturing groups */ From 1f053fc9c7a99fd71b9973f14e8a590b52fc9a64 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 20 Oct 2025 10:08:09 -0500 Subject: [PATCH 2/5] Use default destructor for Pattern --- plugins/prefetch/pattern.cc | 5 ----- plugins/prefetch/pattern.h | 1 - 2 files changed, 6 deletions(-) diff --git a/plugins/prefetch/pattern.cc b/plugins/prefetch/pattern.cc index ff1c74af08b..8d5ef88e522 100644 --- a/plugins/prefetch/pattern.cc +++ b/plugins/prefetch/pattern.cc @@ -129,11 +129,6 @@ Pattern::empty() const return _pattern.empty() || _regex.empty(); } -/** - * @brief Destructor, frees PCRE related resources. - */ -Pattern::~Pattern() {} - /** * @brief Capture or capture-and-replace depending on whether a replacement string is specified. * @see replace() diff --git a/plugins/prefetch/pattern.h b/plugins/prefetch/pattern.h index e1883ee5949..1432cfbbfec 100644 --- a/plugins/prefetch/pattern.h +++ b/plugins/prefetch/pattern.h @@ -35,7 +35,6 @@ class Pattern static const int TOKENCOUNT = 10; /**< @brief Capturing groups $0..$9 */ Pattern(); - virtual ~Pattern(); bool init(const String &pattern, const String &replacement); bool init(const String &config); From 810500de9499e9e7c81dde4346cfa7320f215602 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 21 Oct 2025 14:05:03 -0500 Subject: [PATCH 3/5] Address code review - add missing flag to exec() --- plugins/prefetch/pattern.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/plugins/prefetch/pattern.cc b/plugins/prefetch/pattern.cc index 8d5ef88e522..5bebc395dae 100644 --- a/plugins/prefetch/pattern.cc +++ b/plugins/prefetch/pattern.cc @@ -23,6 +23,7 @@ */ #include "pattern.h" +#include "tsutil/Regex.h" static void replaceString(String &str, const String &from, const String &to) @@ -182,7 +183,7 @@ Pattern::match(const String &subject) return false; } - return _regex.exec(subject); + return _regex.exec(subject, RE_NOTEMPTY); } /** @@ -200,7 +201,7 @@ Pattern::capture(const String &subject, StringVector &result) } RegexMatches matches; - int matchCount = _regex.exec(subject, matches); + int matchCount = _regex.exec(subject, matches, RE_NOTEMPTY); if (matchCount <= 0) { return false; @@ -231,7 +232,7 @@ Pattern::replace(const String &subject, String &result) } RegexMatches matches; - int matchCount = _regex.exec(subject, matches); + int matchCount = _regex.exec(subject, matches, RE_NOTEMPTY); if (matchCount <= 0) { return false; From 727eb3d9ae1e04ec93a58d567aea91dbac57e53b Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 5 Nov 2025 14:31:38 -0600 Subject: [PATCH 4/5] Add back PrefetchError for regex errors. --- include/tsutil/Regex.h | 4 ++++ plugins/prefetch/pattern.cc | 17 ++++++++++++++++- src/tsutil/Regex.cc | 2 ++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/include/tsutil/Regex.h b/include/tsutil/Regex.h index 9ca0608f760..4cb6dc82c95 100644 --- a/include/tsutil/Regex.h +++ b/include/tsutil/Regex.h @@ -39,6 +39,10 @@ enum REFlags { RE_NOTEMPTY = 0x00000004u ///< Not empty (default: may match empty string). }; +enum REErrorC { + RE_ERROR_NOMATCH = -1, ///< No match. +}; + /// @brief Wrapper for PCRE2 match data. class RegexMatches { diff --git a/plugins/prefetch/pattern.cc b/plugins/prefetch/pattern.cc index 5bebc395dae..a03460ec868 100644 --- a/plugins/prefetch/pattern.cc +++ b/plugins/prefetch/pattern.cc @@ -183,7 +183,16 @@ Pattern::match(const String &subject) return false; } - return _regex.exec(subject, RE_NOTEMPTY); + RegexMatches matches; + int matchCount = _regex.exec(subject, matches, RE_NOTEMPTY); + if (matchCount < 0) { + if (matchCount != RE_ERROR_NOMATCH) { + PrefetchError("matching error %d", matchCount); + } + return false; + } + + return true; } /** @@ -204,6 +213,9 @@ Pattern::capture(const String &subject, StringVector &result) int matchCount = _regex.exec(subject, matches, RE_NOTEMPTY); if (matchCount <= 0) { + if (matchCount != RE_ERROR_NOMATCH) { + PrefetchError("matching error %d", matchCount); + } return false; } @@ -235,6 +247,9 @@ Pattern::replace(const String &subject, String &result) int matchCount = _regex.exec(subject, matches, RE_NOTEMPTY); if (matchCount <= 0) { + if (matchCount != RE_ERROR_NOMATCH) { + PrefetchError("matching error %d", matchCount); + } return false; } diff --git a/src/tsutil/Regex.cc b/src/tsutil/Regex.cc index 32604fb6ae5..dbe92885beb 100644 --- a/src/tsutil/Regex.cc +++ b/src/tsutil/Regex.cc @@ -36,6 +36,8 @@ static_assert(RE_UNANCHORED == PCRE2_MULTILINE, "Update RE_UNANCHORED for curren static_assert(RE_ANCHORED == PCRE2_ANCHORED, "Update RE_ANCHORED for current PCRE2 version."); static_assert(RE_NOTEMPTY == PCRE2_NOTEMPTY, "Update RE_NOTEMPTY for current PCRE2 version."); +static_assert(RE_ERROR_NOMATCH == PCRE2_ERROR_NOMATCH, "Update RE_ERROR_NOMATCH for current PCRE2 version."); + //---------------------------------------------------------------------------- namespace { From 27a68cc08d8d6743be210eaac113a1edd95fb8ad Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 5 Nov 2025 16:25:37 -0600 Subject: [PATCH 5/5] Remove redundant REErrorC enum from Regex.h Removed redundant REErrorC enum definition. --- include/tsutil/Regex.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/tsutil/Regex.h b/include/tsutil/Regex.h index 49637093eed..cd8d7c1cb49 100644 --- a/include/tsutil/Regex.h +++ b/include/tsutil/Regex.h @@ -47,10 +47,6 @@ enum REErrors { RE_ERROR_NULL = -51 ///< NULL code or subject was passed. }; -enum REErrorC { - RE_ERROR_NOMATCH = -1, ///< No match. -}; - /// @brief Wrapper for PCRE2 match data. class RegexMatches {