Skip to content

Commit 1949b3b

Browse files
authored
prefetch: use Regex instead of pcre (#12576)
1 parent 891d348 commit 1949b3b

File tree

2 files changed

+32
-98
lines changed

2 files changed

+32
-98
lines changed

plugins/prefetch/pattern.cc

Lines changed: 29 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
*/
2424

2525
#include "pattern.h"
26+
#include "tsutil/Regex.h"
2627

2728
static void
2829
replaceString(String &str, const String &from, const String &to)
@@ -49,16 +50,13 @@ Pattern::Pattern() : _pattern(""), _replacement("") {}
4950
bool
5051
Pattern::init(const String &pattern, const String &replacement)
5152
{
52-
pcreFree();
53-
5453
_pattern.assign(pattern);
5554
_replacement.assign(replacement);
5655

5756
_tokenCount = 0;
5857

5958
if (!compile()) {
6059
PrefetchDebug("failed to initialize pattern:'%s', replacement:'%s'", pattern.c_str(), replacement.c_str());
61-
pcreFree();
6260
return false;
6361
}
6462

@@ -129,32 +127,7 @@ Pattern::init(const String &config)
129127
bool
130128
Pattern::empty() const
131129
{
132-
return _pattern.empty() || nullptr == _re;
133-
}
134-
135-
/**
136-
* @brief Frees PCRE library related resources.
137-
*/
138-
void
139-
Pattern::pcreFree()
140-
{
141-
if (_re) {
142-
pcre_free(_re);
143-
_re = nullptr;
144-
}
145-
146-
if (_extra) {
147-
pcre_free(_extra);
148-
_extra = nullptr;
149-
}
150-
}
151-
152-
/**
153-
* @brief Destructor, frees PCRE related resources.
154-
*/
155-
Pattern::~Pattern()
156-
{
157-
pcreFree();
130+
return _pattern.empty() || _regex.empty();
158131
}
159132

160133
/**
@@ -204,16 +177,16 @@ Pattern::process(const String &subject, StringVector &result)
204177
bool
205178
Pattern::match(const String &subject)
206179
{
207-
int matchCount;
208180
PrefetchDebug("matching '%s' to '%s'", _pattern.c_str(), subject.c_str());
209181

210-
if (!_re) {
182+
if (_regex.empty()) {
211183
return false;
212184
}
213185

214-
matchCount = pcre_exec(_re, _extra, subject.c_str(), subject.length(), 0, PCRE_NOTEMPTY, nullptr, 0);
186+
RegexMatches matches;
187+
int matchCount = _regex.exec(subject, matches, RE_NOTEMPTY);
215188
if (matchCount < 0) {
216-
if (matchCount != PCRE_ERROR_NOMATCH) {
189+
if (matchCount != RE_ERROR_NOMATCH) {
217190
PrefetchError("matching error %d", matchCount);
218191
}
219192
return false;
@@ -230,31 +203,26 @@ Pattern::match(const String &subject)
230203
bool
231204
Pattern::capture(const String &subject, StringVector &result)
232205
{
233-
int matchCount;
234-
int ovector[OVECOUNT];
235-
236206
PrefetchDebug("matching '%s' to '%s'", _pattern.c_str(), subject.c_str());
237207

238-
if (!_re) {
208+
if (_regex.empty()) {
239209
return false;
240210
}
241211

242-
matchCount = pcre_exec(_re, nullptr, subject.c_str(), subject.length(), 0, PCRE_NOTEMPTY, ovector, OVECOUNT);
243-
if (matchCount < 0) {
244-
if (matchCount != PCRE_ERROR_NOMATCH) {
212+
RegexMatches matches;
213+
int matchCount = _regex.exec(subject, matches, RE_NOTEMPTY);
214+
215+
if (matchCount <= 0) {
216+
if (matchCount != RE_ERROR_NOMATCH) {
245217
PrefetchError("matching error %d", matchCount);
246218
}
247219
return false;
248220
}
249221

250222
for (int i = 0; i < matchCount; i++) {
251-
int start = ovector[2 * i];
252-
int length = ovector[2 * i + 1] - ovector[2 * i];
253-
254-
String dst(subject, start, length);
255-
256-
PrefetchDebug("capturing '%s' %d[%d,%d]", dst.c_str(), i, ovector[2 * i], ovector[2 * i + 1]);
257-
result.push_back(dst);
223+
std::string_view match = matches[i];
224+
result.emplace_back(match.data(), match.length());
225+
PrefetchDebug("capturing '%s' %d", result.back().c_str(), i);
258226
}
259227

260228
return true;
@@ -269,18 +237,17 @@ Pattern::capture(const String &subject, StringVector &result)
269237
bool
270238
Pattern::replace(const String &subject, String &result)
271239
{
272-
int matchCount;
273-
int ovector[OVECOUNT];
274-
275240
PrefetchDebug("matching '%s' to '%s'", _pattern.c_str(), subject.c_str());
276241

277-
if (!_re) {
242+
if (_regex.empty()) {
278243
return false;
279244
}
280245

281-
matchCount = pcre_exec(_re, nullptr, subject.c_str(), subject.length(), 0, PCRE_NOTEMPTY, ovector, OVECOUNT);
282-
if (matchCount < 0) {
283-
if (matchCount != PCRE_ERROR_NOMATCH) {
246+
RegexMatches matches;
247+
int matchCount = _regex.exec(subject, matches, RE_NOTEMPTY);
248+
249+
if (matchCount <= 0) {
250+
if (matchCount != RE_ERROR_NOMATCH) {
284251
PrefetchError("matching error %d", matchCount);
285252
}
286253
return false;
@@ -296,17 +263,15 @@ Pattern::replace(const String &subject, String &result)
296263

297264
int previous = 0;
298265
for (int i = 0; i < _tokenCount; i++) {
299-
int replIndex = _tokens[i];
300-
int start = ovector[2 * replIndex];
301-
int length = ovector[2 * replIndex + 1] - ovector[2 * replIndex];
266+
int replIndex = _tokens[i];
267+
std::string_view dst = matches[replIndex];
302268

303269
String src(_replacement, _tokenOffset[i], 2);
304-
String dst(subject, start, length);
305270

306-
PrefetchDebug("replacing '%s' with '%s'", src.c_str(), dst.c_str());
271+
PrefetchDebug("replacing '%s' with '%.*s'", src.c_str(), static_cast<int>(dst.length()), dst.data());
307272

308273
result.append(_replacement, previous, _tokenOffset[i] - previous);
309-
result.append(dst);
274+
result.append(dst.data(), dst.length());
310275

311276
previous = _tokenOffset[i] + 2; /* 2 is the size of $0 or $1 or $2, ... or $9 */
312277
}
@@ -325,30 +290,12 @@ Pattern::replace(const String &subject, String &result)
325290
bool
326291
Pattern::compile()
327292
{
328-
const char *errPtr; /* PCRE error */
329-
int errOffset; /* PCRE error offset */
330-
331293
PrefetchDebug("compiling pattern:'%s', replacement:'%s'", _pattern.c_str(), _replacement.c_str());
332294

333-
_re = pcre_compile(_pattern.c_str(), /* the pattern */
334-
0, /* options */
335-
&errPtr, /* for error message */
336-
&errOffset, /* for error offset */
337-
nullptr); /* use default character tables */
338-
339-
if (nullptr == _re) {
340-
PrefetchError("compile of regex '%s' at char %d: %s", _pattern.c_str(), errOffset, errPtr);
341-
342-
return false;
343-
}
344-
345-
_extra = pcre_study(_re, 0, &errPtr);
346-
347-
if ((nullptr == _extra) && (nullptr != errPtr) && (0 != *errPtr)) {
348-
PrefetchError("failed to study regex '%s': %s", _pattern.c_str(), errPtr);
349-
350-
pcre_free(_re);
351-
_re = nullptr;
295+
std::string error;
296+
int erroffset;
297+
if (!_regex.compile(_pattern, error, erroffset)) {
298+
PrefetchError("compile of regex '%s' at char %d: %s", _pattern.c_str(), erroffset, error.c_str());
352299
return false;
353300
}
354301

@@ -384,10 +331,6 @@ Pattern::compile()
384331
}
385332
}
386333

387-
if (!success) {
388-
pcreFree();
389-
}
390-
391334
return success;
392335
}
393336

plugins/prefetch/pattern.h

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,25 +23,18 @@
2323

2424
#pragma once
2525

26-
#ifdef HAVE_PCRE_PCRE_H
27-
#include <pcre/pcre.h>
28-
#else
29-
#include <pcre.h>
30-
#endif
31-
3226
#include "common.h"
27+
#include "tsutil/Regex.h"
3328

3429
/**
3530
* @brief PCRE matching, capturing and replacing
3631
*/
3732
class Pattern
3833
{
3934
public:
40-
static const int TOKENCOUNT = 10; /**< @brief Capturing groups $0..$9 */
41-
static const int OVECOUNT = TOKENCOUNT * 3; /**< @brief pcre_exec() array count, handle 10 capture groups */
35+
static const int TOKENCOUNT = 10; /**< @brief Capturing groups $0..$9 */
4236

4337
Pattern();
44-
virtual ~Pattern();
4538

4639
bool init(const String &pattern, const String &replacement);
4740
bool init(const String &config);
@@ -54,10 +47,8 @@ class Pattern
5447
private:
5548
bool compile();
5649
bool failed(const String &subject) const;
57-
void pcreFree();
5850

59-
pcre *_re = nullptr; /**< @brief PCRE compiled info structure, computed during initialization */
60-
pcre_extra *_extra = nullptr; /**< @brief PCRE study data block, computed during initialization */
51+
Regex _regex;
6152

6253
String _pattern; /**< @brief PCRE pattern string, containing PCRE patterns and capturing groups. */
6354
String _replacement; /**< @brief PCRE replacement string, containing $0..$9 to be replaced with content of the capturing groups */

0 commit comments

Comments
 (0)