Skip to content

Commit 454ac52

Browse files
committed
[JRuby] Accept String as a pattern at non head
It supports non-head match cases such as StringScanner#scan_until. If we use a String as a pattern, we can improve match performance. Here is a result of the including benchmark. It shows String as a pattern is 2.11x faster than Regexp as a pattern. ``` $ benchmark-driver benchmark/check_until.yaml Warming up -------------------------------------- regexp 7.591M i/s - 7.544M times in 0.993780s (131.74ns/i) regexp_var 6.143M i/s - 6.125M times in 0.997038s (162.77ns/i) string 14.135M i/s - 14.079M times in 0.996067s (70.75ns/i) string_var 14.079M i/s - 14.057M times in 0.998420s (71.03ns/i) Calculating ------------------------------------- regexp 9.409M i/s - 22.773M times in 2.420268s (106.28ns/i) regexp_var 10.116M i/s - 18.430M times in 1.821820s (98.85ns/i) string 21.389M i/s - 42.404M times in 1.982519s (46.75ns/i) string_var 20.897M i/s - 42.237M times in 2.021187s (47.85ns/i) Comparison: string: 21389191.1 i/s string_var: 20897327.5 i/s - 1.02x slower regexp_var: 10116464.7 i/s - 2.11x slower regexp: 9409222.3 i/s - 2.27x slower ``` See: https://github.com/jruby/jruby/blob/be7815ec02356a58891c8727bb448f0c6a826d96/core/src/main/java/org/jruby/util/StringSupport.java#L1706-L1736
1 parent 800482a commit 454ac52

File tree

1 file changed

+13
-17
lines changed

1 file changed

+13
-17
lines changed

ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -262,17 +262,6 @@ private IRubyObject extractBegLen(Ruby runtime, int beg, int len) {
262262
// MRI: strscan_do_scan
263263
private IRubyObject scan(ThreadContext context, IRubyObject regex, boolean succptr, boolean getstr, boolean headonly) {
264264
final Ruby runtime = context.runtime;
265-
266-
if (headonly) {
267-
if (!(regex instanceof RubyRegexp)) {
268-
regex = regex.convertToString();
269-
}
270-
} else {
271-
if (!(regex instanceof RubyRegexp)) {
272-
throw runtime.newTypeError("wrong argument type " + regex.getMetaClass() + " (expected Regexp)");
273-
}
274-
}
275-
276265
check(context);
277266

278267
ByteList strBL = str.getByteList();
@@ -310,9 +299,9 @@ private IRubyObject scan(ThreadContext context, IRubyObject regex, boolean succp
310299
}
311300
if (ret < 0) return context.nil;
312301
} else {
313-
RubyString pattern = (RubyString) regex;
302+
RubyString pattern = regex.convertToString();
314303

315-
str.checkEncoding(pattern);
304+
Encoding patternEnc = str.checkEncoding(pattern);
316305

317306
if (restLen() < pattern.size()) {
318307
return context.nil;
@@ -321,11 +310,18 @@ private IRubyObject scan(ThreadContext context, IRubyObject regex, boolean succp
321310
ByteList patternBL = pattern.getByteList();
322311
int patternSize = patternBL.realSize();
323312

324-
if (ByteList.memcmp(strBL.unsafeBytes(), strBeg + curr, patternBL.unsafeBytes(), patternBL.begin(), patternSize) != 0) {
325-
return context.nil;
313+
if (headonly) {
314+
if (ByteList.memcmp(strBL.unsafeBytes(), strBeg + curr, patternBL.unsafeBytes(), patternBL.begin(), patternSize) != 0) {
315+
return context.nil;
316+
}
317+
setRegisters(patternSize);
318+
} else {
319+
int pos = StringSupport.index(strBL, patternBL, strBeg + curr, patternEnc);
320+
if (pos == -1) {
321+
return context.nil;
322+
}
323+
setRegisters(patternSize + pos - curr);
326324
}
327-
328-
setRegisters(patternSize);
329325
}
330326

331327
setMatched();

0 commit comments

Comments
 (0)