Skip to content

Commit 0646323

Browse files
committed
Ensure match prevents integer overflow
Integers can either be Fixnum or Bignum internally (and the exact integer at which the types change varies by platform) so support both by using `RB_INTEGER_TYPE_P` to test values and `NUM2INT` to convert them to C `int` (which also handles raising `RangeError` for invalid values) instead of always assuming Fixnum integers. Dynamically calculate the C's `INT_MAX` using Ruby's `RbConfig::SIZEOF` hash of C type byte sizes, see https://docs.ruby-lang.org/en/master/RbConfig.html#SIZEOF rather than hard-coding the value to 2147483647.
1 parent a8a6cbd commit 0646323

File tree

2 files changed

+26
-11
lines changed

2 files changed

+26
-11
lines changed

ext/re2/re2.cc

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -417,8 +417,8 @@ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
417417

418418
int id;
419419

420-
if (FIXNUM_P(idx)) {
421-
id = FIX2INT(idx);
420+
if (RB_INTEGER_TYPE_P(idx)) {
421+
id = NUM2INT(idx);
422422
} else if (SYMBOL_P(idx)) {
423423
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
424424
std::map<std::string, int>::const_iterator search = groups.find(rb_id2name(SYM2ID(idx)));
@@ -686,10 +686,10 @@ static VALUE re2_matchdata_aref(int argc, VALUE *argv, const VALUE self) {
686686
std::string(RSTRING_PTR(idx), RSTRING_LEN(idx)), self);
687687
} else if (SYMBOL_P(idx)) {
688688
return re2_matchdata_named_match(rb_id2name(SYM2ID(idx)), self);
689-
} else if (!NIL_P(rest) || !FIXNUM_P(idx) || FIX2INT(idx) < 0) {
689+
} else if (!NIL_P(rest) || !RB_INTEGER_TYPE_P(idx) || NUM2INT(idx) < 0) {
690690
return rb_ary_aref(argc, argv, re2_matchdata_to_a(self));
691691
} else {
692-
return re2_matchdata_nth_match(FIX2INT(idx), self);
692+
return re2_matchdata_nth_match(NUM2INT(idx), self);
693693
}
694694
}
695695

@@ -1426,7 +1426,7 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
14261426
RE2::Anchor anchor = RE2::UNANCHORED;
14271427

14281428
if (RTEST(options)) {
1429-
if (FIXNUM_P(options)) {
1429+
if (RB_INTEGER_TYPE_P(options)) {
14301430
n = NUM2INT(options);
14311431

14321432
if (n < 0) {
@@ -1440,8 +1440,6 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
14401440
VALUE endpos_option = rb_hash_aref(options, ID2SYM(id_endpos));
14411441
if (!NIL_P(endpos_option)) {
14421442
#ifdef HAVE_ENDPOS_ARGUMENT
1443-
Check_Type(endpos_option, T_FIXNUM);
1444-
14451443
endpos = NUM2INT(endpos_option);
14461444

14471445
if (endpos < 0) {
@@ -1470,8 +1468,6 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
14701468

14711469
VALUE submatches_option = rb_hash_aref(options, ID2SYM(id_submatches));
14721470
if (!NIL_P(submatches_option)) {
1473-
Check_Type(submatches_option, T_FIXNUM);
1474-
14751471
n = NUM2INT(submatches_option);
14761472

14771473
if (n < 0) {
@@ -1487,8 +1483,6 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
14871483

14881484
VALUE startpos_option = rb_hash_aref(options, ID2SYM(id_startpos));
14891485
if (!NIL_P(startpos_option)) {
1490-
Check_Type(startpos_option, T_FIXNUM);
1491-
14921486
startpos = NUM2INT(startpos_option);
14931487

14941488
if (startpos < 0) {
@@ -1520,6 +1514,10 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
15201514
#endif
15211515
return BOOL2RUBY(matched);
15221516
} else {
1517+
if (n == INT_MAX) {
1518+
rb_raise(rb_eRangeError, "number of matches should be < %d", INT_MAX);
1519+
}
1520+
15231521
/* Because match returns the whole match as well. */
15241522
n += 1;
15251523

spec/re2/regexp_spec.rb

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
# frozen_string_literal: true
22

3+
require "rbconfig/sizeof"
4+
35
RSpec.describe RE2::Regexp do
6+
INT_MAX = 2**(RbConfig::SIZEOF.fetch("int") * 8 - 1) - 1
7+
48
describe "#initialize" do
59
it "returns an instance given only a pattern" do
610
re = RE2::Regexp.new('woo')
@@ -566,6 +570,12 @@
566570
expect { re.match("one two three", submatches: :invalid) }.to raise_error(TypeError)
567571
end
568572

573+
it "raises an exception when given too large a number of submatches" do
574+
re = RE2::Regexp.new('(\w+) (\w+) (\w+)')
575+
576+
expect { re.match("one two three", submatches: INT_MAX) }.to raise_error(RangeError, "number of matches should be < #{INT_MAX}")
577+
end
578+
569579
it "defaults to extracting all submatches when given nil", :aggregate_failures do
570580
re = RE2::Regexp.new('(\w+) (\w+) (\w+)')
571581
md = re.match("one two three", submatches: nil)
@@ -584,6 +594,13 @@
584594
expect(md[3]).to be_nil
585595
end
586596

597+
it "raises an exception if given too large a number of submatches instead of options" do
598+
re = RE2::Regexp.new('(\w+) (\w+) (\w+)')
599+
md = re.match("one two three", 2)
600+
601+
expect { re.match("one two three", INT_MAX) }.to raise_error(RangeError, "number of matches should be < #{INT_MAX}")
602+
end
603+
587604
it "raises an exception when given invalid options" do
588605
re = RE2::Regexp.new('(\w+) (\w+) (\w+)')
589606

0 commit comments

Comments
 (0)