Skip to content

Commit

Permalink
refactor(ir): ignore crlf lines
Browse files Browse the repository at this point in the history
  • Loading branch information
LeoDog896 committed Mar 7, 2024
1 parent a868d7b commit 78ba4f7
Showing 1 changed file with 41 additions and 16 deletions.
57 changes: 41 additions & 16 deletions crates/redos/src/ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,26 @@ use fancy_regex::{Assertion, Expr as RegexExpr, LookAround};

use crate::vulnerability::VulnerabilityConfig;

#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum IrAssertion {
/// Start of input text
StartText,
/// End of input text
EndText,
/// Start of a line
StartLine,
/// End of a line
EndLine,
/// Left word boundary
LeftWordBoundary,
/// Right word boundary
RightWordBoundary,
/// Both word boundaries
WordBoundary,
/// Not word boundary
NotWordBoundary,
}

#[derive(Debug, PartialEq, Eq)]
pub enum ExprConditional {
Condition(Box<Expr>),
Expand All @@ -18,7 +38,7 @@ pub enum Expr {
/// Some token, whether its a character class, any character, etc.
Token,
/// An assertion
Assertion(Assertion),
Assertion(IrAssertion),
/// Concatenation of multiple expressions, must match in order, e.g. `a.` is a concatenation of
/// the literal `a` and `.` for any character
Concat(Vec<Expr>),
Expand All @@ -33,14 +53,10 @@ pub enum Expr {
/// Look-around (e.g. positive/negative look-ahead or look-behind) with an expression, e.g.
/// `(?=a)` means the next character must be `a` (but the match is not consumed)
LookAround(Box<Expr>, LookAround),
/// Repeat of an expression, e.g. `a*` or `a+` or `a{1,3}`
Repeat {
/// The expression that is being repeated
child: Box<Expr>,
/// Greedy means as much as possible is matched, e.g. `.*b` would match all of `abab`.
/// Non-greedy means as little as possible, e.g. `.*?b` would match only `ab` in `abab`.
greedy: bool,
},
/// Some large repeat of an expression.
// Implementation Note: Greedy does not matter as if it doesn't match (in the case of ReDoS abuse),
// greedy will not affect its matching because of the terminal token.
Repeat(Box<Expr>),
/// Optional expression, e.g. `a?` means `a` is optional
Optional(Box<Expr>),
/// Atomic non-capturing group, e.g. `(?>ab|a)` in text that contains `ab` will match `ab` and
Expand All @@ -66,7 +82,18 @@ pub fn to_expr(
match expr {
RegexExpr::Empty => None,
RegexExpr::Any { .. } => Some(Expr::Token),
RegexExpr::Assertion(a) => Some(Expr::Assertion(*a)),
RegexExpr::Assertion(a) => Some(Expr::Assertion(
match a {
Assertion::StartText => IrAssertion::StartText,
Assertion::EndText => IrAssertion::EndText,
Assertion::StartLine { .. } => IrAssertion::StartLine,
Assertion::EndLine { .. } => IrAssertion::EndLine,
Assertion::LeftWordBoundary => IrAssertion::LeftWordBoundary,
Assertion::RightWordBoundary => IrAssertion::RightWordBoundary,
Assertion::WordBoundary => IrAssertion::WordBoundary,
Assertion::NotWordBoundary => IrAssertion::NotWordBoundary,
},
)),
RegexExpr::Literal { .. } => Some(Expr::Token),
// TODO: propagate group increment
RegexExpr::Concat(list) => Some(Expr::Concat(
Expand Down Expand Up @@ -94,17 +121,15 @@ pub fn to_expr(
child,
lo,
hi,
greedy,
greedy: _,
} => {
let range = hi - lo;

let expression = to_expr(child, config, group_increment);
let expression = if range > config.max_quantifier {
to_expr(child, config, group_increment).map(|child| Expr::Repeat {
child: Box::new(child),
greedy: *greedy,
})
expression.map(|child| Expr::Repeat(Box::new(child)))
} else {
to_expr(child, config, group_increment)
expression
};

if *lo == 0 {
Expand Down

0 comments on commit 78ba4f7

Please sign in to comment.