Skip to content

Commit

Permalink
feat: attempt deep parsing of regex to early exit if not vulnerable, …
Browse files Browse the repository at this point in the history
…base root ilq
  • Loading branch information
LeoDog896 committed Mar 7, 2024
1 parent 69eb6db commit e54d536
Show file tree
Hide file tree
Showing 3 changed files with 203 additions and 73 deletions.
87 changes: 55 additions & 32 deletions crates/redos/src/ilq.rs
Original file line number Diff line number Diff line change
@@ -1,42 +1,65 @@
use crate::ir::{Expr, IrAssertion};
use crate::ir::Expr;

/// Scans an ilq. Assumes `expr` is the root expression of the tree.
pub fn scan_ilq(expr: &Expr) -> bool {
/// Represents the result of an ILQ scan
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct IlqReturn {
/// Whether the regex contains an ilq vulnerability
pub is_present: bool,
}

impl IlqReturn {
/// Creates a new IlqReturn
fn new(is_present: bool) -> Self {
Self { is_present }
}
}

/// Scans a regex tree for an ilq 'vulnerability'. Assumes `expr` is the root expression of the tree.
pub fn scan_ilq(expr: &Expr) -> IlqReturn {
match expr {
// if we hit anything that isn't a Vec<Expr>, we're done
Expr::Token => false,
Expr::Assertion(_) => false,

Expr::Conditional { false_branch, .. } => scan_ilq_recursive(&false_branch).unwrap_or_else(|| false),
Expr::Token => IlqReturn::new(false),
Expr::Assertion(_) => IlqReturn::new(false),

// hit an alternation? scan_ilq on the children; we can simply pretend
// as if they're also roots of their own trees.
// lets find the first child that is an ilq vulnerability
Expr::Alt(list) => list.iter().fold(IlqReturn::new(false), |acc, e| {
if acc.is_present {
acc
} else {
scan_ilq(e)
}
}),

// hit an optional token? we're done! an optional token
// in the root immediately indicates that it matches an empty string,
// and thus will finish in a minimal amount of time
Expr::Optional(_) => IlqReturn::new(false),

// if we hit some combinations of tokens, lets scan the children
Expr::Conditional { false_branch, .. } => IlqReturn::new(scan_ilq_nested(false_branch)),
Expr::Concat(list) => IlqReturn::new(list.iter().any(scan_ilq_nested)),
Expr::Group(e, _) => IlqReturn::new(scan_ilq_nested(e)),

// a repeating token? interesting.. we'll need to scan the child
// luckily, we can just pretend as if the child is the root of its own tree
Expr::Repeat(e) => scan_ilq(e),

Expr::LookAround(e, _) => scan_ilq(e),

// TODO: atomic groups and lookarounds
_ => IlqReturn::new(true),
}
}


/// Returns Some(true) iif an ilq is present anywhere in the regex.
/// Returns Some(false) iif no ilq is present anywhere in the regex.
///
/// Returns None if an ilq higher up in the recursive chain can continue
/// looking through its Vec<Expr>
fn scan_ilq_recursive(expr: &Expr) -> Option<bool> {
/// Scans a regex tree for an ilq 'vulnerability'
fn scan_ilq_nested(expr: &Expr) -> bool {
match expr {
// if we hit a non-complex non-optional expression, we can stop
Expr::Token => Some(false),
// if we hit an odd assertion, we can stop
Expr::Assertion(assertion) => match assertion {
// initial large quantifier requires that the quantifier is first.
// if we hit this, it is not first
IrAssertion::Start => Some(false),
// odd that the end will be here, but regardless, not an ILQ
IrAssertion::End => Some(false),
// a word boundary linearizes any ilq
IrAssertion::WordBoundary => Some(false),
// TODO
_ => None
}
// explore every potential path for some ilq
Expr::Alt(list) => list.iter().find(|expr| scan_ilq(expr) == Some(false)),
// TODO
_ => None,
// if we hit a non-optional token, we're done
Expr::Token => false,

// TODO: finish?
_ => true,
}
}
26 changes: 12 additions & 14 deletions crates/redos/src/ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,21 +78,19 @@ pub fn to_expr(
match expr {
RegexExpr::Empty => None,
RegexExpr::Any { .. } => Some(Expr::Token),
RegexExpr::Assertion(a) => Some(Expr::Assertion(
match a {
// Since start and line only depend on the multiline flag,
// they don't particurally matter for ReDoS detection.
Assertion::StartText => IrAssertion::Start,
Assertion::EndText => IrAssertion::End,
Assertion::StartLine { .. } => IrAssertion::Start,
Assertion::EndLine { .. } => IrAssertion::End,
RegexExpr::Assertion(a) => Some(Expr::Assertion(match a {
// Since start and line only depend on the multiline flag,
// they don't particurally matter for ReDoS detection.
Assertion::StartText => IrAssertion::Start,
Assertion::EndText => IrAssertion::End,
Assertion::StartLine { .. } => IrAssertion::Start,
Assertion::EndLine { .. } => IrAssertion::End,

Assertion::LeftWordBoundary => IrAssertion::LeftWordBoundary,
Assertion::RightWordBoundary => IrAssertion::RightWordBoundary,
Assertion::WordBoundary => IrAssertion::WordBoundary,
Assertion::NotWordBoundary => IrAssertion::NotWordBoundary,
},
)),
Assertion::LeftWordBoundary => IrAssertion::LeftWordBoundary,
Assertion::RightWordBoundary => IrAssertion::RightWordBoundary,
Assertion::WordBoundary => IrAssertion::WordBoundary,
Assertion::NotWordBoundary => IrAssertion::NotWordBoundary,
})),
RegexExpr::Literal { .. } => Some(Expr::Token),
// TODO: propagate group increment
RegexExpr::Concat(list) => Some(Expr::Concat(
Expand Down
163 changes: 136 additions & 27 deletions crates/redos/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,42 +4,129 @@ pub mod vulnerability;
mod ilq;

use fancy_regex::parse::Parser;
use fancy_regex::{Expr as RegexExpr, Result};
use fancy_regex::Expr as RegexExpr;
use ir::{to_expr, Expr, ExprConditional};
use vulnerability::{Vulnerability, VulnerabilityConfig};

/// Returns true iif repeats are present anywhere in the regex
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct RegexInfo {
has_repeat: bool,
has_alternation: bool,
}

impl RegexInfo {
fn merge(self, other: RegexInfo) -> RegexInfo {
RegexInfo {
has_repeat: self.has_repeat || other.has_repeat,
has_alternation: self.has_alternation || other.has_alternation,
}
}

fn empty() -> RegexInfo {
RegexInfo {
has_repeat: false,
has_alternation: false,
}
}
}

/// Returns base information about regex
///
/// A regex must meet the following criteria to be even considered to be vulnerable:
/// - It must contain a repeat
/// - The repeat must have a bound size greater than `config.max_quantifier`
/// - The regex must have a terminating state (to allow for backtracking) (TODO: this is not implemented yet)
fn repeats_anywhere(expr: &Expr) -> bool {
fn regex_pre_scan(expr: &Expr) -> RegexInfo {
match expr {
Expr::Repeat { .. } => true,
// even though there is a repeat, since it is the root node,
// we must dig deeper to see if the repeat does matter,
// since else this will violate our terminating state criteria
Expr::Repeat(expr) => regex_pre_scan(expr.as_ref()),
Expr::Token => RegexInfo::empty(),
Expr::Assertion(_) => RegexInfo::empty(),

// propagate
Expr::Concat(list) => list.iter().fold(RegexInfo::empty(), |acc, e| {
acc.merge(regex_pre_scan_nested(e))
}),

// we use regex_pre_scan instead of nested because
// the alternations effectively act as different regexes
Expr::Alt(list) => list
.iter()
.fold(RegexInfo::empty(), |acc, e| acc.merge(regex_pre_scan(e)))
.merge(RegexInfo {
has_repeat: false,
has_alternation: true,
}),

// doesn't matter how many groups we nest it in,
// a group in the root node is as useful as
// not having a group at all
Expr::Group(e, _) => regex_pre_scan(e.as_ref()),
Expr::LookAround(e, _) => regex_pre_scan(e.as_ref()),
Expr::AtomicGroup(e) => regex_pre_scan(e.as_ref()),

// if the optional is in the root, it doesn't matter
// if it's nested or not, it will always match
Expr::Optional(e) => regex_pre_scan(e.as_ref()),

Expr::Conditional {
condition,
true_branch,
false_branch,
} => {
match condition {
// TODO: can we potentially skip the true_branch here if we know the group never matched
ExprConditional::BackrefExistsCondition(_) => {
regex_pre_scan_nested(true_branch.as_ref())
.merge(regex_pre_scan(false_branch.as_ref()))
}
ExprConditional::Condition(condition) => regex_pre_scan(condition.as_ref())
.merge(regex_pre_scan_nested(true_branch.as_ref()))
.merge(regex_pre_scan_nested(false_branch.as_ref())),
}
}
}
}

fn regex_pre_scan_nested(expr: &Expr) -> RegexInfo {
match expr {
Expr::Repeat(_) => RegexInfo {
has_repeat: true,
has_alternation: false,
},

// no nested expressions
Expr::Token => false,
Expr::Assertion(_) => false,
Expr::Token => RegexInfo::empty(),
Expr::Assertion(_) => RegexInfo::empty(),

// propagate
Expr::Concat(list) => list.iter().any(repeats_anywhere),
Expr::Alt(list) => list.iter().any(repeats_anywhere),
Expr::Group(e, _) => repeats_anywhere(e.as_ref()),
Expr::LookAround(e, _) => repeats_anywhere(e.as_ref()),
Expr::AtomicGroup(e) => repeats_anywhere(e.as_ref()),
Expr::Optional(e) => repeats_anywhere(e.as_ref()),
Expr::Concat(list) => list.iter().fold(RegexInfo::empty(), |acc, e| {
acc.merge(regex_pre_scan_nested(e))
}),
Expr::Alt(list) => list
.iter()
.fold(RegexInfo::empty(), |acc, e| {
acc.merge(regex_pre_scan_nested(e))
})
.merge(RegexInfo {
has_repeat: false,
has_alternation: true,
}),
Expr::Group(e, _) => regex_pre_scan_nested(e.as_ref()),
Expr::LookAround(e, _) => regex_pre_scan_nested(e.as_ref()),
Expr::AtomicGroup(e) => regex_pre_scan_nested(e.as_ref()),
Expr::Optional(e) => regex_pre_scan_nested(e.as_ref()),
Expr::Conditional {
condition,
true_branch,
false_branch,
} => match condition {
ExprConditional::BackrefExistsCondition(_) => false,
ExprConditional::Condition(condition) => {
repeats_anywhere(condition.as_ref())
|| repeats_anywhere(true_branch.as_ref())
|| repeats_anywhere(false_branch.as_ref())
}
ExprConditional::BackrefExistsCondition(_) => RegexInfo::empty(),
ExprConditional::Condition(condition) => regex_pre_scan_nested(condition.as_ref())
.merge(regex_pre_scan_nested(true_branch.as_ref()))
.merge(regex_pre_scan_nested(false_branch.as_ref())),
},
}
}
Expand All @@ -55,7 +142,10 @@ pub struct VulnerabilityResult {
}

/// Returns the list of vulnerabilities in a regex
pub fn vulnerabilities(regex: &str, config: &VulnerabilityConfig) -> Result<VulnerabilityResult> {
pub fn vulnerabilities(
regex: &str,
config: &VulnerabilityConfig,
) -> fancy_regex::Result<VulnerabilityResult> {
// attempt to parse the regex with rust's regex parser
let can_be_dfa = regex::Regex::new(regex).is_ok();

Expand All @@ -70,20 +160,39 @@ pub fn vulnerabilities(regex: &str, config: &VulnerabilityConfig) -> Result<Vuln
}

// second pass: turn AST into IR
let expr = to_expr(&tree.expr, config, nonzero_lit::usize!(1))
.expect("Failed to convert AST to IR; this is a bug");
let expr = match to_expr(&tree.expr, config, nonzero_lit::usize!(1)) {
Some(expr) => expr,
None => {
return Ok(VulnerabilityResult {
vulnerabilities: vec![],
dfa: can_be_dfa,
})
}
};

// third pass: exit early if there are no repeats
if !repeats_anywhere(&expr) {
let regex_info = regex_pre_scan(&expr);
if !regex_info.has_repeat {
return Ok(VulnerabilityResult {
vulnerabilities: vec![],
dfa: can_be_dfa,
});
}

// TODO: this is a fake placeholder
Ok(VulnerabilityResult {
vulnerabilities: vec![Vulnerability::InitialQuantifier],
dfa: can_be_dfa,
})
// scan for vulnerabilities
{
let mut vulnerabilities: Vec<Vulnerability> = vec![];

// first vulnerability scan: ILQ
let ilq = ilq::scan_ilq(&expr);

if ilq.is_present {
vulnerabilities.push(Vulnerability::InitialQuantifier);
}

Ok(VulnerabilityResult {
vulnerabilities,
dfa: can_be_dfa,
})
}
}

0 comments on commit e54d536

Please sign in to comment.