diff --git a/README.md b/README.md index 1a38b1a..7963b1e 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # redos -fast redos prevention in your codebase +fast ReDoS detection in your codebase. detects any superlinear (non `O(n)`) regex matching. ## TODO diff --git a/crates/redos/src/ilq.rs b/crates/redos/src/ilq.rs index 1177a0d..afa9536 100644 --- a/crates/redos/src/ilq.rs +++ b/crates/redos/src/ilq.rs @@ -1,13 +1,42 @@ -use crate::ir::Expr; +use crate::ir::{Expr, IrAssertion}; -/// Returns true iif an ilq is present anywhere in the regex +/// Scans an ilq. Assumes `expr` is the root expression of the tree. pub fn scan_ilq(expr: &Expr) -> bool { match expr { - // if we hit a non-complex non-optional expression, we can stop + // if we hit anything that isn't a Vec, we're done Expr::Token => false, + Expr::Assertion(_) => false, + + Expr::Conditional { false_branch, .. } => scan_ilq_recursive(&false_branch).unwrap_or_else(|| false), + + } +} + + +/// Returns Some(true) iif an ilq is present anywhere in the regex. +/// Returns Some(false) iif no ilq is present anywhere in the regex. +/// +/// Returns None if an ilq higher up in the recursive chain can continue +/// looking through its Vec +fn scan_ilq_recursive(expr: &Expr) -> Option { + match expr { + // if we hit a non-complex non-optional expression, we can stop + Expr::Token => Some(false), + // if we hit an odd assertion, we can stop + Expr::Assertion(assertion) => match assertion { + // initial large quantifier requires that the quantifier is first. + // if we hit this, it is not first + IrAssertion::Start => Some(false), + // odd that the end will be here, but regardless, not an ILQ + IrAssertion::End => Some(false), + // a word boundary linearizes any ilq + IrAssertion::WordBoundary => Some(false), + // TODO + _ => None + } // explore every potential path for some ilq - Expr::Alt(list) => list.iter().any(scan_ilq), + Expr::Alt(list) => list.iter().find(|expr| scan_ilq(expr) == Some(false)), // TODO - _ => false, + _ => None, } } diff --git a/crates/redos/src/ir.rs b/crates/redos/src/ir.rs index 69fbf44..4e23370 100644 --- a/crates/redos/src/ir.rs +++ b/crates/redos/src/ir.rs @@ -1,4 +1,4 @@ -//! Immediate representation of a regular expression. +//! Intermediate representation of a regular expression. //! Used to simplify the AST and make it easier to work with. use std::num::NonZeroUsize; @@ -10,13 +10,9 @@ use crate::vulnerability::VulnerabilityConfig; #[derive(Debug, PartialEq, Eq, Clone, Copy)] pub enum IrAssertion { /// Start of input text - StartText, + Start, /// End of input text - EndText, - /// Start of a line - StartLine, - /// End of a line - EndLine, + End, /// Left word boundary LeftWordBoundary, /// Right word boundary @@ -84,10 +80,13 @@ pub fn to_expr( RegexExpr::Any { .. } => Some(Expr::Token), RegexExpr::Assertion(a) => Some(Expr::Assertion( match a { - Assertion::StartText => IrAssertion::StartText, - Assertion::EndText => IrAssertion::EndText, - Assertion::StartLine { .. } => IrAssertion::StartLine, - Assertion::EndLine { .. } => IrAssertion::EndLine, + // Since start and line only depend on the multiline flag, + // they don't particurally matter for ReDoS detection. + Assertion::StartText => IrAssertion::Start, + Assertion::EndText => IrAssertion::End, + Assertion::StartLine { .. } => IrAssertion::Start, + Assertion::EndLine { .. } => IrAssertion::End, + Assertion::LeftWordBoundary => IrAssertion::LeftWordBoundary, Assertion::RightWordBoundary => IrAssertion::RightWordBoundary, Assertion::WordBoundary => IrAssertion::WordBoundary, diff --git a/website/src/app.html b/website/src/app.html index 6769ed5..3c914de 100644 --- a/website/src/app.html +++ b/website/src/app.html @@ -4,6 +4,7 @@ + redos %sveltekit.head%