Skip to content

Commit

Permalink
feat: base group increment
Browse files Browse the repository at this point in the history
  • Loading branch information
LeoDog896 committed Mar 6, 2024
1 parent 258372b commit a868d7b
Show file tree
Hide file tree
Showing 10 changed files with 422 additions and 148 deletions.
448 changes: 327 additions & 121 deletions Cargo.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions crates/redos-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ path = "src/main.rs"

[dependencies]
fancy-regex = { git = "https://github.com/LeoDog896/fancy-regex", rev = "edc7c1f" }
ruff_python_parser = { git = "https://github.com/astral-sh/ruff", rev = "af6ea2f" }
anyhow = "1.0"
clap = { version = "4.4", features = ["derive"] }
flate2 = "1.0"
Expand Down
1 change: 1 addition & 0 deletions crates/redos-cli/src/languages/mod.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pub mod javascript;
pub mod language;
pub mod python;
38 changes: 38 additions & 0 deletions crates/redos-cli/src/languages/python.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
use std::path::Path;

use anyhow::Result;

use ruff_python_parser::{lexer::lex, Mode};

use async_trait::async_trait;

use super::language::{Language, Location};

/// List of scanned extensions
const EXTENSIONS: [&str; 1] = ["py"];

pub struct Python;

#[async_trait(?Send)]
impl Language for Python {
async fn check_file(path: &Path) -> Result<Option<Vec<(String, Location)>>> {
let ext = path.extension().unwrap_or_default();

if !EXTENSIONS.contains(&ext.to_str().unwrap()) {
return Ok(None);
}

let contents = std::fs::read_to_string(path)?;

let lexer = lex(&contents, Mode::Module);

let regexes = vec![];

for token in lexer {
// TODO: support regexes
println!("{:?}", token);
}

Ok(Some(regexes))
}
}
1 change: 1 addition & 0 deletions crates/redos-wasm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ edition = "2021"
wasm-bindgen = "0.2"
fancy-regex = { git = "https://github.com/LeoDog896/fancy-regex", rev = "edc7c1f" }
redos = { path = "../redos" }
nonzero_lit = "0.1.2"

[lib]
crate-type = ["cdylib", "rlib"]
6 changes: 5 additions & 1 deletion crates/redos-wasm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ pub fn ir(regex: &str) -> String {
let parser = Parser::parse(regex);
format!(
"{:#?}",
parser.map(|tree| redos::ir::to_expr(&tree.expr, &Default::default()))
parser.map(|tree| redos::ir::to_expr(
&tree.expr,
&Default::default(),
nonzero_lit::usize!(1)
))
)
}

Expand Down
1 change: 1 addition & 0 deletions crates/redos/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ edition = "2021"
[dependencies]
fancy-regex = { git = "https://github.com/LeoDog896/fancy-regex", rev = "edc7c1f" }
nom = "7.1.3"
nonzero_lit = "0.1.2"
regex = "1.10.3"
7 changes: 6 additions & 1 deletion crates/redos/src/ilq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ use crate::ir::Expr;
/// Returns true iif an ilq is present anywhere in the regex
pub fn scan_ilq(expr: &Expr) -> bool {
match expr {
// if we hit a non-complex non-optional expression, we can stop
Expr::Token => false,
// explore every potential path for some ilq
Expr::Alt(list) => list.iter().any(scan_ilq),
// TODO
_ => false,
}
}
}
49 changes: 33 additions & 16 deletions crates/redos/src/ir.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
//! Immediate representation of a regular expression.
//! Used to simplify the AST and make it easier to work with.

use std::num::NonZeroUsize;

use fancy_regex::{Assertion, Expr as RegexExpr, LookAround};

use crate::vulnerability::VulnerabilityConfig;
Expand All @@ -25,7 +27,9 @@ pub enum Expr {
Alt(Vec<Expr>),
/// Capturing group of expression, e.g. `(a.)` matches `a` and any character and "captures"
/// (remembers) the match
Group(Box<Expr>),
///
/// The usize is the number of the capturing group, starting from 1
Group(Box<Expr>, usize),
/// Look-around (e.g. positive/negative look-ahead or look-behind) with an expression, e.g.
/// `(?=a)` means the next character must be `a` (but the match is not consumed)
LookAround(Box<Expr>, LookAround),
Expand Down Expand Up @@ -54,25 +58,37 @@ pub enum Expr {
}

/// Converts a fancy-regex AST to an IR AST
pub fn to_expr(expr: &RegexExpr, config: &VulnerabilityConfig) -> Option<Expr> {
pub fn to_expr(
expr: &RegexExpr,
config: &VulnerabilityConfig,
group_increment: NonZeroUsize,
) -> Option<Expr> {
match expr {
RegexExpr::Empty => None,
RegexExpr::Any { .. } => Some(Expr::Token),
RegexExpr::Assertion(a) => Some(Expr::Assertion(*a)),
RegexExpr::Literal { .. } => Some(Expr::Token),
// TODO: propagate group increment
RegexExpr::Concat(list) => Some(Expr::Concat(
list.iter()
.filter_map(|e| to_expr(e, config))
.filter_map(|e| to_expr(e, config, group_increment))
.collect(),
)),
RegexExpr::Alt(list) => Some(Expr::Alt(
list.iter()
.filter_map(|e| to_expr(e, config))
.filter_map(|e| to_expr(e, config, group_increment))
.collect(),
)),
RegexExpr::Group(e) => to_expr(e, config).map(|e| Expr::Group(Box::new(e))),
RegexExpr::Group(e) => to_expr(
e,
config,
group_increment
.checked_add(1)
.expect("group increment overflow"),
)
.map(|e| Expr::Group(Box::new(e), group_increment.into())),
RegexExpr::LookAround(e, la) => {
to_expr(e, config).map(|e| Expr::LookAround(Box::new(e), *la))
to_expr(e, config, group_increment).map(|e| Expr::LookAround(Box::new(e), *la))
}
RegexExpr::Repeat {
child,
Expand All @@ -83,12 +99,12 @@ pub fn to_expr(expr: &RegexExpr, config: &VulnerabilityConfig) -> Option<Expr> {
let range = hi - lo;

let expression = if range > config.max_quantifier {
to_expr(child, config).map(|child| Expr::Repeat {
to_expr(child, config, group_increment).map(|child| Expr::Repeat {
child: Box::new(child),
greedy: *greedy,
})
} else {
to_expr(child, config)
to_expr(child, config, group_increment)
};

if *lo == 0 {
Expand All @@ -104,7 +120,7 @@ pub fn to_expr(expr: &RegexExpr, config: &VulnerabilityConfig) -> Option<Expr> {
// false negatives
RegexExpr::Backref(_) => Some(Expr::Token),
RegexExpr::AtomicGroup(e) => {
to_expr(e, config).map(|e| Expr::AtomicGroup(Box::new(e)))
to_expr(e, config, group_increment).map(|e| Expr::AtomicGroup(Box::new(e)))
}
RegexExpr::KeepOut => None,
RegexExpr::ContinueFromPreviousMatchEnd => None,
Expand All @@ -114,14 +130,15 @@ pub fn to_expr(expr: &RegexExpr, config: &VulnerabilityConfig) -> Option<Expr> {
true_branch,
false_branch,
} => {
let true_branch = to_expr(true_branch, config);
let false_branch = to_expr(false_branch, config);
if let (Some(true_branch), Some(false_branch)) =
(true_branch, false_branch)
{
let true_branch = to_expr(true_branch, config, group_increment);
let false_branch = to_expr(false_branch, config, group_increment);
if let (Some(true_branch), Some(false_branch)) = (true_branch, false_branch) {
let condition: Option<ExprConditional> = match condition.as_ref() {
&RegexExpr::BackrefExistsCondition(number) => Some(ExprConditional::BackrefExistsCondition(number)),
expr => to_expr(expr, config).map(|x| ExprConditional::Condition(Box::new(x)))
&RegexExpr::BackrefExistsCondition(number) => {
Some(ExprConditional::BackrefExistsCondition(number))
}
expr => to_expr(expr, config, group_increment)
.map(|x| ExprConditional::Condition(Box::new(x))),
};

condition.map(|condition| Expr::Conditional {
Expand Down
18 changes: 9 additions & 9 deletions crates/redos/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ pub mod vulnerability;
mod ilq;

use fancy_regex::parse::Parser;
use fancy_regex::{Result, Expr as RegexExpr};
use fancy_regex::{Expr as RegexExpr, Result};
use ir::{to_expr, Expr, ExprConditional};
use vulnerability::{Vulnerability, VulnerabilityConfig};

Expand All @@ -25,22 +25,22 @@ fn repeats_anywhere(expr: &Expr) -> bool {
// propagate
Expr::Concat(list) => list.iter().any(repeats_anywhere),
Expr::Alt(list) => list.iter().any(repeats_anywhere),
Expr::Group(e) => repeats_anywhere(e.as_ref()),
Expr::Group(e, _) => repeats_anywhere(e.as_ref()),
Expr::LookAround(e, _) => repeats_anywhere(e.as_ref()),
Expr::AtomicGroup(e) => repeats_anywhere(e.as_ref()),
Expr::Optional(e) => repeats_anywhere(e.as_ref()),
Expr::Conditional {
condition,
true_branch,
false_branch,
} => {
match condition {
ExprConditional::BackrefExistsCondition(_) => false,
ExprConditional::Condition(condition) => repeats_anywhere(condition.as_ref())
} => match condition {
ExprConditional::BackrefExistsCondition(_) => false,
ExprConditional::Condition(condition) => {
repeats_anywhere(condition.as_ref())
|| repeats_anywhere(true_branch.as_ref())
|| repeats_anywhere(false_branch.as_ref())
}
}
},
}
}

Expand Down Expand Up @@ -70,8 +70,8 @@ pub fn vulnerabilities(regex: &str, config: &VulnerabilityConfig) -> Result<Vuln
}

// second pass: turn AST into IR
let expr =
to_expr(&tree.expr, config).expect("Failed to convert AST to IR; this is a bug");
let expr = to_expr(&tree.expr, config, nonzero_lit::usize!(1))
.expect("Failed to convert AST to IR; this is a bug");

// third pass: exit early if there are no repeats
if !repeats_anywhere(&expr) {
Expand Down

0 comments on commit a868d7b

Please sign in to comment.