diff --git a/src/lexer.rs b/src/lexer.rs index 9e69a6b..410686d 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -44,9 +44,9 @@ pub struct Lexer<'a> { } impl<'a> Lexer<'a> { - pub fn new(input: &'a str) -> Self { + pub fn new(input: &'a [u8]) -> Self { Lexer { - input: input.as_bytes(), + input, pos: 0, capture_depth: 0, set_depth: 0, diff --git a/src/lua/find.rs b/src/lua/find.rs index 33ab3f3..9eec783 100644 --- a/src/lua/find.rs +++ b/src/lua/find.rs @@ -6,12 +6,11 @@ use super::{ /// Corresponds to Lua 5.3 [`string.find`]. /// Returns 1-based or 0-based (see features [`1-based`] and [`0-based`]) indices (start, end) and captured strings. The [`init`] argument can be either 0-based or 1-based. pub fn find( - text: &str, - pattern: &str, + text_bytes: &[u8], + pattern: &[u8], init: Option, plain: bool, -) -> Result)>> { - let text_bytes = text.as_bytes(); +) -> Result>)>> { let byte_len = text_bytes.len(); let start_byte_index = calculate_start_index(byte_len, init); @@ -35,7 +34,7 @@ pub fn find( if let Some(rel_byte_pos) = text_bytes[start_byte_index..] .windows(pattern.len()) - .position(|window| window == pattern.as_bytes()) + .position(|window| window == pattern) { let zero_based_start_pos = start_byte_index + rel_byte_pos; let zero_based_end_pos = zero_based_start_pos + pattern.len(); @@ -65,12 +64,9 @@ pub fn find( }; let end_pos = match_byte_range.end; - let captured_strings: Vec = captures_byte_ranges + let captured_strings: Vec> = captures_byte_ranges .into_iter() - .filter_map(|maybe_range| { - maybe_range - .map(|range| String::from_utf8_lossy(&text_bytes[range]).into_owned()) - }) + .filter_map(|maybe_range| maybe_range.map(|range| text_bytes[range].to_owned())) .collect(); Ok(Some((start_pos, end_pos, captured_strings))) diff --git a/src/lua/gmatch.rs b/src/lua/gmatch.rs index 20e14f1..51aa647 100644 --- a/src/lua/gmatch.rs +++ b/src/lua/gmatch.rs @@ -5,10 +5,7 @@ mod iter; pub use iter::GMatchIterator; /// Corresponds to Lua 5.3 `string.gmatch` -pub fn gmatch( - text: &str, - pattern: &str, -) -> Result { +pub fn gmatch(text: &[u8], pattern: &[u8]) -> Result { let is_empty_pattern = pattern.is_empty(); let pattern_ast = if is_empty_pattern { @@ -19,7 +16,7 @@ pub fn gmatch( }; Ok(GMatchIterator { - bytes: text.as_bytes().to_vec(), + bytes: text.to_vec(), pattern_ast, current_pos: 0, is_empty_pattern, diff --git a/src/lua/gmatch/iter.rs b/src/lua/gmatch/iter.rs index 3cbeb07..b6cf132 100644 --- a/src/lua/gmatch/iter.rs +++ b/src/lua/gmatch/iter.rs @@ -8,7 +8,7 @@ pub struct GMatchIterator { } impl Iterator for GMatchIterator { - type Item = Result>; + type Item = Result>>; fn next(&mut self) -> Option { if self.current_pos > self.bytes.len() { @@ -16,7 +16,7 @@ impl Iterator for GMatchIterator { } if self.is_empty_pattern { - let result = Some(Ok(vec![String::new()])); + let result = Some(Ok(vec![vec![]])); self.current_pos += 1; @@ -34,20 +34,15 @@ impl Iterator for GMatchIterator { self.current_pos = match_range.end; } - let result: Vec = if captures.iter().any(|c| c.is_some()) { + let result: Vec> = if captures.iter().any(|c| c.is_some()) { captures .into_iter() .filter_map(|maybe_range| { - maybe_range.map(|range| { - String::from_utf8_lossy(&self.bytes[range]).into_owned() - }) + maybe_range.map(|range| self.bytes[range].to_owned()) }) .collect() } else { - vec![ - String::from_utf8_lossy(&self.bytes[match_range.start..match_range.end]) - .into_owned(), - ] + vec![self.bytes[match_range.start..match_range.end].to_owned()] }; Some(Ok(result)) diff --git a/src/lua/gsub.rs b/src/lua/gsub.rs index 0cab7b2..e23aff8 100644 --- a/src/lua/gsub.rs +++ b/src/lua/gsub.rs @@ -7,29 +7,28 @@ pub use repl::Repl; /// Corresponds to Lua 5.3 `string.gsub` pub fn gsub<'a>( - text: &'a str, - pattern: &str, + text: &'a [u8], + pattern: &[u8], repl: Repl<'a>, n: Option, -) -> Result<(String, usize)> { - let text_bytes = text.as_bytes(); - let byte_len = text_bytes.len(); +) -> Result<(Vec, usize)> { + let byte_len = text.len(); let mut parser = Parser::new(pattern)?; let pattern_ast = parser.parse()?; - let mut result = String::new(); + let mut result = Vec::new(); let mut last_pos = 0; let mut replacements = 0; let max_replacements = n.unwrap_or(usize::MAX); while replacements < max_replacements { - match find_first_match(&pattern_ast, text_bytes, last_pos)? { + match find_first_match(&pattern_ast, text, last_pos)? { Some((match_range, captures)) => { - result.push_str(&text[last_pos..match_range.start]); + result.extend(&text[last_pos..match_range.start]); let full_match = &text[match_range.start..match_range.end]; - let captures_str: Vec<&str> = captures + let captures_str: Vec<&[u8]> = captures .iter() .filter_map(|maybe_range| { maybe_range @@ -41,14 +40,14 @@ pub fn gsub<'a>( match &repl { Repl::String(repl_str) => { let replacement = process_replacement_string(repl_str, &captures_str)?; - result.push_str(&replacement); + result.extend(&replacement); } Repl::Function(f) => { let mut args = Vec::with_capacity(captures_str.len() + 1); args.push(full_match); args.extend(captures_str.iter()); let replacement = f(&args); - result.push_str(&replacement); + result.extend(&replacement); } Repl::Table(table) => { let key = if !captures_str.is_empty() { @@ -58,9 +57,9 @@ pub fn gsub<'a>( }; if let Some(replacement) = table.get(key) { - result.push_str(replacement); + result.extend(*replacement); } else { - result.push_str(full_match); + result.extend(full_match); } } } @@ -72,7 +71,7 @@ pub fn gsub<'a>( if last_pos >= byte_len { break; } - result.push_str(&text[last_pos..last_pos + 1]); + result.extend(&text[last_pos..last_pos + 1]); last_pos += 1; } } @@ -81,7 +80,7 @@ pub fn gsub<'a>( } if last_pos < byte_len { - result.push_str(&text[last_pos..]); + result.extend(&text[last_pos..]); } Ok((result, replacements)) diff --git a/src/lua/gsub/repl.rs b/src/lua/gsub/repl.rs index b00103d..f56afe4 100644 --- a/src/lua/gsub/repl.rs +++ b/src/lua/gsub/repl.rs @@ -2,9 +2,9 @@ use crate::Result; use std::collections::HashMap; pub enum Repl<'a> { - String(&'a str), - Function(Box String + 'a>), - Table(&'a HashMap), + String(&'a [u8]), + Function(Box Vec + 'a>), + Table(&'a HashMap<&'a [u8], &'a [u8]>), } enum ReplToken { @@ -12,18 +12,18 @@ enum ReplToken { CaptureRef(usize), } -pub fn process_replacement_string(repl: &str, captures: &[&str]) -> Result { +pub fn process_replacement_string(repl: &[u8], captures: &[&[u8]]) -> Result> { let tokens = tokenize_replacement_string(repl); - let mut result = String::with_capacity(tokens.len()); + let mut result = Vec::with_capacity(tokens.len()); for token in tokens { match token { ReplToken::Literal(b) => { - result.push(b as char); + result.push(b); } ReplToken::CaptureRef(idx) => { if idx <= captures.len() { - result.push_str(captures[idx - 1]); + result.extend(captures[idx - 1]); } } } @@ -32,14 +32,13 @@ pub fn process_replacement_string(repl: &str, captures: &[&str]) -> Result Vec { +fn tokenize_replacement_string(repl: &[u8]) -> Vec { let mut tokens = Vec::new(); - let bytes = repl.as_bytes(); let mut i = 0; - while i < bytes.len() { - if bytes[i] == b'%' && i + 1 < bytes.len() { - let next_byte = bytes[i + 1]; + while i < repl.len() { + if repl[i] == b'%' && i + 1 < repl.len() { + let next_byte = repl[i + 1]; if (b'1'..=b'9').contains(&next_byte) { let capture_idx = (next_byte - b'0') as usize; tokens.push(ReplToken::CaptureRef(capture_idx)); @@ -52,7 +51,7 @@ fn tokenize_replacement_string(repl: &str) -> Vec { i += 1; } } else { - tokens.push(ReplToken::Literal(bytes[i])); + tokens.push(ReplToken::Literal(repl[i])); i += 1; } } diff --git a/src/lua/match.rs b/src/lua/match.rs index 74e5d54..ead294b 100644 --- a/src/lua/match.rs +++ b/src/lua/match.rs @@ -4,32 +4,25 @@ use super::{ }; /// Corresponds to Lua 5.3 `string.match` -pub fn r#match(text: &str, pattern: &str, init: Option) -> Result>> { - let text_bytes = text.as_bytes(); - let byte_len = text_bytes.len(); +pub fn r#match(text: &[u8], pattern: &[u8], init: Option) -> Result>>> { + let byte_len = text.len(); let start_byte_index = calculate_start_index(byte_len, init); let mut parser = Parser::new(pattern)?; let ast = parser.parse()?; - match find_first_match(&ast, text_bytes, start_byte_index)? { + match find_first_match(&ast, text, start_byte_index)? { Some((match_byte_range, captures_byte_ranges)) => { let captures: Vec<_> = captures_byte_ranges .into_iter() - .filter_map(|maybe_range| { - maybe_range - .map(|range| String::from_utf8_lossy(&text_bytes[range]).into_owned()) - }) + .filter_map(|maybe_range| maybe_range.map(|range| text[range].to_owned())) .collect(); if !captures.is_empty() { Ok(Some(captures)) } else { - let full_match = String::from_utf8_lossy( - &text_bytes[match_byte_range.start..match_byte_range.end], - ) - .into_owned(); + let full_match = text[match_byte_range.start..match_byte_range.end].to_owned(); Ok(Some(vec![full_match])) } } diff --git a/src/parser.rs b/src/parser.rs index 3f2b2ae..1bd082e 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -35,7 +35,7 @@ pub struct Parser { } impl Parser { - pub fn new(pattern: &str) -> Result { + pub fn new(pattern: &[u8]) -> Result { let mut lexer = Lexer::new(pattern); let mut token_vec = Vec::new(); loop { diff --git a/tests/find.rs b/tests/find.rs index 3099392..6fe94ac 100644 --- a/tests/find.rs +++ b/tests/find.rs @@ -6,67 +6,94 @@ fn svec(items: &[&str]) -> Vec { #[test] fn test_negative_byte_classes() { - assert_eq!(find("a b\tc", "%S", None, false), Ok(Some((1, 1, vec![])))); - assert_eq!(find("a b\tc", "%S+", None, false), Ok(Some((1, 1, vec![])))); - assert_eq!(find(" b\tc", "%S", None, false), Ok(Some((2, 2, vec![])))); - assert_eq!(find("123abc", "%D", None, false), Ok(Some((4, 4, vec![])))); - assert_eq!(find("123abc", "%D+", None, false), Ok(Some((4, 6, vec![])))); - assert_eq!(find("abc_123", "%W", None, false), Ok(Some((4, 4, vec![])))); - assert_eq!(find("-abc-", "%W", None, false), Ok(Some((1, 1, vec![])))); - assert_eq!(find("abc123", "%A", None, false), Ok(Some((4, 4, vec![])))); - assert_eq!(find("abc123", "%A+", None, false), Ok(Some((4, 6, vec![])))); - assert_eq!(find("你a", "%A", None, false), Ok(Some((1, 1, vec![])))); - assert_eq!(find("a你b", "%W", None, false), Ok(Some((2, 2, vec![])))); + assert_eq!( + find(b"a b\tc", b"%S", None, false), + Ok(Some((1, 1, vec![]))) + ); + assert_eq!( + find(b"a b\tc", b"%S+", None, false), + Ok(Some((1, 1, vec![]))) + ); + assert_eq!(find(b" b\tc", b"%S", None, false), Ok(Some((2, 2, vec![])))); + assert_eq!( + find(b"123abc", b"%D", None, false), + Ok(Some((4, 4, vec![]))) + ); + assert_eq!( + find(b"123abc", b"%D+", None, false), + Ok(Some((4, 6, vec![]))) + ); + assert_eq!( + find(b"abc_123", b"%W", None, false), + Ok(Some((4, 4, vec![]))) + ); + assert_eq!(find(b"-abc-", b"%W", None, false), Ok(Some((1, 1, vec![])))); + assert_eq!( + find(b"abc123", b"%A", None, false), + Ok(Some((4, 4, vec![]))) + ); + assert_eq!( + find(b"abc123", b"%A+", None, false), + Ok(Some((4, 6, vec![]))) + ); + assert_eq!( + find("你a".as_bytes(), b"%A", None, false), + Ok(Some((1, 1, vec![]))) + ); + assert_eq!( + find("a你b".as_bytes(), b"%W", None, false), + Ok(Some((2, 2, vec![]))) + ); } #[test] fn test_balanced_patterns() { assert_eq!( - find("a(b(c)d)e", "%b()", None, false), + find(b"a(b(c)d)e", b"%b()", None, false), Ok(Some((2, 8, vec![]))) ); assert_eq!( - find("a{b{c}d}e", "%b{}", None, false), + find(b"a{b{c}d}e", b"%b{}", None, false), Ok(Some((2, 8, vec![]))) ); assert_eq!( - find("ad>e", "%b<>", None, false), + find(b"ad>e", b"%b<>", None, false), Ok(Some((2, 8, vec![]))) ); assert_eq!( - find("a(b(c(d)e)f)g", "%b()", None, false), + find(b"a(b(c(d)e)f)g", b"%b()", None, false), Ok(Some((2, 12, vec![]))) ); assert_eq!( - find("a(b(c)d)e", "(%b())", None, false), - Ok(Some((2, 8, svec(&["(b(c)d)"])))) + find(b"a(b(c)d)e", b"(%b())", None, false), + Ok(Some((2, 8, vec![b"(b(c)d)".to_vec()]))) ); } #[test] fn test_find_invalid_pattern() { assert!(matches!( - find("abc", "[", None, false), + find(b"abc", b"[", None, false), Err(Error::Parser(_)) )); assert!(matches!( - find("abc", "(", None, false), + find(b"abc", b"(", None, false), Err(Error::Parser(_)) )); assert!(matches!( - find("abc", "*", None, false), + find(b"abc", b"*", None, false), Err(Error::Parser(_)) )); assert!(matches!( - find("abc", "%", None, false), + find(b"abc", b"%", None, false), Err(Error::Lexer(_)) )); assert!(matches!( - find("abc", "%z", None, false), + find(b"abc", b"%z", None, false), Err(Error::Parser(_)) | Err(Error::Lexer(_)) )); } @@ -74,20 +101,20 @@ fn test_find_invalid_pattern() { #[test] fn test_plain_find() { assert_eq!( - find("hello world", "", None, true), + find(b"hello world", b"", None, true), Ok(Some((1, 0, vec![]))) ); assert_eq!( - find("hello world", "world", None, true), + find(b"hello world", b"world", None, true), Ok(Some((7, 11, vec![]))) ); assert_eq!( - find("hello world", "hello", None, true), + find(b"hello world", b"hello", None, true), Ok(Some((1, 5, vec![]))) ); - assert_eq!(find("hello world", "not found", None, true), Ok(None)); + assert_eq!(find(b"hello world", b"not found", None, true), Ok(None)); assert_eq!( - find("hello world", "", None, true), + find(b"hello world", b"", None, true), Ok(Some((1, 0, vec![]))) ); } @@ -95,41 +122,45 @@ fn test_plain_find() { #[test] fn test_find_with_init() { assert_eq!( - find("hello world", "world", Some(6), false), + find(b"hello world", b"world", Some(6), false), Ok(Some((7, 11, vec![]))) ); assert_eq!( - find("hello world", "world", Some(7), false), + find(b"hello world", b"world", Some(7), false), Ok(Some((7, 11, vec![]))) ); - assert_eq!(find("hello world", "world", Some(8), false), Ok(None)); + assert_eq!(find(b"hello world", b"world", Some(8), false), Ok(None)); assert_eq!( - find("hello world", "hello", Some(-11), false), + find(b"hello world", b"hello", Some(-11), false), Ok(Some((1, 5, vec![]))) ); - assert_eq!(find("hello world", "hello", Some(-5), false), Ok(None)); + assert_eq!(find(b"hello world", b"hello", Some(-5), false), Ok(None)); } #[test] fn test_find_pattern_with_captures() { assert_eq!( - find("hello 123 world", "(%d+)", None, false), - Ok(Some((7, 9, svec(&["123"])))) + find(b"hello 123 world", b"(%d+)", None, false), + Ok(Some((7, 9, vec![b"123".to_vec()]))) ); assert_eq!( - find("name=John age=25", "(%w+)=(%w+)", None, false), - Ok(Some((1, 9, svec(&["name", "John"])))) + find(b"name=John age=25", b"(%w+)=(%w+)", None, false), + Ok(Some((1, 9, vec![b"name".to_vec(), b"John".to_vec()]))) ); assert_eq!( - find("2023-04-15", "(%d%d%d%d)%-(%d%d)%-(%d%d)", None, false), - Ok(Some((1, 10, svec(&["2023", "04", "15"])))) + find(b"2023-04-15", b"(%d%d%d%d)%-(%d%d)%-(%d%d)", None, false), + Ok(Some(( + 1, + 10, + vec![b"2023".to_vec(), b"04".to_vec(), b"15".to_vec()] + ))) ); } #[test] fn test_find_edge_cases() { - assert_eq!(find("", "", None, false), Ok(Some((1, 0, vec![])))); - assert_eq!(find("hello", "", None, false), Ok(Some((1, 0, vec![])))); - assert_eq!(find("hello", "^", None, false), Ok(Some((1, 0, vec![])))); - assert_eq!(find("hello", "$", None, false), Ok(Some((6, 5, vec![])))); + assert_eq!(find(b"", b"", None, false), Ok(Some((1, 0, vec![])))); + assert_eq!(find(b"hello", b"", None, false), Ok(Some((1, 0, vec![])))); + assert_eq!(find(b"hello", b"^", None, false), Ok(Some((1, 0, vec![])))); + assert_eq!(find(b"hello", b"$", None, false), Ok(Some((6, 5, vec![])))); } diff --git a/tests/gmatch.rs b/tests/gmatch.rs index dc09e1e..554eb7d 100644 --- a/tests/gmatch.rs +++ b/tests/gmatch.rs @@ -1,10 +1,10 @@ use lsonar::{Result, gmatch}; -fn convert_to_string_vec(items: &[&str]) -> Vec { - items.iter().map(|&s| s.to_string()).collect() +fn convert_to_string_vec(items: &[&[u8]]) -> Vec> { + items.iter().map(|&s| s.to_vec()).collect() } -fn collect_gmatch_results(text: &str, pattern: &str) -> Result>> { +fn collect_gmatch_results(text: &[u8], pattern: &[u8]) -> Result>>> { let it = gmatch(text, pattern)?; it.collect() } @@ -12,18 +12,18 @@ fn collect_gmatch_results(text: &str, pattern: &str) -> Result>> #[test] fn test_single_match() { assert_eq!( - collect_gmatch_results("hello world", "hello"), - Ok(vec![convert_to_string_vec(&["hello"])]) + collect_gmatch_results(b"hello world", b"hello"), + Ok(vec![convert_to_string_vec(&[b"hello"])]) ); } #[test] fn test_repeated_match() { assert_eq!( - collect_gmatch_results("hello hello", "hello"), + collect_gmatch_results(b"hello hello", b"hello"), Ok(vec![ - convert_to_string_vec(&["hello"]), - convert_to_string_vec(&["hello"]) + convert_to_string_vec(&[b"hello"]), + convert_to_string_vec(&[b"hello"]) ]) ); } @@ -31,10 +31,10 @@ fn test_repeated_match() { #[test] fn test_numeric_pattern() { assert_eq!( - collect_gmatch_results("abc123def456", "%d+"), + collect_gmatch_results(b"abc123def456", b"%d+"), Ok(vec![ - convert_to_string_vec(&["123"]), - convert_to_string_vec(&["456"]) + convert_to_string_vec(&[b"123"]), + convert_to_string_vec(&[b"456"]) ]) ); } @@ -42,10 +42,10 @@ fn test_numeric_pattern() { #[test] fn test_captures() { assert_eq!( - collect_gmatch_results("name=John age=25", "(%a+)=(%w+)"), + collect_gmatch_results(b"name=John age=25", b"(%a+)=(%w+)"), Ok(vec![ - convert_to_string_vec(&["name", "John"]), - convert_to_string_vec(&["age", "25"]) + convert_to_string_vec(&[b"name", b"John"]), + convert_to_string_vec(&[b"age", b"25"]) ]) ); } @@ -53,11 +53,11 @@ fn test_captures() { #[test] fn test_single_char_captures() { assert_eq!( - collect_gmatch_results("a=1 b=2 c=3", "(%a)=(%d)"), + collect_gmatch_results(b"a=1 b=2 c=3", b"(%a)=(%d)"), Ok(vec![ - convert_to_string_vec(&["a", "1"]), - convert_to_string_vec(&["b", "2"]), - convert_to_string_vec(&["c", "3"]) + convert_to_string_vec(&[b"a", b"1"]), + convert_to_string_vec(&[b"b", b"2"]), + convert_to_string_vec(&[b"c", b"3"]) ]) ); } @@ -65,18 +65,18 @@ fn test_single_char_captures() { #[test] fn test_empty_captures() { assert_eq!( - collect_gmatch_results("abc", "()a()"), - Ok(vec![convert_to_string_vec(&["", ""])]) + collect_gmatch_results(b"abc", b"()a()"), + Ok(vec![convert_to_string_vec(&[b"", b""])]) ); } #[test] fn test_empty_pattern() { - let result = collect_gmatch_results("abc", "").unwrap(); + let result = collect_gmatch_results(b"abc", b"").unwrap(); assert_eq!(result.len(), 4); for r in &result { - assert_eq!(r, &convert_to_string_vec(&[""])); + assert_eq!(r, &convert_to_string_vec(&[b""])); } } @@ -84,12 +84,12 @@ fn test_empty_pattern() { fn test_ip_address_pattern() { assert_eq!( collect_gmatch_results( - "IPv4: 192.168.1.1 and 10.0.0.1", - "(%d+)%.(%d+)%.(%d+)%.(%d+)" + b"IPv4: 192.168.1.1 and 10.0.0.1", + b"(%d+)%.(%d+)%.(%d+)%.(%d+)" ), Ok(vec![ - convert_to_string_vec(&["192", "168", "1", "1"]), - convert_to_string_vec(&["10", "0", "0", "1"]) + convert_to_string_vec(&[b"192", b"168", b"1", b"1"]), + convert_to_string_vec(&[b"10", b"0", b"0", b"1"]) ]) ); } @@ -97,10 +97,10 @@ fn test_ip_address_pattern() { #[test] fn test_html_tag_content() { assert_eq!( - collect_gmatch_results("

First

Second

", "

([^<]+)

"), + collect_gmatch_results(b"

First

Second

", b"

([^<]+)

"), Ok(vec![ - convert_to_string_vec(&["First"]), - convert_to_string_vec(&["Second"]) + convert_to_string_vec(&[b"First"]), + convert_to_string_vec(&[b"Second"]) ]) ); } @@ -108,34 +108,34 @@ fn test_html_tag_content() { #[test] fn test_no_matches() { assert_eq!( - collect_gmatch_results("hello world", "not found"), + collect_gmatch_results(b"hello world", b"not found"), Ok(vec![]) ); } #[test] fn test_empty_string() { - assert_eq!(collect_gmatch_results("", "pattern"), Ok(vec![])); + assert_eq!(collect_gmatch_results(b"", b"pattern"), Ok(vec![])); } #[test] fn test_single_char_repeated() { assert_eq!( - collect_gmatch_results("aaa", "a"), + collect_gmatch_results(b"aaa", b"a"), Ok(vec![ - convert_to_string_vec(&["a"]), - convert_to_string_vec(&["a"]), - convert_to_string_vec(&["a"]) + convert_to_string_vec(&[b"a"]), + convert_to_string_vec(&[b"a"]), + convert_to_string_vec(&[b"a"]) ]) ); } #[test] fn test_dot_pattern() { - let result = collect_gmatch_results("hello world", ".").unwrap(); + let result = collect_gmatch_results(b"hello world", b".").unwrap(); assert_eq!(result.len(), 11); for (i, v) in result.into_iter().enumerate() { - assert_eq!(v[0], "hello world".chars().nth(i).unwrap().to_string()); + assert_eq!(v[0], vec![b"hello world"[i]]); } } diff --git a/tests/gsub.rs b/tests/gsub.rs index 392f04f..f37571d 100644 --- a/tests/gsub.rs +++ b/tests/gsub.rs @@ -1,27 +1,27 @@ -use lsonar::{Repl, gsub}; +use lsonar::{gsub, Repl}; use std::collections::HashMap; #[test] fn test_basic_replacement() { assert_eq!( - gsub("hello world", "l", Repl::String("L"), None), - Ok(("heLLo worLd".to_string(), 3)) + gsub(b"hello world", b"l", Repl::String(b"L"), None), + Ok((b"heLLo worLd".to_vec(), 3)) ); } #[test] fn test_limited_replacement_count() { assert_eq!( - gsub("hello world", "l", Repl::String("L"), Some(2)), - Ok(("heLLo world".to_string(), 2)) + gsub(b"hello world", b"l", Repl::String(b"L"), Some(2)), + Ok((b"heLLo world".to_vec(), 2)) ); } #[test] fn test_zero_replacement_count() { assert_eq!( - gsub("hello", ".", Repl::String("x"), Some(0)), - Ok(("hello".to_string(), 0)) + gsub(b"hello", b".", Repl::String(b"x"), Some(0)), + Ok((b"hello".to_vec(), 0)) ); } @@ -29,36 +29,41 @@ fn test_zero_replacement_count() { fn test_pattern_with_captures() { assert_eq!( gsub( - "name=John age=25", - "(%w+)=(%w+)", - Repl::String("%2 is %1"), + b"name=John age=25", + b"(%w+)=(%w+)", + Repl::String(b"%2 is %1"), None ), - Ok(("John is name 25 is age".to_string(), 2)) + Ok((b"John is name 25 is age".to_vec(), 2)) ); } #[test] fn test_numeric_pattern() { assert_eq!( - gsub("hello 123 world 456", "%d+", Repl::String(""), None), - Ok(("hello world ".to_string(), 2)) + gsub( + b"hello 123 world 456", + b"%d+", + Repl::String(b""), + None + ), + Ok((b"hello world ".to_vec(), 2)) ); } #[test] fn test_empty_pattern() { assert_eq!( - gsub("hello", "", Repl::String("-"), None), - Ok(("-h-e-l-l-o-".to_string(), 6)) + gsub(b"hello", b"", Repl::String(b"-"), None), + Ok((b"-h-e-l-l-o-".to_vec(), 6)) ); } #[test] fn test_escape_percent_in_replacement() { assert_eq!( - gsub("hello", "e", Repl::String("%% escaped"), None), - Ok(("h% escapedllo".to_string(), 1)) + gsub(b"hello", b"e", Repl::String(b"%% escaped"), None), + Ok((b"h% escapedllo".to_vec(), 1)) ); } @@ -66,13 +71,13 @@ fn test_escape_percent_in_replacement() { fn test_complex_pattern_with_captures() { assert_eq!( gsub( - "User: John, Age: 25, Email: john@example.com", - "(User: )(%w+)(, Age: )(%d+)", - Repl::String("%1%2%3%4 (adult)"), + b"User: John, Age: 25, Email: john@example.com", + b"(User: )(%w+)(, Age: )(%d+)", + Repl::String(b"%1%2%3%4 (adult)"), None ), Ok(( - "User: John, Age: 25 (adult), Email: john@example.com".to_string(), + b"User: John, Age: 25 (adult), Email: john@example.com".to_vec(), 1 )) ); @@ -82,12 +87,14 @@ fn test_complex_pattern_with_captures() { fn test_function_replacement() { assert_eq!( gsub( - "hello world", - "%w+", - Repl::Function(Box::new(|captures: &[&str]| { captures[0].to_uppercase() })), + b"hello world", + b"%w+", + Repl::Function(Box::new(|captures: &[&[u8]]| { + captures[0].to_ascii_uppercase() + })), None ), - Ok(("HELLO WORLD".to_string(), 2)) + Ok((b"HELLO WORLD".to_vec(), 2)) ); } @@ -95,68 +102,70 @@ fn test_function_replacement() { fn test_function_with_captures() { assert_eq!( gsub( - "a=1, b=2, c=3", - "(%w)=(%d)", - Repl::Function(Box::new(|captures: &[&str]| { + b"a=1, b=2, c=3", + b"(%w)=(%d)", + Repl::Function(Box::new(|captures: &[&[u8]]| { format!( "{}={}", - captures[1], - captures[2].parse::().unwrap() * 2 + str::from_utf8(captures[1]).unwrap(), + str::from_utf8(captures[2]).unwrap().parse::().unwrap() * 2 ) + .as_bytes() + .to_vec() })), None ), - Ok(("a=2, b=4, c=6".to_string(), 3)) + Ok((b"a=2, b=4, c=6".to_vec(), 3)) ); } #[test] fn test_table_replacement() { let mut table = HashMap::new(); - table.insert("hello".to_string(), "привет".to_string()); - table.insert("world".to_string(), "мир".to_string()); + table.insert(b"hello".as_slice(), "привет".as_bytes()); + table.insert(b"world", "мир".as_bytes()); assert_eq!( - gsub("hello world", "%w+", Repl::Table(&table), None), - Ok(("привет мир".to_string(), 2)) + gsub(b"hello world", b"%w+", Repl::Table(&table), None), + Ok(("привет мир".as_bytes().to_vec(), 2)) ); } #[test] fn test_partial_table_replacement() { let mut table = HashMap::new(); - table.insert("hello".to_string(), "привет".to_string()); + table.insert(b"hello".as_slice(), "привет".as_bytes()); assert_eq!( - gsub("hello world", "%w+", Repl::Table(&table), None), - Ok(("привет world".to_string(), 2)) + gsub(b"hello world", b"%w+", Repl::Table(&table), None), + Ok(("привет world".as_bytes().to_vec(), 2)) ); } #[test] fn test_table_with_captures() { let mut table = HashMap::new(); - table.insert("name".to_string(), "имя".to_string()); - table.insert("age".to_string(), "возраст".to_string()); + table.insert(b"name".as_slice(), "имя".as_bytes()); + table.insert(b"age", "возраст".as_bytes()); assert_eq!( - gsub("name=John age=25", "(%w+)=%w+", Repl::Table(&table), None), - Ok(("имя возраст".to_string(), 2)) + gsub(b"name=John age=25", b"(%w+)=%w+", Repl::Table(&table), None), + Ok(("имя возраст".as_bytes().to_vec(), 2)) ); } #[test] fn test_empty_string() { assert_eq!( - gsub("", "pattern", Repl::String("repl"), None), - Ok(("".to_string(), 0)) + gsub(b"", b"pattern", Repl::String(b"repl"), None), + Ok((b"".to_vec(), 0)) ); } #[test] fn test_pattern_not_found() { assert_eq!( - gsub("hello", "x", Repl::String("y"), None), - Ok(("hello".to_string(), 0)) + gsub(b"hello", b"x", Repl::String(b"y"), None), + Ok((b"hello".to_vec(), 0)) ); } diff --git a/tests/lexer.rs b/tests/lexer.rs index e9ccbd4..7386dff 100644 --- a/tests/lexer.rs +++ b/tests/lexer.rs @@ -3,7 +3,7 @@ use lsonar::{ lexer::{Lexer, token::Token}, }; -fn lex_all(input: &str) -> Result> { +fn lex_all(input: &[u8]) -> Result> { let mut lexer = Lexer::new(input); let mut tokens = Vec::new(); while let Some(token_result) = lexer.next_token()? { @@ -15,7 +15,7 @@ fn lex_all(input: &str) -> Result> { #[test] fn test_basic_tokens_lexer() -> Result<()> { assert_eq!( - lex_all("abc")?, + lex_all(b"abc")?, vec![ Token::Literal(b'a'), Token::Literal(b'b'), @@ -23,13 +23,13 @@ fn test_basic_tokens_lexer() -> Result<()> { ] ); assert_eq!( - lex_all("a.c")?, + lex_all(b"a.c")?, vec![Token::Literal(b'a'), Token::Any, Token::Literal(b'c')] ); - assert_eq!(lex_all("()")?, vec![Token::LParen, Token::RParen]); - assert_eq!(lex_all("[]")?, vec![Token::LBracket, Token::RBracket]); + assert_eq!(lex_all(b"()")?, vec![Token::LParen, Token::RParen]); + assert_eq!(lex_all(b"[]")?, vec![Token::LBracket, Token::RBracket]); assert_eq!( - lex_all("^$*+?-")?, + lex_all(b"^$*+?-")?, vec![ Token::Caret, Token::Dollar, @@ -44,28 +44,28 @@ fn test_basic_tokens_lexer() -> Result<()> { #[test] fn test_escape_tokens_lexer() -> Result<()> { - assert_eq!(lex_all("%%")?, vec![Token::EscapedLiteral(b'%')]); + assert_eq!(lex_all(b"%%")?, vec![Token::EscapedLiteral(b'%')]); assert_eq!( - lex_all("%.%a")?, + lex_all(b"%.%a")?, vec![Token::EscapedLiteral(b'.'), Token::Class(b'a')] ); - assert_eq!(lex_all("%(")?, vec![Token::EscapedLiteral(b'(')]); - assert_eq!(lex_all("%)")?, vec![Token::EscapedLiteral(b')')]); - assert_eq!(lex_all("%[")?, vec![Token::EscapedLiteral(b'[')]); - assert_eq!(lex_all("%]")?, vec![Token::EscapedLiteral(b']')]); - assert_eq!(lex_all("%*")?, vec![Token::EscapedLiteral(b'*')]); - assert_eq!(lex_all("%+")?, vec![Token::EscapedLiteral(b'+')]); - assert_eq!(lex_all("%?")?, vec![Token::EscapedLiteral(b'?')]); - assert_eq!(lex_all("%-")?, vec![Token::EscapedLiteral(b'-')]); - assert_eq!(lex_all("%^")?, vec![Token::EscapedLiteral(b'^')]); - assert_eq!(lex_all("%$")?, vec![Token::EscapedLiteral(b'$')]); + assert_eq!(lex_all(b"%(")?, vec![Token::EscapedLiteral(b'(')]); + assert_eq!(lex_all(b"%)")?, vec![Token::EscapedLiteral(b')')]); + assert_eq!(lex_all(b"%[")?, vec![Token::EscapedLiteral(b'[')]); + assert_eq!(lex_all(b"%]")?, vec![Token::EscapedLiteral(b']')]); + assert_eq!(lex_all(b"%*")?, vec![Token::EscapedLiteral(b'*')]); + assert_eq!(lex_all(b"%+")?, vec![Token::EscapedLiteral(b'+')]); + assert_eq!(lex_all(b"%?")?, vec![Token::EscapedLiteral(b'?')]); + assert_eq!(lex_all(b"%-")?, vec![Token::EscapedLiteral(b'-')]); + assert_eq!(lex_all(b"%^")?, vec![Token::EscapedLiteral(b'^')]); + assert_eq!(lex_all(b"%$")?, vec![Token::EscapedLiteral(b'$')]); Ok(()) } #[test] fn test_class_tokens_lexer() -> Result<()> { assert_eq!( - lex_all("%a%d%l%s%u%w%x%p%c%g")?, + lex_all(b"%a%d%l%s%u%w%x%p%c%g")?, vec![ Token::Class(b'a'), Token::Class(b'd'), @@ -80,7 +80,7 @@ fn test_class_tokens_lexer() -> Result<()> { ] ); assert_eq!( - lex_all("%A%D%L%S%U%W%X%P%C%G")?, + lex_all(b"%A%D%L%S%U%W%X%P%C%G")?, vec![ Token::Class(b'A'), Token::Class(b'D'), @@ -100,7 +100,7 @@ fn test_class_tokens_lexer() -> Result<()> { #[test] fn test_special_escape_tokens_lexer() -> Result<()> { assert_eq!( - lex_all("%b()%f")?, + lex_all(b"%b()%f")?, vec![Token::Balanced(b'(', b')'), Token::Frontier] ); Ok(()) @@ -109,7 +109,7 @@ fn test_special_escape_tokens_lexer() -> Result<()> { #[test] fn test_capture_ref_tokens_lexer() -> Result<()> { assert_eq!( - lex_all("%1%2%9")?, + lex_all(b"%1%2%9")?, vec![ Token::CaptureRef(1), Token::CaptureRef(2), @@ -122,7 +122,7 @@ fn test_capture_ref_tokens_lexer() -> Result<()> { #[test] fn test_mixed_tokens_lexer() -> Result<()> { assert_eq!( - lex_all("(a%d+)%1?")?, + lex_all(b"(a%d+)%1?")?, vec![ Token::LParen, Token::Literal(b'a'), @@ -138,25 +138,25 @@ fn test_mixed_tokens_lexer() -> Result<()> { #[test] fn test_lexer_throw_errors() { - assert!(matches!(lex_all("%"), Err(Error::Lexer(_)))); - assert!(matches!(lex_all("%q"), Err(Error::Lexer(_)))); - assert!(matches!(lex_all("abc%"), Err(Error::Lexer(_)))); + assert!(matches!(lex_all(b"%"), Err(Error::Lexer(_)))); + assert!(matches!(lex_all(b"%q"), Err(Error::Lexer(_)))); + assert!(matches!(lex_all(b"abc%"), Err(Error::Lexer(_)))); } #[test] fn test_quantifiers_lexer() { assert_eq!( - lex_all("%d+").unwrap(), + lex_all(b"%d+").unwrap(), vec![Token::Class(b'd'), Token::Plus] ); - assert_eq!(lex_all("%]").unwrap(), vec![Token::EscapedLiteral(b']')]); - assert_eq!(lex_all("%)").unwrap(), vec![Token::EscapedLiteral(b')')]); - assert_eq!(lex_all("%*").unwrap(), vec![Token::EscapedLiteral(b'*')]); - assert_eq!(lex_all("%+").unwrap(), vec![Token::EscapedLiteral(b'+')]); - assert_eq!(lex_all("%?").unwrap(), vec![Token::EscapedLiteral(b'?')]); - assert_eq!(lex_all("%-").unwrap(), vec![Token::EscapedLiteral(b'-')]); + assert_eq!(lex_all(b"%]").unwrap(), vec![Token::EscapedLiteral(b']')]); + assert_eq!(lex_all(b"%)").unwrap(), vec![Token::EscapedLiteral(b')')]); + assert_eq!(lex_all(b"%*").unwrap(), vec![Token::EscapedLiteral(b'*')]); + assert_eq!(lex_all(b"%+").unwrap(), vec![Token::EscapedLiteral(b'+')]); + assert_eq!(lex_all(b"%?").unwrap(), vec![Token::EscapedLiteral(b'?')]); + assert_eq!(lex_all(b"%-").unwrap(), vec![Token::EscapedLiteral(b'-')]); assert_eq!( - lex_all("[ab]+").unwrap(), + lex_all(b"[ab]+").unwrap(), vec![ Token::LBracket, Token::Literal(b'a'), @@ -166,7 +166,7 @@ fn test_quantifiers_lexer() { ] ); assert_eq!( - lex_all("[ab]*").unwrap(), + lex_all(b"[ab]*").unwrap(), vec![ Token::LBracket, Token::Literal(b'a'), @@ -176,7 +176,7 @@ fn test_quantifiers_lexer() { ] ); assert_eq!( - lex_all("[ab]?").unwrap(), + lex_all(b"[ab]?").unwrap(), vec![ Token::LBracket, Token::Literal(b'a'), @@ -186,7 +186,7 @@ fn test_quantifiers_lexer() { ] ); assert_eq!( - lex_all("[ab]-").unwrap(), + lex_all(b"[ab]-").unwrap(), vec![ Token::LBracket, Token::Literal(b'a'), @@ -196,23 +196,23 @@ fn test_quantifiers_lexer() { ] ); assert_eq!( - lex_all("a*").unwrap(), + lex_all(b"a*").unwrap(), vec![Token::Literal(b'a'), Token::Star] ); assert_eq!( - lex_all("a+").unwrap(), + lex_all(b"a+").unwrap(), vec![Token::Literal(b'a'), Token::Plus] ); assert_eq!( - lex_all("a?").unwrap(), + lex_all(b"a?").unwrap(), vec![Token::Literal(b'a'), Token::Question] ); assert_eq!( - lex_all("a-").unwrap(), + lex_all(b"a-").unwrap(), vec![Token::Literal(b'a'), Token::Minus] ); assert_eq!( - lex_all("(abc)+").unwrap(), + lex_all(b"(abc)+").unwrap(), vec![ Token::LParen, Token::Literal(b'a'), @@ -223,7 +223,7 @@ fn test_quantifiers_lexer() { ] ); assert_eq!( - lex_all("(abc)*").unwrap(), + lex_all(b"(abc)*").unwrap(), vec![ Token::LParen, Token::Literal(b'a'), @@ -234,7 +234,7 @@ fn test_quantifiers_lexer() { ] ); assert_eq!( - lex_all("(abc)?").unwrap(), + lex_all(b"(abc)?").unwrap(), vec![ Token::LParen, Token::Literal(b'a'), @@ -245,7 +245,7 @@ fn test_quantifiers_lexer() { ] ); assert_eq!( - lex_all("(abc)-").unwrap(), + lex_all(b"(abc)-").unwrap(), vec![ Token::LParen, Token::Literal(b'a'), @@ -256,7 +256,7 @@ fn test_quantifiers_lexer() { ] ); assert_eq!( - lex_all("(%d%d%d%d)-").unwrap(), + lex_all(b"(%d%d%d%d)-").unwrap(), vec![ Token::LParen, Token::Class(b'd'), @@ -268,23 +268,23 @@ fn test_quantifiers_lexer() { ] ); assert_eq!( - lex_all("[*]").unwrap(), + lex_all(b"[*]").unwrap(), vec![Token::LBracket, Token::Literal(b'*'), Token::RBracket] ); assert_eq!( - lex_all("[+]").unwrap(), + lex_all(b"[+]").unwrap(), vec![Token::LBracket, Token::Literal(b'+'), Token::RBracket] ); assert_eq!( - lex_all("[?]").unwrap(), + lex_all(b"[?]").unwrap(), vec![Token::LBracket, Token::Literal(b'?'), Token::RBracket] ); assert_eq!( - lex_all("[-]").unwrap(), + lex_all(b"[-]").unwrap(), vec![Token::LBracket, Token::Literal(b'-'), Token::RBracket] ); assert_eq!( - lex_all("[a-z]").unwrap(), + lex_all(b"[a-z]").unwrap(), vec![ Token::LBracket, Token::Literal(b'a'), @@ -294,7 +294,7 @@ fn test_quantifiers_lexer() { ] ); assert_eq!( - lex_all("[%]]").unwrap(), + lex_all(b"[%]]").unwrap(), vec![ Token::LBracket, Token::EscapedLiteral(b']'), @@ -302,7 +302,7 @@ fn test_quantifiers_lexer() { ] ); assert_eq!( - lex_all("[%-]").unwrap(), + lex_all(b"[%-]").unwrap(), vec![ Token::LBracket, Token::EscapedLiteral(b'-'), @@ -310,7 +310,7 @@ fn test_quantifiers_lexer() { ] ); assert_eq!( - lex_all("[%%]").unwrap(), + lex_all(b"[%%]").unwrap(), vec![ Token::LBracket, Token::EscapedLiteral(b'%'), diff --git a/tests/match.rs b/tests/match.rs index dd814f0..6311555 100644 --- a/tests/match.rs +++ b/tests/match.rs @@ -1,107 +1,107 @@ use lsonar::r#match; -fn convert_to_string_vec(items: &[&str]) -> Vec { - items.iter().map(|&s| s.to_string()).collect() +fn convert_to_string_vec(items: &[&[u8]]) -> Vec> { + items.iter().map(|&s| s.to_vec()).collect() } #[test] fn test_simple_match() { assert_eq!( - r#match("hello world", "hello", None), - Ok(Some(convert_to_string_vec(&["hello"]))) + r#match(b"hello world", b"hello", None), + Ok(Some(vec![b"hello".to_vec()])) ); assert_eq!( - r#match("hello world", "world", None), - Ok(Some(convert_to_string_vec(&["world"]))) + r#match(b"hello world", b"world", None), + Ok(Some(vec![b"world".to_vec()])) ); - assert_eq!(r#match("hello world", "bye", None), Ok(None)); + assert_eq!(r#match(b"hello world", b"bye", None), Ok(None)); } #[test] fn test_pattern_classes() { assert_eq!( - r#match("abc123", "%a+", None), - Ok(Some(convert_to_string_vec(&["abc"]))) + r#match(b"abc123", b"%a+", None), + Ok(Some(convert_to_string_vec(&[b"abc"]))) ); assert_eq!( - r#match("abc123", "%d+", None), - Ok(Some(convert_to_string_vec(&["123"]))) + r#match(b"abc123", b"%d+", None), + Ok(Some(convert_to_string_vec(&[b"123"]))) ); } #[test] fn test_single_capture() { assert_eq!( - r#match("hello world", "(hello)", None), - Ok(Some(convert_to_string_vec(&["hello"]))) + r#match(b"hello world", b"(hello)", None), + Ok(Some(convert_to_string_vec(&[b"hello"]))) ); } #[test] fn test_multiple_captures() { assert_eq!( - r#match("hello world", "(hello) (world)", None), - Ok(Some(convert_to_string_vec(&["hello", "world"]))) + r#match(b"hello world", b"(hello) (world)", None), + Ok(Some(convert_to_string_vec(&[b"hello", b"world"]))) ); assert_eq!( - r#match("123-456-7890", "(%d+)%-(%d+)%-(%d+)", None), - Ok(Some(convert_to_string_vec(&["123", "456", "7890"]))) + r#match(b"123-456-7890", b"(%d+)%-(%d+)%-(%d+)", None), + Ok(Some(convert_to_string_vec(&[b"123", b"456", b"7890"]))) ); } #[test] fn test_combined_pattern_captures() { assert_eq!( - r#match("abc123", "(%a+)(%d+)", None), - Ok(Some(convert_to_string_vec(&["abc", "123"]))) + r#match(b"abc123", b"(%a+)(%d+)", None), + Ok(Some(convert_to_string_vec(&[b"abc", b"123"]))) ); } #[test] fn test_empty_captures() { assert_eq!( - r#match("hello", "(h)()ello", None), - Ok(Some(convert_to_string_vec(&["h", ""]))) + r#match(b"hello", b"(h)()ello", None), + Ok(Some(convert_to_string_vec(&[b"h", b""]))) ); } #[test] fn test_init_parameter() { assert_eq!( - r#match("hello world", "world", Some(6)), - Ok(Some(convert_to_string_vec(&["world"]))) + r#match(b"hello world", b"world", Some(6)), + Ok(Some(convert_to_string_vec(&[b"world"]))) ); assert_eq!( - r#match("hello world", "hello", Some(1)), - Ok(Some(convert_to_string_vec(&["hello"]))) + r#match(b"hello world", b"hello", Some(1)), + Ok(Some(convert_to_string_vec(&[b"hello"]))) ); - assert_eq!(r#match("hello world", "hello", Some(2)), Ok(None)); + assert_eq!(r#match(b"hello world", b"hello", Some(2)), Ok(None)); } #[test] fn test_empty_string_edge_cases() { assert_eq!( - r#match("", "", None), - Ok(Some(convert_to_string_vec(&[""]))) + r#match(b"", b"", None), + Ok(Some(convert_to_string_vec(&[b""]))) ); assert_eq!( - r#match("", "^$", None), - Ok(Some(convert_to_string_vec(&[""]))) + r#match(b"", b"^$", None), + Ok(Some(convert_to_string_vec(&[b""]))) ); } #[test] fn test_anchor_patterns() { assert_eq!( - r#match("hello", "^", None), - Ok(Some(convert_to_string_vec(&[""]))) + r#match(b"hello", b"^", None), + Ok(Some(convert_to_string_vec(&[b""]))) ); assert_eq!( - r#match("hello", "$", None), - Ok(Some(convert_to_string_vec(&[""]))) + r#match(b"hello", b"$", None), + Ok(Some(convert_to_string_vec(&[b""]))) ); assert_eq!( - r#match("hello", "^hello$", None), - Ok(Some(convert_to_string_vec(&["hello"]))) + r#match(b"hello", b"^hello$", None), + Ok(Some(convert_to_string_vec(&[b"hello"]))) ); } diff --git a/tests/parser.rs b/tests/parser.rs index d01b370..0910c6d 100644 --- a/tests/parser.rs +++ b/tests/parser.rs @@ -1,14 +1,17 @@ use lsonar::charset::CharSet; use lsonar::{AstNode, Error, LUA_MAXCAPTURES, Parser, Quantifier, Result}; -fn parse_ok(pattern: &str) -> Vec { +fn parse_ok(pattern: &[u8]) -> Vec { Parser::new(pattern) .expect("Parser::new failed") .parse() - .expect(&format!("Parser failed for pattern: {}", pattern)) + .expect(&format!( + "Parser failed for pattern: {}", + str::from_utf8(pattern).unwrap() + )) } -fn parse_err(pattern: &str) -> Result> { +fn parse_err(pattern: &[u8]) -> Result> { let mut parser = Parser::new(pattern)?; parser.parse() } @@ -40,7 +43,7 @@ fn make_set(bytes: &[u8], ranges: &[(u8, u8)], classes: &[u8], negated: bool) -> #[test] fn test_simple_sequence_parser() { assert_eq!( - parse_ok("abc"), + parse_ok(b"abc"), vec![ AstNode::Literal(b'a'), AstNode::Literal(b'b'), @@ -48,11 +51,11 @@ fn test_simple_sequence_parser() { ] ); assert_eq!( - parse_ok("a.c"), + parse_ok(b"a.c"), vec![AstNode::Literal(b'a'), AstNode::Any, AstNode::Literal(b'c')] ); assert_eq!( - parse_ok("a%dc"), + parse_ok(b"a%dc"), vec![ AstNode::Literal(b'a'), AstNode::Class(b'd', false), @@ -60,7 +63,7 @@ fn test_simple_sequence_parser() { ] ); assert_eq!( - parse_ok("a%Dc"), + parse_ok(b"a%Dc"), vec![ AstNode::Literal(b'a'), AstNode::Class(b'd', true), @@ -72,7 +75,7 @@ fn test_simple_sequence_parser() { #[test] fn test_anchors_parser() { assert_eq!( - parse_ok("^abc$"), + parse_ok(b"^abc$"), vec![ AstNode::AnchorStart, AstNode::Literal(b'a'), @@ -82,7 +85,7 @@ fn test_anchors_parser() { ] ); assert_eq!( - parse_ok("abc$"), + parse_ok(b"abc$"), vec![ AstNode::Literal(b'a'), AstNode::Literal(b'b'), @@ -91,7 +94,7 @@ fn test_anchors_parser() { ] ); assert_eq!( - parse_ok("^abc"), + parse_ok(b"^abc"), vec![ AstNode::AnchorStart, AstNode::Literal(b'a'), @@ -104,23 +107,23 @@ fn test_anchors_parser() { #[test] fn test_quantifiers_parser() { assert_eq!( - parse_ok("a*"), + parse_ok(b"a*"), vec![quantified(AstNode::Literal(b'a'), Quantifier::Star)] ); assert_eq!( - parse_ok("a+"), + parse_ok(b"a+"), vec![quantified(AstNode::Literal(b'a'), Quantifier::Plus)] ); assert_eq!( - parse_ok("a?"), + parse_ok(b"a?"), vec![quantified(AstNode::Literal(b'a'), Quantifier::Question)] ); assert_eq!( - parse_ok("a-"), + parse_ok(b"a-"), vec![quantified(AstNode::Literal(b'a'), Quantifier::Minus)] ); assert_eq!( - parse_ok("a.*c+d?e-"), + parse_ok(b"a.*c+d?e-"), vec![ AstNode::Literal(b'a'), quantified(AstNode::Any, Quantifier::Star), @@ -130,11 +133,11 @@ fn test_quantifiers_parser() { ] ); assert_eq!( - parse_ok("%d+"), + parse_ok(b"%d+"), vec![quantified(AstNode::Class(b'd', false), Quantifier::Plus)] ); assert_eq!( - parse_ok(".*"), + parse_ok(b".*"), vec![quantified(AstNode::Any, Quantifier::Star)] ); } @@ -142,27 +145,27 @@ fn test_quantifiers_parser() { #[test] fn test_sets_parser() { assert_eq!( - parse_ok("[]"), + parse_ok(b"[]"), vec![AstNode::Set(make_set(&[], &[], &[], false))] ); assert_eq!( - parse_ok("[abc]"), + parse_ok(b"[abc]"), vec![AstNode::Set(make_set(&[b'a', b'b', b'c'], &[], &[], false))] ); assert_eq!( - parse_ok("[^abc]"), + parse_ok(b"[^abc]"), vec![AstNode::Set(make_set(&[b'a', b'b', b'c'], &[], &[], true))] ); assert_eq!( - parse_ok("[a-c]"), + parse_ok(b"[a-c]"), vec![AstNode::Set(make_set(&[], &[(b'a', b'c')], &[], false))] ); assert_eq!( - parse_ok("[^a-c]"), + parse_ok(b"[^a-c]"), vec![AstNode::Set(make_set(&[], &[(b'a', b'c')], &[], true))] ); assert_eq!( - parse_ok("[a.^$]"), + parse_ok(b"[a.^$]"), vec![AstNode::Set(make_set( &[b'a', b'.', b'^', b'$'], &[], @@ -171,15 +174,15 @@ fn test_sets_parser() { ))] ); assert_eq!( - parse_ok("[%a]"), + parse_ok(b"[%a]"), vec![AstNode::Set(make_set(&[], &[], &[b'a'], false))] ); assert_eq!( - parse_ok("[%%]"), + parse_ok(b"[%%]"), vec![AstNode::Set(make_set(&[b'%'], &[], &[], false))] ); assert_eq!( - parse_ok("[-abc]"), + parse_ok(b"[-abc]"), vec![AstNode::Set(make_set( &[b'-', b'a', b'b', b'c'], &[], @@ -188,7 +191,7 @@ fn test_sets_parser() { ))] ); assert_eq!( - parse_ok("[abc-]"), + parse_ok(b"[abc-]"), vec![AstNode::Set(make_set( &[b'a', b'b', b'c', b'-'], &[], @@ -201,7 +204,7 @@ fn test_sets_parser() { #[test] fn test_set_quantifier_parser() { assert_eq!( - parse_ok("[abc]*"), + parse_ok(b"[abc]*"), vec![quantified( AstNode::Set(make_set(&[b'a', b'b', b'c'], &[], &[], false)), Quantifier::Star @@ -212,21 +215,21 @@ fn test_set_quantifier_parser() { #[test] fn test_captures_parser() { assert_eq!( - parse_ok("()"), + parse_ok(b"()"), vec![AstNode::Capture { index: 1, inner: vec![] }] ); assert_eq!( - parse_ok("(a)"), + parse_ok(b"(a)"), vec![AstNode::Capture { index: 1, inner: vec![AstNode::Literal(b'a')] }] ); assert_eq!( - parse_ok("(a%d+)"), + parse_ok(b"(a%d+)"), vec![AstNode::Capture { index: 1, inner: vec![ @@ -236,7 +239,7 @@ fn test_captures_parser() { }] ); assert_eq!( - parse_ok("(a(b)c)"), + parse_ok(b"(a(b)c)"), vec![AstNode::Capture { index: 1, inner: vec![ @@ -250,7 +253,7 @@ fn test_captures_parser() { }] ); assert_eq!( - parse_ok("(a)?"), + parse_ok(b"(a)?"), vec![quantified( AstNode::Capture { index: 1, @@ -260,14 +263,14 @@ fn test_captures_parser() { )] ); assert_eq!( - parse_ok("a?b"), + parse_ok(b"a?b"), vec![ quantified(AstNode::Literal(b'a'), Quantifier::Question), AstNode::Literal(b'b') ] ); assert_eq!( - parse_ok("a-b"), + parse_ok(b"a-b"), vec![ quantified(AstNode::Literal(b'a'), Quantifier::Minus), AstNode::Literal(b'b') @@ -277,9 +280,9 @@ fn test_captures_parser() { #[test] fn test_balanced_frontier_parser() { - assert_eq!(parse_ok("%b()"), vec![AstNode::Balanced(b'(', b')')]); + assert_eq!(parse_ok(b"%b()"), vec![AstNode::Balanced(b'(', b')')]); assert_eq!( - parse_ok("%f[ac]"), + parse_ok(b"%f[ac]"), vec![AstNode::Frontier(make_set(&[b'a', b'c'], &[], &[], false))] ); } @@ -287,7 +290,7 @@ fn test_balanced_frontier_parser() { #[test] fn test_complex_parser() { assert_eq!( - parse_ok("^(%b())%d*$"), + parse_ok(b"^(%b())%d*$"), vec![ AstNode::AnchorStart, AstNode::Capture { @@ -302,73 +305,81 @@ fn test_complex_parser() { #[test] fn test_escaped_rparen_rbracket_without_panic() { - assert_eq!(parse_ok("%]"), vec![AstNode::Literal(b']')]); - assert_eq!(parse_ok("%)"), vec![AstNode::Literal(b')')]) + assert_eq!(parse_ok(b"%]"), vec![AstNode::Literal(b']')]); + assert_eq!(parse_ok(b"%)"), vec![AstNode::Literal(b')')]) } #[test] fn test_throw_parser_errors() { assert!( - matches!(parse_err("("), Err(Error::Parser(s)) if s.contains("malformed pattern (unexpected end, expected RParen)")) + matches!(parse_err(b"("), Err(Error::Parser(s)) if s.contains("malformed pattern (unexpected end, expected RParen)")) ); - assert!(matches!(parse_err(")"), Err(Error::Parser(s)) if s.contains("unexpected ')'"))); - assert!(matches!(parse_err("]"), Err(Error::Parser(s)) if s.contains("unexpected ']'"))); + assert!(matches!(parse_err(b")"), Err(Error::Parser(s)) if s.contains("unexpected ')'"))); + assert!(matches!(parse_err(b"]"), Err(Error::Parser(s)) if s.contains("unexpected ']'"))); assert!( - matches!(parse_err("["), Err(Error::Parser(s)) if s.contains("unfinished character class")) - ); - assert!(matches!(parse_err("*"), Err(Error::Parser(s)) if s.contains("must follow an item"))); - assert!(matches!(parse_err("^*"), Err(Error::Parser(s)) if s.contains("cannot be quantified"))); - assert!(matches!(parse_err("$+"), Err(Error::Parser(s)) if s.contains("cannot be quantified"))); - assert!(matches!(parse_err("%b"), Err(Error::Lexer(s)) if s.contains("needs two characters"))); - assert!(matches!(parse_err("%bx"), Err(Error::Lexer(s)) if s.contains("needs two characters"))); - assert!(matches!(parse_err("%f"), Err(Error::Parser(s)) if s.contains("missing '[' after %f"))); + matches!(parse_err(b"["), Err(Error::Parser(s)) if s.contains("unfinished character class")) + ); + assert!(matches!(parse_err(b"*"), Err(Error::Parser(s)) if s.contains("must follow an item"))); + assert!( + matches!(parse_err(b"^*"), Err(Error::Parser(s)) if s.contains("cannot be quantified")) + ); + assert!( + matches!(parse_err(b"$+"), Err(Error::Parser(s)) if s.contains("cannot be quantified")) + ); + assert!(matches!(parse_err(b"%b"), Err(Error::Lexer(s)) if s.contains("needs two characters"))); + assert!( + matches!(parse_err(b"%bx"), Err(Error::Lexer(s)) if s.contains("needs two characters")) + ); + assert!( + matches!(parse_err(b"%f"), Err(Error::Parser(s)) if s.contains("missing '[' after %f")) + ); assert!( - matches!(parse_err("%fa"), Err(Error::Parser(s)) if s.contains("missing '[' after %f")) + matches!(parse_err(b"%fa"), Err(Error::Parser(s)) if s.contains("missing '[' after %f")) ); assert!( - matches!(parse_err("%f["), Err(Error::Parser(s)) if s.contains("unfinished character class")) + matches!(parse_err(b"%f["), Err(Error::Parser(s)) if s.contains("unfinished character class")) ); assert!( - matches!(parse_err("%f[a"), Err(Error::Parser(s)) if s.contains("unfinished character class")) + matches!(parse_err(b"%f[a"), Err(Error::Parser(s)) if s.contains("unfinished character class")) ); - assert!(matches!(parse_err("%z"), Err(Error::Lexer(_)))); + assert!(matches!(parse_err(b"%z"), Err(Error::Lexer(_)))); - assert_eq!(parse_ok("%1"), vec![AstNode::CaptureRef(1)]); + assert_eq!(parse_ok(b"%1"), vec![AstNode::CaptureRef(1)]); let too_many_captures = "()".repeat(LUA_MAXCAPTURES + 1); assert!( - matches!(parse_err(&too_many_captures), Err(Error::Parser(s)) if s.contains("too many captures")) + matches!(parse_err(&too_many_captures.as_bytes()), Err(Error::Parser(s)) if s.contains("too many captures")) ); } #[test] fn test_special_byte_edge_cases_parser() { assert_eq!( - parse_ok("[%%]"), + parse_ok(b"[%%]"), vec![AstNode::Set(make_set(&[b'%'], &[], &[], false))] ); assert_eq!( - parse_ok("[%-]"), + parse_ok(b"[%-]"), vec![AstNode::Set(make_set(&[b'-'], &[], &[], false))] ); assert_eq!( - parse_ok("[%]]"), + parse_ok(b"[%]]"), vec![AstNode::Set(make_set(&[b']'], &[], &[], false))] ); assert_eq!( - parse_ok("[%[]"), + parse_ok(b"[%[]"), vec![AstNode::Set(make_set(&[b'['], &[], &[], false))] ); assert_eq!( - parse_ok("%*+%?"), + parse_ok(b"%*+%?"), vec![ quantified(AstNode::Literal(b'*'), Quantifier::Plus), AstNode::Literal(b'?') ] ); assert_eq!( - parse_ok("[%[]"), + parse_ok(b"[%[]"), vec![AstNode::Set(make_set(&[b'['], &[], &[], false))] ); } @@ -376,7 +387,7 @@ fn test_special_byte_edge_cases_parser() { #[test] fn test_nested_complex_patterns_parser() { assert_eq!( - parse_ok("((a+)?(b*))+"), + parse_ok(b"((a+)?(b*))+"), vec![quantified( AstNode::Capture { index: 1, @@ -399,7 +410,7 @@ fn test_nested_complex_patterns_parser() { ); assert_eq!( - parse_ok("(%f[%a]%w+)"), + parse_ok(b"(%f[%a]%w+)"), vec![AstNode::Capture { index: 1, inner: vec![ @@ -412,21 +423,21 @@ fn test_nested_complex_patterns_parser() { #[test] fn test_real_world_patterns_parser() { - assert!(parse_ok("https?://[%w%.%-%+]+%.%w+").len() > 0); + assert!(parse_ok(b"https?://[%w%.%-%+]+%.%w+").len() > 0); - assert!(parse_ok("^[%w%.%+%-]+@[%w%.%+%-]+%.%w+$").len() > 0); + assert!(parse_ok(b"^[%w%.%+%-]+@[%w%.%+%-]+%.%w+$").len() > 0); - assert!(parse_ok("(%d%d?)/(%d%d?)/(%d%d%d%d)").len() > 0); + assert!(parse_ok(b"(%d%d?)/(%d%d?)/(%d%d%d%d)").len() > 0); - assert!(parse_ok("(%d+)%.(%d+)%.(%d+)%.(%d+)").len() > 0); + assert!(parse_ok(b"(%d+)%.(%d+)%.(%d+)%.(%d+)").len() > 0); - assert!(parse_ok("\"([^\"]+)\":%s*\"([^\"]*)\"").len() > 0); + assert!(parse_ok(b"\"([^\"]+)\":%s*\"([^\"]*)\"").len() > 0); } #[test] fn test_special_lua_pattern_features_parser() { - assert!(parse_ok("%1").len() > 0); - assert!(parse_ok("(.)%1").len() > 0); - assert!(parse_ok("%b{}").len() > 0); - assert!(parse_ok("%f[%a]").len() > 0); + assert!(parse_ok(b"%1").len() > 0); + assert!(parse_ok(b"(.)%1").len() > 0); + assert!(parse_ok(b"%b{}").len() > 0); + assert!(parse_ok(b"%f[%a]").len() > 0); } diff --git a/tests/pattern_engine.rs b/tests/pattern_engine.rs index 36ab899..1e48319 100644 --- a/tests/pattern_engine.rs +++ b/tests/pattern_engine.rs @@ -2,17 +2,17 @@ use lsonar::{LUA_MAXCAPTURES, Parser, Result, engine::find_first_match}; use std::ops::Range; fn find( - pattern_str: &str, - text: &str, + pattern_str: &[u8], + text: &[u8], ) -> Result, Vec>>)>> { let mut parser = Parser::new(pattern_str)?; let ast = parser.parse()?; - find_first_match(&ast, text.as_bytes(), 0) // 0-based index only for tests + find_first_match(&ast, text, 0) // 0-based index only for tests } fn assert_match( - pattern: &str, - text: &str, + pattern: &[u8], + text: &[u8], expected_full: Range, expected_captures: &[Option>], ) { @@ -29,227 +29,228 @@ fn assert_match( } None => panic!( "Expected match, but found none for pattern '{}' in text '{}'", - pattern, text + str::from_utf8(pattern).unwrap(), + str::from_utf8(text).unwrap() ), } } -fn assert_no_match(pattern: &str, text: &str) { +fn assert_no_match(pattern: &[u8], text: &[u8]) { let result = find(pattern, text).expect("find failed"); assert!( result.is_none(), "Expected no match, but found one for pattern '{}' in text '{}'", - pattern, - text + str::from_utf8(pattern).unwrap(), + str::from_utf8(text).unwrap() ); } #[test] fn test_literal_match_engine() { - assert_match("abc", "abc", 0..3, &[]); - assert_match("abc", "xabc", 1..4, &[]); - assert_match("abc", "abcy", 0..3, &[]); - assert_no_match("abc", "axbyc"); - assert_no_match("abc", "ab"); - assert_no_match("abc", ""); + assert_match(b"abc", b"abc", 0..3, &[]); + assert_match(b"abc", b"xabc", 1..4, &[]); + assert_match(b"abc", b"abcy", 0..3, &[]); + assert_no_match(b"abc", b"axbyc"); + assert_no_match(b"abc", b"ab"); + assert_no_match(b"abc", b""); } #[test] fn test_any_match_engine() { - assert_match(".", "a", 0..1, &[]); - assert_match("a.c", "axc", 0..3, &[]); - assert_match("a.c", "a\nc", 0..3, &[]); - assert_no_match(".", ""); + assert_match(b".", b"a", 0..1, &[]); + assert_match(b"a.c", b"axc", 0..3, &[]); + assert_match(b"a.c", b"a\nc", 0..3, &[]); + assert_no_match(b".", b""); } #[test] fn test_class_match_engine() { - assert_match("%d", "5", 0..1, &[]); - assert_match("%a", "Z", 0..1, &[]); - assert_match("%l", "z", 0..1, &[]); - assert_match("%s", " ", 0..1, &[]); - assert_match("%x", "f", 0..1, &[]); - assert_match("a%dz", "a1z", 0..3, &[]); - assert_no_match("%d", "a"); - assert_match("%D", "a", 0..1, &[]); - assert_no_match("%D", "5"); - assert_match("%S", "a", 0..1, &[]); - assert_no_match("%S", " "); + assert_match(b"%d", b"5", 0..1, &[]); + assert_match(b"%a", b"Z", 0..1, &[]); + assert_match(b"%l", b"z", 0..1, &[]); + assert_match(b"%s", b" ", 0..1, &[]); + assert_match(b"%x", b"f", 0..1, &[]); + assert_match(b"a%dz", b"a1z", 0..3, &[]); + assert_no_match(b"%d", b"a"); + assert_match(b"%D", b"a", 0..1, &[]); + assert_no_match(b"%D", b"5"); + assert_match(b"%S", b"a", 0..1, &[]); + assert_no_match(b"%S", b" "); } #[test] fn test_set_match_engine() { - assert_match("[abc]", "a", 0..1, &[]); - assert_match("[abc]", "b", 0..1, &[]); - assert_match("[^abc]", "d", 0..1, &[]); - assert_match("[a-z]", "m", 0..1, &[]); - assert_match("[%d%s]", "5", 0..1, &[]); - assert_match("[%d%s]", " ", 0..1, &[]); - assert_no_match("[abc]", "d"); - assert_no_match("[^abc]", "a"); - assert_no_match("[a-z]", "A"); - assert_no_match("[a-z]", "5"); - assert_no_match("[%d%s]", "a"); + assert_match(b"[abc]", b"a", 0..1, &[]); + assert_match(b"[abc]", b"b", 0..1, &[]); + assert_match(b"[^abc]", b"d", 0..1, &[]); + assert_match(b"[a-z]", b"m", 0..1, &[]); + assert_match(b"[%d%s]", b"5", 0..1, &[]); + assert_match(b"[%d%s]", b" ", 0..1, &[]); + assert_no_match(b"[abc]", b"d"); + assert_no_match(b"[^abc]", b"a"); + assert_no_match(b"[a-z]", b"A"); + assert_no_match(b"[a-z]", b"5"); + assert_no_match(b"[%d%s]", b"a"); } #[test] fn test_anchor_match_engine() { - assert_match("^abc", "abc", 0..3, &[]); - assert_no_match("^abc", "xabc"); - assert_match("abc$", "abc", 0..3, &[]); - assert_no_match("abc$", "abcd"); - assert_match("^abc$", "abc", 0..3, &[]); - assert_no_match("^abc$", "xabc"); - assert_no_match("^abc$", "abcd"); - assert_match("^", "", 0..0, &[]); - assert_match("$", "", 0..0, &[]); - assert_match("^$", "", 0..0, &[]); + assert_match(b"^abc", b"abc", 0..3, &[]); + assert_no_match(b"^abc", b"xabc"); + assert_match(b"abc$", b"abc", 0..3, &[]); + assert_no_match(b"abc$", b"abcd"); + assert_match(b"^abc$", b"abc", 0..3, &[]); + assert_no_match(b"^abc$", b"xabc"); + assert_no_match(b"^abc$", b"abcd"); + assert_match(b"^", b"", 0..0, &[]); + assert_match(b"$", b"", 0..0, &[]); + assert_match(b"^$", b"", 0..0, &[]); } #[test] fn test_greedy_quantifiers_engine() { - assert_match("a*", "aaa", 0..3, &[]); - assert_match("a*", "", 0..0, &[]); - assert_match("a*b", "aaab", 0..4, &[]); - assert_match("a*b", "b", 0..1, &[]); - assert_match("x*", "y", 0..0, &[]); - assert_match("a+", "aaa", 0..3, &[]); - assert_no_match("a+", ""); - assert_match("a+b", "aaab", 0..4, &[]); - assert_no_match("a+b", "b"); - assert_match("a?", "a", 0..1, &[]); - assert_match("a?", "", 0..0, &[]); - assert_match("a?b", "ab", 0..2, &[]); - assert_match("a?b", "b", 0..1, &[]); - assert_match("a*a", "aaa", 0..3, &[]); - assert_match(".*b", "axbyb", 0..5, &[]); - assert_match("a+a", "aa", 0..2, &[]); - assert_match("a?a", "aa", 0..2, &[]); - assert_match("a?a", "a", 0..1, &[]); + assert_match(b"a*", b"aaa", 0..3, &[]); + assert_match(b"a*", b"", 0..0, &[]); + assert_match(b"a*b", b"aaab", 0..4, &[]); + assert_match(b"a*b", b"b", 0..1, &[]); + assert_match(b"x*", b"y", 0..0, &[]); + assert_match(b"a+", b"aaa", 0..3, &[]); + assert_no_match(b"a+", b""); + assert_match(b"a+b", b"aaab", 0..4, &[]); + assert_no_match(b"a+b", b"b"); + assert_match(b"a?", b"a", 0..1, &[]); + assert_match(b"a?", b"", 0..0, &[]); + assert_match(b"a?b", b"ab", 0..2, &[]); + assert_match(b"a?b", b"b", 0..1, &[]); + assert_match(b"a*a", b"aaa", 0..3, &[]); + assert_match(b".*b", b"axbyb", 0..5, &[]); + assert_match(b"a+a", b"aa", 0..2, &[]); + assert_match(b"a?a", b"aa", 0..2, &[]); + assert_match(b"a?a", b"a", 0..1, &[]); } #[test] fn test_non_greedy_quantifier_engine() { - assert_match("a-", "aaa", 0..0, &[]); - assert_match("a-", "", 0..0, &[]); - assert_match("a-b", "aaab", 0..4, &[]); - assert_match("a-b", "b", 0..1, &[]); - assert_match("x-", "y", 0..0, &[]); - assert_match(".-b", "axbyb", 0..3, &[]); - assert_match("a-a", "aaa", 0..1, &[]); + assert_match(b"a-", b"aaa", 0..0, &[]); + assert_match(b"a-", b"", 0..0, &[]); + assert_match(b"a-b", b"aaab", 0..4, &[]); + assert_match(b"a-b", b"b", 0..1, &[]); + assert_match(b"x-", b"y", 0..0, &[]); + assert_match(b".-b", b"axbyb", 0..3, &[]); + assert_match(b"a-a", b"aaa", 0..1, &[]); } #[test] fn test_captures_simple_engine() { - assert_match("(a)", "a", 0..1, &[Some(0..1)]); - assert_match("(.)", "b", 0..1, &[Some(0..1)]); - assert_match("(%d)", "3", 0..1, &[Some(0..1)]); - assert_match("a(b)c", "abc", 0..3, &[Some(1..2)]); - assert_match("a(.)c", "axc", 0..3, &[Some(1..2)]); - assert_match("(a)(b)", "ab", 0..2, &[Some(0..1), Some(1..2)]); - assert_match("()(b)", "b", 0..1, &[Some(0..0), Some(0..1)]); + assert_match(b"(a)", b"a", 0..1, &[Some(0..1)]); + assert_match(b"(.)", b"b", 0..1, &[Some(0..1)]); + assert_match(b"(%d)", b"3", 0..1, &[Some(0..1)]); + assert_match(b"a(b)c", b"abc", 0..3, &[Some(1..2)]); + assert_match(b"a(.)c", b"axc", 0..3, &[Some(1..2)]); + assert_match(b"(a)(b)", b"ab", 0..2, &[Some(0..1), Some(1..2)]); + assert_match(b"()(b)", b"b", 0..1, &[Some(0..0), Some(0..1)]); } #[test] fn test_captures_quantified_engine() { - assert_match("(a)*", "aaa", 0..3, &[Some(2..3)]); - assert_match("(a)+", "aaa", 0..3, &[Some(2..3)]); - assert_match("(a)?", "a", 0..1, &[Some(0..1)]); - assert_match("(a)?", "", 0..0, &[None]); - assert_match("a(b)*c", "abbbc", 0..5, &[Some(3..4)]); - assert_match("a(b)+c", "abbbc", 0..5, &[Some(3..4)]); - assert_match("a(b)?c", "abc", 0..3, &[Some(1..2)]); - assert_match("a(b)?c", "ac", 0..2, &[None]); - assert_match("a(b)-c", "abbbc", 0..5, &[Some(3..4)]); - assert_match("a(b)-c", "abbbc", 0..5, &[Some(3..4)]); + assert_match(b"(a)*", b"aaa", 0..3, &[Some(2..3)]); + assert_match(b"(a)+", b"aaa", 0..3, &[Some(2..3)]); + assert_match(b"(a)?", b"a", 0..1, &[Some(0..1)]); + assert_match(b"(a)?", b"", 0..0, &[None]); + assert_match(b"a(b)*c", b"abbbc", 0..5, &[Some(3..4)]); + assert_match(b"a(b)+c", b"abbbc", 0..5, &[Some(3..4)]); + assert_match(b"a(b)?c", b"abc", 0..3, &[Some(1..2)]); + assert_match(b"a(b)?c", b"ac", 0..2, &[None]); + assert_match(b"a(b)-c", b"abbbc", 0..5, &[Some(3..4)]); + assert_match(b"a(b)-c", b"abbbc", 0..5, &[Some(3..4)]); } #[test] fn test_captures_nested_engine() { - assert_match("(a(b)c)", "abc", 0..3, &[Some(0..3), Some(1..2)]); - assert_match("((.)%w*)", "a1 b2", 0..2, &[Some(0..2), Some(0..1)]); + assert_match(b"(a(b)c)", b"abc", 0..3, &[Some(0..3), Some(1..2)]); + assert_match(b"((.)%w*)", b"a1 b2", 0..2, &[Some(0..2), Some(0..1)]); } #[test] fn test_balanced_engine() { - assert_match("%b()", "(inner)", 0..7, &[]); - assert_match("%b<>", "<>", 0..5, &[]); - assert_match("a %b() c", "a (bal) c", 0..9, &[]); - assert_match("%b()", "()", 0..2, &[]); - assert_no_match("%b()", "(unbalanced"); - assert_match("%b()", "x()y", 1..3, &[]); + assert_match(b"%b()", b"(inner)", 0..7, &[]); + assert_match(b"%b<>", b"<>", 0..5, &[]); + assert_match(b"a %b() c", b"a (bal) c", 0..9, &[]); + assert_match(b"%b()", b"()", 0..2, &[]); + assert_no_match(b"%b()", b"(unbalanced"); + assert_match(b"%b()", b"x()y", 1..3, &[]); } #[test] fn test_frontier_engine() { - assert_match("%f[a]a", " a", 1..2, &[]); - assert_match("%f[a]a", "ba", 1..2, &[]); + assert_match(b"%f[a]a", b" a", 1..2, &[]); + assert_match(b"%f[a]a", b"ba", 1..2, &[]); - assert_no_match("%f[^%w]word", "_word"); - assert_no_match("%f[^%w]word", "1word"); - assert_no_match("%f[%s]a", " a"); + assert_no_match(b"%f[^%w]word", b"_word"); + assert_no_match(b"%f[^%w]word", b"1word"); + assert_no_match(b"%f[%s]a", b" a"); - assert_match("%f[a]a", "a", 0..1, &[]); - assert_match("%f[^a]b", "b", 0..1, &[]); + assert_match(b"%f[a]a", b"a", 0..1, &[]); + assert_match(b"%f[^a]b", b"b", 0..1, &[]); } #[test] fn test_backtracking_engine() { - assert_no_match("a*b", "aaac"); - assert_no_match("a+b", "aaac"); - assert_match("(ab)+a", "abab", 0..3, &[]); - assert_match("(a*)b", "aaab", 0..4, &[Some(0..3)]); - assert_match("(a+)b", "aaab", 0..4, &[Some(0..3)]); - assert_match("a[bc]+d", "abbcd", 0..5, &[]); + assert_no_match(b"a*b", b"aaac"); + assert_no_match(b"a+b", b"aaac"); + assert_match(b"(ab)+a", b"abab", 0..3, &[]); + assert_match(b"(a*)b", b"aaab", 0..4, &[Some(0..3)]); + assert_match(b"(a+)b", b"aaab", 0..4, &[Some(0..3)]); + assert_match(b"a[bc]+d", b"abbcd", 0..5, &[]); } #[test] fn test_empty_engine() { - assert_match("", "", 0..0, &[]); - assert_match("", "abc", 0..0, &[]); - assert_no_match("a", ""); - assert_match("a*", "", 0..0, &[]); - assert_no_match("a+", ""); - assert_match("a?", "", 0..0, &[]); - assert_match("()", "", 0..0, &[Some(0..0)]); + assert_match(b"", b"", 0..0, &[]); + assert_match(b"", b"abc", 0..0, &[]); + assert_no_match(b"a", b""); + assert_match(b"a*", b"", 0..0, &[]); + assert_no_match(b"a+", b""); + assert_match(b"a?", b"", 0..0, &[]); + assert_match(b"()", b"", 0..0, &[Some(0..0)]); } #[test] fn test_find_offset_engine() { - let pattern = "b"; - let text = "abc"; + let pattern = b"b"; + let text = b"abc"; let mut parser = Parser::new(pattern).unwrap(); let ast = parser.parse().unwrap(); - let result = find_first_match(&ast, text.as_bytes(), 1).unwrap(); + let result = find_first_match(&ast, text, 1).unwrap(); assert_eq!(result, Some((1..2, vec![None; LUA_MAXCAPTURES]))); - let result2 = find_first_match(&ast, text.as_bytes(), 2).unwrap(); + let result2 = find_first_match(&ast, text, 2).unwrap(); assert!(result2.is_none()); } #[test] fn test_real_world_email_validation_engine() { assert_match( - "^[%w%.%+%-]+@[%w%.%+%-]+%.%w+$", - "user@example.com", + b"^[%w%.%+%-]+@[%w%.%+%-]+%.%w+$", + b"user@example.com", 0..16, &[], ); assert_match( - "^[%w%.%+%-]+@[%w%.%+%-]+%.%w+$", - "user.name+tag-123@example-site.co.uk", + b"^[%w%.%+%-]+@[%w%.%+%-]+%.%w+$", + b"user.name+tag-123@example-site.co.uk", 0..36, &[], ); - assert_no_match("^[%w%.%+%-]+@[%w%.%+%-]+%.%w+$", "user@example"); - assert_no_match("^[%w%.%+%-]+@[%w%.%+%-]+%.%w+$", "@example.com"); - assert_no_match("^[%w%.%+%-]+@[%w%.%+%-]+%.%w+$", "user@.com"); + assert_no_match(b"^[%w%.%+%-]+@[%w%.%+%-]+%.%w+$", b"user@example"); + assert_no_match(b"^[%w%.%+%-]+@[%w%.%+%-]+%.%w+$", b"@example.com"); + assert_no_match(b"^[%w%.%+%-]+@[%w%.%+%-]+%.%w+$", b"user@.com"); } #[test] fn test_extracting_data_with_captures_engine() { - let result = find("(%d%d?)/(%d%d?)/(%d%d%d%d)", "Date: 25/12/2023") + let result = find(b"(%d%d?)/(%d%d?)/(%d%d%d%d)", b"Date: 25/12/2023") .unwrap() .unwrap(); let (full, captures) = result; @@ -259,8 +260,8 @@ fn test_extracting_data_with_captures_engine() { assert_eq!(captures[2], Some(12..16)); let result = find( - "([%w%.%+%-]+)@([%w%.%+%-]+%.%w+)", - "Contact: john.doe@example.com", + b"([%w%.%+%-]+)@([%w%.%+%-]+%.%w+)", + b"Contact: john.doe@example.com", ) .unwrap() .unwrap(); @@ -272,24 +273,24 @@ fn test_extracting_data_with_captures_engine() { #[test] fn test_balanced_delimiters_engine() { - assert_match("%b<>", "

text

", 0..5, &[]); - assert_match("%b()", "(a + (b * c))", 0..13, &[]); - assert_match("'%b\"\"'", "'\"nested\"'", 0..10, &[]); - assert_match("before %b() after", "before (balanced) after", 0..23, &[]); + assert_match(b"%b<>", b"

text

", 0..5, &[]); + assert_match(b"%b()", b"(a + (b * c))", 0..13, &[]); + assert_match(b"'%b\"\"'", b"'\"nested\"'", 0..10, &[]); + assert_match(b"before %b() after", b"before (balanced) after", 0..23, &[]); } #[test] fn test_frontier_patterns_engine() { - assert_match("%f[%a]t%w+", "start the test", 6..9, &[]); - assert_match("%w+t%f[^%w]", "start the test", 0..5, &[]); - assert_match("%f[%w]word%f[^%w]", "a word here", 2..6, &[]); - assert_no_match("%f[%w]word%f[^%w]", "aword here"); + assert_match(b"%f[%a]t%w+", b"start the test", 6..9, &[]); + assert_match(b"%w+t%f[^%w]", b"start the test", 0..5, &[]); + assert_match(b"%f[%w]word%f[^%w]", b"a word here", 2..6, &[]); + assert_no_match(b"%f[%w]word%f[^%w]", b"aword here"); } #[test] fn test_complex_pattern_combinations_engine() { - let pattern = "]*>([^<]*)
"; - let text = "

Visit Example Site for more info.

"; + let pattern = b"]*>([^<]*)"; + let text = b"

Visit Example Site for more info.

"; let result = find(pattern, text).unwrap().unwrap(); let (full, captures) = result; @@ -297,9 +298,14 @@ fn test_complex_pattern_combinations_engine() { assert_eq!(captures[0], Some(18..37)); assert_eq!(captures[1], Some(52..64)); - assert_match("%f[%w][%u][%l]+%f[^%w]", "This is a Test string", 0..4, &[]); + assert_match( + b"%f[%w][%u][%l]+%f[^%w]", + b"This is a Test string", + 0..4, + &[], + ); - let result = find("([^,]+),([^,]+),([^,]+)", "apple,orange,banana") + let result = find(b"([^,]+),([^,]+),([^,]+)", b"apple,orange,banana") .unwrap() .unwrap(); let (_, captures) = result; @@ -310,71 +316,55 @@ fn test_complex_pattern_combinations_engine() { #[test] fn test_optimization_cases_engine() { - let mut parser = Parser::new("^abc").unwrap(); + let mut parser = Parser::new(b"^abc").unwrap(); let ast = parser.parse().unwrap(); - assert!( - find_first_match(&ast, "abcdef".as_bytes(), 0) - .unwrap() - .is_some() - ); - assert!( - find_first_match(&ast, "abcdef".as_bytes(), 1) - .unwrap() - .is_none() - ); + assert!(find_first_match(&ast, b"abcdef", 0).unwrap().is_some()); + assert!(find_first_match(&ast, b"abcdef", 1).unwrap().is_none()); - let mut parser = Parser::new("abc$").unwrap(); + let mut parser = Parser::new(b"abc$").unwrap(); let ast = parser.parse().unwrap(); - assert!( - find_first_match(&ast, "xyzabc".as_bytes(), 0) - .unwrap() - .is_some() - ); - assert!( - find_first_match(&ast, "abcxyz".as_bytes(), 0) - .unwrap() - .is_none() - ); + assert!(find_first_match(&ast, b"xyzabc", 0).unwrap().is_some()); + assert!(find_first_match(&ast, b"abcxyz", 0).unwrap().is_none()); } #[test] fn test_pattern_with_utf8_content_engine() { - assert_match(".", "привет", 0..1, &[]); - assert_match("..", "привет", 0..2, &[]); + assert_match(b".", "привет".as_bytes(), 0..1, &[]); + assert_match(b"..", "привет".as_bytes(), 0..2, &[]); - assert_match("[%w]+", "привет123", 12..15, &[]); + assert_match(b"[%w]+", "привет123".as_bytes(), 12..15, &[]); - assert_match("%a+", "hello привет", 0..5, &[]); + assert_match(b"%a+", "hello привет".as_bytes(), 0..5, &[]); } #[test] fn test_quantifiers_with_capturing_groups_engine() { - assert_match("(a)+", "aaa", 0..3, &[Some(2..3)]); - assert_match("(ab)+", "ababab", 0..6, &[Some(4..6)]); - assert_match("(a)*", "aaa", 0..3, &[Some(2..3)]); - assert_match("(a)*", "", 0..0, &[None]); - assert_match("(a)?", "a", 0..1, &[Some(0..1)]); - assert_match("(a)?", "", 0..0, &[None]); - assert_match("(a)-", "aaa", 0..0, &[None]); + assert_match(b"(a)+", b"aaa", 0..3, &[Some(2..3)]); + assert_match(b"(ab)+", b"ababab", 0..6, &[Some(4..6)]); + assert_match(b"(a)*", b"aaa", 0..3, &[Some(2..3)]); + assert_match(b"(a)*", b"", 0..0, &[None]); + assert_match(b"(a)?", b"a", 0..1, &[Some(0..1)]); + assert_match(b"(a)?", b"", 0..0, &[None]); + assert_match(b"(a)-", b"aaa", 0..0, &[None]); } #[test] fn test_edge_cases_and_backtracking_engine() { - assert_match("(a+)+", "aaa", 0..3, &[Some(0..3)]); - assert_match("[ab][cd]", "ac", 0..2, &[]); - assert_match("[ab][cd]", "bd", 0..2, &[]); - assert_no_match("[ab][cd]", "ab"); - assert_match("a.-b", "axxxbyyybzzz", 0..5, &[]); - assert_match("a.*b", "axxxbyyybzzz", 0..9, &[]); - assert_match("(a*)(b?)b+", "aaabbb", 0..6, &[Some(0..3), Some(3..4)]); + assert_match(b"(a+)+", b"aaa", 0..3, &[Some(0..3)]); + assert_match(b"[ab][cd]", b"ac", 0..2, &[]); + assert_match(b"[ab][cd]", b"bd", 0..2, &[]); + assert_no_match(b"[ab][cd]", b"ab"); + assert_match(b"a.-b", b"axxxbyyybzzz", 0..5, &[]); + assert_match(b"a.*b", b"axxxbyyybzzz", 0..9, &[]); + assert_match(b"(a*)(b?)b+", b"aaabbb", 0..6, &[Some(0..3), Some(3..4)]); } #[test] fn test_real_world_patterns_advanced_engine() { - let html = "
Product: Laptop
$999
"; - let pattern = "
([^<]*[^<]*)?([^<]*)
"; + let html = b"
Product: Laptop
$999
"; + let pattern = b"
([^<]*[^<]*)?([^<]*)
"; let result = find(pattern, html).unwrap().unwrap(); let (full, captures) = result; @@ -383,8 +373,8 @@ fn test_real_world_patterns_advanced_engine() { assert_eq!(captures[1], Some(18..40)); assert_eq!(captures[2], Some(40..46)); - let log_line = "2023-04-15 14:23:45 ERROR [app.service] Failed to connect: timeout"; - let pattern = "(%d+)%-(%d+)%-(%d+) (%d+):(%d+):(%d+) (%u+)"; + let log_line = b"2023-04-15 14:23:45 ERROR [app.service] Failed to connect: timeout"; + let pattern = b"(%d+)%-(%d+)%-(%d+) (%d+):(%d+):(%d+) (%u+)"; let result = find(pattern, log_line).unwrap().unwrap(); let (full, captures) = result; @@ -401,15 +391,15 @@ fn test_real_world_patterns_advanced_engine() { #[test] fn test_subsequent_captures_engine() { assert_match( - "(%d%d%d%d)%-(%d%d)%-(%d%d)", - "2023-04-15", + b"(%d%d%d%d)%-(%d%d)%-(%d%d)", + b"2023-04-15", 0..10, &[Some(0..4), Some(5..7), Some(8..10)], ); assert_match( - "(%d+)_(%w+)_(%d+)", - "123_test_456", + b"(%d+)_(%w+)_(%d+)", + b"123_test_456", 0..12, &[Some(0..3), Some(4..8), Some(9..12)], );