Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ pub struct Lexer<'a> {
}

impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Self {
pub fn new(input: &'a [u8]) -> Self {
Lexer {
input: input.as_bytes(),
input,
pos: 0,
capture_depth: 0,
set_depth: 0,
Expand Down
16 changes: 6 additions & 10 deletions src/lua/find.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,11 @@ use super::{
/// Corresponds to Lua 5.3 [`string.find`].
/// Returns 1-based or 0-based (see features [`1-based`] and [`0-based`]) indices (start, end) and captured strings. The [`init`] argument can be either 0-based or 1-based.
pub fn find(
text: &str,
pattern: &str,
text_bytes: &[u8],
pattern: &[u8],
init: Option<isize>,
plain: bool,
) -> Result<Option<(usize, usize, Vec<String>)>> {
let text_bytes = text.as_bytes();
) -> Result<Option<(usize, usize, Vec<Vec<u8>>)>> {
let byte_len = text_bytes.len();

let start_byte_index = calculate_start_index(byte_len, init);
Expand All @@ -35,7 +34,7 @@ pub fn find(

if let Some(rel_byte_pos) = text_bytes[start_byte_index..]
.windows(pattern.len())
.position(|window| window == pattern.as_bytes())
.position(|window| window == pattern)
{
let zero_based_start_pos = start_byte_index + rel_byte_pos;
let zero_based_end_pos = zero_based_start_pos + pattern.len();
Expand Down Expand Up @@ -65,12 +64,9 @@ pub fn find(
};
let end_pos = match_byte_range.end;

let captured_strings: Vec<String> = captures_byte_ranges
let captured_strings: Vec<Vec<u8>> = captures_byte_ranges
.into_iter()
.filter_map(|maybe_range| {
maybe_range
.map(|range| String::from_utf8_lossy(&text_bytes[range]).into_owned())
})
.filter_map(|maybe_range| maybe_range.map(|range| text_bytes[range].to_owned()))
.collect();

Ok(Some((start_pos, end_pos, captured_strings)))
Expand Down
7 changes: 2 additions & 5 deletions src/lua/gmatch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@ mod iter;
pub use iter::GMatchIterator;

/// Corresponds to Lua 5.3 `string.gmatch`
pub fn gmatch(
text: &str,
pattern: &str,
) -> Result<GMatchIterator> {
pub fn gmatch(text: &[u8], pattern: &[u8]) -> Result<GMatchIterator> {
let is_empty_pattern = pattern.is_empty();

let pattern_ast = if is_empty_pattern {
Expand All @@ -19,7 +16,7 @@ pub fn gmatch(
};

Ok(GMatchIterator {
bytes: text.as_bytes().to_vec(),
bytes: text.to_vec(),
pattern_ast,
current_pos: 0,
is_empty_pattern,
Expand Down
15 changes: 5 additions & 10 deletions src/lua/gmatch/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ pub struct GMatchIterator {
}

impl Iterator for GMatchIterator {
type Item = Result<Vec<String>>;
type Item = Result<Vec<Vec<u8>>>;

fn next(&mut self) -> Option<Self::Item> {
if self.current_pos > self.bytes.len() {
return None;
}

if self.is_empty_pattern {
let result = Some(Ok(vec![String::new()]));
let result = Some(Ok(vec![vec![]]));

self.current_pos += 1;

Expand All @@ -34,20 +34,15 @@ impl Iterator for GMatchIterator {
self.current_pos = match_range.end;
}

let result: Vec<String> = if captures.iter().any(|c| c.is_some()) {
let result: Vec<Vec<u8>> = if captures.iter().any(|c| c.is_some()) {
captures
.into_iter()
.filter_map(|maybe_range| {
maybe_range.map(|range| {
String::from_utf8_lossy(&self.bytes[range]).into_owned()
})
maybe_range.map(|range| self.bytes[range].to_owned())
})
.collect()
} else {
vec![
String::from_utf8_lossy(&self.bytes[match_range.start..match_range.end])
.into_owned(),
]
vec![self.bytes[match_range.start..match_range.end].to_owned()]
};

Some(Ok(result))
Expand Down
29 changes: 14 additions & 15 deletions src/lua/gsub.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,28 @@ pub use repl::Repl;

/// Corresponds to Lua 5.3 `string.gsub`
pub fn gsub<'a>(
text: &'a str,
pattern: &str,
text: &'a [u8],
pattern: &[u8],
repl: Repl<'a>,
n: Option<usize>,
) -> Result<(String, usize)> {
let text_bytes = text.as_bytes();
let byte_len = text_bytes.len();
) -> Result<(Vec<u8>, usize)> {
let byte_len = text.len();

let mut parser = Parser::new(pattern)?;
let pattern_ast = parser.parse()?;

let mut result = String::new();
let mut result = Vec::new();
let mut last_pos = 0;
let mut replacements = 0;
let max_replacements = n.unwrap_or(usize::MAX);

while replacements < max_replacements {
match find_first_match(&pattern_ast, text_bytes, last_pos)? {
match find_first_match(&pattern_ast, text, last_pos)? {
Some((match_range, captures)) => {
result.push_str(&text[last_pos..match_range.start]);
result.extend(&text[last_pos..match_range.start]);

let full_match = &text[match_range.start..match_range.end];
let captures_str: Vec<&str> = captures
let captures_str: Vec<&[u8]> = captures
.iter()
.filter_map(|maybe_range| {
maybe_range
Expand All @@ -41,14 +40,14 @@ pub fn gsub<'a>(
match &repl {
Repl::String(repl_str) => {
let replacement = process_replacement_string(repl_str, &captures_str)?;
result.push_str(&replacement);
result.extend(&replacement);
}
Repl::Function(f) => {
let mut args = Vec::with_capacity(captures_str.len() + 1);
args.push(full_match);
args.extend(captures_str.iter());
let replacement = f(&args);
result.push_str(&replacement);
result.extend(&replacement);
}
Repl::Table(table) => {
let key = if !captures_str.is_empty() {
Expand All @@ -58,9 +57,9 @@ pub fn gsub<'a>(
};

if let Some(replacement) = table.get(key) {
result.push_str(replacement);
result.extend(*replacement);
} else {
result.push_str(full_match);
result.extend(full_match);
}
}
}
Expand All @@ -72,7 +71,7 @@ pub fn gsub<'a>(
if last_pos >= byte_len {
break;
}
result.push_str(&text[last_pos..last_pos + 1]);
result.extend(&text[last_pos..last_pos + 1]);
last_pos += 1;
}
}
Expand All @@ -81,7 +80,7 @@ pub fn gsub<'a>(
}

if last_pos < byte_len {
result.push_str(&text[last_pos..]);
result.extend(&text[last_pos..]);
}

Ok((result, replacements))
Expand Down
25 changes: 12 additions & 13 deletions src/lua/gsub/repl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,28 @@ use crate::Result;
use std::collections::HashMap;

pub enum Repl<'a> {
String(&'a str),
Function(Box<dyn Fn(&[&str]) -> String + 'a>),
Table(&'a HashMap<String, String>),
String(&'a [u8]),
Function(Box<dyn Fn(&[&[u8]]) -> Vec<u8> + 'a>),
Table(&'a HashMap<&'a [u8], &'a [u8]>),
}

enum ReplToken {
Literal(u8),
CaptureRef(usize),
}

pub fn process_replacement_string(repl: &str, captures: &[&str]) -> Result<String> {
pub fn process_replacement_string(repl: &[u8], captures: &[&[u8]]) -> Result<Vec<u8>> {
let tokens = tokenize_replacement_string(repl);
let mut result = String::with_capacity(tokens.len());
let mut result = Vec::with_capacity(tokens.len());

for token in tokens {
match token {
ReplToken::Literal(b) => {
result.push(b as char);
result.push(b);
}
ReplToken::CaptureRef(idx) => {
if idx <= captures.len() {
result.push_str(captures[idx - 1]);
result.extend(captures[idx - 1]);
}
}
}
Expand All @@ -32,14 +32,13 @@ pub fn process_replacement_string(repl: &str, captures: &[&str]) -> Result<Strin
Ok(result)
}

fn tokenize_replacement_string(repl: &str) -> Vec<ReplToken> {
fn tokenize_replacement_string(repl: &[u8]) -> Vec<ReplToken> {
let mut tokens = Vec::new();
let bytes = repl.as_bytes();
let mut i = 0;

while i < bytes.len() {
if bytes[i] == b'%' && i + 1 < bytes.len() {
let next_byte = bytes[i + 1];
while i < repl.len() {
if repl[i] == b'%' && i + 1 < repl.len() {
let next_byte = repl[i + 1];
if (b'1'..=b'9').contains(&next_byte) {
let capture_idx = (next_byte - b'0') as usize;
tokens.push(ReplToken::CaptureRef(capture_idx));
Expand All @@ -52,7 +51,7 @@ fn tokenize_replacement_string(repl: &str) -> Vec<ReplToken> {
i += 1;
}
} else {
tokens.push(ReplToken::Literal(bytes[i]));
tokens.push(ReplToken::Literal(repl[i]));
i += 1;
}
}
Expand Down
17 changes: 5 additions & 12 deletions src/lua/match.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,25 @@ use super::{
};

/// Corresponds to Lua 5.3 `string.match`
pub fn r#match(text: &str, pattern: &str, init: Option<isize>) -> Result<Option<Vec<String>>> {
let text_bytes = text.as_bytes();
let byte_len = text_bytes.len();
pub fn r#match(text: &[u8], pattern: &[u8], init: Option<isize>) -> Result<Option<Vec<Vec<u8>>>> {
let byte_len = text.len();

let start_byte_index = calculate_start_index(byte_len, init);

let mut parser = Parser::new(pattern)?;
let ast = parser.parse()?;

match find_first_match(&ast, text_bytes, start_byte_index)? {
match find_first_match(&ast, text, start_byte_index)? {
Some((match_byte_range, captures_byte_ranges)) => {
let captures: Vec<_> = captures_byte_ranges
.into_iter()
.filter_map(|maybe_range| {
maybe_range
.map(|range| String::from_utf8_lossy(&text_bytes[range]).into_owned())
})
.filter_map(|maybe_range| maybe_range.map(|range| text[range].to_owned()))
.collect();

if !captures.is_empty() {
Ok(Some(captures))
} else {
let full_match = String::from_utf8_lossy(
&text_bytes[match_byte_range.start..match_byte_range.end],
)
.into_owned();
let full_match = text[match_byte_range.start..match_byte_range.end].to_owned();
Ok(Some(vec![full_match]))
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ pub struct Parser {
}

impl Parser {
pub fn new(pattern: &str) -> Result<Self> {
pub fn new(pattern: &[u8]) -> Result<Self> {
let mut lexer = Lexer::new(pattern);
let mut token_vec = Vec::new();
loop {
Expand Down
Loading