Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add std regex builtins #119

Merged
merged 2 commits into from
Dec 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
299 changes: 212 additions & 87 deletions Cargo.lock

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ clap_complete = "4.4"
lsp-server = "0.7.4"
lsp-types = "0.94.1"

regex = "1.8.4"
lru = "0.10.0"

#[profile.test]
#opt-level = 1

Expand Down
2 changes: 2 additions & 0 deletions cmds/jrsonnet/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ exp-destruct = ["jrsonnet-evaluator/exp-destruct"]
exp-object-iteration = ["jrsonnet-evaluator/exp-object-iteration"]
# Bigint type
exp-bigint = ["jrsonnet-evaluator/exp-bigint", "jrsonnet-cli/exp-bigint"]
# std.regex and co.
exp-regex = ["jrsonnet-cli/exp-regex"]
# obj?.field, obj?.['field']
exp-null-coaelse = [
"jrsonnet-evaluator/exp-null-coaelse",
Expand Down
3 changes: 3 additions & 0 deletions crates/jrsonnet-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ exp-null-coaelse = [
"jrsonnet-evaluator/exp-null-coaelse",
"jrsonnet-stdlib/exp-null-coaelse",
]
exp-regex = [
"jrsonnet-stdlib/exp-regex",
]
legacy-this-file = ["jrsonnet-stdlib/legacy-this-file"]

[dependencies]
Expand Down
18 changes: 17 additions & 1 deletion crates/jrsonnet-evaluator/src/typed/conversions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
bail,
function::{native::NativeDesc, FuncDesc, FuncVal},
typed::CheckType,
val::{IndexableVal, ThunkMapper},
val::{IndexableVal, StrValue, ThunkMapper},
ObjValue, ObjValueBuilder, Result, Thunk, Val,
};

Expand Down Expand Up @@ -304,6 +304,22 @@
}
}

impl Typed for StrValue {
const TYPE: &'static ComplexValType = &ComplexValType::Simple(ValType::Str);

fn into_untyped(value: Self) -> Result<Val> {
Ok(Val::Str(value))
}

fn from_untyped(value: Val) -> Result<Self> {
<Self as Typed>::TYPE.check(&value)?;
match value {
Val::Str(s) => Ok(s),
_ => unreachable!(),
}
}
}

impl Typed for char {
const TYPE: &'static ComplexValType = &ComplexValType::Char;

Expand Down Expand Up @@ -433,25 +449,25 @@
}

fn from_untyped(value: Val) -> Result<Self> {
match &value {
Val::Arr(a) => {
if let Some(bytes) = a.as_any().downcast_ref::<BytesArray>() {
return Ok(bytes.0.as_slice().into());
};
<Self as Typed>::TYPE.check(&value)?;
// Any::downcast_ref::<ByteArray>(&a);
let mut out = Vec::with_capacity(a.len());
for e in a.iter() {
let r = e?;
out.push(u8::from_untyped(r)?);
}
Ok(out.as_slice().into())
}
_ => {
<Self as Typed>::TYPE.check(&value)?;
unreachable!()
}
}

Check warning on line 470 in crates/jrsonnet-evaluator/src/typed/conversions.rs

View workflow job for this annotation

GitHub Actions / clippy

you seem to be trying to use `match` for destructuring a single pattern. Consider using `if let`

warning: you seem to be trying to use `match` for destructuring a single pattern. Consider using `if let` --> crates/jrsonnet-evaluator/src/typed/conversions.rs:452:3 | 452 | / match &value { 453 | | Val::Arr(a) => { 454 | | if let Some(bytes) = a.as_any().downcast_ref::<BytesArray>() { 455 | | return Ok(bytes.0.as_slice().into()); ... | 469 | | } 470 | | } | |_________^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#single_match_else = note: `#[warn(clippy::single_match_else)]` implied by `#[warn(clippy::pedantic)]` help: try | 452 ~ if let Val::Arr(a) = &value { 453 ~ if let Some(bytes) = a.as_any().downcast_ref::<BytesArray>() { 454 ~ return Ok(bytes.0.as_slice().into()); 455 ~ }; 456 ~ <Self as Typed>::TYPE.check(&value)?; 457 ~ // Any::downcast_ref::<ByteArray>(&a); 458 ~ let mut out = Vec::with_capacity(a.len()); 459 ~ for e in a.iter() { 460 ~ let r = e?; 461 ~ out.push(u8::from_untyped(r)?); 462 ~ } 463 ~ Ok(out.as_slice().into()) 464 ~ } else { 465 ~ <Self as Typed>::TYPE.check(&value)?; 466 ~ unreachable!() 467 ~ } |
}
}

Expand Down
7 changes: 7 additions & 0 deletions crates/jrsonnet-stdlib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ exp-preserve-order = ["jrsonnet-evaluator/exp-preserve-order"]
exp-bigint = ["num-bigint", "jrsonnet-evaluator/exp-bigint"]

exp-null-coaelse = ["jrsonnet-parser/exp-null-coaelse", "jrsonnet-evaluator/exp-null-coaelse"]
# std.regexMatch and other helpers
exp-regex = ["regex", "lru", "rustc-hash"]

[dependencies]
jrsonnet-evaluator.workspace = true
Expand Down Expand Up @@ -49,6 +51,11 @@ serde_yaml_with_quirks.workspace = true

num-bigint = { workspace = true, optional = true }

# regex
regex = { workspace = true, optional = true }
lru = { workspace = true, optional = true }
rustc-hash = { workspace = true, optional = true }

[build-dependencies]
jrsonnet-parser.workspace = true
structdump = { workspace = true, features = ["derive"] }
38 changes: 37 additions & 1 deletion crates/jrsonnet-stdlib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ mod sets;
pub use sets::*;
mod compat;
pub use compat::*;
#[cfg(feature = "exp-regex")]
mod regex;
#[cfg(feature = "exp-regex")]
pub use crate::regex::*;

pub fn stdlib_uncached(settings: Rc<RefCell<Settings>>) -> ObjValue {
let mut builder = ObjValueBuilder::new();
Expand Down Expand Up @@ -185,6 +189,9 @@ pub fn stdlib_uncached(settings: Rc<RefCell<Settings>>) -> ObjValue {
("setInter", builtin_set_inter::INST),
("setDiff", builtin_set_diff::INST),
("setUnion", builtin_set_union::INST),
// Regex
#[cfg(feature = "exp-regex")]
("regexQuoteMeta", builtin_regex_quote_meta::INST),
// Compat
("__compare", builtin___compare::INST),
]
Expand All @@ -207,9 +214,38 @@ pub fn stdlib_uncached(settings: Rc<RefCell<Settings>>) -> ObjValue {
},
);
builder.method("trace", builtin_trace { settings });

builder.method("id", FuncVal::Id);

#[cfg(feature = "exp-regex")]
{
// Regex
let regex_cache = RegexCache::default();
builder.method(
"regexFullMatch",
builtin_regex_full_match {
cache: regex_cache.clone(),
},
);
builder.method(
"regexPartialMatch",
builtin_regex_partial_match {
cache: regex_cache.clone(),
},
);
builder.method(
"regexReplace",
builtin_regex_replace {
cache: regex_cache.clone(),
},
);
builder.method(
"regexGlobalReplace",
builtin_regex_global_replace {
cache: regex_cache.clone(),
},
);
};

builder.build()
}

Expand Down
133 changes: 133 additions & 0 deletions crates/jrsonnet-stdlib/src/regex.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
use std::{cell::RefCell, hash::BuildHasherDefault, num::NonZeroUsize, rc::Rc};

use ::regex::Regex;
use jrsonnet_evaluator::{
error::{ErrorKind::*, Result},
val::StrValue,
IStr, ObjValueBuilder, Val,
};
use jrsonnet_macros::builtin;
use lru::LruCache;
use rustc_hash::FxHasher;

pub struct RegexCacheInner {
cache: RefCell<LruCache<IStr, Rc<Regex>, BuildHasherDefault<FxHasher>>>,
}
impl Default for RegexCacheInner {
fn default() -> Self {
Self {
cache: RefCell::new(LruCache::with_hasher(
NonZeroUsize::new(20).unwrap(),
BuildHasherDefault::default(),
)),
}
}
}
pub type RegexCache = Rc<RegexCacheInner>;
impl RegexCacheInner {
fn parse(&self, pattern: IStr) -> Result<Rc<Regex>> {
let mut cache = self.cache.borrow_mut();
if let Some(found) = cache.get(&pattern) {
return Ok(found.clone());
}
let regex = Regex::new(&pattern)
.map_err(|e| RuntimeError(format!("regex parse failed: {e}").into()))?;
let regex = Rc::new(regex);
cache.push(pattern, regex.clone());
Ok(regex)
}
}

pub fn regex_match_inner(regex: &Regex, str: String) -> Result<Val> {
let mut out = ObjValueBuilder::with_capacity(3);

let mut captures = Vec::with_capacity(regex.captures_len());
let mut named_captures = ObjValueBuilder::with_capacity(regex.capture_names().len());

let Some(captured) = regex.captures(&str) else {
return Ok(Val::Null);
};

for ele in captured.iter().skip(1) {
if let Some(ele) = ele {
captures.push(Val::Str(StrValue::Flat(ele.as_str().into())))
} else {
captures.push(Val::Str(StrValue::Flat(IStr::empty())))
}
}
for (i, name) in regex
.capture_names()
.skip(1)
.enumerate()
.flat_map(|(i, v)| Some((i, v?)))
{
let capture = captures[i].clone();
named_captures.field(name).try_value(capture)?;
}

out.field("string")
.value(Val::Str(captured.get(0).unwrap().as_str().into()));
out.field("captures").value(Val::Arr(captures.into()));
out.field("namedCaptures")
.value(Val::Obj(named_captures.build()));

Ok(Val::Obj(out.build()))
}

#[builtin(fields(
cache: RegexCache,
))]
pub fn builtin_regex_partial_match(
this: &builtin_regex_partial_match,
pattern: IStr,
str: String,
) -> Result<Val> {
let regex = this.cache.parse(pattern)?;
regex_match_inner(&regex, str)
}

#[builtin(fields(
cache: RegexCache,
))]
pub fn builtin_regex_full_match(
this: &builtin_regex_full_match,
pattern: StrValue,
str: String,
) -> Result<Val> {
let pattern = format!("^{pattern}$").into();
let regex = this.cache.parse(pattern)?;
regex_match_inner(&regex, str)
}

#[builtin]
pub fn builtin_regex_quote_meta(pattern: String) -> String {
regex::escape(&pattern)
}

#[builtin(fields(
cache: RegexCache,
))]
pub fn builtin_regex_replace(
this: &builtin_regex_replace,
str: String,
pattern: IStr,
to: String,
) -> Result<String> {
let regex = this.cache.parse(pattern)?;
let replaced = regex.replace(&str, to);
Ok(replaced.to_string())
}

#[builtin(fields(
cache: RegexCache,
))]
pub fn builtin_regex_global_replace(
this: &builtin_regex_global_replace,
str: String,
pattern: IStr,
to: String,
) -> Result<String> {
let regex = this.cache.parse(pattern)?;
let replaced = regex.replace_all(&str, to);
Ok(replaced.to_string())
}
Loading