Skip to content

Commit

Permalink
feat(experimental:template-language): start an own template language
Browse files Browse the repository at this point in the history
The whole reason for this experiment is to get closer to the http plugin template syntax as linked in #5
There the vscode plugin [has this concept of system variables](https://marketplace.visualstudio.com/items?itemName=humao.rest-client#system-variables) that follow the syntax of a mix of expression and function e.g.

- {{$randomInt min max}}: Returns a random integer between min (included) and max (excluded)
- {{$dotenv [%]variableName}}: Returns the environment value stored in the .env file which exists in the same directory of your .http file.

Both examples show that `$<ident>` is similar to a function name and then a variable argument list is passed without any braces like `()`.

This experiment focuses on the ability to register functions for this very system variable syntax at compile time in an extensible fashion.

done so far:
- lexing and parsing of very basic templates with an expression `{{ var }}`
- basic runtime to interpret the AST
- some tests added
- design of a Visitor pattern for the AST

yet open
- [ ] tests for unhappy path are to less
- [ ] runtime is very incomplete yet, `SysVar` hooked functions are missing e.g. `hello {{ $processEnv HOME }}`
  • Loading branch information
sassman committed Feb 28, 2023
1 parent 22af929 commit 4f1ca62
Show file tree
Hide file tree
Showing 12 changed files with 883 additions and 0 deletions.
91 changes: 91 additions & 0 deletions curlz/src/curlz/templ-lang/ast.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
use crate::language::tokens::Span;

// using toml::Value here only because of lazyness
pub use toml::Value;

#[derive(Debug)]
pub struct Spanned<T> {
pub node: Box<T>,
pub span: Span,
}

impl<T> Spanned<T> {
pub fn new(node: T, span: Span) -> Self {
Self {
node: Box::new(node),
span,
}
}
}

#[derive(Debug)]
pub struct Template<'a> {
pub children: Vec<Stmt<'a>>,
}

#[derive(Debug)]
pub struct Var<'a> {
pub id: &'a str,
}

#[derive(Debug)]
pub struct SysVar<'a> {
pub id: &'a str,
}

#[derive(Debug)]
pub struct EmitRaw<'a> {
pub raw: &'a str,
}

#[derive(Debug)]
pub struct EmitExpr<'a> {
pub expr: Expr<'a>,
}

#[derive(Debug)]
pub struct Const {
pub value: Value,
}

#[derive(Debug)]
pub struct Call<'a> {
pub expr: Expr<'a>,
pub args: Vec<Expr<'a>>,
}

#[derive(Debug)]
pub enum Expr<'a> {
SysVar(Spanned<SysVar<'a>>),
Var(Spanned<Var<'a>>),
Const(Spanned<Const>),
Call(Spanned<Call<'a>>),
}

#[derive(Debug)]
pub enum Stmt<'a> {
Template(Spanned<Template<'a>>),
EmitRaw(Spanned<EmitRaw<'a>>),
EmitExpr(Spanned<EmitExpr<'a>>),
}

#[cfg(test)]
pub trait IntoSpanned {
fn spanned(self) -> Spanned<Self>
where
Self: Sized,
{
Spanned::new(
self,
Span {
start_line: 1,
start_col: 0,
end_line: 1,
end_col: 1,
},
)
}
}

#[cfg(test)]
impl<T> IntoSpanned for T {}
33 changes: 33 additions & 0 deletions curlz/src/curlz/templ-lang/ast_visitor.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*!
this module contains AST related tooling such as the visitor trait and
the double dispatch (impl of [`AstVisitAcceptor`]) for all AST nodes.
*/
use crate::language::ast;

pub trait AstVisitAcceptor<'ast> {
fn accept<V: AstVisit<'ast>>(&self, visitor: &mut V);
}

pub trait AstVisit<'ast> {
fn visit_stmt(&mut self, _stmt: &ast::Stmt<'ast>) {}
fn visit_expr(&mut self, _expr: &ast::Expr<'ast>) {}
fn visit_emit_raw(&mut self, _raw: &ast::EmitRaw<'ast>) {}
}

impl<'ast> AstVisitAcceptor<'ast> for ast::Stmt<'ast> {
fn accept<V: AstVisit<'ast>>(&self, visitor: &mut V) {
visitor.visit_stmt(self);
}
}

impl<'ast> AstVisitAcceptor<'ast> for ast::Expr<'ast> {
fn accept<V: AstVisit<'ast>>(&self, visitor: &mut V) {
visitor.visit_expr(self);
}
}

impl<'ast> AstVisitAcceptor<'ast> for ast::EmitRaw<'ast> {
fn accept<V: AstVisit<'ast>>(&self, visitor: &mut V) {
visitor.visit_emit_raw(self);
}
}
254 changes: 254 additions & 0 deletions curlz/src/curlz/templ-lang/lexer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
use anyhow::{anyhow, Error};
use std::ops::Not;

use crate::language::tokens::{Span, Token};

enum LexerState {
Template,
InVariable,
}

struct TokenizerState<'s> {
stack: Vec<LexerState>,
rest: &'s str,
failed: bool,
current_line: usize,
current_col: usize,
}

impl<'s> TokenizerState<'s> {}

impl<'s> TokenizerState<'s> {
/// advance by `n_bytes` and keeps track of the position in the stream
fn advance(&mut self, n_bytes: usize) -> &'s str {
let (skipped, new_rest) = self.rest.split_at(n_bytes);
self.rest = new_rest;

skipped.chars().for_each(|c| match c {
'\n' => {
self.current_line += 1;
self.current_col = 0;
}
_ => self.current_col += 1,
});

skipped
}

/// advance forward for as long as whitespaces appear
fn skip_whitespaces(&mut self) {
let skip = self
.rest
.chars()
.map_while(|c| c.is_whitespace().then(|| c.len_utf8()))
.sum::<usize>();
if skip > 0 {
self.advance(skip);
}
}

#[inline(always)]
fn loc(&self) -> (usize, usize) {
(self.current_line, self.current_col)
}

fn span(&self, (start_line, start_col): (usize, usize)) -> Span {
Span {
start_line,
start_col,
end_line: self.current_line,
end_col: self.current_col,
}
}

fn eat_identifier(&mut self) -> Result<(Token<'s>, Span), Error> {
let ident_len = lex_identifier(self.rest);
if ident_len > 0 {
let old_loc = self.loc();
let ident = self.advance(ident_len);
let token = if let Some(b'$') = ident.as_bytes().first() {
Token::SysVarIdent(&ident[1..])
} else {
Token::VarIdent(ident)
};

Ok((token, self.span(old_loc)))
} else {
Err(self.syntax_error("unexpected character"))
}
}

fn syntax_error(&mut self, msg: &'static str) -> Error {
self.failed = true;
anyhow!(msg)
// Error::new(ErrorKind::SyntaxError, msg)
}
}

fn lex_identifier(s: &str) -> usize {
s.chars()
.enumerate()
.map_while(|(idx, c)| {
let cont = if c == '_' || c == '$' || c == '-' {
true
} else if idx == 0 {
unicode_ident::is_xid_start(c)
} else {
unicode_ident::is_xid_continue(c)
};
cont.then(|| c.len_utf8())
})
.sum::<usize>()
}

fn memchr(haystack: &[u8], needle: u8) -> Option<usize> {
haystack.iter().position(|&x| x == needle)
}

#[inline(always)]
fn find_marker(a: &str) -> Option<usize> {
let bytes = a.as_bytes();
let mut offset = 0;
loop {
if let Some(idx) = memchr(&bytes[offset..], b'{') {
if let Some(b'{') = bytes.get(offset + idx + 1).copied() {
// this prevents the `${{` situation
if let Some(b'$') = bytes.get(offset + idx - 1) {
break None;
} else {
break Some(offset + idx);
}
}
offset += idx + 1;
} else {
break None;
}
}
}

pub fn tokenize(input: &str) -> impl Iterator<Item = Result<(Token<'_>, Span), Error>> {
let mut state = TokenizerState {
rest: input,
stack: vec![LexerState::Template],
failed: false,
current_line: 1,
current_col: 0,
};

std::iter::from_fn(move || loop {
if state.rest.is_empty() || state.failed {
return None;
}

let prev_loc = state.loc();
match state.stack.last() {
Some(LexerState::Template) => {
if let Some("{{") = state.rest.get(..2) {
// entering the `InVariable` state
state.advance(2);
state.stack.push(LexerState::InVariable);
return Some(Ok((Token::VariableStart, state.span(prev_loc))));
}

let (lead, span) = match find_marker(state.rest) {
Some(start) => (state.advance(start), state.span(prev_loc)),
None => (state.advance(state.rest.len()), state.span(prev_loc)),
};

if lead.is_empty().not() {
return Some(Ok((Token::TemplateData(lead), span)));
}
}
Some(LexerState::InVariable) => {
state.skip_whitespaces();

if let Some("}}") = state.rest.get(..2) {
state.stack.pop();
state.advance(2);
return Some(Ok((Token::VariableEnd, state.span(prev_loc))));
}

return Some(state.eat_identifier());
}
None => todo!("lexer state is empty!?"),
}
})
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_template_data() {
match tokenize("hello {{ world }}").next() {
Some(Ok((Token::TemplateData(data), _))) if data == "hello " => {}
s => panic!("did not get a matching token result: {:?}", s),
}
}

#[test]
fn test_template_data_with_dollar_2braces() {
match tokenize("hello ${{ world }}").next() {
Some(Ok((Token::TemplateData(data), _))) if data == "hello ${{ world }}" => {}
s => panic!("did not get a matching token result: {:?}", s),
}
}

#[test]
fn test_tokenize_var_ident() {
let mut tokens = tokenize("hello {{ world }}").skip(1);

assert_eq!(tokens.next().unwrap().unwrap().0, Token::VariableStart);

match tokens.next() {
Some(Ok((Token::VarIdent(id), _))) if id == "world" => {}
s => panic!("did not get a matching token result: {:?}", s),
}

assert_eq!(tokens.next().unwrap().unwrap().0, Token::VariableEnd);
assert!(tokens.next().is_none())
}

#[test]
fn test_tokenize_var_ident_containing_dash() {
let mut tokens = tokenize("hello {{ new-world }}").skip(2);

match tokens.next() {
Some(Ok((Token::VarIdent(id), _))) if id == "new-world" => {}
s => panic!("did not get a matching token result: {:?}", s),
}

assert_eq!(tokens.next().unwrap().unwrap().0, Token::VariableEnd);
assert!(tokens.next().is_none())
}

#[test]
fn test_tokenize_sys_var_ident() {
let mut tokens = tokenize("hello {{ $world }}").skip(2);

match tokens.next() {
Some(Ok((Token::SysVarIdent(id), _))) if id == "world" => {}
s => panic!("did not get a matching token result: {:?}", s),
}

assert_eq!(tokens.next().unwrap().unwrap().0, Token::VariableEnd);
}

#[test]
fn test_tokenize_sys_var_ident_with_a_argument() {
let mut tokens = tokenize("hello {{ $processEnv envVarName }}").skip(2);

match tokens.next() {
Some(Ok((Token::SysVarIdent(id), _))) if id == "processEnv" => {}
s => panic!("did not get a matching token result: {:?}", s),
}

match tokens.next() {
Some(Ok((Token::VarIdent(id), _))) if id == "envVarName" => {}
s => panic!("did not get a matching token result: {:?}", s),
}

assert_eq!(tokens.next().unwrap().unwrap().0, Token::VariableEnd);
}
}
14 changes: 14 additions & 0 deletions curlz/src/curlz/templ-lang/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
pub fn add(left: usize, right: usize) -> usize {
left + right
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn it_works() {
let result = add(2, 2);
assert_eq!(result, 4);
}
}
Loading

0 comments on commit 4f1ca62

Please sign in to comment.