Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
6cb316d
Make token value hb_string
timkaechele Oct 17, 2025
74ceb0e
Make token_init take hb_string value
timkaechele Oct 17, 2025
2de7ee3
Allocate lexer errors using the arena
timkaechele Oct 17, 2025
ef6120c
Make lexer_match_and_advance take string
timkaechele Oct 17, 2025
86c995f
Make lexer_advance_with take string
timkaechele Oct 17, 2025
fbe3116
Fix lexer_eof token_init
timkaechele Oct 17, 2025
d9815ad
Remove allocations from lexer_advance_with_next
timkaechele Oct 17, 2025
43ed7c2
Make utf_8_sequence_length use hb_string
timkaechele Oct 17, 2025
f31ffec
Mkae lexer_advance_utf8 use hb_string
timkaechele Oct 17, 2025
34c1f99
Make lexer_parse_whitespace use hb_string
timkaechele Oct 17, 2025
88bba1f
Make lexer_parse_erb_content use hb_string
timkaechele Oct 17, 2025
412b91d
Use hb_buffer_append_string in parser instead of hb_buffer_append
timkaechele Oct 17, 2025
6a6a8b2
Fix analyze.c herb_analyze_ruby value usage
timkaechele Oct 27, 2025
493000a
WIP: Fix src/ast_node.c
timkaechele Oct 18, 2025
5b998f9
WIP: simple compile script
timkaechele Oct 18, 2025
4fb057a
WIP: Fix token.c value usages
timkaechele Oct 28, 2025
a3d45e7
Fix extract.c token value usages
timkaechele Oct 18, 2025
8ab24e2
Fix pretty print token value usages
timkaechele Oct 27, 2025
5645897
Use correct error message length in lexer_error method
timkaechele Oct 19, 2025
ce6508a
Make parser_helper compatible with hb_string token value
timkaechele Oct 27, 2025
ebaf1da
WIP: Fix token value usages in parser.c
timkaechele Oct 27, 2025
3bf30ad
Fix hb_string_from_c_string usages in lexer.c
timkaechele Oct 27, 2025
70c32cd
Fix hb_string_from_c_string usages in extract.c
timkaechele Oct 27, 2025
ca1fa0b
Fix hb_string_from_c_string usages in parser.c
timkaechele Oct 27, 2025
78d0f78
WIP: Use hb_string in token_type_to_string
timkaechele Oct 28, 2025
6c4814d
Use hb_string_T in errors template
timkaechele Oct 28, 2025
a255eeb
Adapt errors call sites to use hb_string_T
timkaechele Oct 28, 2025
29d5704
Stop freeing strings
timkaechele Oct 28, 2025
efe4425
Revert to malloc in lexer error message
timkaechele Oct 30, 2025
e62ab74
Fix test_token.c
timkaechele Oct 30, 2025
a569e88
Fix issues in generated code
timkaechele Oct 30, 2025
93d10ab
Return uint32_t in utf8_char_byte_length
timkaechele Oct 30, 2025
1039634
Use truncate method in lexer
timkaechele Oct 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions compile.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
for file in src/*.c
do
clang -Isrc -Ivendor/prism/include -c "$file"
done
8 changes: 5 additions & 3 deletions src/analyze.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,12 @@ static bool analyze_erb_content(const AST_NODE_T* node, void* data) {
if (node->type == AST_ERB_CONTENT_NODE) {
AST_ERB_CONTENT_NODE_T* erb_content_node = (AST_ERB_CONTENT_NODE_T*) node;

const char* opening = erb_content_node->tag_opening->value;
hb_string_T opening = erb_content_node->tag_opening->value;

if (strcmp(opening, "<%%") != 0 && strcmp(opening, "<%%=") != 0 && strcmp(opening, "<%#") != 0) {
analyzed_ruby_T* analyzed = herb_analyze_ruby(hb_string(erb_content_node->content->value));
if (!hb_string_equals(opening, hb_string("<%%"))
&& !hb_string_equals(opening, hb_string("<%%="))
&& !hb_string_equals(opening, hb_string("<%#"))) {
analyzed_ruby_T* analyzed = herb_analyze_ruby(erb_content_node->content->value);

erb_content_node->parsed = true;
erb_content_node->valid = analyzed->valid;
Expand Down
2 changes: 1 addition & 1 deletion src/ast_node.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ AST_LITERAL_NODE_T* ast_literal_node_init_from_token(const token_T* token) {

ast_node_init(&literal->base, AST_LITERAL_NODE, token->location.start, token->location.end, NULL);

literal->content = herb_strdup(token->value);
literal->content = token->value;

return literal;
}
Expand Down
20 changes: 12 additions & 8 deletions src/extract.c
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#include "include/herb.h"
#include "include/io.h"
#include "include/lexer.h"
#include "include/util/hb_array.h"
#include "include/util/hb_buffer.h"
#include "include/util/hb_string.h"

#include <stdlib.h>
#include <string.h>
Expand All @@ -16,12 +16,14 @@ void herb_extract_ruby_to_buffer_with_semicolons(const char* source, hb_buffer_T

switch (token->type) {
case TOKEN_NEWLINE: {
hb_buffer_append(output, token->value);
hb_buffer_append_string(output, token->value);
break;
}

case TOKEN_ERB_START: {
if (strcmp(token->value, "<%#") == 0 || strcmp(token->value, "<%%") == 0 || strcmp(token->value, "<%%=") == 0) {
if (hb_string_equals(token->value, hb_string("<%#"))
|| hb_string_equals(token->value, hb_string("<%%"))
|| hb_string_equals(token->value, hb_string("<%%="))) {
skip_erb_content = true;
}

Expand All @@ -31,7 +33,7 @@ void herb_extract_ruby_to_buffer_with_semicolons(const char* source, hb_buffer_T

case TOKEN_ERB_CONTENT: {
if (skip_erb_content == false) {
hb_buffer_append(output, token->value);
hb_buffer_append_string(output, token->value);
} else {
hb_buffer_append_whitespace(output, range_length(token->range));
}
Expand Down Expand Up @@ -66,12 +68,14 @@ void herb_extract_ruby_to_buffer(const char* source, hb_buffer_T* output) {

switch (token->type) {
case TOKEN_NEWLINE: {
hb_buffer_append(output, token->value);
hb_buffer_append_string(output, token->value);
break;
}

case TOKEN_ERB_START: {
if (strcmp(token->value, "<%#") == 0 || strcmp(token->value, "<%%") == 0 || strcmp(token->value, "<%%=") == 0) {
if (hb_string_equals(token->value, hb_string("<%#"))
|| hb_string_equals(token->value, hb_string("<%%"))
|| hb_string_equals(token->value, hb_string("<%%="))) {
skip_erb_content = true;
}

Expand All @@ -81,7 +85,7 @@ void herb_extract_ruby_to_buffer(const char* source, hb_buffer_T* output) {

case TOKEN_ERB_CONTENT: {
if (skip_erb_content == false) {
hb_buffer_append(output, token->value);
hb_buffer_append_string(output, token->value);
} else {
hb_buffer_append_whitespace(output, range_length(token->range));
}
Expand Down Expand Up @@ -115,7 +119,7 @@ void herb_extract_html_to_buffer(const char* source, hb_buffer_T* output) {
case TOKEN_ERB_START:
case TOKEN_ERB_CONTENT:
case TOKEN_ERB_END: hb_buffer_append_whitespace(output, range_length(token->range)); break;
default: hb_buffer_append(output, token->value);
default: hb_buffer_append_string(output, token->value);
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/include/parser_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ token_T* parser_pop_open_tag(const parser_T* parser);

void parser_append_unexpected_error(
parser_T* parser,
const char* description,
const char* expected,
hb_string_T description,
hb_string_T expected,
hb_array_T* errors
);
void parser_append_unexpected_token_error(parser_T* parser, token_type_T expected_type, hb_array_T* errors);
Expand Down
7 changes: 4 additions & 3 deletions src/include/token.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
#include "lexer_struct.h"
#include "position.h"
#include "token_struct.h"
#include "util/hb_string.h"

token_T* token_init(const char* value, token_type_T type, lexer_T* lexer);
token_T* token_init(hb_string_T value, token_type_T type, lexer_T* lexer);
char* token_to_string(const token_T* token);
const char* token_type_to_string(token_type_T type);
hb_string_T token_type_to_string(token_type_T type);

char* token_value(const token_T* token);
hb_string_T token_value(const token_T* token);
int token_type(const token_T* token);

size_t token_sizeof(void);
Expand Down
3 changes: 2 additions & 1 deletion src/include/token_struct.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "location.h"
#include "range.h"
#include "util/hb_string.h"

typedef enum {
TOKEN_WHITESPACE, // ' '
Expand Down Expand Up @@ -49,7 +50,7 @@ typedef enum {
} token_type_T;

typedef struct TOKEN_STRUCT {
char* value;
hb_string_T value;
range_T range;
location_T location;
token_type_T type;
Expand Down
5 changes: 3 additions & 2 deletions src/include/utf8.h
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
#ifndef HERB_UTF8_H
#define HERB_UTF8_H

#include "util/hb_string.h"
#include <stdbool.h>
#include <stdlib.h>

int utf8_char_byte_length(unsigned char first_byte);
int utf8_sequence_length(const char* str, size_t position, size_t max_length);
uint32_t utf8_char_byte_length(unsigned char first_byte);
uint32_t utf8_sequence_length(hb_string_T value);
bool utf8_is_valid_continuation_byte(unsigned char byte);

#endif
Loading
Loading