From 239547fc58dc8eea8896a5949f1ad2da2671bb21 Mon Sep 17 00:00:00 2001 From: Marco Roth Date: Thu, 23 Oct 2025 13:41:46 +0200 Subject: [PATCH] C: Use Arena for Lexing and Parsing --- ext/herb/extension.c | 75 ++++++- javascript/packages/node/extension/herb.cpp | 105 +++++++++- src/analyze.c | 211 +++++++++++++------- src/extract.c | 65 +++++- src/herb.c | 68 +++++-- src/include/analyze.h | 1 + src/include/herb.h | 13 +- src/include/lexer.h | 3 +- src/include/lexer_struct.h | 3 + src/include/macros.h | 2 +- src/include/parser.h | 2 + src/include/parser_helpers.h | 1 + src/include/token.h | 3 +- src/include/token_struct.h | 3 + src/include/util/hb_arena.h | 2 +- src/lexer.c | 16 +- src/lexer_peek_helpers.c | 5 +- src/main.c | 43 +++- src/parser.c | 115 +++++++---- src/parser_helpers.c | 32 ++- src/prism_helpers.c | 3 +- src/token.c | 47 ++++- src/util/hb_arena.c | 2 +- templates/src/ast_nodes.c.erb | 106 +++++++--- templates/src/errors.c.erb | 69 ++++++- templates/src/include/ast_nodes.h.erb | 6 +- templates/src/include/errors.h.erb | 8 +- wasm/herb-wasm.cpp | 44 +++- 28 files changed, 827 insertions(+), 226 deletions(-) diff --git a/ext/herb/extension.c b/ext/herb/extension.c index 155908ac3..4825350bc 100644 --- a/ext/herb/extension.c +++ b/ext/herb/extension.c @@ -6,6 +6,8 @@ #include "nodes.h" #include "../../src/include/analyze.h" +#include "../../src/include/macros.h" +#include "../../src/include/util/hb_arena.h" VALUE mHerb; VALUE cPosition; @@ -19,23 +21,52 @@ VALUE cParseResult; static VALUE Herb_lex(VALUE self, VALUE source) { char* string = (char*) check_string(source); - hb_array_T* tokens = herb_lex(string); + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + if (!arena) { return Qnil; } - VALUE result = create_lex_result(tokens, source); + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return Qnil; + } + + herb_lex_result_T* lex_result = herb_lex(string, arena); + + if (!lex_result) { + hb_arena_free(arena); + free(arena); + return Qnil; + } + + VALUE result = create_lex_result(lex_result->tokens, source); - herb_free_tokens(&tokens); + herb_free_lex_result(&lex_result); return result; } static VALUE Herb_lex_file(VALUE self, VALUE path) { char* file_path = (char*) check_string(path); - hb_array_T* tokens = herb_lex_file(file_path); + + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + if (!arena) { return Qnil; } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return Qnil; + } + + herb_lex_result_T* lex_result = herb_lex_file(file_path, arena); + + if (!lex_result) { + hb_arena_free(arena); + free(arena); + return Qnil; + } VALUE source_value = read_file_to_ruby_string(file_path); - VALUE result = create_lex_result(tokens, source_value); + VALUE result = create_lex_result(lex_result->tokens, source_value); - herb_free_tokens(&tokens); + herb_free_lex_result(&lex_result); return result; } @@ -59,7 +90,21 @@ static VALUE Herb_parse(int argc, VALUE* argv, VALUE self) { } } - AST_DOCUMENT_NODE_T* root = herb_parse(string, parser_options); + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + if (!arena) { return Qnil; } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return Qnil; + } + + AST_DOCUMENT_NODE_T* root = herb_parse(string, parser_options, arena); + + if (!root) { + hb_arena_free(arena); + free(arena); + return Qnil; + } herb_analyze_parse_tree(root, string); @@ -76,7 +121,21 @@ static VALUE Herb_parse_file(VALUE self, VALUE path) { VALUE source_value = read_file_to_ruby_string(file_path); char* string = (char*) check_string(source_value); - AST_DOCUMENT_NODE_T* root = herb_parse(string, NULL); + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + if (!arena) { return Qnil; } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return Qnil; + } + + AST_DOCUMENT_NODE_T* root = herb_parse(string, NULL, arena); + + if (!root) { + hb_arena_free(arena); + free(arena); + return Qnil; + } VALUE result = create_parse_result(root, source_value); diff --git a/javascript/packages/node/extension/herb.cpp b/javascript/packages/node/extension/herb.cpp index ec29d11c0..8e6369fda 100644 --- a/javascript/packages/node/extension/herb.cpp +++ b/javascript/packages/node/extension/herb.cpp @@ -3,8 +3,10 @@ extern "C" { #include "../extension/libherb/include/ast_nodes.h" #include "../extension/libherb/include/herb.h" #include "../extension/libherb/include/location.h" +#include "../extension/libherb/include/macros.h" #include "../extension/libherb/include/range.h" #include "../extension/libherb/include/token.h" +#include "../extension/libherb/include/util/hb_arena.h" #include "../extension/libherb/include/util/hb_array.h" #include "../extension/libherb/include/util/hb_buffer.h" } @@ -31,10 +33,31 @@ napi_value Herb_lex(napi_env env, napi_callback_info info) { char* string = CheckString(env, args[0]); if (!string) { return nullptr; } - hb_array_T* tokens = herb_lex(string); - napi_value result = CreateLexResult(env, tokens, args[0]); + hb_arena_T* arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); - herb_free_tokens(&tokens); + if (!arena) { + free(string); + return nullptr; + } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + free(string); + return nullptr; + } + + herb_lex_result_T* lex_result = herb_lex(string, arena); + + if (!lex_result) { + hb_arena_free(arena); + free(arena); + free(string); + return nullptr; + } + + napi_value result = CreateLexResult(env, lex_result->tokens, args[0]); + + herb_free_lex_result(&lex_result); free(string); return result; @@ -53,11 +76,32 @@ napi_value Herb_lex_file(napi_env env, napi_callback_info info) { char* file_path = CheckString(env, args[0]); if (!file_path) { return nullptr; } - hb_array_T* tokens = herb_lex_file(file_path); + hb_arena_T* arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + + if (!arena) { + free(file_path); + return nullptr; + } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + free(file_path); + return nullptr; + } + + herb_lex_result_T* lex_result = herb_lex_file(file_path, arena); + + if (!lex_result) { + hb_arena_free(arena); + free(arena); + free(file_path); + return nullptr; + } + napi_value source_value = ReadFileToString(env, file_path); - napi_value result = CreateLexResult(env, tokens, source_value); + napi_value result = CreateLexResult(env, lex_result->tokens, source_value); - herb_free_tokens(&tokens); + herb_free_lex_result(&lex_result); free(file_path); return result; @@ -101,7 +145,28 @@ napi_value Herb_parse(napi_env env, napi_callback_info info) { } } - AST_DOCUMENT_NODE_T* root = herb_parse(string, parser_options); + hb_arena_T* arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + + if (!arena) { + free(string); + return nullptr; + } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + free(string); + return nullptr; + } + + AST_DOCUMENT_NODE_T* root = herb_parse(string, parser_options, arena); + + if (!root) { + hb_arena_free(arena); + free(arena); + free(string); + return nullptr; + } + herb_analyze_parse_tree(root, string); napi_value result = CreateParseResult(env, root, args[0]); @@ -132,7 +197,31 @@ napi_value Herb_parse_file(napi_env env, napi_callback_info info) { return nullptr; } - AST_DOCUMENT_NODE_T* root = herb_parse(string, nullptr); + hb_arena_T* arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + + if (!arena) { + free(file_path); + free(string); + return nullptr; + } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + free(file_path); + free(string); + return nullptr; + } + + AST_DOCUMENT_NODE_T* root = herb_parse(string, nullptr, arena); + + if (!root) { + hb_arena_free(arena); + free(arena); + free(file_path); + free(string); + return nullptr; + } + napi_value result = CreateParseResult(env, root, source_value); ast_node_free((AST_NODE_T *) root); diff --git a/src/analyze.c b/src/analyze.c index 6e83f117e..a49012baa 100644 --- a/src/analyze.c +++ b/src/analyze.c @@ -11,6 +11,7 @@ #include "include/prism_helpers.h" #include "include/token_struct.h" #include "include/util.h" +#include "include/util/hb_arena.h" #include "include/util/hb_array.h" #include "include/util/hb_string.h" #include "include/visitor.h" @@ -152,11 +153,17 @@ static AST_NODE_T* create_control_node( hb_array_T* children, AST_NODE_T* subsequent, AST_ERB_END_NODE_T* end_node, - control_type_t control_type + control_type_t control_type, + hb_arena_T* arena ) { hb_array_T* errors = erb_node->base.errors; erb_node->base.errors = NULL; + if (erb_node->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_node->analyzed_ruby); + erb_node->analyzed_ruby = NULL; + } + position_T start_position = erb_node->tag_opening->location.start; position_T end_position = erb_node->tag_closing->location.end; @@ -185,14 +192,22 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } case CONTROL_TYPE_ELSE: { - return ( - AST_NODE_T* - ) ast_erb_else_node_init(tag_opening, content, tag_closing, children, start_position, end_position, errors); + return (AST_NODE_T*) ast_erb_else_node_init( + tag_opening, + content, + tag_closing, + children, + start_position, + end_position, + errors, + arena + ); } case CONTROL_TYPE_CASE: @@ -231,7 +246,8 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } else { hb_array_free(&in_conditions); @@ -246,21 +262,29 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } } case CONTROL_TYPE_WHEN: { - return ( - AST_NODE_T* - ) ast_erb_when_node_init(tag_opening, content, tag_closing, children, start_position, end_position, errors); + return (AST_NODE_T*) ast_erb_when_node_init( + tag_opening, + content, + tag_closing, + children, + start_position, + end_position, + errors, + arena + ); } case CONTROL_TYPE_IN: { return ( AST_NODE_T* - ) ast_erb_in_node_init(tag_opening, content, tag_closing, children, start_position, end_position, errors); + ) ast_erb_in_node_init(tag_opening, content, tag_closing, children, start_position, end_position, errors, arena); } case CONTROL_TYPE_BEGIN: { @@ -289,7 +313,8 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } @@ -306,14 +331,22 @@ static AST_NODE_T* create_control_node( rescue_node, start_position, end_position, - errors + errors, + arena ); } case CONTROL_TYPE_ENSURE: { - return ( - AST_NODE_T* - ) ast_erb_ensure_node_init(tag_opening, content, tag_closing, children, start_position, end_position, errors); + return (AST_NODE_T*) ast_erb_ensure_node_init( + tag_opening, + content, + tag_closing, + children, + start_position, + end_position, + errors, + arena + ); } case CONTROL_TYPE_UNLESS: { @@ -330,7 +363,8 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } @@ -343,7 +377,8 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } @@ -356,7 +391,8 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } @@ -369,7 +405,8 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } @@ -382,14 +419,15 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } case CONTROL_TYPE_YIELD: { return ( AST_NODE_T* - ) ast_erb_yield_node_init(tag_opening, content, tag_closing, start_position, end_position, errors); + ) ast_erb_yield_node_init(tag_opening, content, tag_closing, start_position, end_position, errors, arena); } default: return NULL; @@ -453,6 +491,11 @@ static size_t process_control_structure( hb_array_T* when_errors = erb_content->base.errors; erb_content->base.errors = NULL; + if (erb_content->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_content->analyzed_ruby); + erb_content->analyzed_ruby = NULL; + } + AST_ERB_WHEN_NODE_T* when_node = ast_erb_when_node_init( erb_content->tag_opening, erb_content->content, @@ -460,11 +503,10 @@ static size_t process_control_structure( when_statements, erb_content->tag_opening->location.start, erb_content->tag_closing->location.end, - when_errors + when_errors, + context->arena ); - ast_node_free((AST_NODE_T*) erb_content); - hb_array_append(when_conditions, (AST_NODE_T*) when_node); continue; @@ -477,6 +519,11 @@ static size_t process_control_structure( hb_array_T* in_errors = erb_content->base.errors; erb_content->base.errors = NULL; + if (erb_content->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_content->analyzed_ruby); + erb_content->analyzed_ruby = NULL; + } + AST_ERB_IN_NODE_T* in_node = ast_erb_in_node_init( erb_content->tag_opening, erb_content->content, @@ -484,11 +531,10 @@ static size_t process_control_structure( in_statements, erb_content->tag_opening->location.start, erb_content->tag_closing->location.end, - in_errors + in_errors, + context->arena ); - ast_node_free((AST_NODE_T*) erb_content); - hb_array_append(in_conditions, (AST_NODE_T*) in_node); continue; @@ -540,10 +586,9 @@ static size_t process_control_structure( else_children, next_erb->tag_opening->location.start, next_erb->tag_closing->location.end, - else_errors + else_errors, + context->arena ); - - ast_node_free((AST_NODE_T*) next_erb); } } } @@ -566,11 +611,10 @@ static size_t process_control_structure( end_erb->tag_closing, end_erb->tag_opening->location.start, end_erb->tag_closing->location.end, - end_errors + end_errors, + context->arena ); - ast_node_free((AST_NODE_T*) end_erb); - index++; } } @@ -595,6 +639,11 @@ static size_t process_control_structure( hb_array_T* case_match_errors = erb_node->base.errors; erb_node->base.errors = NULL; + if (erb_node->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_node->analyzed_ruby); + erb_node->analyzed_ruby = NULL; + } + AST_ERB_CASE_MATCH_NODE_T* case_match_node = ast_erb_case_match_node_init( erb_node->tag_opening, erb_node->content, @@ -605,11 +654,10 @@ static size_t process_control_structure( end_node, start_position, end_position, - case_match_errors + case_match_errors, + context->arena ); - ast_node_free((AST_NODE_T*) erb_node); - hb_array_append(output_array, (AST_NODE_T*) case_match_node); hb_array_free(&when_conditions); hb_array_free(&children); @@ -620,6 +668,11 @@ static size_t process_control_structure( hb_array_T* case_errors = erb_node->base.errors; erb_node->base.errors = NULL; + if (erb_node->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_node->analyzed_ruby); + erb_node->analyzed_ruby = NULL; + } + AST_ERB_CASE_NODE_T* case_node = ast_erb_case_node_init( erb_node->tag_opening, erb_node->content, @@ -630,11 +683,10 @@ static size_t process_control_structure( end_node, start_position, end_position, - case_errors + case_errors, + context->arena ); - ast_node_free((AST_NODE_T*) erb_node); - hb_array_append(output_array, (AST_NODE_T*) case_node); hb_array_free(&in_conditions); hb_array_free(&children); @@ -702,10 +754,9 @@ static size_t process_control_structure( else_children, next_erb->tag_opening->location.start, next_erb->tag_closing->location.end, - else_errors + else_errors, + context->arena ); - - ast_node_free((AST_NODE_T*) next_erb); } } } @@ -748,10 +799,9 @@ static size_t process_control_structure( ensure_children, next_erb->tag_opening->location.start, next_erb->tag_closing->location.end, - ensure_errors + ensure_errors, + context->arena ); - - ast_node_free((AST_NODE_T*) next_erb); } } } @@ -774,11 +824,10 @@ static size_t process_control_structure( end_erb->tag_closing, end_erb->tag_opening->location.start, end_erb->tag_closing->location.end, - end_errors + end_errors, + context->arena ); - ast_node_free((AST_NODE_T*) end_erb); - index++; } } @@ -800,6 +849,11 @@ static size_t process_control_structure( hb_array_T* begin_errors = erb_node->base.errors; erb_node->base.errors = NULL; + if (erb_node->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_node->analyzed_ruby); + erb_node->analyzed_ruby = NULL; + } + AST_ERB_BEGIN_NODE_T* begin_node = ast_erb_begin_node_init( erb_node->tag_opening, erb_node->content, @@ -811,12 +865,12 @@ static size_t process_control_structure( end_node, start_position, end_position, - begin_errors + begin_errors, + context->arena ); - ast_node_free((AST_NODE_T*) erb_node); - hb_array_append(output_array, (AST_NODE_T*) begin_node); + return index; } @@ -842,11 +896,10 @@ static size_t process_control_structure( close_erb->tag_closing, close_erb->tag_opening->location.start, close_erb->tag_closing->location.end, - end_errors + end_errors, + context->arena ); - ast_node_free((AST_NODE_T*) close_erb); - index++; } } @@ -865,6 +918,11 @@ static size_t process_control_structure( hb_array_T* block_errors = erb_node->base.errors; erb_node->base.errors = NULL; + if (erb_node->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_node->analyzed_ruby); + erb_node->analyzed_ruby = NULL; + } + AST_ERB_BLOCK_NODE_T* block_node = ast_erb_block_node_init( erb_node->tag_opening, erb_node->content, @@ -873,12 +931,12 @@ static size_t process_control_structure( end_node, start_position, end_position, - block_errors + block_errors, + context->arena ); - ast_node_free((AST_NODE_T*) erb_node); - hb_array_append(output_array, (AST_NODE_T*) block_node); + return index; } @@ -916,20 +974,19 @@ static size_t process_control_structure( end_erb->tag_closing, end_erb->tag_opening->location.start, end_erb->tag_closing->location.end, - end_errors + end_errors, + context->arena ); - ast_node_free((AST_NODE_T*) end_erb); - index++; } } } - AST_NODE_T* control_node = create_control_node(erb_node, children, subsequent, end_node, initial_type); + AST_NODE_T* control_node = + create_control_node(erb_node, children, subsequent, end_node, initial_type, context->arena); if (control_node) { - ast_node_free((AST_NODE_T*) erb_node); hb_array_append(output_array, control_node); } else { hb_array_free(&children); @@ -954,10 +1011,10 @@ static size_t process_subsequent_block( index = process_block_children(node, array, index, children, context, parent_type); - AST_NODE_T* subsequent_node = create_control_node(erb_node, children, NULL, NULL, type); + AST_NODE_T* subsequent_node = create_control_node(erb_node, children, NULL, NULL, type, context->arena); if (subsequent_node) { - ast_node_free((AST_NODE_T*) erb_node); + // no-op } else { hb_array_free(&children); } @@ -1089,10 +1146,9 @@ static hb_array_T* rewrite_node_array(AST_NODE_T* node, hb_array_T* array, analy continue; case CONTROL_TYPE_YIELD: { - AST_NODE_T* yield_node = create_control_node(erb_node, NULL, NULL, NULL, type); + AST_NODE_T* yield_node = create_control_node(erb_node, NULL, NULL, NULL, type, context->arena); if (yield_node) { - ast_node_free((AST_NODE_T*) erb_node); hb_array_append(new_array, yield_node); } else { hb_array_append(new_array, item); @@ -1112,6 +1168,21 @@ static hb_array_T* rewrite_node_array(AST_NODE_T* node, hb_array_T* array, analy return new_array; } +static void free_analyzed_ruby_from_array(hb_array_T* array) { + if (!array) { return; } + + for (size_t i = 0; i < hb_array_size(array); i++) { + AST_NODE_T* node = hb_array_get(array, i); + if (node && node->type == AST_ERB_CONTENT_NODE) { + AST_ERB_CONTENT_NODE_T* erb_content = (AST_ERB_CONTENT_NODE_T*) node; + if (erb_content->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_content->analyzed_ruby); + erb_content->analyzed_ruby = NULL; + } + } + } +} + static bool transform_erb_nodes(const AST_NODE_T* node, void* data) { analyze_ruby_context_T* context = (analyze_ruby_context_T*) data; context->parent = (AST_NODE_T*) node; @@ -1120,6 +1191,7 @@ static bool transform_erb_nodes(const AST_NODE_T* node, void* data) { AST_DOCUMENT_NODE_T* document_node = (AST_DOCUMENT_NODE_T*) node; hb_array_T* old_array = document_node->children; document_node->children = rewrite_node_array((AST_NODE_T*) node, document_node->children, context); + free_analyzed_ruby_from_array(old_array); hb_array_free(&old_array); } @@ -1127,6 +1199,7 @@ static bool transform_erb_nodes(const AST_NODE_T* node, void* data) { AST_HTML_ELEMENT_NODE_T* element_node = (AST_HTML_ELEMENT_NODE_T*) node; hb_array_T* old_array = element_node->body; element_node->body = rewrite_node_array((AST_NODE_T*) node, element_node->body, context); + free_analyzed_ruby_from_array(old_array); hb_array_free(&old_array); } @@ -1134,6 +1207,7 @@ static bool transform_erb_nodes(const AST_NODE_T* node, void* data) { AST_HTML_OPEN_TAG_NODE_T* open_tag = (AST_HTML_OPEN_TAG_NODE_T*) node; hb_array_T* old_array = open_tag->children; open_tag->children = rewrite_node_array((AST_NODE_T*) node, open_tag->children, context); + free_analyzed_ruby_from_array(old_array); hb_array_free(&old_array); } @@ -1141,6 +1215,7 @@ static bool transform_erb_nodes(const AST_NODE_T* node, void* data) { AST_HTML_ATTRIBUTE_VALUE_NODE_T* value_node = (AST_HTML_ATTRIBUTE_VALUE_NODE_T*) node; hb_array_T* old_array = value_node->children; value_node->children = rewrite_node_array((AST_NODE_T*) node, value_node->children, context); + free_analyzed_ruby_from_array(old_array); hb_array_free(&old_array); } @@ -1156,12 +1231,14 @@ void herb_analyze_parse_tree(AST_DOCUMENT_NODE_T* document, const char* source) context->document = document; context->parent = NULL; context->ruby_context_stack = hb_array_init(8); + context->arena = document->arena; herb_visit_node((AST_NODE_T*) document, transform_erb_nodes, context); herb_analyze_parse_errors(document, source); hb_array_free(&context->ruby_context_stack); + free(context); } diff --git a/src/extract.c b/src/extract.c index 16a6740ad..7e185c3e7 100644 --- a/src/extract.c +++ b/src/extract.c @@ -1,6 +1,8 @@ #include "include/herb.h" #include "include/io.h" #include "include/lexer.h" +#include "include/macros.h" +#include "include/util/hb_arena.h" #include "include/util/hb_array.h" #include "include/util/hb_buffer.h" @@ -8,7 +10,24 @@ #include void herb_extract_ruby_to_buffer_with_semicolons(const char* source, hb_buffer_T* output) { - hb_array_T* tokens = herb_lex(source); + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + + if (!arena) { return; } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return; + } + + herb_lex_result_T* result = herb_lex(source, arena); + + if (!result) { + hb_arena_free(arena); + free(arena); + return; + } + + hb_array_T* tokens = result->tokens; bool skip_erb_content = false; for (size_t i = 0; i < hb_array_size(tokens); i++) { @@ -54,11 +73,28 @@ void herb_extract_ruby_to_buffer_with_semicolons(const char* source, hb_buffer_T } } - herb_free_tokens(&tokens); + herb_free_lex_result(&result); } void herb_extract_ruby_to_buffer(const char* source, hb_buffer_T* output) { - hb_array_T* tokens = herb_lex(source); + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + + if (!arena) { return; } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return; + } + + herb_lex_result_T* result = herb_lex(source, arena); + + if (!result) { + hb_arena_free(arena); + free(arena); + return; + } + + hb_array_T* tokens = result->tokens; bool skip_erb_content = false; for (size_t i = 0; i < hb_array_size(tokens); i++) { @@ -102,11 +138,28 @@ void herb_extract_ruby_to_buffer(const char* source, hb_buffer_T* output) { } } - herb_free_tokens(&tokens); + herb_free_lex_result(&result); } void herb_extract_html_to_buffer(const char* source, hb_buffer_T* output) { - hb_array_T* tokens = herb_lex(source); + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + + if (!arena) { return; } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return; + } + + herb_lex_result_T* result = herb_lex(source, arena); + + if (!result) { + hb_arena_free(arena); + free(arena); + return; + } + + hb_array_T* tokens = result->tokens; for (size_t i = 0; i < hb_array_size(tokens); i++) { const token_T* token = hb_array_get(tokens, i); @@ -119,7 +172,7 @@ void herb_extract_html_to_buffer(const char* source, hb_buffer_T* output) { } } - herb_free_tokens(&tokens); + herb_free_lex_result(&result); } char* herb_extract_ruby_with_semicolons(const char* source) { diff --git a/src/herb.c b/src/herb.c index a48a56f3a..45cd83c52 100644 --- a/src/herb.c +++ b/src/herb.c @@ -1,8 +1,10 @@ #include "include/herb.h" #include "include/io.h" #include "include/lexer.h" +#include "include/macros.h" #include "include/parser.h" #include "include/token.h" +#include "include/util/hb_arena.h" #include "include/util/hb_array.h" #include "include/util/hb_buffer.h" #include "include/version.h" @@ -10,9 +12,11 @@ #include #include -hb_array_T* herb_lex(const char* source) { +herb_lex_result_T* herb_lex(const char* source, hb_arena_T* arena) { + if (!arena) { return NULL; } + lexer_T lexer = { 0 }; - lexer_init(&lexer, source); + lexer_init(&lexer, source, arena); token_T* token = NULL; hb_array_T* tokens = hb_array_init(128); @@ -23,14 +27,24 @@ hb_array_T* herb_lex(const char* source) { hb_array_append(tokens, token); - return tokens; + herb_lex_result_T* result = malloc(sizeof(herb_lex_result_T)); + if (!result) { + hb_array_free(&tokens); + return NULL; + } + + result->tokens = tokens; + result->arena = arena; + + return result; } -AST_DOCUMENT_NODE_T* herb_parse(const char* source, parser_options_T* options) { +AST_DOCUMENT_NODE_T* herb_parse(const char* source, parser_options_T* options, hb_arena_T* arena) { if (!source) { source = ""; } + if (!arena) { return NULL; } lexer_T lexer = { 0 }; - lexer_init(&lexer, source); + lexer_init(&lexer, source, arena); parser_T parser = { 0 }; parser_options_T parser_options = HERB_DEFAULT_PARSER_OPTIONS; @@ -46,20 +60,34 @@ AST_DOCUMENT_NODE_T* herb_parse(const char* source, parser_options_T* options) { return document; } -hb_array_T* herb_lex_file(const char* path) { +herb_lex_result_T* herb_lex_file(const char* path, hb_arena_T* arena) { char* source = herb_read_file(path); - hb_array_T* tokens = herb_lex(source); + herb_lex_result_T* result = herb_lex(source, arena); free(source); - return tokens; + return result; } void herb_lex_to_buffer(const char* source, hb_buffer_T* output) { - hb_array_T* tokens = herb_lex(source); + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + if (!arena) { return; } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return; + } + + herb_lex_result_T* result = herb_lex(source, arena); + + if (!result) { + hb_arena_free(arena); + free(arena); + return; + } - for (size_t i = 0; i < hb_array_size(tokens); i++) { - token_T* token = hb_array_get(tokens, i); + for (size_t i = 0; i < hb_array_size(result->tokens); i++) { + token_T* token = hb_array_get(result->tokens, i); char* type = token_to_string(token); hb_buffer_append(output, type); @@ -68,7 +96,23 @@ void herb_lex_to_buffer(const char* source, hb_buffer_T* output) { hb_buffer_append(output, "\n"); } - herb_free_tokens(&tokens); + herb_free_lex_result(&result); +} + +void herb_free_lex_result(herb_lex_result_T** result) { + if (!result || !*result) { return; } + + herb_lex_result_T* r = *result; + + if (r->tokens) { hb_array_free(&r->tokens); } + + if (r->arena) { + hb_arena_free(r->arena); + free(r->arena); + } + + free(r); + *result = NULL; } void herb_free_tokens(hb_array_T** tokens) { diff --git a/src/include/analyze.h b/src/include/analyze.h index d6f5a1049..63f421158 100644 --- a/src/include/analyze.h +++ b/src/include/analyze.h @@ -9,6 +9,7 @@ typedef struct ANALYZE_RUBY_CONTEXT_STRUCT { AST_DOCUMENT_NODE_T* document; AST_NODE_T* parent; hb_array_T* ruby_context_stack; + hb_arena_T* arena; } analyze_ruby_context_T; typedef enum { diff --git a/src/include/herb.h b/src/include/herb.h index 207053e45..a977eb15b 100644 --- a/src/include/herb.h +++ b/src/include/herb.h @@ -4,6 +4,7 @@ #include "ast_node.h" #include "extract.h" #include "parser.h" +#include "util/hb_arena.h" #include "util/hb_array.h" #include "util/hb_buffer.h" @@ -13,16 +14,22 @@ extern "C" { #endif +typedef struct { + hb_array_T* tokens; + hb_arena_T* arena; +} herb_lex_result_T; + void herb_lex_to_buffer(const char* source, hb_buffer_T* output); -hb_array_T* herb_lex(const char* source); -hb_array_T* herb_lex_file(const char* path); +herb_lex_result_T* herb_lex(const char* source, hb_arena_T* arena); +herb_lex_result_T* herb_lex_file(const char* path, hb_arena_T* arena); -AST_DOCUMENT_NODE_T* herb_parse(const char* source, parser_options_T* options); +AST_DOCUMENT_NODE_T* herb_parse(const char* source, parser_options_T* options, hb_arena_T* arena); const char* herb_version(void); const char* herb_prism_version(void); +void herb_free_lex_result(herb_lex_result_T** result); void herb_free_tokens(hb_array_T** tokens); #ifdef __cplusplus diff --git a/src/include/lexer.h b/src/include/lexer.h index 142f3fb1c..adb13935e 100644 --- a/src/include/lexer.h +++ b/src/include/lexer.h @@ -3,8 +3,9 @@ #include "lexer_struct.h" #include "token_struct.h" +#include "util/hb_arena.h" -void lexer_init(lexer_T* lexer, const char* source); +void lexer_init(lexer_T* lexer, const char* source, hb_arena_T* arena); token_T* lexer_next_token(lexer_T* lexer); token_T* lexer_error(lexer_T* lexer, const char* message); diff --git a/src/include/lexer_struct.h b/src/include/lexer_struct.h index 94b132559..46e6a492a 100644 --- a/src/include/lexer_struct.h +++ b/src/include/lexer_struct.h @@ -1,6 +1,7 @@ #ifndef HERB_LEXER_STRUCT_H #define HERB_LEXER_STRUCT_H +#include "util/hb_arena.h" #include "util/hb_string.h" #include @@ -29,6 +30,8 @@ typedef struct LEXER_STRUCT { uint32_t stall_counter; uint32_t last_position; bool stalled; + + hb_arena_T* arena; } lexer_T; #endif diff --git a/src/include/macros.h b/src/include/macros.h index 983aaab78..05fb61138 100644 --- a/src/include/macros.h +++ b/src/include/macros.h @@ -5,7 +5,7 @@ #define MIN(a, b) ((a) < (b) ? (a) : (b)) -#define KB(kb) (1024 * kb) +#define KB(kb) (1024 * (kb)) #define MB(mb) (1024 * KB(mb)) diff --git a/src/include/parser.h b/src/include/parser.h index 4eb92f3fb..9778809d8 100644 --- a/src/include/parser.h +++ b/src/include/parser.h @@ -3,6 +3,7 @@ #include "ast_node.h" #include "lexer.h" +#include "util/hb_arena.h" #include "util/hb_array.h" typedef enum { @@ -28,6 +29,7 @@ typedef struct PARSER_STRUCT { parser_state_T state; foreign_content_type_T foreign_content_type; parser_options_T options; + hb_arena_T* arena; } parser_T; void herb_parser_init(parser_T* parser, lexer_T* lexer, parser_options_T options); diff --git a/src/include/parser_helpers.h b/src/include/parser_helpers.h index b3ab98300..01719af47 100644 --- a/src/include/parser_helpers.h +++ b/src/include/parser_helpers.h @@ -44,6 +44,7 @@ token_T* parser_consume_if_present(parser_T* parser, token_type_T type); token_T* parser_consume_expected(parser_T* parser, token_type_T type, hb_array_T* array); AST_HTML_ELEMENT_NODE_T* parser_handle_missing_close_tag( + const parser_T* parser, AST_HTML_OPEN_TAG_NODE_T* open_tag, hb_array_T* body, hb_array_T* errors diff --git a/src/include/token.h b/src/include/token.h index 5628e2f0d..c2f97b82a 100644 --- a/src/include/token.h +++ b/src/include/token.h @@ -4,6 +4,7 @@ #include "lexer_struct.h" #include "position.h" #include "token_struct.h" +#include "util/hb_arena.h" token_T* token_init(const char* value, token_type_T type, lexer_T* lexer); char* token_to_string(const token_T* token); @@ -14,7 +15,7 @@ int token_type(const token_T* token); size_t token_sizeof(void); -token_T* token_copy(token_T* token); +token_T* token_copy(token_T* token, hb_arena_T* arena); void token_free(token_T* token); diff --git a/src/include/token_struct.h b/src/include/token_struct.h index 2727d2a4c..e217aace3 100644 --- a/src/include/token_struct.h +++ b/src/include/token_struct.h @@ -1,6 +1,8 @@ #ifndef HERB_TOKEN_STRUCT_H #define HERB_TOKEN_STRUCT_H +#include + #include "location.h" #include "range.h" @@ -53,6 +55,7 @@ typedef struct TOKEN_STRUCT { range_T range; location_T location; token_type_T type; + bool arena_allocated; } token_T; #endif diff --git a/src/include/util/hb_arena.h b/src/include/util/hb_arena.h index 907aede22..8c89be6d5 100644 --- a/src/include/util/hb_arena.h +++ b/src/include/util/hb_arena.h @@ -25,7 +25,7 @@ void* hb_arena_alloc(hb_arena_T* allocator, size_t size); size_t hb_arena_position(hb_arena_T* allocator); size_t hb_arena_capacity(hb_arena_T* allocator); void hb_arena_reset(hb_arena_T* allocator); -void hb_arena_reset_to(hb_arena_T* allocator, size_t new_position); +void hb_arena_reset_to(hb_arena_T* allocator, size_t target_position); void hb_arena_free(hb_arena_T* allocator); #endif diff --git a/src/lexer.c b/src/lexer.c index 45995751c..0c00e3134 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -31,7 +31,7 @@ static bool lexer_stalled(lexer_T* lexer) { return lexer->stalled; } -void lexer_init(lexer_T* lexer, const char* source) { +void lexer_init(lexer_T* lexer, const char* source, hb_arena_T* arena) { if (source != NULL) { lexer->source = hb_string(source); } else { @@ -52,6 +52,8 @@ void lexer_init(lexer_T* lexer, const char* source) { lexer->stall_counter = 0; lexer->last_position = 0; lexer->stalled = false; + + lexer->arena = arena; } token_T* lexer_error(lexer_T* lexer, const char* message) { @@ -108,7 +110,7 @@ static token_T* lexer_advance_with(lexer_T* lexer, const char* value, const toke } static token_T* lexer_advance_with_next(lexer_T* lexer, size_t count, token_type_T type) { - char* collected = malloc(count + 1); + char* collected = hb_arena_alloc(lexer->arena, count + 1); if (!collected) { return NULL; } for (size_t i = 0; i < count; i++) { @@ -119,7 +121,6 @@ static token_T* lexer_advance_with_next(lexer_T* lexer, size_t count, token_type collected[count] = '\0'; token_T* token = token_init(collected, type, lexer); - free(collected); return token; } @@ -133,15 +134,12 @@ static token_T* lexer_advance_utf8_character(lexer_T* lexer, const token_type_T if (char_byte_length <= 1) { return lexer_advance_current(lexer, type); } - char* utf8_char = malloc(char_byte_length + 1); + char* utf8_char = hb_arena_alloc(lexer->arena, char_byte_length + 1); if (!utf8_char) { return lexer_advance_current(lexer, type); } for (int i = 0; i < char_byte_length; i++) { - if (lexer->current_position + i >= lexer->source.length) { - free(utf8_char); - return lexer_advance_current(lexer, type); - } + if (lexer->current_position + i >= lexer->source.length) { return lexer_advance_current(lexer, type); } utf8_char[i] = lexer->source.data[lexer->current_position + i]; } @@ -152,8 +150,6 @@ static token_T* lexer_advance_utf8_character(lexer_T* lexer, const token_type_T token_T* token = token_init(utf8_char, type, lexer); - free(utf8_char); - return token; } diff --git a/src/lexer_peek_helpers.c b/src/lexer_peek_helpers.c index 86cbfe72a..1fae69054 100644 --- a/src/lexer_peek_helpers.c +++ b/src/lexer_peek_helpers.c @@ -84,14 +84,11 @@ bool lexer_peek_for_token_type_after_whitespace(lexer_T* lexer, token_type_T tok token_T* token = lexer_next_token(lexer); while (token && (token->type == TOKEN_WHITESPACE || token->type == TOKEN_NEWLINE)) { - token_free(token); token = lexer_next_token(lexer); } bool result = (token && token->type == token_type); - if (token) { token_free(token); } - lexer->current_position = saved_position; lexer->current_line = saved_line; lexer->current_column = saved_column; @@ -104,7 +101,7 @@ bool lexer_peek_for_token_type_after_whitespace(lexer_T* lexer, token_type_T tok bool lexer_peek_for_close_tag_start(const lexer_T* lexer, uint32_t offset) { if (lexer_peek(lexer, offset) != '<' || lexer_peek(lexer, offset + 1) != '/') { return false; } - int pos = offset + 2; + uint32_t pos = offset + 2; while (lexer_peek(lexer, pos) == ' ' || lexer_peek(lexer, pos) == '\t' || lexer_peek(lexer, pos) == '\n' || lexer_peek(lexer, pos) == '\r') { diff --git a/src/main.c b/src/main.c index 52b1f370c..a62902191 100644 --- a/src/main.c +++ b/src/main.c @@ -7,7 +7,10 @@ #include "include/extract.h" #include "include/herb.h" #include "include/io.h" +#include "include/macros.h" #include "include/ruby_parser.h" +#include "include/util/hb_arena.h" +#include "include/util/hb_arena_debug.h" #include "include/util/hb_buffer.h" #include @@ -32,6 +35,23 @@ void print_time_diff(const struct timespec start, const struct timespec end, con printf(" %8.6f s\n\n", s); } +static hb_arena_T* allocate_arena(void) { + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + + if (!arena) { + fprintf(stderr, "Failed to allocate arena\n"); + return NULL; + } + + if (!hb_arena_init(arena, KB(16))) { + fprintf(stderr, "Failed to initialize arena\n"); + free(arena); + return NULL; + } + + return arena; +} + int main(const int argc, char* argv[]) { if (argc < 2) { printf("./herb [command] [options]\n\n"); @@ -62,7 +82,13 @@ int main(const int argc, char* argv[]) { clock_gettime(CLOCK_MONOTONIC, &start); if (strcmp(argv[1], "visit") == 0) { - AST_DOCUMENT_NODE_T* root = herb_parse(source, NULL); + hb_arena_T* arena = allocate_arena(); + if (!arena) { + free(source); + return 1; + } + + AST_DOCUMENT_NODE_T* root = herb_parse(source, NULL, arena); clock_gettime(CLOCK_MONOTONIC, &end); herb_analyze_parse_tree(root, source); @@ -93,7 +119,13 @@ int main(const int argc, char* argv[]) { } if (strcmp(argv[1], "parse") == 0) { - AST_DOCUMENT_NODE_T* root = herb_parse(source, NULL); + hb_arena_T* arena = allocate_arena(); + if (!arena) { + free(source); + return 1; + } + + AST_DOCUMENT_NODE_T* root = herb_parse(source, NULL, arena); herb_analyze_parse_tree(root, source); @@ -107,6 +139,9 @@ int main(const int argc, char* argv[]) { printf("%s\n", output.value); print_time_diff(start, end, "parsing"); + + printf("\n"); + hb_arena_print_stats(arena); } ast_node_free((AST_NODE_T*) root); @@ -150,6 +185,10 @@ int main(const int argc, char* argv[]) { herb_parse_ruby_to_stdout(ruby_source); + free(ruby_source); + free(output.value); + free(source); + return 0; } diff --git a/src/parser.c b/src/parser.c index 3cc25791a..46dc8a612 100644 --- a/src/parser.c +++ b/src/parser.c @@ -41,6 +41,7 @@ void herb_parser_init(parser_T* parser, lexer_T* lexer, parser_options_T options parser->state = PARSER_STATE_DATA; parser->foreign_content_type = FOREIGN_CONTENT_UNKNOWN; parser->options = options; + parser->arena = lexer->arena; } static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) { @@ -75,7 +76,8 @@ static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) { tag_closing, tag_opening->location.start, tag_closing->location.end, - errors + errors, + parser->arena ); free(content.value); @@ -121,7 +123,8 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) { comment_end, comment_start->location.start, comment_end->location.end, - errors + errors, + parser->arena ); free(comment.value); @@ -166,7 +169,8 @@ static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) { tag_closing, tag_opening->location.start, tag_closing->location.end, - errors + errors, + parser->arena ); token_free(tag_opening); @@ -213,7 +217,8 @@ static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser tag_closing, tag_opening->location.start, tag_closing->location.end, - errors + errors, + parser->arena ); token_free(tag_opening); @@ -248,7 +253,8 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_arra token->value, token->location.start, token->location.end, - document_errors + document_errors, + parser->arena ); token_free(token); @@ -266,10 +272,15 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_arra AST_HTML_TEXT_NODE_T* text_node = NULL; if (hb_buffer_length(&content) > 0) { - text_node = - ast_html_text_node_init(hb_buffer_value(&content), start, parser->current_token->location.start, errors); + text_node = ast_html_text_node_init( + hb_buffer_value(&content), + start, + parser->current_token->location.start, + errors, + parser->arena + ); } else { - text_node = ast_html_text_node_init("", start, parser->current_token->location.start, errors); + text_node = ast_html_text_node_init("", start, parser->current_token->location.start, errors, parser->arena); } free(content.value); @@ -325,7 +336,7 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T } AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name = - ast_html_attribute_name_node_init(children, node_start, node_end, errors); + ast_html_attribute_name_node_init(children, node_start, node_end, errors, parser->arena); free(buffer.value); @@ -399,7 +410,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value opening_quote->value, potential_closing->location.start, potential_closing->location.end, - errors + errors, + parser->arena ); lexer_restore_state(parser->lexer, saved_state); @@ -450,7 +462,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value closing_quote, closing_quote->location.start, closing_quote->location.end, - errors + errors, + parser->arena ); } @@ -461,7 +474,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value true, opening_quote->location.start, closing_quote->location.end, - errors + errors, + parser->arena ); token_free(opening_quote); @@ -486,7 +500,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser false, erb_node->base.location.start, erb_node->base.location.end, - errors + errors, + parser->arena ); } @@ -505,7 +520,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser false, literal->base.location.start, literal->base.location.end, - errors + errors, + parser->arena ); } @@ -523,11 +539,12 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser "backtick (`)", start, end, - errors + errors, + parser->arena ); AST_HTML_ATTRIBUTE_VALUE_NODE_T* value = - ast_html_attribute_value_node_init(NULL, children, NULL, false, start, end, errors); + ast_html_attribute_value_node_init(NULL, children, NULL, false, start, end, errors, parser->arena); token_free(token); @@ -540,7 +557,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser token_type_to_string(parser->current_token->type), parser->current_token->location.start, parser->current_token->location.end, - errors + errors, + parser->arena ); AST_HTML_ATTRIBUTE_VALUE_NODE_T* value = ast_html_attribute_value_node_init( @@ -550,7 +568,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser false, parser->current_token->location.start, parser->current_token->location.end, - errors + errors, + parser->arena ); return value; @@ -606,11 +625,18 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) token_free(whitespace); } - token_T* equals_with_whitespace = calloc(1, sizeof(token_T)); + token_T* equals_with_whitespace = hb_arena_alloc(parser->arena, sizeof(token_T)); equals_with_whitespace->type = TOKEN_EQUALS; - equals_with_whitespace->value = herb_strdup(equals_buffer.value); + + size_t value_length = strlen(equals_buffer.value); + char* arena_value = hb_arena_alloc(parser->arena, value_length + 1); + memcpy(arena_value, equals_buffer.value, value_length); + arena_value[value_length] = '\0'; + + equals_with_whitespace->value = arena_value; equals_with_whitespace->location = (location_T) { .start = equals_start, .end = equals_end }; equals_with_whitespace->range = (range_T) { .from = range_start, .to = range_end }; + equals_with_whitespace->arena_allocated = true; free(equals_buffer.value); @@ -622,7 +648,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) attribute_value, attribute_name->base.location.start, attribute_value->base.location.end, - NULL + NULL, + parser->arena ); } else { return ast_html_attribute_node_init( @@ -631,7 +658,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) NULL, attribute_name->base.location.start, attribute_name->base.location.end, - NULL + NULL, + parser->arena ); } } else { @@ -651,7 +679,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) attribute_value, attribute_name->base.location.start, attribute_value->base.location.end, - NULL + NULL, + parser->arena ); token_free(equals); @@ -665,7 +694,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) NULL, attribute_name->base.location.start, attribute_name->base.location.end, - NULL + NULL, + parser->arena ); } @@ -834,7 +864,8 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) { is_self_closing, tag_start->location.start, tag_end->location.end, - errors + errors, + parser->arena ); token_free(tag_start); @@ -868,7 +899,8 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser) got.data, tag_opening->location.start, tag_closing->location.end, - errors + errors, + parser->arena ); free(expected.data); @@ -882,7 +914,8 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser) tag_closing, tag_opening->location.start, tag_closing->location.end, - errors + errors, + parser->arena ); token_free(tag_opening); @@ -906,7 +939,8 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_self_closing_element( ELEMENT_SOURCE_HTML, open_tag->base.location.start, open_tag->base.location.end, - NULL + NULL, + parser->arena ); } @@ -927,7 +961,9 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element( parser_parse_in_data_state(parser, body, errors); } - if (!token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) { return parser_handle_missing_close_tag(open_tag, body, errors); } + if (!token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) { + return parser_handle_missing_close_tag(parser, open_tag, body, errors); + } AST_HTML_CLOSE_TAG_NODE_T* close_tag = parser_parse_html_close_tag(parser); @@ -955,7 +991,8 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element( ELEMENT_SOURCE_HTML, open_tag->base.location.start, close_tag->base.location.end, - errors + errors, + parser->arena ); } @@ -986,7 +1023,8 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_element(parser_T* parser) { ELEMENT_SOURCE_HTML, open_tag->base.location.start, open_tag->base.location.end, - errors + errors, + parser->arena ); } @@ -1006,7 +1044,8 @@ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) { false, opening_tag->location.start, closing_tag->location.end, - errors + errors, + parser->arena ); token_free(opening_tag); @@ -1151,7 +1190,8 @@ static void parser_parse_unclosed_html_tags(const parser_T* parser, hb_array_T* unclosed_tag, parser->current_token->location.start, parser->current_token->location.end, - errors + errors, + parser->arena ); token_free(unclosed_tag); @@ -1176,7 +1216,8 @@ static void parser_parse_stray_closing_tags(parser_T* parser, hb_array_T* childr close_tag->tag_name, close_tag->base.location.start, close_tag->base.location.end, - close_tag->base.errors + close_tag->base.errors, + parser->arena ); } @@ -1197,7 +1238,8 @@ static AST_DOCUMENT_NODE_T* parser_parse_document(parser_T* parser) { token_T* eof = parser_consume_expected(parser, TOKEN_EOF, errors); - AST_DOCUMENT_NODE_T* document_node = ast_document_node_init(children, start, eof->location.end, errors); + AST_DOCUMENT_NODE_T* document_node = + ast_document_node_init(children, start, eof->location.end, errors, parser->arena); token_free(eof); @@ -1215,7 +1257,8 @@ static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token whitespace_token, whitespace_token->location.start, whitespace_token->location.end, - errors + errors, + parser->arena ); hb_array_append(children, whitespace_node); } diff --git a/src/parser_helpers.c b/src/parser_helpers.c index f34d97864..374b4cf89 100644 --- a/src/parser_helpers.c +++ b/src/parser_helpers.c @@ -14,7 +14,7 @@ #include void parser_push_open_tag(const parser_T* parser, token_T* tag_name) { - token_T* copy = token_copy(tag_name); + token_T* copy = token_copy(tag_name, parser->arena); hb_array_push(parser->open_tags_stack, copy); } @@ -107,7 +107,8 @@ void parser_append_unexpected_error( token_type_to_string(token->type), token->location.start, token->location.end, - errors + errors, + parser->arena ); token_free(token); @@ -119,7 +120,8 @@ void parser_append_unexpected_token_error(parser_T* parser, token_type_T expecte parser->current_token, parser->current_token->location.start, parser->current_token->location.end, - errors + errors, + parser->arena ); } @@ -132,7 +134,7 @@ void parser_append_literal_node_from_buffer( if (hb_buffer_length(buffer) == 0) { return; } AST_LITERAL_NODE_T* literal = - ast_literal_node_init(hb_buffer_value(buffer), start, parser->current_token->location.start, NULL); + ast_literal_node_init(hb_buffer_value(buffer), start, parser->current_token->location.start, NULL, parser->arena); if (children != NULL) { hb_array_append(children, literal); } hb_buffer_clear(buffer); @@ -155,13 +157,21 @@ token_T* parser_consume_expected(parser_T* parser, const token_type_T expected_t if (token == NULL) { token = parser_advance(parser); - append_unexpected_token_error(expected_type, token, token->location.start, token->location.end, array); + append_unexpected_token_error( + expected_type, + token, + token->location.start, + token->location.end, + array, + parser->arena + ); } return token; } AST_HTML_ELEMENT_NODE_T* parser_handle_missing_close_tag( + const parser_T* parser, AST_HTML_OPEN_TAG_NODE_T* open_tag, hb_array_T* body, hb_array_T* errors @@ -170,7 +180,8 @@ AST_HTML_ELEMENT_NODE_T* parser_handle_missing_close_tag( open_tag->tag_name, open_tag->tag_name->location.start, open_tag->tag_name->location.end, - errors + errors, + parser->arena ); return ast_html_element_node_init( @@ -182,7 +193,8 @@ AST_HTML_ELEMENT_NODE_T* parser_handle_missing_close_tag( ELEMENT_SOURCE_HTML, open_tag->base.location.start, open_tag->base.location.end, - errors + errors, + parser->arena ); } @@ -200,14 +212,16 @@ void parser_handle_mismatched_tags( actual_tag, actual_tag->location.start, actual_tag->location.end, - errors + errors, + parser->arena ); } else { append_missing_opening_tag_error( close_tag->tag_name, close_tag->tag_name->location.start, close_tag->tag_name->location.end, - errors + errors, + parser->arena ); } } diff --git a/src/prism_helpers.c b/src/prism_helpers.c index 06ac155b5..510b1b3e9 100644 --- a/src/prism_helpers.c +++ b/src/prism_helpers.c @@ -48,6 +48,7 @@ RUBY_PARSE_ERROR_T* ruby_parse_error_from_prism_error( pm_diagnostic_id_human(error->diag_id), pm_error_level_to_string(error->level), start, - end + end, + NULL ); } diff --git a/src/token.c b/src/token.c index b84362754..2af2e0729 100644 --- a/src/token.c +++ b/src/token.c @@ -14,7 +14,7 @@ size_t token_sizeof(void) { } token_T* token_init(const char* value, const token_type_T type, lexer_T* lexer) { - token_T* token = calloc(1, token_sizeof()); + token_T* token = hb_arena_alloc(lexer->arena, token_sizeof()); if (type == TOKEN_NEWLINE) { lexer->current_line++; @@ -22,12 +22,21 @@ token_T* token_init(const char* value, const token_type_T type, lexer_T* lexer) } if (value) { - token->value = herb_strdup(value); + size_t value_length = strlen(value); + char* arena_value = hb_arena_alloc(lexer->arena, value_length + 1); + if (arena_value) { + memcpy(arena_value, value, value_length); + arena_value[value_length] = '\0'; + token->value = arena_value; + } else { + token->value = NULL; + } } else { token->value = NULL; } token->type = type; + token->arena_allocated = true; token->range = (range_T) { .from = lexer->previous_position, .to = lexer->current_position }; location_from( @@ -126,19 +135,32 @@ int token_type(const token_T* token) { return token->type; } -token_T* token_copy(token_T* token) { +token_T* token_copy(token_T* token, hb_arena_T* arena) { if (!token) { return NULL; } - token_T* new_token = calloc(1, token_sizeof()); + token_T* new_token = arena ? hb_arena_alloc(arena, token_sizeof()) : calloc(1, token_sizeof()); if (!new_token) { return NULL; } if (token->value) { - new_token->value = herb_strdup(token->value); - - if (!new_token->value) { - free(new_token); - return NULL; + if (arena) { + size_t value_length = strlen(token->value); + char* arena_value = hb_arena_alloc(arena, value_length + 1); + + if (arena_value) { + memcpy(arena_value, token->value, value_length); + arena_value[value_length] = '\0'; + new_token->value = arena_value; + } else { + new_token->value = NULL; + } + } else { + new_token->value = herb_strdup(token->value); + + if (!new_token->value) { + free(new_token); + return NULL; + } } } else { new_token->value = NULL; @@ -147,6 +169,7 @@ token_T* token_copy(token_T* token) { new_token->type = token->type; new_token->range = token->range; new_token->location = token->location; + new_token->arena_allocated = arena != NULL; return new_token; } @@ -154,7 +177,9 @@ token_T* token_copy(token_T* token) { void token_free(token_T* token) { if (!token) { return; } - if (token->value != NULL) { free(token->value); } + if (!token->arena_allocated) { + if (token->value != NULL) { free(token->value); } - free(token); + free(token); + } } diff --git a/src/util/hb_arena.c b/src/util/hb_arena.c index a17b67005..4e9793ddd 100644 --- a/src/util/hb_arena.c +++ b/src/util/hb_arena.c @@ -11,7 +11,7 @@ #include #include -#define hb_arena_for_each_page(allocator, page) \ +#define hb_arena_for_each_page(allocator, _page) \ for (hb_arena_page_T* page = (allocator)->head; page != NULL; page = page->next) static inline size_t hb_arena_align_size(size_t size, size_t alignment) { diff --git a/templates/src/ast_nodes.c.erb b/templates/src/ast_nodes.c.erb index 658bf5061..92487990c 100644 --- a/templates/src/ast_nodes.c.erb +++ b/templates/src/ast_nodes.c.erb @@ -1,5 +1,6 @@ #include #include +#include #include @@ -9,21 +10,22 @@ #include "include/errors.h" #include "include/token.h" #include "include/util.h" +#include "include/util/hb_arena.h" #include "include/util/hb_array.h" <%- nodes.each do |node| -%> <%- node_arguments = node.fields.any? ? node.fields.map { |field| [field.c_type, " ", field.name].join } : [] -%> -<%- arguments = node_arguments + ["position_T start_position", "position_T end_position", "hb_array_T* errors"] -%> +<%- arguments = node_arguments + ["position_T start_position", "position_T end_position", "hb_array_T* errors", "hb_arena_T* arena"] -%> <%= node.struct_type %>* ast_<%= node.human %>_init(<%= arguments.join(", ") %>) { - <%= node.struct_type %>* <%= node.human %> = malloc(sizeof(<%= node.struct_type %>)); + <%= node.struct_type %>* <%= node.human %> = arena ? hb_arena_alloc(arena, sizeof(<%= node.struct_type %>)) : malloc(sizeof(<%= node.struct_type %>)); ast_node_init(&<%= node.human %>->base, <%= node.type %>, start_position, end_position, errors); <%- node.fields.each do |field| -%> <%- case field -%> <%- when Herb::Template::TokenField -%> - <%= node.human %>-><%= field.name %> = token_copy(<%= field.name %>); + <%= node.human %>-><%= field.name %> = token_copy(<%= field.name %>, arena); <%- when Herb::Template::NodeField -%> <%= node.human %>-><%= field.name %> = <%= field.name %>; <%- when Herb::Template::ArrayField -%> @@ -35,7 +37,13 @@ <%- when Herb::Template::PrismNodeField -%> <%= node.human %>-><%= field.name %> = <%= field.name %>; <%- when Herb::Template::StringField -%> - <%= node.human %>-><%= field.name %> = herb_strdup(<%= field.name %>); + if (arena) { + char* temp = (char*) hb_arena_alloc(arena, strlen(<%= field.name %>) + 1); + strcpy(temp, <%= field.name %>); + <%= node.human %>-><%= field.name %> = temp; + } else { + <%= node.human %>-><%= field.name %> = herb_strdup(<%= field.name %>); + } <%- when Herb::Template::AnalyzedRubyField -%> <%= node.human %>-><%= field.name %> = <%= field.name %>; <%- when Herb::Template::VoidPointerField -%> @@ -45,6 +53,10 @@ <%- end -%> <%- end -%> + <%- if node.human == "document_node" -%> + <%= node.human %>->arena = arena; + <%- end -%> + return <%= node.human %>; } <%- end -%> @@ -70,20 +82,60 @@ const char* ast_node_human_type(AST_NODE_T* node) { } void ast_free_base_node(AST_NODE_T* node) { - if (node == NULL) { return; } + // Base node cleanup is intentionally empty in the arena-based implementation. + // The node itself is arena-allocated and freed when the arena is destroyed. + // The errors array is freed by ast_free_arrays_recursive() before arena cleanup. + // This function is kept as an extension point for any future non-arena base node cleanup. +} + +<%- nodes.each do |node| -%> +static void ast_free_arrays_<%= node.human %>(<%= node.struct_type %>* <%= node.human %>); +<%- end -%> + +static void ast_free_arrays_recursive(AST_NODE_T* node) { + if (!node) { return; } + + switch (node->type) { + <%- nodes.each do |node| -%> + case <%= node.type %>: ast_free_arrays_<%= node.human %>((<%= node.struct_type %>*) node); break; + <%- end -%> + } +} + +<%- nodes.each do |node| -%> + +static void ast_free_arrays_<%= node.human %>(<%= node.struct_type %>* <%= node.human %>) { + if (!<%= node.human %>) { return; } - if (node->errors) { - for (size_t i = 0; i < hb_array_size(node->errors); i++) { - ERROR_T* child = hb_array_get(node->errors, i); - if (child != NULL) { error_free(child); } + <%- node.fields.each do |field| -%> + <%- case field -%> + <%- when Herb::Template::NodeField -%> + if (<%= node.human %>-><%= field.name %> != NULL) { + ast_free_arrays_recursive((AST_NODE_T*) <%= node.human %>-><%= field.name %>); + } + <%- when Herb::Template::ArrayField -%> + if (<%= node.human %>-><%= field.name %> != NULL) { + for (size_t i = 0; i < hb_array_size(<%= node.human %>-><%= field.name %>); i++) { + AST_NODE_T* child = (AST_NODE_T*) hb_array_get(<%= node.human %>-><%= field.name %>, i); + ast_free_arrays_recursive(child); } - hb_array_free(&node->errors); + hb_array_free(&<%= node.human %>-><%= field.name %>); } + <%- when Herb::Template::AnalyzedRubyField -%> + if (<%= node.human %>-><%= field.name %> != NULL) { + free_analyzed_ruby(<%= node.human %>-><%= field.name %>); + } + <%- end -%> + <%- end -%> - free(node); + if (<%= node.human %>->base.errors != NULL) { + hb_array_free(&<%= node.human %>->base.errors); + } } +<%- end -%> + <%- nodes.each do |node| -%> <%- arguments = node.fields.any? ? node.fields.map { |field| [field.c_type, " ", field.name].join }.join(", ") : "void" -%> @@ -94,20 +146,13 @@ static void ast_free_<%= node.human %>(<%= node.struct_type %>* <%= node.human % <%- node.fields.each do |field| -%> <%- case field -%> <%- when Herb::Template::TokenField -%> - if (<%= node.human %>-><%= field.name %> != NULL) { token_free(<%= node.human %>-><%= field.name %>); } + // Token is arena-allocated, will be freed with arena <%- when Herb::Template::NodeField -%> - ast_node_free((AST_NODE_T*) <%= node.human %>-><%= field.name %>); + // Node is arena-allocated, will be freed with arena <%- when Herb::Template::ArrayField -%> - if (<%= node.human %>-><%= field.name %> != NULL) { - for (size_t i = 0; i < hb_array_size(<%= node.human %>-><%= field.name %>); i++) { - AST_NODE_T* child = hb_array_get(<%= node.human %>-><%= field.name %>, i); - if (child) { ast_node_free(child); } - } - - hb_array_free(&<%= node.human %>-><%= field.name %>); - } + // Array freed by ast_free_arrays_recursive() before arena cleanup <%- when Herb::Template::StringField -%> - if (<%= node.human %>-><%= field.name %> != NULL) { free((char*) <%= node.human %>-><%= field.name %>); } + // String is arena-allocated, will be freed with arena <%- when Herb::Template::PrismNodeField -%> if (<%= node.human %>-><%= field.name %> != NULL) { // The first argument to `pm_node_destroy` is a `pm_parser_t`, but it's currently unused: @@ -116,9 +161,7 @@ static void ast_free_<%= node.human %>(<%= node.struct_type %>* <%= node.human % pm_node_destroy(NULL, <%= node.human %>-><%= field.name %>); } <%- when Herb::Template::AnalyzedRubyField -%> - if (<%= node.human %>-><%= field.name %> != NULL) { - free_analyzed_ruby(<%= node.human %>-><%= field.name %>); - } + // AnalyzedRuby freed by ast_free_arrays_recursive() before arena cleanup <%- when Herb::Template::VoidPointerField -%> free(<%= node.human %>-><%= field.name %>); <%- when Herb::Template::BooleanField -%> @@ -128,7 +171,20 @@ static void ast_free_<%= node.human %>(<%= node.struct_type %>* <%= node.human % <%- end -%> <%- end -%> + <%- if node.human == "document_node" -%> + ast_free_arrays_recursive((AST_NODE_T*)<%= node.human %>); + + hb_arena_T* arena = <%= node.human %>->arena; + + ast_free_base_node(&<%= node.human %>->base); + + if (arena != NULL) { + hb_arena_free(arena); + free(arena); + } + <%- else -%> ast_free_base_node(&<%= node.human %>->base); + <%- end -%> } <%- end -%> diff --git a/templates/src/errors.c.erb b/templates/src/errors.c.erb index 1c8e1408c..5134d57a5 100644 --- a/templates/src/errors.c.erb +++ b/templates/src/errors.c.erb @@ -5,6 +5,7 @@ #include "include/token.h" #include "include/util.h" #include "include/util/hb_array.h" +#include "include/util/hb_arena.h" #include #include @@ -26,10 +27,10 @@ void error_init(ERROR_T* error, const error_type_T type, position_T start, posit } <%- errors.each do |error| -%> <%- error_arguments = error.fields.any? ? error.fields.map { |field| [field.c_type, " ", field.name].join } : [] -%> -<%- arguments = error_arguments + ["position_T start", "position_T end"] -%> +<%- arguments = error_arguments + ["position_T start", "position_T end", "hb_arena_T* arena"] -%> <%= error.struct_type %>* <%= error.human %>_init(<%= arguments.join(", ") %>) { - <%= error.struct_type %>* <%= error.human %> = malloc(sizeof(<%= error.struct_type %>)); + <%= error.struct_type %>* <%= error.human %> = arena ? hb_arena_alloc(arena, sizeof(<%= error.struct_type %>)) : malloc(sizeof(<%= error.struct_type %>)); error_init(&<%= error.human %>->base, <%= error.type %>, start, end); @@ -37,7 +38,7 @@ void error_init(ERROR_T* error, const error_type_T type, position_T start, posit const char* message_template = "<%= error.message_template %>"; size_t message_size = <%= Herb::Template::PrintfMessageTemplate.estimate_buffer_size(error.message_template) %>; - char* message = (char*) malloc(message_size); + char* message = arena ? (char*) hb_arena_alloc(arena, message_size) : (char*) malloc(message_size); if (message) { <%- error.message_arguments.each_with_index do |argument, i| -%> @@ -61,13 +62,48 @@ void error_init(ERROR_T* error, const error_type_T type, position_T start, posit <%- end -%> ); - <%= error.human %>->base.message = herb_strdup(message); - free(message); + if (arena) { + size_t length = strlen(message); + char* string = hb_arena_alloc(arena, length + 1); + + if (string) { + memcpy(string, message, length); + string[length] = '\0'; + <%= error.human %>->base.message = string; + } + } else { + <%= error.human %>->base.message = herb_strdup(message); + free(message); + } } else { - <%= error.human %>->base.message = herb_strdup("<%= error.message_template %>"); + if (arena) { + const char* template_string = "<%= error.message_template %>"; + size_t length = strlen(template_string); + char* string = hb_arena_alloc(arena, length + 1); + + if (string) { + memcpy(string, template_string, length); + string[length] = '\0'; + <%= error.human %>->base.message = string; + } + } else { + <%= error.human %>->base.message = herb_strdup("<%= error.message_template %>"); + } } <%- else -%> - <%= error.human %>->base.message = herb_strdup("<%= error.message_template %>"); + if (arena) { + const char* template_string = "<%= error.message_template %>"; + size_t length = strlen(template_string); + char* string = hb_arena_alloc(arena, length + 1); + + if (string) { + memcpy(string, template_string, length); + string[length] = '\0'; + <%= error.human %>->base.message = string; + } + } else { + <%= error.human %>->base.message = herb_strdup("<%= error.message_template %>"); + } <%- end -%> <%- error.fields.each do |field| -%> @@ -75,13 +111,24 @@ void error_init(ERROR_T* error, const error_type_T type, position_T start, posit <%- when Herb::Template::PositionField -%> <%= error.human %>-><%= field.name %> = <%= field.name %>; <%- when Herb::Template::TokenField -%> - <%= error.human %>-><%= field.name %> = token_copy(<%= field.name %>); + <%= error.human %>-><%= field.name %> = token_copy(<%= field.name %>, arena); <%- when Herb::Template::TokenTypeField -%> <%= error.human %>-><%= field.name %> = <%= field.name %>; <%- when Herb::Template::SizeTField -%> <%= error.human %>-><%= field.name %> = <%= field.name %>; <%- when Herb::Template::StringField -%> - <%= error.human %>-><%= field.name %> = herb_strdup(<%= field.name %>); + if (arena) { + size_t length = strlen(<%= field.name %>); + char* string = hb_arena_alloc(arena, length + 1); + + if (string) { + memcpy(string, <%= field.name %>, length); + string[length] = '\0'; + <%= error.human %>-><%= field.name %> = string; + } + } else { + <%= error.human %>-><%= field.name %> = herb_strdup(<%= field.name %>); + } <%- else -%> <%= field.inspect %> <%- end -%> @@ -89,8 +136,8 @@ void error_init(ERROR_T* error, const error_type_T type, position_T start, posit return <%= error.human %>; } -void append_<%= error.human %>(<%= (arguments + ["hb_array_T* errors"]).join(", ") %>) { - hb_array_append(errors, <%= error.human %>_init(<%= arguments.map { |argument| argument.split(" ").last.strip }.join(", ") %>)); +void append_<%= error.human %>(<%= (error_arguments + ["position_T start", "position_T end", "hb_array_T* errors", "hb_arena_T* arena"]).join(", ") %>) { + hb_array_append(errors, <%= error.human %>_init(<%= (error_arguments.map { |arg| arg.split(" ").last.strip } + ["start", "end", "arena"]).join(", ") %>)); } <%- end -%> diff --git a/templates/src/include/ast_nodes.h.erb b/templates/src/include/ast_nodes.h.erb index 9aad4ad94..8fb52ba09 100644 --- a/templates/src/include/ast_nodes.h.erb +++ b/templates/src/include/ast_nodes.h.erb @@ -9,6 +9,7 @@ #include "location.h" #include "position.h" #include "token_struct.h" +#include "util/hb_arena.h" #include "util/hb_array.h" #include "util/hb_buffer.h" @@ -31,12 +32,15 @@ typedef struct AST_NODE_STRUCT { typedef struct <%= node.struct_name %> { AST_NODE_T base; <%= arguments %> + <%- if node.human == "document_node" -%> + hb_arena_T* arena; + <%- end -%> } <%= node.struct_type %>; <%- end -%> <%- nodes.each do |node| -%> <%- node_arguments = node.fields.any? ? node.fields.map { |field| [field.c_type, " ", field.name].join } : [] -%> -<%- arguments = node_arguments + ["position_T start_position", "position_T end_position", "hb_array_T* errors"] -%> +<%- arguments = node_arguments + ["position_T start_position", "position_T end_position", "hb_array_T* errors", "hb_arena_T* arena"] -%> <%= node.struct_type %>* ast_<%= node.human %>_init(<%= arguments.join(", ") %>); <%- end -%> diff --git a/templates/src/include/errors.h.erb b/templates/src/include/errors.h.erb index 4676c72d3..9f7549f64 100644 --- a/templates/src/include/errors.h.erb +++ b/templates/src/include/errors.h.erb @@ -6,6 +6,7 @@ #include "position.h" #include "token.h" #include "util/hb_array.h" +#include "util/hb_arena.h" #include "util/hb_buffer.h" typedef enum { @@ -31,9 +32,10 @@ typedef struct { <%- errors.each do |error| -%> <%- error_arguments = error.fields.any? ? error.fields.map { |field| [field.c_type, " ", field.name].join } : [] -%> -<%- arguments = error_arguments + ["position_T start", "position_T end"] -%> -<%= error.struct_type %>* <%= error.human %>_init(<%= arguments.join(", ") %>); -void append_<%= error.human %>(<%= (arguments << "hb_array_T* errors").join(", ") %>); +<%- init_arguments = error_arguments + ["position_T start", "position_T end", "hb_arena_T* arena"] -%> +<%- append_arguments = error_arguments + ["position_T start", "position_T end", "hb_array_T* errors", "hb_arena_T* arena"] -%> +<%= error.struct_type %>* <%= error.human %>_init(<%= init_arguments.join(", ") %>); +void append_<%= error.human %>(<%= append_arguments.join(", ") %>); <%- end -%> void error_init(ERROR_T* error, error_type_T type, position_T start, position_T end); diff --git a/wasm/herb-wasm.cpp b/wasm/herb-wasm.cpp index e1e5ce06e..97826d820 100644 --- a/wasm/herb-wasm.cpp +++ b/wasm/herb-wasm.cpp @@ -8,6 +8,8 @@ extern "C" { #include "../src/include/analyze.h" +#include "../src/include/macros.h" +#include "../src/include/util/hb_arena.h" #include "../src/include/util/hb_array.h" #include "../src/include/ast_node.h" #include "../src/include/ast_nodes.h" @@ -25,11 +27,28 @@ extern "C" { using namespace emscripten; val Herb_lex(const std::string& source) { - hb_array_T* tokens = herb_lex(source.c_str()); + hb_arena_T* arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); - val result = CreateLexResult(tokens, source); + if (!arena) { + return val::null(); + } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return val::null(); + } + + herb_lex_result_T* lex_result = herb_lex(source.c_str(), arena); + + if (!lex_result) { + hb_arena_free(arena); + free(arena); + return val::null(); + } + + val result = CreateLexResult(lex_result->tokens, source); - herb_free_tokens(&tokens); + herb_free_lex_result(&lex_result); return result; } @@ -48,7 +67,24 @@ val Herb_parse(const std::string& source, val options) { } } - AST_DOCUMENT_NODE_T* root = herb_parse(source.c_str(), parser_options); + hb_arena_T* arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + + if (!arena) { + return val::null(); + } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return val::null(); + } + + AST_DOCUMENT_NODE_T* root = herb_parse(source.c_str(), parser_options, arena); + + if (!root) { + hb_arena_free(arena); + free(arena); + return val::null(); + } herb_analyze_parse_tree(root, source.c_str());