diff --git a/compile.sh b/compile.sh new file mode 100644 index 000000000..037397849 --- /dev/null +++ b/compile.sh @@ -0,0 +1,4 @@ +for file in src/*.c +do + clang -Isrc -Ivendor/prism/include -c "$file" +done diff --git a/src/analyze.c b/src/analyze.c index 6e83f117e..ef8fe5252 100644 --- a/src/analyze.c +++ b/src/analyze.c @@ -51,10 +51,12 @@ static bool analyze_erb_content(const AST_NODE_T* node, void* data) { if (node->type == AST_ERB_CONTENT_NODE) { AST_ERB_CONTENT_NODE_T* erb_content_node = (AST_ERB_CONTENT_NODE_T*) node; - const char* opening = erb_content_node->tag_opening->value; + hb_string_T opening = erb_content_node->tag_opening->value; - if (strcmp(opening, "<%%") != 0 && strcmp(opening, "<%%=") != 0 && strcmp(opening, "<%#") != 0) { - analyzed_ruby_T* analyzed = herb_analyze_ruby(hb_string(erb_content_node->content->value)); + if (!hb_string_equals(opening, hb_string("<%%")) + && !hb_string_equals(opening, hb_string("<%%=")) + && !hb_string_equals(opening, hb_string("<%#"))) { + analyzed_ruby_T* analyzed = herb_analyze_ruby(erb_content_node->content->value); erb_content_node->parsed = true; erb_content_node->valid = analyzed->valid; diff --git a/src/ast_node.c b/src/ast_node.c index e66414249..4058c06e2 100644 --- a/src/ast_node.c +++ b/src/ast_node.c @@ -31,7 +31,7 @@ AST_LITERAL_NODE_T* ast_literal_node_init_from_token(const token_T* token) { ast_node_init(&literal->base, AST_LITERAL_NODE, token->location.start, token->location.end, NULL); - literal->content = herb_strdup(token->value); + literal->content = token->value; return literal; } diff --git a/src/extract.c b/src/extract.c index 16a6740ad..f0afde2f3 100644 --- a/src/extract.c +++ b/src/extract.c @@ -1,8 +1,8 @@ #include "include/herb.h" #include "include/io.h" -#include "include/lexer.h" #include "include/util/hb_array.h" #include "include/util/hb_buffer.h" +#include "include/util/hb_string.h" #include #include @@ -16,12 +16,14 @@ void herb_extract_ruby_to_buffer_with_semicolons(const char* source, hb_buffer_T switch (token->type) { case TOKEN_NEWLINE: { - hb_buffer_append(output, token->value); + hb_buffer_append_string(output, token->value); break; } case TOKEN_ERB_START: { - if (strcmp(token->value, "<%#") == 0 || strcmp(token->value, "<%%") == 0 || strcmp(token->value, "<%%=") == 0) { + if (hb_string_equals(token->value, hb_string("<%#")) + || hb_string_equals(token->value, hb_string("<%%")) + || hb_string_equals(token->value, hb_string("<%%="))) { skip_erb_content = true; } @@ -31,7 +33,7 @@ void herb_extract_ruby_to_buffer_with_semicolons(const char* source, hb_buffer_T case TOKEN_ERB_CONTENT: { if (skip_erb_content == false) { - hb_buffer_append(output, token->value); + hb_buffer_append_string(output, token->value); } else { hb_buffer_append_whitespace(output, range_length(token->range)); } @@ -66,12 +68,14 @@ void herb_extract_ruby_to_buffer(const char* source, hb_buffer_T* output) { switch (token->type) { case TOKEN_NEWLINE: { - hb_buffer_append(output, token->value); + hb_buffer_append_string(output, token->value); break; } case TOKEN_ERB_START: { - if (strcmp(token->value, "<%#") == 0 || strcmp(token->value, "<%%") == 0 || strcmp(token->value, "<%%=") == 0) { + if (hb_string_equals(token->value, hb_string("<%#")) + || hb_string_equals(token->value, hb_string("<%%")) + || hb_string_equals(token->value, hb_string("<%%="))) { skip_erb_content = true; } @@ -81,7 +85,7 @@ void herb_extract_ruby_to_buffer(const char* source, hb_buffer_T* output) { case TOKEN_ERB_CONTENT: { if (skip_erb_content == false) { - hb_buffer_append(output, token->value); + hb_buffer_append_string(output, token->value); } else { hb_buffer_append_whitespace(output, range_length(token->range)); } @@ -115,7 +119,7 @@ void herb_extract_html_to_buffer(const char* source, hb_buffer_T* output) { case TOKEN_ERB_START: case TOKEN_ERB_CONTENT: case TOKEN_ERB_END: hb_buffer_append_whitespace(output, range_length(token->range)); break; - default: hb_buffer_append(output, token->value); + default: hb_buffer_append_string(output, token->value); } } diff --git a/src/include/parser_helpers.h b/src/include/parser_helpers.h index b3ab98300..ecdd6208a 100644 --- a/src/include/parser_helpers.h +++ b/src/include/parser_helpers.h @@ -15,8 +15,8 @@ token_T* parser_pop_open_tag(const parser_T* parser); void parser_append_unexpected_error( parser_T* parser, - const char* description, - const char* expected, + hb_string_T description, + hb_string_T expected, hb_array_T* errors ); void parser_append_unexpected_token_error(parser_T* parser, token_type_T expected_type, hb_array_T* errors); diff --git a/src/include/token.h b/src/include/token.h index 5628e2f0d..2f4c0d29d 100644 --- a/src/include/token.h +++ b/src/include/token.h @@ -4,12 +4,13 @@ #include "lexer_struct.h" #include "position.h" #include "token_struct.h" +#include "util/hb_string.h" -token_T* token_init(const char* value, token_type_T type, lexer_T* lexer); +token_T* token_init(hb_string_T value, token_type_T type, lexer_T* lexer); char* token_to_string(const token_T* token); -const char* token_type_to_string(token_type_T type); +hb_string_T token_type_to_string(token_type_T type); -char* token_value(const token_T* token); +hb_string_T token_value(const token_T* token); int token_type(const token_T* token); size_t token_sizeof(void); diff --git a/src/include/token_struct.h b/src/include/token_struct.h index 2727d2a4c..595bf27d1 100644 --- a/src/include/token_struct.h +++ b/src/include/token_struct.h @@ -3,6 +3,7 @@ #include "location.h" #include "range.h" +#include "util/hb_string.h" typedef enum { TOKEN_WHITESPACE, // ' ' @@ -49,7 +50,7 @@ typedef enum { } token_type_T; typedef struct TOKEN_STRUCT { - char* value; + hb_string_T value; range_T range; location_T location; token_type_T type; diff --git a/src/include/utf8.h b/src/include/utf8.h index 1b3dbdd78..2ba0b44e0 100644 --- a/src/include/utf8.h +++ b/src/include/utf8.h @@ -1,11 +1,12 @@ #ifndef HERB_UTF8_H #define HERB_UTF8_H +#include "util/hb_string.h" #include #include -int utf8_char_byte_length(unsigned char first_byte); -int utf8_sequence_length(const char* str, size_t position, size_t max_length); +uint32_t utf8_char_byte_length(unsigned char first_byte); +uint32_t utf8_sequence_length(hb_string_T value); bool utf8_is_valid_continuation_byte(unsigned char byte); #endif diff --git a/src/lexer.c b/src/lexer.c index 45995751c..21234dc8c 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1,4 +1,5 @@ #include "include/lexer_peek_helpers.h" +#include "include/macros.h" #include "include/token.h" #include "include/utf8.h" #include "include/util.h" @@ -6,6 +7,7 @@ #include "include/util/hb_string.h" #include +#include #include #define LEXER_STALL_LIMIT 5 @@ -55,11 +57,12 @@ void lexer_init(lexer_T* lexer, const char* source) { } token_T* lexer_error(lexer_T* lexer, const char* message) { - char error_message[128]; + size_t error_message_length = 128; + char* error_message = malloc(sizeof(char) * error_message_length); // hb_arena_alloc(lexer->allocator, sizeof(char) * error_message_length); snprintf( error_message, - sizeof(error_message), + error_message_length, "[Lexer] Error: %s (character '%c', line %u, col %u)\n", message, lexer->current_character, @@ -67,7 +70,7 @@ token_T* lexer_error(lexer_T* lexer, const char* message) { lexer->current_column ); - return token_init(error_message, TOKEN_ERROR, lexer); + return token_init(hb_string(error_message), TOKEN_ERROR, lexer); } static void lexer_advance(lexer_T* lexer) { @@ -79,7 +82,7 @@ static void lexer_advance(lexer_T* lexer) { } } -static void lexer_advance_utf8_bytes(lexer_T* lexer, int byte_count) { +static void lexer_advance_utf8_bytes(lexer_T* lexer, uint32_t byte_count) { if (byte_count <= 0) { return; } if (lexer_has_more_characters(lexer) && !lexer_eof(lexer)) { @@ -102,65 +105,53 @@ static void lexer_advance_by(lexer_T* lexer, const size_t count) { } } -static token_T* lexer_advance_with(lexer_T* lexer, const char* value, const token_type_T type) { - lexer_advance_by(lexer, strlen(value)); +static token_T* lexer_advance_with(lexer_T* lexer, hb_string_T value, const token_type_T type) { + lexer_advance_by(lexer, value.length); return token_init(value, type, lexer); } static token_T* lexer_advance_with_next(lexer_T* lexer, size_t count, token_type_T type) { - char* collected = malloc(count + 1); - if (!collected) { return NULL; } + uint32_t start_position = lexer->current_position; for (size_t i = 0; i < count; i++) { - collected[i] = lexer->current_character; lexer_advance(lexer); } - collected[count] = '\0'; + uint32_t end_position = lexer->current_position; - token_T* token = token_init(collected, type, lexer); - free(collected); + hb_string_T value = hb_string_slice(lexer->source, start_position); + value = hb_string_truncate(value, end_position - start_position); + + token_T* token = token_init( + value, + type, + lexer + ); return token; } static token_T* lexer_advance_current(lexer_T* lexer, const token_type_T type) { - return lexer_advance_with(lexer, (char[]) { lexer->current_character, '\0' }, type); + hb_string_T value = { .data = lexer->source.data + lexer->current_position, .length = 1 }; + return lexer_advance_with(lexer, value, type); } static token_T* lexer_advance_utf8_character(lexer_T* lexer, const token_type_T type) { - int char_byte_length = utf8_sequence_length(lexer->source.data, lexer->current_position, lexer->source.length); - + uint32_t char_byte_length = utf8_sequence_length(hb_string_slice(lexer->source, lexer->current_position)); if (char_byte_length <= 1) { return lexer_advance_current(lexer, type); } - char* utf8_char = malloc(char_byte_length + 1); - - if (!utf8_char) { return lexer_advance_current(lexer, type); } - - for (int i = 0; i < char_byte_length; i++) { - if (lexer->current_position + i >= lexer->source.length) { - free(utf8_char); - return lexer_advance_current(lexer, type); - } - - utf8_char[i] = lexer->source.data[lexer->current_position + i]; - } - - utf8_char[char_byte_length] = '\0'; - + size_t start_position = lexer->current_position; lexer_advance_utf8_bytes(lexer, char_byte_length); - token_T* token = token_init(utf8_char, type, lexer); - - free(utf8_char); + hb_string_T utf8_char = hb_string_slice(lexer->source, start_position); + utf8_char = hb_string_truncate(utf8_char, char_byte_length); - return token; + return token_init(utf8_char, type, lexer); } -static token_T* lexer_match_and_advance(lexer_T* lexer, const char* value, const token_type_T type) { - if (strncmp(lexer->source.data + lexer->current_position, value, strlen(value)) == 0) { - return lexer_advance_with(lexer, value, type); - } +static token_T* lexer_match_and_advance(lexer_T* lexer, hb_string_T value, const token_type_T type) { + hb_string_T remaining_source = hb_string_slice(lexer->source, lexer->current_position); + if (hb_string_starts_with(remaining_source, value)) { return lexer_advance_with(lexer, value, type); } return NULL; } @@ -168,37 +159,36 @@ static token_T* lexer_match_and_advance(lexer_T* lexer, const char* value, const // ===== Specialized Parsers static token_T* lexer_parse_whitespace(lexer_T* lexer) { - hb_buffer_T buffer; - hb_buffer_init(&buffer, 128); - + uint32_t start_position = lexer->current_position; while (isspace(lexer->current_character) && lexer->current_character != '\n' && lexer->current_character != '\r' && !lexer_eof(lexer)) { - hb_buffer_append_char(&buffer, lexer->current_character); lexer_advance(lexer); } + uint32_t end_position = lexer->current_position; - token_T* token = token_init(buffer.value, TOKEN_WHITESPACE, lexer); + hb_string_T value = hb_string_slice(lexer->source, start_position); + value = hb_string_truncate(value, end_position - start_position); - free(buffer.value); + token_T* token = token_init(value, TOKEN_WHITESPACE, lexer); return token; } static token_T* lexer_parse_identifier(lexer_T* lexer) { - hb_buffer_T buffer; - hb_buffer_init(&buffer, 128); + uint32_t start_position = lexer->current_position; while ((isalnum(lexer->current_character) || lexer->current_character == '-' || lexer->current_character == '_' || lexer->current_character == ':') && !lexer_peek_for_html_comment_end(lexer, 0) && !lexer_eof(lexer)) { - - hb_buffer_append_char(&buffer, lexer->current_character); lexer_advance(lexer); } + uint32_t end_position = lexer->current_position; - token_T* token = token_init(buffer.value, TOKEN_IDENTIFIER, lexer); + hb_string_T value = hb_string_slice(lexer->source, start_position); + value = hb_string_truncate(value, end_position - start_position); + value.length = end_position - start_position; - free(buffer.value); + token_T* token = token_init(value, TOKEN_IDENTIFIER, lexer); return token; } @@ -206,11 +196,13 @@ static token_T* lexer_parse_identifier(lexer_T* lexer) { // ===== ERB Parsing static token_T* lexer_parse_erb_open(lexer_T* lexer) { - const char* erb_patterns[] = { "<%==", "<%%=", "<%=", "<%#", "<%-", "<%%", "<%" }; + hb_string_T erb_patterns[7] = { hb_string("<%=="), hb_string("<%%="), + hb_string("<%="), hb_string("<%#"), + hb_string("<%-"), hb_string("<%%"), + hb_string("<%") }; lexer->state = STATE_ERB_CONTENT; - - for (size_t i = 0; i < sizeof(erb_patterns) / sizeof(erb_patterns[0]); i++) { + for (size_t i = 0; i < 7; i++) { token_T* match = lexer_match_and_advance(lexer, erb_patterns[i], TOKEN_ERB_START); if (match) { return match; } } @@ -219,20 +211,17 @@ static token_T* lexer_parse_erb_open(lexer_T* lexer) { } static token_T* lexer_parse_erb_content(lexer_T* lexer) { - hb_buffer_T buffer; - hb_buffer_init(&buffer, 1024); - + size_t start_position = lexer->current_position; while (!lexer_peek_erb_end(lexer, 0)) { if (lexer_eof(lexer)) { - token_T* token = token_init(buffer.value, TOKEN_ERROR, lexer); // Handle unexpected EOF + uint32_t end_position = lexer->current_position; - free(buffer.value); + hb_string_T value = hb_string_slice(lexer->source, start_position); + value.length = end_position - start_position; - return token; + return token_init(value, TOKEN_ERROR, lexer); // Handle unexpected EOF } - hb_buffer_append_char(&buffer, lexer->current_character); - if (is_newline(lexer->current_character)) { lexer->current_line++; lexer->current_column = 0; @@ -246,27 +235,33 @@ static token_T* lexer_parse_erb_content(lexer_T* lexer) { lexer->state = STATE_ERB_CLOSE; - token_T* token = token_init(buffer.value, TOKEN_ERB_CONTENT, lexer); + uint32_t end_position = lexer->current_position; + hb_string_T value = hb_string_slice(lexer->source, start_position); + value = hb_string_truncate(value, end_position - start_position); - free(buffer.value); - - return token; + return token_init(value, TOKEN_ERB_CONTENT, lexer); } static token_T* lexer_parse_erb_close(lexer_T* lexer) { lexer->state = STATE_DATA; - if (lexer_peek_erb_percent_close_tag(lexer, 0)) { return lexer_advance_with(lexer, "%%>", TOKEN_ERB_END); } - if (lexer_peek_erb_equals_close_tag(lexer, 0)) { return lexer_advance_with(lexer, "=%>", TOKEN_ERB_END); } - if (lexer_peek_erb_dash_close_tag(lexer, 0)) { return lexer_advance_with(lexer, "-%>", TOKEN_ERB_END); } + if (lexer_peek_erb_percent_close_tag(lexer, 0)) { + return lexer_advance_with(lexer, hb_string("%%>"), TOKEN_ERB_END); + } + if (lexer_peek_erb_equals_close_tag(lexer, 0)) { + return lexer_advance_with(lexer, hb_string("=%>"), TOKEN_ERB_END); + } + if (lexer_peek_erb_dash_close_tag(lexer, 0)) { + return lexer_advance_with(lexer, hb_string("-%>"), TOKEN_ERB_END); + } - return lexer_advance_with(lexer, "%>", TOKEN_ERB_END); + return lexer_advance_with(lexer, hb_string("%>"), TOKEN_ERB_END); } // ===== Tokenizing Function token_T* lexer_next_token(lexer_T* lexer) { - if (lexer_eof(lexer)) { return token_init("", TOKEN_EOF, lexer); } + if (lexer_eof(lexer)) { token_init(hb_string(""), TOKEN_EOF, lexer); } if (lexer_stalled(lexer)) { return lexer_error(lexer, "Lexer stalled after 5 iterations"); } if (lexer->state == STATE_ERB_CONTENT) { return lexer_parse_erb_content(lexer); } @@ -303,33 +298,33 @@ token_T* lexer_next_token(lexer_T* lexer) { if (isalnum(lexer_peek(lexer, 1))) { return lexer_advance_current(lexer, TOKEN_HTML_TAG_START); } if (lexer_peek_for_html_comment_start(lexer, 0)) { - return lexer_advance_with(lexer, "", TOKEN_HTML_COMMENT_END); + token_T* token = lexer_match_and_advance(lexer, hb_string("-->"), TOKEN_HTML_COMMENT_END); return token ? token : lexer_advance_current(lexer, TOKEN_DASH); } case ']': { - token_T* token = lexer_match_and_advance(lexer, "]]>", TOKEN_CDATA_END); + token_T* token = lexer_match_and_advance(lexer, hb_string("]]>"), TOKEN_CDATA_END); return token ? token : lexer_advance_current(lexer, TOKEN_CHARACTER); } diff --git a/src/parser.c b/src/parser.c index 3cc25791a..94f9a48af 100644 --- a/src/parser.c +++ b/src/parser.c @@ -62,7 +62,7 @@ static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) { } token_T* token = parser_advance(parser); - hb_buffer_append(&content, token->value); + hb_buffer_append_string(&content, token->value); token_free(token); } @@ -107,7 +107,7 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) { } token_T* token = parser_advance(parser); - hb_buffer_append(&comment, token->value); + hb_buffer_append_string(&comment, token->value); token_free(token); } @@ -152,7 +152,7 @@ static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) { } token_T* token = parser_consume_expected(parser, parser->current_token->type, errors); - hb_buffer_append(&content, token->value); + hb_buffer_append_string(&content, token->value); token_free(token); } @@ -199,7 +199,7 @@ static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser } token_T* token = parser_advance(parser); - hb_buffer_append(&content, token->value); + hb_buffer_append_string(&content, token->value); token_free(token); } @@ -243,8 +243,8 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_arra token_T* token = parser_consume_expected(parser, TOKEN_ERROR, document_errors); append_unexpected_error( - "Token Error", - "not TOKEN_ERROR", + hb_string("Token Error"), + hb_string("not TOKEN_ERROR"), token->value, token->location.start, token->location.end, @@ -257,7 +257,7 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_arra } token_T* token = parser_advance(parser); - hb_buffer_append(&content, token->value); + hb_buffer_append_string(&content, token->value); token_free(token); } @@ -267,9 +267,9 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_arra if (hb_buffer_length(&content) > 0) { text_node = - ast_html_text_node_init(hb_buffer_value(&content), start, parser->current_token->location.start, errors); + ast_html_text_node_init(hb_string(content.value), start, parser->current_token->location.start, errors); } else { - text_node = ast_html_text_node_init("", start, parser->current_token->location.start, errors); + text_node = ast_html_text_node_init(hb_string(""), start, parser->current_token->location.start, errors); } free(content.value); @@ -304,7 +304,7 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T } token_T* token = parser_advance(parser); - hb_buffer_append(&buffer, token->value); + hb_buffer_append_string(&buffer, token->value); token_free(token); } @@ -345,7 +345,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value while (!token_is(parser, TOKEN_EOF) && !( token_is(parser, TOKEN_QUOTE) && opening_quote != NULL - && strcmp(parser->current_token->value, opening_quote->value) == 0 + && hb_string_equals(parser->current_token->value, opening_quote->value) )) { if (token_is(parser, TOKEN_ERB_START)) { parser_append_literal_node_from_buffer(parser, &buffer, children, start); @@ -363,9 +363,9 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value token_T* next_token = lexer_next_token(parser->lexer); if (next_token && next_token->type == TOKEN_QUOTE && opening_quote != NULL - && strcmp(next_token->value, opening_quote->value) == 0) { - hb_buffer_append(&buffer, parser->current_token->value); - hb_buffer_append(&buffer, next_token->value); + && hb_string_equals(next_token->value, opening_quote->value)) { + hb_buffer_append_string(&buffer, parser->current_token->value); + hb_buffer_append_string(&buffer, next_token->value); token_free(parser->current_token); token_free(next_token); @@ -379,14 +379,14 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value } } - hb_buffer_append(&buffer, parser->current_token->value); + hb_buffer_append_string(&buffer, parser->current_token->value); token_free(parser->current_token); parser->current_token = lexer_next_token(parser->lexer); } if (token_is(parser, TOKEN_QUOTE) && opening_quote != NULL - && strcmp(parser->current_token->value, opening_quote->value) == 0) { + && hb_string_equals(parser->current_token->value, opening_quote->value)) { lexer_state_snapshot_T saved_state = lexer_save_state(parser->lexer); token_T* potential_closing = parser->current_token; @@ -394,8 +394,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value if (token_is(parser, TOKEN_IDENTIFIER) || token_is(parser, TOKEN_CHARACTER)) { append_unexpected_error( - "Unescaped quote character in attribute value", - "escaped quote (\\') or different quote style (\")", + hb_string("Unescaped quote character in attribute value"), + hb_string("escaped quote (\\') or different quote style (\")"), opening_quote->value, potential_closing->location.start, potential_closing->location.end, @@ -407,14 +407,14 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value token_free(parser->current_token); parser->current_token = potential_closing; - hb_buffer_append(&buffer, parser->current_token->value); + hb_buffer_append_string(&buffer, parser->current_token->value); token_free(parser->current_token); parser->current_token = lexer_next_token(parser->lexer); while (!token_is(parser, TOKEN_EOF) && !( token_is(parser, TOKEN_QUOTE) && opening_quote != NULL - && strcmp(parser->current_token->value, opening_quote->value) == 0 + && hb_string_equals(parser->current_token->value, opening_quote->value) )) { if (token_is(parser, TOKEN_ERB_START)) { parser_append_literal_node_from_buffer(parser, &buffer, children, start); @@ -426,7 +426,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value continue; } - hb_buffer_append(&buffer, parser->current_token->value); + hb_buffer_append_string(&buffer, parser->current_token->value); token_free(parser->current_token); parser->current_token = lexer_next_token(parser->lexer); @@ -444,7 +444,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value token_T* closing_quote = parser_consume_expected(parser, TOKEN_QUOTE, errors); - if (opening_quote != NULL && closing_quote != NULL && strcmp(opening_quote->value, closing_quote->value) != 0) { + if (!hb_string_equals(opening_quote->value, closing_quote->value)) { append_quotes_mismatch_error( opening_quote, closing_quote, @@ -518,9 +518,9 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser position_T end = token->location.end; append_unexpected_error( - "Invalid quote character for HTML attribute", - "single quote (') or double quote (\")", - "backtick (`)", + hb_string("Invalid quote character for HTML attribute"), + hb_string("single quote (') or double quote (\")"), + hb_string("backtick (`)"), start, end, errors @@ -535,8 +535,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser } append_unexpected_error( - "Unexpected Token", - "TOKEN_IDENTIFIER, TOKEN_QUOTE, TOKEN_ERB_START", + hb_string("Unexpected Token"), + hb_string("TOKEN_IDENTIFIER, TOKEN_QUOTE, TOKEN_ERB_START"), token_type_to_string(parser->current_token->type), parser->current_token->location.start, parser->current_token->location.end, @@ -581,7 +581,7 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) range_start = whitespace->range.from; } - hb_buffer_append(&equals_buffer, whitespace->value); + hb_buffer_append_string(&equals_buffer, whitespace->value); token_free(whitespace); } @@ -593,14 +593,14 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) range_start = equals->range.from; } - hb_buffer_append(&equals_buffer, equals->value); + hb_buffer_append_string(&equals_buffer, equals->value); equals_end = equals->location.end; range_end = equals->range.to; token_free(equals); while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) { token_T* whitespace = parser_advance(parser); - hb_buffer_append(&equals_buffer, whitespace->value); + hb_buffer_append_string(&equals_buffer, whitespace->value); equals_end = whitespace->location.end; range_end = whitespace->range.to; token_free(whitespace); @@ -608,12 +608,11 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) token_T* equals_with_whitespace = calloc(1, sizeof(token_T)); equals_with_whitespace->type = TOKEN_EQUALS; - equals_with_whitespace->value = herb_strdup(equals_buffer.value); + // TODO(Tim): This is a leak + equals_with_whitespace->value = hb_string(equals_buffer.value); equals_with_whitespace->location = (location_T) { .start = equals_start, .end = equals_end }; equals_with_whitespace->range = (range_T) { .from = range_start, .to = range_end }; - free(equals_buffer.value); - AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = parser_parse_html_attribute_value(parser); return ast_html_attribute_node_init( @@ -719,8 +718,8 @@ static bool parser_lookahead_erb_is_attribute(lexer_T* lexer) { } static void parser_handle_erb_in_open_tag(parser_T* parser, hb_array_T* children) { - bool is_output_tag = parser->current_token->value && strlen(parser->current_token->value) >= 3 - && strncmp(parser->current_token->value, "<%=", 3) == 0; + bool is_output_tag = !hb_string_is_empty(parser->current_token->value) + && hb_string_starts_with(parser->current_token->value, hb_string("<%=")); if (!is_output_tag) { hb_array_append(children, parser_parse_erb_tag(parser)); @@ -800,8 +799,8 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) { parser_append_unexpected_error( parser, - "Unexpected Token", - "TOKEN_IDENTIFIER, TOKEN_AT, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE", + hb_string("Unexpected Token"), + hb_string("TOKEN_IDENTIFIER, TOKEN_AT, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE"), errors ); } @@ -858,14 +857,14 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser) token_T* tag_closing = parser_consume_expected(parser, TOKEN_HTML_TAG_END, errors); - if (tag_name != NULL && is_void_element(hb_string(tag_name->value)) && parser_in_svg_context(parser) == false) { - hb_string_T expected = html_self_closing_tag_string(hb_string(tag_name->value)); - hb_string_T got = html_closing_tag_string(hb_string(tag_name->value)); + if (tag_name != NULL && is_void_element(tag_name->value) && parser_in_svg_context(parser) == false) { + hb_string_T expected = html_self_closing_tag_string(tag_name->value); + hb_string_T got = html_closing_tag_string(tag_name->value); append_void_element_closing_tag_error( tag_name, - expected.data, - got.data, + expected, + got, tag_opening->location.start, tag_closing->location.end, errors @@ -919,8 +918,8 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element( parser_push_open_tag(parser, open_tag->tag_name); - if (open_tag->tag_name->value && parser_is_foreign_content_tag(hb_string(open_tag->tag_name->value))) { - foreign_content_type_T content_type = parser_get_foreign_content_type(hb_string(open_tag->tag_name->value)); + if (!hb_string_is_empty(open_tag->tag_name->value) && parser_is_foreign_content_tag(open_tag->tag_name->value)) { + foreign_content_type_T content_type = parser_get_foreign_content_type(open_tag->tag_name->value); parser_enter_foreign_content(parser, content_type); parser_parse_foreign_content(parser, body, errors); } else { @@ -931,13 +930,13 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element( AST_HTML_CLOSE_TAG_NODE_T* close_tag = parser_parse_html_close_tag(parser); - if (parser_in_svg_context(parser) == false && is_void_element(hb_string(close_tag->tag_name->value))) { + if (parser_in_svg_context(parser) == false && is_void_element(close_tag->tag_name->value)) { hb_array_push(body, close_tag); parser_parse_in_data_state(parser, body, errors); close_tag = parser_parse_html_close_tag(parser); } - bool matches_stack = parser_check_matching_tag(parser, hb_string(close_tag->tag_name->value)); + bool matches_stack = parser_check_matching_tag(parser, close_tag->tag_name->value); if (matches_stack) { token_T* popped_token = parser_pop_open_tag(parser); @@ -966,7 +965,7 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_element(parser_T* parser) { if (open_tag->is_void) { return parser_parse_html_self_closing_element(parser, open_tag); } // , in void element list, and not in inside an element - if (!open_tag->is_void && is_void_element(hb_string(open_tag->tag_name->value)) && !parser_in_svg_context(parser)) { + if (!open_tag->is_void && is_void_element(open_tag->tag_name->value) && !parser_in_svg_context(parser)) { return parser_parse_html_self_closing_element(parser, open_tag); } @@ -975,7 +974,7 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_element(parser_T* parser) { hb_array_T* errors = hb_array_init(8); - parser_append_unexpected_error(parser, "Unknown HTML open tag type", "HTMLOpenTag or HTMLSelfCloseTag", errors); + parser_append_unexpected_error(parser, hb_string("Unknown HTML open tag type"), hb_string("HTMLOpenTag or HTMLSelfCloseTag"), errors); return ast_html_element_node_init( open_tag, @@ -1047,9 +1046,9 @@ static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children, token_T* next_token = lexer_next_token(parser->lexer); bool is_potential_match = false; - if (next_token && next_token->type == TOKEN_IDENTIFIER && next_token->value) { + if (next_token && next_token->type == TOKEN_IDENTIFIER && !hb_string_is_empty(next_token->value)) { is_potential_match = - parser_is_expected_closing_tag_name(hb_string(next_token->value), parser->foreign_content_type); + parser_is_expected_closing_tag_name(next_token->value, parser->foreign_content_type); } lexer_restore_state(parser->lexer, saved_state); @@ -1067,7 +1066,7 @@ static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children, } token_T* token = parser_advance(parser); - hb_buffer_append(&content, token->value); + hb_buffer_append_string(&content, token->value); token_free(token); } @@ -1135,9 +1134,9 @@ static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, h parser_append_unexpected_error( parser, - "Unexpected token", - "TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, " - "TOKEN_NBSP, TOKEN_AT, TOKEN_BACKSLASH, or TOKEN_NEWLINE", + hb_string("Unexpected token"), + hb_string("TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, " + "TOKEN_NBSP, TOKEN_AT, TOKEN_BACKSLASH, or TOKEN_NEWLINE"), errors ); } @@ -1171,7 +1170,7 @@ static void parser_parse_stray_closing_tags(parser_T* parser, hb_array_T* childr AST_HTML_CLOSE_TAG_NODE_T* close_tag = parser_parse_html_close_tag(parser); - if (!is_void_element(hb_string(close_tag->tag_name->value))) { + if (!is_void_element(close_tag->tag_name->value)) { append_missing_opening_tag_error( close_tag->tag_name, close_tag->base.location.start, diff --git a/src/parser_helpers.c b/src/parser_helpers.c index f34d97864..3da3bc3b5 100644 --- a/src/parser_helpers.c +++ b/src/parser_helpers.c @@ -22,9 +22,9 @@ bool parser_check_matching_tag(const parser_T* parser, hb_string_T tag_name) { if (hb_array_size(parser->open_tags_stack) == 0) { return false; } token_T* top_token = hb_array_last(parser->open_tags_stack); - if (top_token == NULL || top_token->value == NULL) { return false; }; + if (top_token == NULL || hb_string_is_empty(top_token->value)) { return false; }; - return hb_string_equals(hb_string(top_token->value), tag_name); + return hb_string_equals(top_token->value, tag_name); } token_T* parser_pop_open_tag(const parser_T* parser) { @@ -47,9 +47,8 @@ bool parser_in_svg_context(const parser_T* parser) { for (size_t i = 0; i < stack_size; i++) { token_T* tag = (token_T*) hb_array_get(parser->open_tags_stack, i); - if (tag && tag->value) { - hb_string_T tag_value_string = hb_string(tag->value); - if (hb_string_equals(tag_value_string, hb_string("svg"))) { return true; } + if (tag && !hb_string_is_empty(tag->value)) { + if (hb_string_equals_case_insensitive(tag->value, hb_string("svg"))) { return true; } } } @@ -95,8 +94,8 @@ void parser_exit_foreign_content(parser_T* parser) { void parser_append_unexpected_error( parser_T* parser, - const char* description, - const char* expected, + hb_string_T description, + hb_string_T expected, hb_array_T* errors ) { token_T* token = parser_advance(parser); @@ -129,10 +128,9 @@ void parser_append_literal_node_from_buffer( hb_array_T* children, position_T start ) { - if (hb_buffer_length(buffer) == 0) { return; } - + if (buffer->length == 0) { return; } AST_LITERAL_NODE_T* literal = - ast_literal_node_init(hb_buffer_value(buffer), start, parser->current_token->location.start, NULL); + ast_literal_node_init(hb_string(buffer->value), start, parser->current_token->location.start, NULL); if (children != NULL) { hb_array_append(children, literal); } hb_buffer_clear(buffer); diff --git a/src/pretty_print.c b/src/pretty_print.c index be9670ebd..98e908caf 100644 --- a/src/pretty_print.c +++ b/src/pretty_print.c @@ -212,8 +212,8 @@ void pretty_print_token_property( ) { pretty_print_label(name, indent, relative_indent, last_property, buffer); - if (token != NULL && token->value != NULL) { - hb_string_T quoted = quoted_string(hb_string(token->value)); + if (token != NULL && !hb_string_is_empty(token->value)) { + hb_string_T quoted = quoted_string(token->value); hb_buffer_append_string(buffer, quoted); free(quoted.data); diff --git a/src/prism_helpers.c b/src/prism_helpers.c index 06ac155b5..22cef65c6 100644 --- a/src/prism_helpers.c +++ b/src/prism_helpers.c @@ -44,9 +44,9 @@ RUBY_PARSE_ERROR_T* ruby_parse_error_from_prism_error( position_T end = position_from_source_with_offset(source, end_offset); return ruby_parse_error_init( - error->message, - pm_diagnostic_id_human(error->diag_id), - pm_error_level_to_string(error->level), + hb_string(error->message), + hb_string(pm_diagnostic_id_human(error->diag_id)), + hb_string(pm_error_level_to_string(error->level)), start, end ); diff --git a/src/token.c b/src/token.c index b6ef42fe8..50ffeec91 100644 --- a/src/token.c +++ b/src/token.c @@ -4,6 +4,8 @@ #include "include/range.h" #include "include/token_struct.h" #include "include/util.h" +#include "include/util/hb_buffer.h" +#include "include/util/hb_string.h" #include #include @@ -13,7 +15,7 @@ size_t token_sizeof(void) { return sizeof(struct TOKEN_STRUCT); } -token_T* token_init(const char* value, const token_type_T type, lexer_T* lexer) { +token_T* token_init(hb_string_T value, const token_type_T type, lexer_T* lexer) { token_T* token = calloc(1, token_sizeof()); if (type == TOKEN_NEWLINE) { @@ -21,11 +23,7 @@ token_T* token_init(const char* value, const token_type_T type, lexer_T* lexer) lexer->current_column = 0; } - if (value) { - token->value = herb_strdup(value); - } else { - token->value = NULL; - } + token->value = value; token->type = type; token->range = (range_T) { .from = lexer->previous_position, .to = lexer->current_position }; @@ -45,65 +43,66 @@ token_T* token_init(const char* value, const token_type_T type, lexer_T* lexer) return token; } -const char* token_type_to_string(const token_type_T type) { +hb_string_T token_type_to_string(const token_type_T type) { switch (type) { - case TOKEN_WHITESPACE: return "TOKEN_WHITESPACE"; - case TOKEN_NBSP: return "TOKEN_NBSP"; - case TOKEN_NEWLINE: return "TOKEN_NEWLINE"; - case TOKEN_IDENTIFIER: return "TOKEN_IDENTIFIER"; - case TOKEN_HTML_DOCTYPE: return "TOKEN_HTML_DOCTYPE"; - case TOKEN_XML_DECLARATION: return "TOKEN_XML_DECLARATION"; - case TOKEN_XML_DECLARATION_END: return "TOKEN_XML_DECLARATION_END"; - case TOKEN_CDATA_START: return "TOKEN_CDATA_START"; - case TOKEN_CDATA_END: return "TOKEN_CDATA_END"; - case TOKEN_HTML_TAG_START: return "TOKEN_HTML_TAG_START"; - case TOKEN_HTML_TAG_END: return "TOKEN_HTML_TAG_END"; - case TOKEN_HTML_TAG_START_CLOSE: return "TOKEN_HTML_TAG_START_CLOSE"; - case TOKEN_HTML_TAG_SELF_CLOSE: return "TOKEN_HTML_TAG_SELF_CLOSE"; - case TOKEN_HTML_COMMENT_START: return "TOKEN_HTML_COMMENT_START"; - case TOKEN_HTML_COMMENT_END: return "TOKEN_HTML_COMMENT_END"; - case TOKEN_EQUALS: return "TOKEN_EQUALS"; - case TOKEN_QUOTE: return "TOKEN_QUOTE"; - case TOKEN_BACKTICK: return "TOKEN_BACKTICK"; - case TOKEN_BACKSLASH: return "TOKEN_BACKSLASH"; - case TOKEN_DASH: return "TOKEN_DASH"; - case TOKEN_UNDERSCORE: return "TOKEN_UNDERSCORE"; - case TOKEN_EXCLAMATION: return "TOKEN_EXCLAMATION"; - case TOKEN_SLASH: return "TOKEN_SLASH"; - case TOKEN_SEMICOLON: return "TOKEN_SEMICOLON"; - case TOKEN_COLON: return "TOKEN_COLON"; - case TOKEN_AT: return "TOKEN_AT"; - case TOKEN_LT: return "TOKEN_LT"; - case TOKEN_PERCENT: return "TOKEN_PERCENT"; - case TOKEN_AMPERSAND: return "TOKEN_AMPERSAND"; - case TOKEN_ERB_START: return "TOKEN_ERB_START"; - case TOKEN_ERB_CONTENT: return "TOKEN_ERB_CONTENT"; - case TOKEN_ERB_END: return "TOKEN_ERB_END"; - case TOKEN_CHARACTER: return "TOKEN_CHARACTER"; - case TOKEN_ERROR: return "TOKEN_ERROR"; - case TOKEN_EOF: return "TOKEN_EOF"; + case TOKEN_WHITESPACE: return hb_string("TOKEN_WHITESPACE"); + case TOKEN_NBSP: return hb_string("TOKEN_NBSP"); + case TOKEN_NEWLINE: return hb_string("TOKEN_NEWLINE"); + case TOKEN_IDENTIFIER: return hb_string("TOKEN_IDENTIFIER"); + case TOKEN_HTML_DOCTYPE: return hb_string("TOKEN_HTML_DOCTYPE"); + case TOKEN_XML_DECLARATION: return hb_string("TOKEN_XML_DECLARATION"); + case TOKEN_XML_DECLARATION_END: return hb_string("TOKEN_XML_DECLARATION_END"); + case TOKEN_CDATA_START: return hb_string("TOKEN_CDATA_START"); + case TOKEN_CDATA_END: return hb_string("TOKEN_CDATA_END"); + case TOKEN_HTML_TAG_START: return hb_string("TOKEN_HTML_TAG_START"); + case TOKEN_HTML_TAG_END: return hb_string("TOKEN_HTML_TAG_END"); + case TOKEN_HTML_TAG_START_CLOSE: return hb_string("TOKEN_HTML_TAG_START_CLOSE"); + case TOKEN_HTML_TAG_SELF_CLOSE: return hb_string("TOKEN_HTML_TAG_SELF_CLOSE"); + case TOKEN_HTML_COMMENT_START: return hb_string("TOKEN_HTML_COMMENT_START"); + case TOKEN_HTML_COMMENT_END: return hb_string("TOKEN_HTML_COMMENT_END"); + case TOKEN_EQUALS: return hb_string("TOKEN_EQUALS"); + case TOKEN_QUOTE: return hb_string("TOKEN_QUOTE"); + case TOKEN_BACKTICK: return hb_string("TOKEN_BACKTICK"); + case TOKEN_BACKSLASH: return hb_string("TOKEN_BACKSLASH"); + case TOKEN_DASH: return hb_string("TOKEN_DASH"); + case TOKEN_UNDERSCORE: return hb_string("TOKEN_UNDERSCORE"); + case TOKEN_EXCLAMATION: return hb_string("TOKEN_EXCLAMATION"); + case TOKEN_SLASH: return hb_string("TOKEN_SLASH"); + case TOKEN_SEMICOLON: return hb_string("TOKEN_SEMICOLON"); + case TOKEN_COLON: return hb_string("TOKEN_COLON"); + case TOKEN_AT: return hb_string("TOKEN_AT"); + case TOKEN_LT: return hb_string("TOKEN_LT"); + case TOKEN_PERCENT: return hb_string("TOKEN_PERCENT"); + case TOKEN_AMPERSAND: return hb_string("TOKEN_AMPERSAND"); + case TOKEN_ERB_START: return hb_string("TOKEN_ERB_START"); + case TOKEN_ERB_CONTENT: return hb_string("TOKEN_ERB_CONTENT"); + case TOKEN_ERB_END: return hb_string("TOKEN_ERB_END"); + case TOKEN_CHARACTER: return hb_string("TOKEN_CHARACTER"); + case TOKEN_ERROR: return hb_string("TOKEN_ERROR"); + case TOKEN_EOF: return hb_string("TOKEN_EOF"); } - return "Unknown token_type_T"; + return hb_string("Unknown token_type_T"); } char* token_to_string(const token_T* token) { - const char* type_string = token_type_to_string(token->type); - const char* template = "#"; + hb_string_T type_string = token_type_to_string(token->type); + hb_string_T template = hb_string("#"); - char* string = calloc(strlen(type_string) + strlen(template) + strlen(token->value) + 16, sizeof(char)); + char* string = calloc(template.length + type_string.length + token->value.length + 16, sizeof(char)); hb_string_T escaped; if (token->type == TOKEN_EOF) { escaped = hb_string(herb_strdup("")); } else { - escaped = escape_newlines(hb_string(token->value)); + escaped = escape_newlines(token_value(token)); } sprintf( string, - template, - type_string, + template.data, + type_string.length, + type_string.data, escaped.length, escaped.data, token->range.from, @@ -119,7 +118,7 @@ char* token_to_string(const token_T* token) { return string; } -char* token_value(const token_T* token) { +hb_string_T token_value(const token_T* token) { return token->value; } @@ -134,16 +133,7 @@ token_T* token_copy(token_T* token) { if (!new_token) { return NULL; } - if (token->value) { - new_token->value = herb_strdup(token->value); - - if (!new_token->value) { - free(new_token); - return NULL; - } - } else { - new_token->value = NULL; - } + new_token->value = token->value; new_token->type = token->type; new_token->range = token->range; @@ -152,10 +142,9 @@ token_T* token_copy(token_T* token) { return new_token; } +// TODO: Remove method void token_free(token_T* token) { if (!token) { return; } - if (token->value != NULL) { free(token->value); } - free(token); } diff --git a/src/utf8.c b/src/utf8.c index 9804dbbad..79a8aa5e0 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -1,11 +1,13 @@ #include "include/utf8.h" +#include "include/util/hb_string.h" +#include // UTF-8 byte patterns: // 0xxxxxxx = 1 byte (ASCII) // 110xxxxx = 2 bytes // 1110xxxx = 3 bytes // 11110xxx = 4 bytes -int utf8_char_byte_length(unsigned char first_byte) { +uint32_t utf8_char_byte_length(unsigned char first_byte) { if ((first_byte & 0x80) == 0) { return 1; } else if ((first_byte & 0xE0) == 0xC0) { @@ -24,19 +26,17 @@ bool utf8_is_valid_continuation_byte(unsigned char byte) { return (byte & 0xC0) == 0x80; } -int utf8_sequence_length(const char* str, size_t position, size_t max_length) { - if (position >= max_length) { return 0; } +uint32_t utf8_sequence_length(hb_string_T value) { + if (hb_string_is_empty(value)) { return 0; } - unsigned char first_byte = (unsigned char) str[position]; - int expected_length = utf8_char_byte_length(first_byte); - - if (position + expected_length > max_length) { + uint32_t expected_length = utf8_char_byte_length(value.data[0]); + if (value.length - expected_length < expected_length) { return 1; // Not enough bytes, treat as single byte } if (expected_length > 1) { - for (int i = 1; i < expected_length; i++) { - if (!utf8_is_valid_continuation_byte((unsigned char) str[position + i])) { + for (uint32_t i = 1; i < expected_length; i++) { + if (!utf8_is_valid_continuation_byte((unsigned char) value.data[i])) { return 1; // Invalid continuation byte, treat first byte as single byte } } diff --git a/templates/src/ast_nodes.c.erb b/templates/src/ast_nodes.c.erb index 19cef5ef8..c00442570 100644 --- a/templates/src/ast_nodes.c.erb +++ b/templates/src/ast_nodes.c.erb @@ -35,7 +35,7 @@ <%- when Herb::Template::PrismNodeField -%> <%= node.human %>-><%= field.name %> = <%= field.name %>; <%- when Herb::Template::StringField -%> - <%= node.human %>-><%= field.name %> = herb_strdup(<%= field.name %>); + <%= node.human %>-><%= field.name %> = <%= field.name %>; <%- when Herb::Template::AnalyzedRubyField -%> <%= node.human %>-><%= field.name %> = <%= field.name %>; <%- when Herb::Template::VoidPointerField -%> @@ -107,7 +107,6 @@ static void ast_free_<%= node.human %>(<%= node.struct_type %>* <%= node.human % hb_array_free(&<%= node.human %>-><%= field.name %>); } <%- when Herb::Template::StringField -%> - if (<%= node.human %>-><%= field.name %> != NULL) { free((char*) <%= node.human %>-><%= field.name %>); } <%- when Herb::Template::PrismNodeField -%> if (<%= node.human %>-><%= field.name %> != NULL) { // The first argument to `pm_node_destroy` is a `pm_parser_t`, but it's currently unused: diff --git a/templates/src/ast_pretty_print.c.erb b/templates/src/ast_pretty_print.c.erb index f245e0224..b4be01ff5 100644 --- a/templates/src/ast_pretty_print.c.erb +++ b/templates/src/ast_pretty_print.c.erb @@ -41,7 +41,7 @@ void ast_pretty_print_node(AST_NODE_T* node, const size_t indent, const size_t r <%- when Herb::Template::ElementSourceField -%> pretty_print_string_property(element_source_to_string(<%= node.human %>-><%= field.name %>), hb_string("<%= field.name %>"), indent, relative_indent, <%= last %>, buffer); <%- when Herb::Template::StringField -%> - pretty_print_string_property(hb_string(<%= node.human %>-><%= field.name %>), hb_string("<%= field.name %>"), indent, relative_indent, <%= last %>, buffer); + pretty_print_string_property(<%= node.human %>-><%= field.name %>, hb_string("<%= field.name %>"), indent, relative_indent, <%= last %>, buffer); <%- when Herb::Template::PrismNodeField -%> pretty_print_string_property(hb_string("<%= field.name %>"), hb_string("<%= field.name %>"), indent, relative_indent, <%= last %>, buffer); <%- when Herb::Template::NodeField -%> diff --git a/templates/src/errors.c.erb b/templates/src/errors.c.erb index b61edee00..68ccdd840 100644 --- a/templates/src/errors.c.erb +++ b/templates/src/errors.c.erb @@ -42,10 +42,7 @@ void error_init(ERROR_T* error, const error_type_T type, position_T start, posit if (message) { <%- error.message_arguments.each_with_index do |argument, i| -%> <%- if error.message_template.scan(/%[sdulfz]/)[i] == "%s" -%> - char truncated_argument_<%= i %>[ERROR_MESSAGES_TRUNCATED_LENGTH + 1]; - strncpy(truncated_argument_<%= i %>, <%= argument %>, ERROR_MESSAGES_TRUNCATED_LENGTH); - truncated_argument_<%= i %>[ERROR_MESSAGES_TRUNCATED_LENGTH] = '\0'; - + hb_string_T truncated_argument_<%= i %> = hb_string_truncate(<%= argument %>, ERROR_MESSAGES_TRUNCATED_LENGTH); <%- end -%> <%- end -%> snprintf( @@ -81,7 +78,7 @@ void error_init(ERROR_T* error, const error_type_T type, position_T start, posit <%- when Herb::Template::SizeTField -%> <%= error.human %>-><%= field.name %> = <%= field.name %>; <%- when Herb::Template::StringField -%> - <%= error.human %>-><%= field.name %> = herb_strdup(<%= field.name %>); + <%= error.human %>-><%= field.name %> = <%= field.name %>; <%- else -%> <%= field.inspect %> <%- end -%> @@ -137,7 +134,6 @@ static void error_free_<%= error.human %>(<%= error.struct_type %>* <%= error.hu <%- when Herb::Template::SizeTField -%> // size_t is part of struct <%- when Herb::Template::StringField -%> - if (<%= error.human %>-><%= field.name %> != NULL) { free((char*) <%= error.human %>-><%= field.name %>); } <%- else -%> <%= field.inspect %> <%- end -%> @@ -220,11 +216,11 @@ static void error_pretty_print_<%= error.human %>(<%= error.struct_type %>* erro <%- when Herb::Template::TokenField -%> pretty_print_token_property(error-><%= field.name %>, hb_string("<%= field.name %>"), indent, relative_indent, <%= error.fields.length - 1 == index %>, buffer); <%- when Herb::Template::TokenTypeField -%> - pretty_print_property(hb_string(token_type_to_string(error-><%= field.name %>)), hb_string("<%= field.name %>"), indent, relative_indent, <%= error.fields.length - 1 == index %>, buffer); + pretty_print_property(token_type_to_string(error-><%= field.name %>), hb_string("<%= field.name %>"), indent, relative_indent, <%= error.fields.length - 1 == index %>, buffer); <%- when Herb::Template::SizeTField -%> pretty_print_size_t_property(hb_string(error-><%= field.name %>), hb_string("<%= field.name %>"), indent, relative_indent, <%= error.fields.length - 1 == index %>, buffer); <%- when Herb::Template::StringField -%> - pretty_print_quoted_property(hb_string("<%= field.name %>"), hb_string(error-><%= field.name %>), indent, relative_indent, <%= error.fields.length - 1 == index %>, buffer); + pretty_print_quoted_property(hb_string("<%= field.name %>"), error-><%= field.name %>, indent, relative_indent, <%= error.fields.length - 1 == index %>, buffer); <%- else -%> <%= field.inspect %> <%- end -%> diff --git a/templates/template.rb b/templates/template.rb index 749c970db..a651afc22 100755 --- a/templates/template.rb +++ b/templates/template.rb @@ -103,7 +103,7 @@ def ruby_type end def c_type - "const char*" + "hb_string_T" end end diff --git a/test/c/test_token.c b/test/c/test_token.c index 69335ad9c..529ddfcb2 100644 --- a/test/c/test_token.c +++ b/test/c/test_token.c @@ -4,7 +4,7 @@ #include "../../src/include/token.h" TEST(test_token) - ck_assert_str_eq(token_type_to_string(TOKEN_IDENTIFIER), "TOKEN_IDENTIFIER"); + ck_assert(hb_string_equals(token_type_to_string(TOKEN_IDENTIFIER), hb_string("TOKEN_IDENTIFIER"))); END TEST(test_token_to_string)