marcoroth · timkaechele · Oct 17, 2025 · Oct 17, 2025 · Oct 17, 2025 · Oct 17, 2025
diff --git a/compile.sh b/compile.sh
@@ -0,0 +1,4 @@
+for file in src/*.c
+do
+  clang -Isrc -Ivendor/prism/include -c "$file"
+done
diff --git a/src/analyze.c b/src/analyze.c
@@ -51,10 +51,12 @@ static bool analyze_erb_content(const AST_NODE_T* node, void* data) {
   if (node->type == AST_ERB_CONTENT_NODE) {
     AST_ERB_CONTENT_NODE_T* erb_content_node = (AST_ERB_CONTENT_NODE_T*) node;
 
-    const char* opening = erb_content_node->tag_opening->value;
+    hb_string_T opening = erb_content_node->tag_opening->value;
 
-    if (strcmp(opening, "<%%") != 0 && strcmp(opening, "<%%=") != 0 && strcmp(opening, "<%#") != 0) {
-      analyzed_ruby_T* analyzed = herb_analyze_ruby(hb_string(erb_content_node->content->value));
+    if (!hb_string_equals(opening, hb_string("<%%"))
+        && !hb_string_equals(opening, hb_string("<%%="))
+        && !hb_string_equals(opening, hb_string("<%#"))) {
+      analyzed_ruby_T* analyzed = herb_analyze_ruby(erb_content_node->content->value);
 
       erb_content_node->parsed = true;
       erb_content_node->valid = analyzed->valid;

diff --git a/src/ast_node.c b/src/ast_node.c
@@ -31,7 +31,7 @@ AST_LITERAL_NODE_T* ast_literal_node_init_from_token(const token_T* token) {
 
   ast_node_init(&literal->base, AST_LITERAL_NODE, token->location.start, token->location.end, NULL);
 
-  literal->content = herb_strdup(token->value);
+  literal->content = token->value;
 
   return literal;
 }

diff --git a/src/extract.c b/src/extract.c
@@ -1,8 +1,8 @@
 #include "include/herb.h"
 #include "include/io.h"
-#include "include/lexer.h"
 #include "include/util/hb_array.h"
 #include "include/util/hb_buffer.h"
+#include "include/util/hb_string.h"
 
 #include <stdlib.h>
 #include <string.h>
@@ -16,12 +16,14 @@ void herb_extract_ruby_to_buffer_with_semicolons(const char* source, hb_buffer_T
 
     switch (token->type) {
       case TOKEN_NEWLINE: {
-        hb_buffer_append(output, token->value);
+        hb_buffer_append_string(output, token->value);
         break;
       }
 
       case TOKEN_ERB_START: {
-        if (strcmp(token->value, "<%#") == 0 || strcmp(token->value, "<%%") == 0 || strcmp(token->value, "<%%=") == 0) {
+        if (hb_string_equals(token->value, hb_string("<%#"))
+            || hb_string_equals(token->value, hb_string("<%%"))
+            || hb_string_equals(token->value, hb_string("<%%="))) {
           skip_erb_content = true;
         }
 
@@ -31,7 +33,7 @@ void herb_extract_ruby_to_buffer_with_semicolons(const char* source, hb_buffer_T
 
       case TOKEN_ERB_CONTENT: {
         if (skip_erb_content == false) {
-          hb_buffer_append(output, token->value);
+          hb_buffer_append_string(output, token->value);
         } else {
           hb_buffer_append_whitespace(output, range_length(token->range));
         }
@@ -66,12 +68,14 @@ void herb_extract_ruby_to_buffer(const char* source, hb_buffer_T* output) {
 
     switch (token->type) {
       case TOKEN_NEWLINE: {
-        hb_buffer_append(output, token->value);
+        hb_buffer_append_string(output, token->value);
         break;
       }
 
       case TOKEN_ERB_START: {
-        if (strcmp(token->value, "<%#") == 0 || strcmp(token->value, "<%%") == 0 || strcmp(token->value, "<%%=") == 0) {
+        if (hb_string_equals(token->value, hb_string("<%#"))
+            || hb_string_equals(token->value, hb_string("<%%"))
+            || hb_string_equals(token->value, hb_string("<%%="))) {
           skip_erb_content = true;
         }
 
@@ -81,7 +85,7 @@ void herb_extract_ruby_to_buffer(const char* source, hb_buffer_T* output) {
 
       case TOKEN_ERB_CONTENT: {
         if (skip_erb_content == false) {
-          hb_buffer_append(output, token->value);
+          hb_buffer_append_string(output, token->value);
         } else {
           hb_buffer_append_whitespace(output, range_length(token->range));
         }
@@ -115,7 +119,7 @@ void herb_extract_html_to_buffer(const char* source, hb_buffer_T* output) {
       case TOKEN_ERB_START:
       case TOKEN_ERB_CONTENT:
       case TOKEN_ERB_END: hb_buffer_append_whitespace(output, range_length(token->range)); break;
-      default: hb_buffer_append(output, token->value);
+      default: hb_buffer_append_string(output, token->value);
     }
   }
 

diff --git a/src/include/parser_helpers.h b/src/include/parser_helpers.h
@@ -15,8 +15,8 @@ token_T* parser_pop_open_tag(const parser_T* parser);
 
 void parser_append_unexpected_error(
   parser_T* parser,
-  const char* description,
-  const char* expected,
+  hb_string_T description,
+  hb_string_T expected,
   hb_array_T* errors
 );
 void parser_append_unexpected_token_error(parser_T* parser, token_type_T expected_type, hb_array_T* errors);

diff --git a/src/include/token.h b/src/include/token.h
@@ -4,12 +4,13 @@
 #include "lexer_struct.h"
 #include "position.h"
 #include "token_struct.h"
+#include "util/hb_string.h"
 
-token_T* token_init(const char* value, token_type_T type, lexer_T* lexer);
+token_T* token_init(hb_string_T value, token_type_T type, lexer_T* lexer);
 char* token_to_string(const token_T* token);
-const char* token_type_to_string(token_type_T type);
+hb_string_T token_type_to_string(token_type_T type);
 
-char* token_value(const token_T* token);
+hb_string_T token_value(const token_T* token);
 int token_type(const token_T* token);
 
 size_t token_sizeof(void);

diff --git a/src/include/token_struct.h b/src/include/token_struct.h
@@ -3,6 +3,7 @@
 
 #include "location.h"
 #include "range.h"
+#include "util/hb_string.h"
 
 typedef enum {
   TOKEN_WHITESPACE, // ' '
@@ -49,7 +50,7 @@ typedef enum {
 } token_type_T;
 
 typedef struct TOKEN_STRUCT {
-  char* value;
+  hb_string_T value;
   range_T range;
   location_T location;
   token_type_T type;

diff --git a/src/include/utf8.h b/src/include/utf8.h
@@ -1,11 +1,12 @@
 #ifndef HERB_UTF8_H
 #define HERB_UTF8_H
 
+#include "util/hb_string.h"
 #include <stdbool.h>
 #include <stdlib.h>
 
-int utf8_char_byte_length(unsigned char first_byte);
-int utf8_sequence_length(const char* str, size_t position, size_t max_length);
+uint32_t utf8_char_byte_length(unsigned char first_byte);
+uint32_t utf8_sequence_length(hb_string_T value);
 bool utf8_is_valid_continuation_byte(unsigned char byte);
 
 #endif