From 6cb316da30cb460d54c4eec6fe15e034c89a7b36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Fri, 17 Oct 2025 17:00:49 +0200
Subject: [PATCH 01/33] Make token value hb_string

---
 src/include/token_struct.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/include/token_struct.h b/src/include/token_struct.h
index 2727d2a4c..595bf27d1 100644
--- a/src/include/token_struct.h
+++ b/src/include/token_struct.h
@@ -3,6 +3,7 @@
 
 #include "location.h"
 #include "range.h"
+#include "util/hb_string.h"
 
 typedef enum {
   TOKEN_WHITESPACE, // ' '
@@ -49,7 +50,7 @@ typedef enum {
 } token_type_T;
 
 typedef struct TOKEN_STRUCT {
-  char* value;
+  hb_string_T value;
   range_T range;
   location_T location;
   token_type_T type;

From 74ceb0e824b0c0cd5afa50bba1e7c9b4b46489b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Fri, 17 Oct 2025 17:02:12 +0200
Subject: [PATCH 02/33] Make token_init take hb_string value

---
 src/include/token.h | 3 ++-
 src/token.c         | 9 +++------
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/include/token.h b/src/include/token.h
index 5628e2f0d..7c8dd8778 100644
--- a/src/include/token.h
+++ b/src/include/token.h
@@ -4,8 +4,9 @@
 #include "lexer_struct.h"
 #include "position.h"
 #include "token_struct.h"
+#include "util/hb_string.h"
 
-token_T* token_init(const char* value, token_type_T type, lexer_T* lexer);
+token_T* token_init(hb_string_T value, token_type_T type, lexer_T* lexer);
 char* token_to_string(const token_T* token);
 const char* token_type_to_string(token_type_T type);
 
diff --git a/src/token.c b/src/token.c
index b6ef42fe8..e0fd81ff8 100644
--- a/src/token.c
+++ b/src/token.c
@@ -4,6 +4,7 @@
 #include "include/range.h"
 #include "include/token_struct.h"
 #include "include/util.h"
+#include "include/util/hb_string.h"
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -13,7 +14,7 @@ size_t token_sizeof(void) {
   return sizeof(struct TOKEN_STRUCT);
 }
 
-token_T* token_init(const char* value, const token_type_T type, lexer_T* lexer) {
+token_T* token_init(hb_string_T value, const token_type_T type, lexer_T* lexer) {
   token_T* token = calloc(1, token_sizeof());
 
   if (type == TOKEN_NEWLINE) {
@@ -21,11 +22,7 @@ token_T* token_init(const char* value, const token_type_T type, lexer_T* lexer)
     lexer->current_column = 0;
   }
 
-  if (value) {
-    token->value = herb_strdup(value);
-  } else {
-    token->value = NULL;
-  }
+  token->value = value;
 
   token->type = type;
   token->range = (range_T) { .from = lexer->previous_position, .to = lexer->current_position };

From 2de7ee36fe9d1f0f0fec8761d21f4f138edcfcf6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Fri, 17 Oct 2025 17:04:48 +0200
Subject: [PATCH 03/33] Allocate lexer errors using the arena

---
 src/lexer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/lexer.c b/src/lexer.c
index 45995751c..7de89dc64 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -55,7 +55,7 @@ void lexer_init(lexer_T* lexer, const char* source) {
 }
 
 token_T* lexer_error(lexer_T* lexer, const char* message) {
-  char error_message[128];
+  char *error_message = hb_arena_alloc(lexer->allocator, sizeof(char) * 128);
 
   snprintf(
     error_message,
@@ -67,7 +67,7 @@ token_T* lexer_error(lexer_T* lexer, const char* message) {
     lexer->current_column
   );
 
-  return token_init(error_message, TOKEN_ERROR, lexer);
+  return token_init(hb_string_from_c_string(error_message), TOKEN_ERROR, lexer);
 }
 
 static void lexer_advance(lexer_T* lexer) {

From ef6120c29c266032321bd578ff17c359d4bc5f42 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Fri, 17 Oct 2025 17:14:21 +0200
Subject: [PATCH 04/33] Make lexer_match_and_advance take string

---
 src/lexer.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/src/lexer.c b/src/lexer.c
index 7de89dc64..85d106ca3 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -55,7 +55,7 @@ void lexer_init(lexer_T* lexer, const char* source) {
 }
 
 token_T* lexer_error(lexer_T* lexer, const char* message) {
-  char *error_message = hb_arena_alloc(lexer->allocator, sizeof(char) * 128);
+  char* error_message = hb_arena_alloc(lexer->allocator, sizeof(char) * 128);
 
   snprintf(
     error_message,
@@ -157,9 +157,11 @@ static token_T* lexer_advance_utf8_character(lexer_T* lexer, const token_type_T
   return token;
 }
 
-static token_T* lexer_match_and_advance(lexer_T* lexer, const char* value, const token_type_T type) {
-  if (strncmp(lexer->source.data + lexer->current_position, value, strlen(value)) == 0) {
-    return lexer_advance_with(lexer, value, type);
+static token_T* lexer_match_and_advance(lexer_T* lexer, hb_string_T value, const token_type_T type) {
+  hb_string_T remaining_source = hb_string_slice(lexer->source, lexer->current_position);
+  if (hb_string_starts_with(remaining_source, value)) {
+    // TODO(Tim): Fix string
+    return lexer_advance_with(lexer, value.data, type);
   }
 
   return NULL;
@@ -206,11 +208,13 @@ static token_T* lexer_parse_identifier(lexer_T* lexer) {
 // ===== ERB Parsing
 
 static token_T* lexer_parse_erb_open(lexer_T* lexer) {
-  const char* erb_patterns[] = { "<%==", "<%%=", "<%=", "<%#", "<%-", "<%%", "<%" };
+  hb_string_T erb_patterns[7] = { hb_string_from_c_string("<%=="), hb_string_from_c_string("<%%="),
+                                  hb_string_from_c_string("<%="),  hb_string_from_c_string("<%#"),
+                                  hb_string_from_c_string("<%-"),  hb_string_from_c_string("<%%"),
+                                  hb_string_from_c_string("<%") };
 
   lexer->state = STATE_ERB_CONTENT;
-
-  for (size_t i = 0; i < sizeof(erb_patterns) / sizeof(erb_patterns[0]); i++) {
+  for (size_t i = 0; i < 7; i++) {
     token_T* match = lexer_match_and_advance(lexer, erb_patterns[i], TOKEN_ERB_START);
     if (match) { return match; }
   }
@@ -314,22 +318,22 @@ token_T* lexer_next_token(lexer_T* lexer) {
     }
 
     case '/': {
-      token_T* token = lexer_match_and_advance(lexer, "/>", TOKEN_HTML_TAG_SELF_CLOSE);
+      token_T* token = lexer_match_and_advance(lexer, hb_string_from_c_string("/>"), TOKEN_HTML_TAG_SELF_CLOSE);
       return token ? token : lexer_advance_current(lexer, TOKEN_SLASH);
     }
 
     case '?': {
-      token_T* token = lexer_match_and_advance(lexer, "?>", TOKEN_XML_DECLARATION_END);
+      token_T* token = lexer_match_and_advance(lexer, hb_string_from_c_string("?>"), TOKEN_XML_DECLARATION_END);
       return token ? token : lexer_advance_current(lexer, TOKEN_CHARACTER);
     }
 
     case '-': {
-      token_T* token = lexer_match_and_advance(lexer, "-->", TOKEN_HTML_COMMENT_END);
+      token_T* token = lexer_match_and_advance(lexer, hb_string_from_c_string("-->"), TOKEN_HTML_COMMENT_END);
       return token ? token : lexer_advance_current(lexer, TOKEN_DASH);
     }
 
     case ']': {
-      token_T* token = lexer_match_and_advance(lexer, "]]>", TOKEN_CDATA_END);
+      token_T* token = lexer_match_and_advance(lexer, hb_string_from_c_string("]]>"), TOKEN_CDATA_END);
       return token ? token : lexer_advance_current(lexer, TOKEN_CHARACTER);
     }
 

From 86c995fd43131b21e5e70ee55b52c4937701c04b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Fri, 17 Oct 2025 17:40:13 +0200
Subject: [PATCH 05/33] Make lexer_advance_with take string

---
 src/lexer.c | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/src/lexer.c b/src/lexer.c
index 85d106ca3..3127966e7 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -102,8 +102,8 @@ static void lexer_advance_by(lexer_T* lexer, const size_t count) {
   }
 }
 
-static token_T* lexer_advance_with(lexer_T* lexer, const char* value, const token_type_T type) {
-  lexer_advance_by(lexer, strlen(value));
+static token_T* lexer_advance_with(lexer_T* lexer, hb_string_T value, const token_type_T type) {
+  lexer_advance_by(lexer, value.length);
   return token_init(value, type, lexer);
 }
 
@@ -125,7 +125,8 @@ static token_T* lexer_advance_with_next(lexer_T* lexer, size_t count, token_type
 }
 
 static token_T* lexer_advance_current(lexer_T* lexer, const token_type_T type) {
-  return lexer_advance_with(lexer, (char[]) { lexer->current_character, '\0' }, type);
+  hb_string_T value = { .data = lexer->source.data + lexer->current_position, .length = 1 };
+  return lexer_advance_with(lexer, value, type);
 }
 
 static token_T* lexer_advance_utf8_character(lexer_T* lexer, const token_type_T type) {
@@ -159,10 +160,7 @@ static token_T* lexer_advance_utf8_character(lexer_T* lexer, const token_type_T
 
 static token_T* lexer_match_and_advance(lexer_T* lexer, hb_string_T value, const token_type_T type) {
   hb_string_T remaining_source = hb_string_slice(lexer->source, lexer->current_position);
-  if (hb_string_starts_with(remaining_source, value)) {
-    // TODO(Tim): Fix string
-    return lexer_advance_with(lexer, value.data, type);
-  }
+  if (hb_string_starts_with(remaining_source, value)) { return lexer_advance_with(lexer, value, type); }
 
   return NULL;
 }
@@ -260,11 +258,17 @@ static token_T* lexer_parse_erb_content(lexer_T* lexer) {
 static token_T* lexer_parse_erb_close(lexer_T* lexer) {
   lexer->state = STATE_DATA;
 
-  if (lexer_peek_erb_percent_close_tag(lexer, 0)) { return lexer_advance_with(lexer, "%%>", TOKEN_ERB_END); }
-  if (lexer_peek_erb_equals_close_tag(lexer, 0)) { return lexer_advance_with(lexer, "=%>", TOKEN_ERB_END); }
-  if (lexer_peek_erb_dash_close_tag(lexer, 0)) { return lexer_advance_with(lexer, "-%>", TOKEN_ERB_END); }
+  if (lexer_peek_erb_percent_close_tag(lexer, 0)) {
+    return lexer_advance_with(lexer, hb_string_from_c_string("%%>"), TOKEN_ERB_END);
+  }
+  if (lexer_peek_erb_equals_close_tag(lexer, 0)) {
+    return lexer_advance_with(lexer, hb_string_from_c_string("=%>"), TOKEN_ERB_END);
+  }
+  if (lexer_peek_erb_dash_close_tag(lexer, 0)) {
+    return lexer_advance_with(lexer, hb_string_from_c_string("-%>"), TOKEN_ERB_END);
+  }
 
-  return lexer_advance_with(lexer, "%>", TOKEN_ERB_END);
+  return lexer_advance_with(lexer, hb_string_from_c_string("%>"), TOKEN_ERB_END);
 }
 
 // ===== Tokenizing Function
@@ -307,11 +311,11 @@ token_T* lexer_next_token(lexer_T* lexer) {
       if (isalnum(lexer_peek(lexer, 1))) { return lexer_advance_current(lexer, TOKEN_HTML_TAG_START); }
 
       if (lexer_peek_for_html_comment_start(lexer, 0)) {
-        return lexer_advance_with(lexer, "<!--", TOKEN_HTML_COMMENT_START);
+        return lexer_advance_with(lexer, hb_string_from_c_string("<!--"), TOKEN_HTML_COMMENT_START);
       }
 
       if (lexer_peek_for_close_tag_start(lexer, 0)) {
-        return lexer_advance_with(lexer, "</", TOKEN_HTML_TAG_START_CLOSE);
+        return lexer_advance_with(lexer, hb_string_from_c_string("</"), TOKEN_HTML_TAG_START_CLOSE);
       }
 
       return lexer_advance_current(lexer, TOKEN_LT);

From fbe3116ba9149e047c44fbcfa511fe2c9fed58be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Fri, 17 Oct 2025 17:34:10 +0200
Subject: [PATCH 06/33] Fix lexer_eof token_init

---
 src/lexer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lexer.c b/src/lexer.c
index 3127966e7..97af68652 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -274,7 +274,7 @@ static token_T* lexer_parse_erb_close(lexer_T* lexer) {
 // ===== Tokenizing Function
 
 token_T* lexer_next_token(lexer_T* lexer) {
-  if (lexer_eof(lexer)) { return token_init("", TOKEN_EOF, lexer); }
+  if (lexer_eof(lexer)) { token_init(hb_string_from_c_string(""), TOKEN_EOF, lexer); }
   if (lexer_stalled(lexer)) { return lexer_error(lexer, "Lexer stalled after 5 iterations"); }
 
   if (lexer->state == STATE_ERB_CONTENT) { return lexer_parse_erb_content(lexer); }

From d9815ad77868c6c19b03c44156747dffefe90b5d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Fri, 17 Oct 2025 17:39:00 +0200
Subject: [PATCH 07/33] Remove allocations from lexer_advance_with_next

---
 src/lexer.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/lexer.c b/src/lexer.c
index 97af68652..e11c77c7c 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -6,6 +6,7 @@
 #include "include/util/hb_string.h"
 
 #include <ctype.h>
+#include <stdint.h>
 #include <string.h>
 
 #define LEXER_STALL_LIMIT 5
@@ -108,18 +109,19 @@ static token_T* lexer_advance_with(lexer_T* lexer, hb_string_T value, const toke
 }
 
 static token_T* lexer_advance_with_next(lexer_T* lexer, size_t count, token_type_T type) {
-  char* collected = malloc(count + 1);
-  if (!collected) { return NULL; }
+  uint32_t start_position = lexer->current_position;
 
   for (size_t i = 0; i < count; i++) {
-    collected[i] = lexer->current_character;
     lexer_advance(lexer);
   }
 
-  collected[count] = '\0';
+  uint32_t end_position = lexer->current_position;
 
-  token_T* token = token_init(collected, type, lexer);
-  free(collected);
+  token_T* token = token_init(
+    (hb_string_T) { .data = lexer->source.data + start_position, .length = end_position - start_position },
+    type,
+    lexer
+  );
 
   return token;
 }

From 43ed7c2589fa1f44783479d113f3e8f4be55b1cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Fri, 17 Oct 2025 19:04:21 +0200
Subject: [PATCH 08/33] Make utf_8_sequence_length use hb_string

---
 src/include/utf8.h |  3 ++-
 src/utf8.c         | 13 ++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/include/utf8.h b/src/include/utf8.h
index 1b3dbdd78..4c2d6c5da 100644
--- a/src/include/utf8.h
+++ b/src/include/utf8.h
@@ -1,11 +1,12 @@
 #ifndef HERB_UTF8_H
 #define HERB_UTF8_H
 
+#include "util/hb_string.h"
 #include <stdbool.h>
 #include <stdlib.h>
 
 int utf8_char_byte_length(unsigned char first_byte);
-int utf8_sequence_length(const char* str, size_t position, size_t max_length);
+uint32_t utf8_sequence_length(hb_string_T value);
 bool utf8_is_valid_continuation_byte(unsigned char byte);
 
 #endif
diff --git a/src/utf8.c b/src/utf8.c
index 9804dbbad..f24e7015d 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -1,4 +1,5 @@
 #include "include/utf8.h"
+#include "include/util/hb_string.h"
 
 // UTF-8 byte patterns:
 //   0xxxxxxx = 1 byte (ASCII)
@@ -24,19 +25,17 @@ bool utf8_is_valid_continuation_byte(unsigned char byte) {
   return (byte & 0xC0) == 0x80;
 }
 
-int utf8_sequence_length(const char* str, size_t position, size_t max_length) {
-  if (position >= max_length) { return 0; }
+uint32_t utf8_sequence_length(hb_string_T value) {
+  if (hb_string_is_empty(value)) { return 0; }
 
-  unsigned char first_byte = (unsigned char) str[position];
-  int expected_length = utf8_char_byte_length(first_byte);
-
-  if (position + expected_length > max_length) {
+  int expected_length = utf8_char_byte_length(value.data[0]);
+  if (value.length - expected_length < expected_length) {
     return 1; // Not enough bytes, treat as single byte
   }
 
   if (expected_length > 1) {
     for (int i = 1; i < expected_length; i++) {
-      if (!utf8_is_valid_continuation_byte((unsigned char) str[position + i])) {
+      if (!utf8_is_valid_continuation_byte((unsigned char) value.data[i])) {
         return 1; // Invalid continuation byte, treat first byte as single byte
       }
     }

From f31ffec55031ee2721cbe7f59800f4282f6ca046 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Fri, 17 Oct 2025 19:30:52 +0200
Subject: [PATCH 09/33] Mkae lexer_advance_utf8 use hb_string

---
 src/lexer.c | 29 +++++++----------------------
 1 file changed, 7 insertions(+), 22 deletions(-)

diff --git a/src/lexer.c b/src/lexer.c
index e11c77c7c..1af1acc9b 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -1,4 +1,5 @@
 #include "include/lexer_peek_helpers.h"
+#include "include/macros.h"
 #include "include/token.h"
 #include "include/utf8.h"
 #include "include/util.h"
@@ -80,7 +81,7 @@ static void lexer_advance(lexer_T* lexer) {
   }
 }
 
-static void lexer_advance_utf8_bytes(lexer_T* lexer, int byte_count) {
+static void lexer_advance_utf8_bytes(lexer_T* lexer, uint32_t byte_count) {
   if (byte_count <= 0) { return; }
 
   if (lexer_has_more_characters(lexer) && !lexer_eof(lexer)) {
@@ -132,32 +133,16 @@ static token_T* lexer_advance_current(lexer_T* lexer, const token_type_T type) {
 }
 
 static token_T* lexer_advance_utf8_character(lexer_T* lexer, const token_type_T type) {
-  int char_byte_length = utf8_sequence_length(lexer->source.data, lexer->current_position, lexer->source.length);
-
+  uint32_t char_byte_length = utf8_sequence_length(hb_string_slice(lexer->source, lexer->current_position));
   if (char_byte_length <= 1) { return lexer_advance_current(lexer, type); }
 
-  char* utf8_char = malloc(char_byte_length + 1);
-
-  if (!utf8_char) { return lexer_advance_current(lexer, type); }
-
-  for (int i = 0; i < char_byte_length; i++) {
-    if (lexer->current_position + i >= lexer->source.length) {
-      free(utf8_char);
-      return lexer_advance_current(lexer, type);
-    }
-
-    utf8_char[i] = lexer->source.data[lexer->current_position + i];
-  }
-
-  utf8_char[char_byte_length] = '\0';
-
+  size_t start_position = lexer->current_position;
   lexer_advance_utf8_bytes(lexer, char_byte_length);
 
-  token_T* token = token_init(utf8_char, type, lexer);
+  hb_string_T utf8_char = hb_string_slice(lexer->source, lexer->current_position);
+  utf8_char.length = MIN(char_byte_length, utf8_char.length);
 
-  free(utf8_char);
-
-  return token;
+  return token_init(utf8_char, type, lexer);
 }
 
 static token_T* lexer_match_and_advance(lexer_T* lexer, hb_string_T value, const token_type_T type) {

From 34c1f99988911dae6478ae8704a75a9c7abfc019 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Fri, 17 Oct 2025 19:33:19 +0200
Subject: [PATCH 10/33] Make lexer_parse_whitespace use hb_string

---
 src/lexer.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/lexer.c b/src/lexer.c
index 1af1acc9b..b7b5d38f2 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -155,18 +155,17 @@ static token_T* lexer_match_and_advance(lexer_T* lexer, hb_string_T value, const
 // ===== Specialized Parsers
 
 static token_T* lexer_parse_whitespace(lexer_T* lexer) {
-  hb_buffer_T buffer;
-  hb_buffer_init(&buffer, 128);
-
+  uint32_t start_position = lexer->current_position;
   while (isspace(lexer->current_character) && lexer->current_character != '\n' && lexer->current_character != '\r'
          && !lexer_eof(lexer)) {
-    hb_buffer_append_char(&buffer, lexer->current_character);
     lexer_advance(lexer);
   }
+  uint32_t end_position = lexer->current_position;
 
-  token_T* token = token_init(buffer.value, TOKEN_WHITESPACE, lexer);
+  hb_string_T value = hb_string_slice(lexer->source, start_position);
+  value.length = end_position - start_position;
 
-  free(buffer.value);
+  token_T* token = token_init(value, TOKEN_WHITESPACE, lexer);
 
   return token;
 }

From 88bba1f740cbfd4dc856b2f00f921c12a5306600 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Fri, 17 Oct 2025 19:34:01 +0200
Subject: [PATCH 11/33] Make lexer_parse_erb_content use hb_string

---
 src/lexer.c | 30 +++++++++++++-----------------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/src/lexer.c b/src/lexer.c
index b7b5d38f2..d3d4d8026 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -171,20 +171,19 @@ static token_T* lexer_parse_whitespace(lexer_T* lexer) {
 }
 
 static token_T* lexer_parse_identifier(lexer_T* lexer) {
-  hb_buffer_T buffer;
-  hb_buffer_init(&buffer, 128);
+  uint32_t start_position = lexer->current_position;
 
   while ((isalnum(lexer->current_character) || lexer->current_character == '-' || lexer->current_character == '_'
           || lexer->current_character == ':')
          && !lexer_peek_for_html_comment_end(lexer, 0) && !lexer_eof(lexer)) {
-
-    hb_buffer_append_char(&buffer, lexer->current_character);
     lexer_advance(lexer);
   }
+  uint32_t end_position = lexer->current_position;
 
-  token_T* token = token_init(buffer.value, TOKEN_IDENTIFIER, lexer);
+  hb_string_T value = hb_string_slice(lexer->source, start_position);
+  value.length = end_position - start_position;
 
-  free(buffer.value);
+  token_T* token = token_init(value, TOKEN_IDENTIFIER, lexer);
 
   return token;
 }
@@ -207,20 +206,17 @@ static token_T* lexer_parse_erb_open(lexer_T* lexer) {
 }
 
 static token_T* lexer_parse_erb_content(lexer_T* lexer) {
-  hb_buffer_T buffer;
-  hb_buffer_init(&buffer, 1024);
 
   while (!lexer_peek_erb_end(lexer, 0)) {
     if (lexer_eof(lexer)) {
-      token_T* token = token_init(buffer.value, TOKEN_ERROR, lexer); // Handle unexpected EOF
+      uint32_t end_position = lexer->current_position;
 
-      free(buffer.value);
+      hb_string_T value = hb_string_slice(lexer->source, start_position);
+      value.length = end_position - start_position;
 
-      return token;
+      return token_init(value, TOKEN_ERROR, lexer); // Handle unexpected EOF
     }
 
-    hb_buffer_append_char(&buffer, lexer->current_character);
-
     if (is_newline(lexer->current_character)) {
       lexer->current_line++;
       lexer->current_column = 0;
@@ -234,11 +230,11 @@ static token_T* lexer_parse_erb_content(lexer_T* lexer) {
 
   lexer->state = STATE_ERB_CLOSE;
 
-  token_T* token = token_init(buffer.value, TOKEN_ERB_CONTENT, lexer);
-
-  free(buffer.value);
+  uint32_t end_position = lexer->current_position;
+  hb_string_T value = hb_string_slice(lexer->source, start_position);
+  value.length = end_position - start_position;
 
-  return token;
+  return token_init(value, TOKEN_ERB_CONTENT, lexer);
 }
 
 static token_T* lexer_parse_erb_close(lexer_T* lexer) {

From 412b91db6590d74d68bcb7ed6aa5720712d6ed68 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Fri, 17 Oct 2025 23:06:46 +0200
Subject: [PATCH 12/33] Use hb_buffer_append_string in parser instead of
 hb_buffer_append

---
 src/parser.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/parser.c b/src/parser.c
index 3cc25791a..a518c9f99 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -62,7 +62,7 @@ static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
     }
 
     token_T* token = parser_advance(parser);
-    hb_buffer_append(&content, token->value);
+    hb_buffer_append_string(&content, token->value);
     token_free(token);
   }
 
@@ -107,7 +107,7 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
     }
 
     token_T* token = parser_advance(parser);
-    hb_buffer_append(&comment, token->value);
+    hb_buffer_append_string(&comment, token->value);
     token_free(token);
   }
 
@@ -152,7 +152,7 @@ static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
     }
 
     token_T* token = parser_consume_expected(parser, parser->current_token->type, errors);
-    hb_buffer_append(&content, token->value);
+    hb_buffer_append_string(&content, token->value);
     token_free(token);
   }
 
@@ -199,7 +199,7 @@ static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser
     }
 
     token_T* token = parser_advance(parser);
-    hb_buffer_append(&content, token->value);
+    hb_buffer_append_string(&content, token->value);
     token_free(token);
   }
 
@@ -257,7 +257,7 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_arra
     }
 
     token_T* token = parser_advance(parser);
-    hb_buffer_append(&content, token->value);
+    hb_buffer_append_string(&content, token->value);
     token_free(token);
   }
 
@@ -304,7 +304,7 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T
     }
 
     token_T* token = parser_advance(parser);
-    hb_buffer_append(&buffer, token->value);
+    hb_buffer_append_string(&buffer, token->value);
     token_free(token);
   }
 
@@ -364,8 +364,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
 
       if (next_token && next_token->type == TOKEN_QUOTE && opening_quote != NULL
           && strcmp(next_token->value, opening_quote->value) == 0) {
-        hb_buffer_append(&buffer, parser->current_token->value);
-        hb_buffer_append(&buffer, next_token->value);
+        hb_buffer_append_string(&buffer, parser->current_token->value);
+        hb_buffer_append_string(&buffer, next_token->value);
 
         token_free(parser->current_token);
         token_free(next_token);
@@ -379,7 +379,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
       }
     }
 
-    hb_buffer_append(&buffer, parser->current_token->value);
+    hb_buffer_append_string(&buffer, parser->current_token->value);
     token_free(parser->current_token);
 
     parser->current_token = lexer_next_token(parser->lexer);
@@ -407,7 +407,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
       token_free(parser->current_token);
       parser->current_token = potential_closing;
 
-      hb_buffer_append(&buffer, parser->current_token->value);
+      hb_buffer_append_string(&buffer, parser->current_token->value);
       token_free(parser->current_token);
       parser->current_token = lexer_next_token(parser->lexer);
 
@@ -426,7 +426,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
           continue;
         }
 
-        hb_buffer_append(&buffer, parser->current_token->value);
+        hb_buffer_append_string(&buffer, parser->current_token->value);
         token_free(parser->current_token);
 
         parser->current_token = lexer_next_token(parser->lexer);
@@ -581,7 +581,7 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
           range_start = whitespace->range.from;
         }
 
-        hb_buffer_append(&equals_buffer, whitespace->value);
+        hb_buffer_append_string(&equals_buffer, whitespace->value);
         token_free(whitespace);
       }
 
@@ -593,14 +593,14 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
         range_start = equals->range.from;
       }
 
-      hb_buffer_append(&equals_buffer, equals->value);
+      hb_buffer_append_string(&equals_buffer, equals->value);
       equals_end = equals->location.end;
       range_end = equals->range.to;
       token_free(equals);
 
       while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
         token_T* whitespace = parser_advance(parser);
-        hb_buffer_append(&equals_buffer, whitespace->value);
+        hb_buffer_append_string(&equals_buffer, whitespace->value);
         equals_end = whitespace->location.end;
         range_end = whitespace->range.to;
         token_free(whitespace);
@@ -1067,7 +1067,7 @@ static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children,
     }
 
     token_T* token = parser_advance(parser);
-    hb_buffer_append(&content, token->value);
+    hb_buffer_append_string(&content, token->value);
     token_free(token);
   }
 

From 6a6a8b2f4cabdc29eb00b514ba405969e2160712 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Mon, 27 Oct 2025 18:48:22 +0100
Subject: [PATCH 13/33] Fix analyze.c herb_analyze_ruby value usage

---
 src/analyze.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/analyze.c b/src/analyze.c
index 6e83f117e..ef8fe5252 100644
--- a/src/analyze.c
+++ b/src/analyze.c
@@ -51,10 +51,12 @@ static bool analyze_erb_content(const AST_NODE_T* node, void* data) {
   if (node->type == AST_ERB_CONTENT_NODE) {
     AST_ERB_CONTENT_NODE_T* erb_content_node = (AST_ERB_CONTENT_NODE_T*) node;
 
-    const char* opening = erb_content_node->tag_opening->value;
+    hb_string_T opening = erb_content_node->tag_opening->value;
 
-    if (strcmp(opening, "<%%") != 0 && strcmp(opening, "<%%=") != 0 && strcmp(opening, "<%#") != 0) {
-      analyzed_ruby_T* analyzed = herb_analyze_ruby(hb_string(erb_content_node->content->value));
+    if (!hb_string_equals(opening, hb_string("<%%"))
+        && !hb_string_equals(opening, hb_string("<%%="))
+        && !hb_string_equals(opening, hb_string("<%#"))) {
+      analyzed_ruby_T* analyzed = herb_analyze_ruby(erb_content_node->content->value);
 
       erb_content_node->parsed = true;
       erb_content_node->valid = analyzed->valid;

From 493000aa7c88aad2698cc518c3627b8b1cb68770 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Sat, 18 Oct 2025 18:07:30 +0200
Subject: [PATCH 14/33] WIP: Fix src/ast_node.c

---
 src/ast_node.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ast_node.c b/src/ast_node.c
index e66414249..976abfec8 100644
--- a/src/ast_node.c
+++ b/src/ast_node.c
@@ -31,7 +31,7 @@ AST_LITERAL_NODE_T* ast_literal_node_init_from_token(const token_T* token) {
 
   ast_node_init(&literal->base, AST_LITERAL_NODE, token->location.start, token->location.end, NULL);
 
-  literal->content = herb_strdup(token->value);
+  literal->content = NULL; // herb_strdup(token->value);
 
   return literal;
 }

From 5b998f99bb270ef7913c3d1f8a11c5244d129351 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Sat, 18 Oct 2025 18:11:44 +0200
Subject: [PATCH 15/33] WIP: simple compile script

---
 compile.sh | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 compile.sh

diff --git a/compile.sh b/compile.sh
new file mode 100644
index 000000000..037397849
--- /dev/null
+++ b/compile.sh
@@ -0,0 +1,4 @@
+for file in src/*.c
+do
+  clang -Isrc -Ivendor/prism/include -c "$file"
+done

From 4fb057a6b437d42f8290a8fb2d4f3dd7600d7bff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Tue, 28 Oct 2025 07:25:32 +0100
Subject: [PATCH 16/33] WIP: Fix token.c value usages

---
 src/token.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/src/token.c b/src/token.c
index e0fd81ff8..1a3d5aa50 100644
--- a/src/token.c
+++ b/src/token.c
@@ -4,6 +4,7 @@
 #include "include/range.h"
 #include "include/token_struct.h"
 #include "include/util.h"
+#include "include/util/hb_buffer.h"
 #include "include/util/hb_string.h"
 
 #include <stdio.h>
@@ -88,13 +89,13 @@ char* token_to_string(const token_T* token) {
   const char* type_string = token_type_to_string(token->type);
   const char* template = "#<Herb::Token type=\"%s\" value=\"%.*s\" range=[%u, %u] start=(%u:%u) end=(%u:%u)>";
 
-  char* string = calloc(strlen(type_string) + strlen(template) + strlen(token->value) + 16, sizeof(char));
+  char* string = calloc(strlen(type_string) + strlen(template) + token->value.length + 16, sizeof(char));
   hb_string_T escaped;
 
   if (token->type == TOKEN_EOF) {
     escaped = hb_string(herb_strdup("<EOF>"));
   } else {
-    escaped = escape_newlines(hb_string(token->value));
+    escaped = escape_newlines(token_value(token));
   }
 
   sprintf(
@@ -117,7 +118,11 @@ char* token_to_string(const token_T* token) {
 }
 
 char* token_value(const token_T* token) {
-  return token->value;
+  hb_buffer_T buffer;
+  hb_buffer_init(&buffer, token->value.length);
+  hb_buffer_append_string(&buffer, token->value);
+
+  return buffer.value;
 }
 
 int token_type(const token_T* token) {
@@ -131,16 +136,7 @@ token_T* token_copy(token_T* token) {
 
   if (!new_token) { return NULL; }
 
-  if (token->value) {
-    new_token->value = herb_strdup(token->value);
-
-    if (!new_token->value) {
-      free(new_token);
-      return NULL;
-    }
-  } else {
-    new_token->value = NULL;
-  }
+  new_token->value = token->value;
 
   new_token->type = token->type;
   new_token->range = token->range;
@@ -149,10 +145,11 @@ token_T* token_copy(token_T* token) {
   return new_token;
 }
 
+// TODO: Remove method
 void token_free(token_T* token) {
   if (!token) { return; }
 
-  if (token->value != NULL) { free(token->value); }
+  // if (token->value != NULL) { free(token->value); }
 
   free(token);
 }

From a3d45e78e3057671430066265b497c4e911fb3b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Sat, 18 Oct 2025 18:36:47 +0200
Subject: [PATCH 17/33] Fix extract.c token value usages

---
 src/extract.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/extract.c b/src/extract.c
index 16a6740ad..7ff547da9 100644
--- a/src/extract.c
+++ b/src/extract.c
@@ -1,8 +1,8 @@
 #include "include/herb.h"
 #include "include/io.h"
-#include "include/lexer.h"
 #include "include/util/hb_array.h"
 #include "include/util/hb_buffer.h"
+#include "include/util/hb_string.h"
 
 #include <stdlib.h>
 #include <string.h>
@@ -16,12 +16,14 @@ void herb_extract_ruby_to_buffer_with_semicolons(const char* source, hb_buffer_T
 
     switch (token->type) {
       case TOKEN_NEWLINE: {
-        hb_buffer_append(output, token->value);
+        hb_buffer_append_string(output, token->value);
         break;
       }
 
       case TOKEN_ERB_START: {
-        if (strcmp(token->value, "<%#") == 0 || strcmp(token->value, "<%%") == 0 || strcmp(token->value, "<%%=") == 0) {
+        if (hb_string_equals(token->value, hb_string_from_c_string("<%#"))
+            || hb_string_equals(token->value, hb_string_from_c_string("<%%"))
+            || hb_string_equals(token->value, hb_string_from_c_string("<%%="))) {
           skip_erb_content = true;
         }
 
@@ -31,7 +33,7 @@ void herb_extract_ruby_to_buffer_with_semicolons(const char* source, hb_buffer_T
 
       case TOKEN_ERB_CONTENT: {
         if (skip_erb_content == false) {
-          hb_buffer_append(output, token->value);
+          hb_buffer_append_string(output, token->value);
         } else {
           hb_buffer_append_whitespace(output, range_length(token->range));
         }
@@ -66,12 +68,14 @@ void herb_extract_ruby_to_buffer(const char* source, hb_buffer_T* output) {
 
     switch (token->type) {
       case TOKEN_NEWLINE: {
-        hb_buffer_append(output, token->value);
+        hb_buffer_append_string(output, token->value);
         break;
       }
 
       case TOKEN_ERB_START: {
-        if (strcmp(token->value, "<%#") == 0 || strcmp(token->value, "<%%") == 0 || strcmp(token->value, "<%%=") == 0) {
+        if (hb_string_equals(token->value, hb_string_from_c_string("<%#"))
+            || hb_string_equals(token->value, hb_string_from_c_string("<%%"))
+            || hb_string_equals(token->value, hb_string_from_c_string("<%%="))) {
           skip_erb_content = true;
         }
 
@@ -81,7 +85,7 @@ void herb_extract_ruby_to_buffer(const char* source, hb_buffer_T* output) {
 
       case TOKEN_ERB_CONTENT: {
         if (skip_erb_content == false) {
-          hb_buffer_append(output, token->value);
+          hb_buffer_append_string(output, token->value);
         } else {
           hb_buffer_append_whitespace(output, range_length(token->range));
         }
@@ -115,7 +119,7 @@ void herb_extract_html_to_buffer(const char* source, hb_buffer_T* output) {
       case TOKEN_ERB_START:
       case TOKEN_ERB_CONTENT:
       case TOKEN_ERB_END: hb_buffer_append_whitespace(output, range_length(token->range)); break;
-      default: hb_buffer_append(output, token->value);
+      default: hb_buffer_append_string(output, token->value);
     }
   }
 

From 8ab24e21868f6809fcbec8e8c3f52746b76f3734 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Mon, 27 Oct 2025 18:51:10 +0100
Subject: [PATCH 18/33] Fix pretty print token value usages

---
 src/pretty_print.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/pretty_print.c b/src/pretty_print.c
index be9670ebd..98e908caf 100644
--- a/src/pretty_print.c
+++ b/src/pretty_print.c
@@ -212,8 +212,8 @@ void pretty_print_token_property(
 ) {
   pretty_print_label(name, indent, relative_indent, last_property, buffer);
 
-  if (token != NULL && token->value != NULL) {
-    hb_string_T quoted = quoted_string(hb_string(token->value));
+  if (token != NULL && !hb_string_is_empty(token->value)) {
+    hb_string_T quoted = quoted_string(token->value);
     hb_buffer_append_string(buffer, quoted);
     free(quoted.data);
 

From 5645897824d9466593682351cc368b1127c82d5f Mon Sep 17 00:00:00 2001
From: Tim Kaechele <mail@timkaechele.me>
Date: Sun, 19 Oct 2025 21:57:09 +0200
Subject: [PATCH 19/33] Use correct error message length in lexer_error method

---
 src/lexer.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/lexer.c b/src/lexer.c
index d3d4d8026..fdbb254fb 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -57,11 +57,12 @@ void lexer_init(lexer_T* lexer, const char* source) {
 }
 
 token_T* lexer_error(lexer_T* lexer, const char* message) {
-  char* error_message = hb_arena_alloc(lexer->allocator, sizeof(char) * 128);
+  size_t error_message_length = 128;
+  char* error_message = hb_arena_alloc(lexer->allocator, sizeof(char) * error_message_length);
 
   snprintf(
     error_message,
-    sizeof(error_message),
+    error_message_length,
     "[Lexer] Error: %s (character '%c', line %u, col %u)\n",
     message,
     lexer->current_character,

From ce6508a82ff45ac2d12bfe99c3a48a891f08166f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Mon, 27 Oct 2025 18:52:23 +0100
Subject: [PATCH 20/33] Make parser_helper compatible with hb_string token
 value

---
 src/parser_helpers.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/parser_helpers.c b/src/parser_helpers.c
index f34d97864..a889a678e 100644
--- a/src/parser_helpers.c
+++ b/src/parser_helpers.c
@@ -22,9 +22,9 @@ bool parser_check_matching_tag(const parser_T* parser, hb_string_T tag_name) {
   if (hb_array_size(parser->open_tags_stack) == 0) { return false; }
 
   token_T* top_token = hb_array_last(parser->open_tags_stack);
-  if (top_token == NULL || top_token->value == NULL) { return false; };
+  if (top_token == NULL || hb_string_is_empty(top_token->value)) { return false; };
 
-  return hb_string_equals(hb_string(top_token->value), tag_name);
+  return hb_string_equals(top_token->value, tag_name);
 }
 
 token_T* parser_pop_open_tag(const parser_T* parser) {
@@ -47,9 +47,8 @@ bool parser_in_svg_context(const parser_T* parser) {
   for (size_t i = 0; i < stack_size; i++) {
     token_T* tag = (token_T*) hb_array_get(parser->open_tags_stack, i);
 
-    if (tag && tag->value) {
-      hb_string_T tag_value_string = hb_string(tag->value);
-      if (hb_string_equals(tag_value_string, hb_string("svg"))) { return true; }
+    if (tag && !hb_string_is_empty(tag->value)) {
+      if (hb_string_equals_case_insensitive(tag->value, hb_string("svg"))) { return true; }
     }
   }
 

From ebaf1da5ceeddd61dc4bbdceec3aaefb3c1236e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Mon, 27 Oct 2025 18:55:50 +0100
Subject: [PATCH 21/33] WIP: Fix token value usages in parser.c

---
 src/parser.c | 41 ++++++++++++++++++++---------------------
 1 file changed, 20 insertions(+), 21 deletions(-)

diff --git a/src/parser.c b/src/parser.c
index a518c9f99..de58cc58d 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -345,7 +345,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
   while (!token_is(parser, TOKEN_EOF)
          && !(
            token_is(parser, TOKEN_QUOTE) && opening_quote != NULL
-           && strcmp(parser->current_token->value, opening_quote->value) == 0
+           && hb_string_equals(parser->current_token->value, opening_quote->value)
          )) {
     if (token_is(parser, TOKEN_ERB_START)) {
       parser_append_literal_node_from_buffer(parser, &buffer, children, start);
@@ -363,7 +363,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
       token_T* next_token = lexer_next_token(parser->lexer);
 
       if (next_token && next_token->type == TOKEN_QUOTE && opening_quote != NULL
-          && strcmp(next_token->value, opening_quote->value) == 0) {
+          && hb_string_equals(next_token->value, opening_quote->value)) {
         hb_buffer_append_string(&buffer, parser->current_token->value);
         hb_buffer_append_string(&buffer, next_token->value);
 
@@ -386,7 +386,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
   }
 
   if (token_is(parser, TOKEN_QUOTE) && opening_quote != NULL
-      && strcmp(parser->current_token->value, opening_quote->value) == 0) {
+      && hb_string_equals(parser->current_token->value, opening_quote->value)) {
     lexer_state_snapshot_T saved_state = lexer_save_state(parser->lexer);
 
     token_T* potential_closing = parser->current_token;
@@ -414,7 +414,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
       while (!token_is(parser, TOKEN_EOF)
              && !(
                token_is(parser, TOKEN_QUOTE) && opening_quote != NULL
-               && strcmp(parser->current_token->value, opening_quote->value) == 0
+               && hb_string_equals(parser->current_token->value, opening_quote->value)
              )) {
         if (token_is(parser, TOKEN_ERB_START)) {
           parser_append_literal_node_from_buffer(parser, &buffer, children, start);
@@ -444,7 +444,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
 
   token_T* closing_quote = parser_consume_expected(parser, TOKEN_QUOTE, errors);
 
-  if (opening_quote != NULL && closing_quote != NULL && strcmp(opening_quote->value, closing_quote->value) != 0) {
+  if (!hb_string_equals(opening_quote->value, closing_quote->value)) {
     append_quotes_mismatch_error(
       opening_quote,
       closing_quote,
@@ -608,12 +608,11 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
 
       token_T* equals_with_whitespace = calloc(1, sizeof(token_T));
       equals_with_whitespace->type = TOKEN_EQUALS;
-      equals_with_whitespace->value = herb_strdup(equals_buffer.value);
+      // TODO(Tim): This is a leak
+      equals_with_whitespace->value = hb_string_from_c_string(equals_buffer.value);
       equals_with_whitespace->location = (location_T) { .start = equals_start, .end = equals_end };
       equals_with_whitespace->range = (range_T) { .from = range_start, .to = range_end };
 
-      free(equals_buffer.value);
-
       AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = parser_parse_html_attribute_value(parser);
 
       return ast_html_attribute_node_init(
@@ -719,8 +718,8 @@ static bool parser_lookahead_erb_is_attribute(lexer_T* lexer) {
 }
 
 static void parser_handle_erb_in_open_tag(parser_T* parser, hb_array_T* children) {
-  bool is_output_tag = parser->current_token->value && strlen(parser->current_token->value) >= 3
-                    && strncmp(parser->current_token->value, "<%=", 3) == 0;
+  bool is_output_tag = !hb_string_is_empty(parser->current_token->value)
+    && hb_string_starts_with(parser->current_token->value, hb_string_from_c_string("<%="));
 
   if (!is_output_tag) {
     hb_array_append(children, parser_parse_erb_tag(parser));
@@ -858,9 +857,9 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser)
 
   token_T* tag_closing = parser_consume_expected(parser, TOKEN_HTML_TAG_END, errors);
 
-  if (tag_name != NULL && is_void_element(hb_string(tag_name->value)) && parser_in_svg_context(parser) == false) {
-    hb_string_T expected = html_self_closing_tag_string(hb_string(tag_name->value));
-    hb_string_T got = html_closing_tag_string(hb_string(tag_name->value));
+  if (tag_name != NULL && is_void_element(tag_name->value) && parser_in_svg_context(parser) == false) {
+    hb_string_T expected = html_self_closing_tag_string(tag_name->value);
+    hb_string_T got = html_closing_tag_string(tag_name->value);
 
     append_void_element_closing_tag_error(
       tag_name,
@@ -919,8 +918,8 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element(
 
   parser_push_open_tag(parser, open_tag->tag_name);
 
-  if (open_tag->tag_name->value && parser_is_foreign_content_tag(hb_string(open_tag->tag_name->value))) {
-    foreign_content_type_T content_type = parser_get_foreign_content_type(hb_string(open_tag->tag_name->value));
+  if (!hb_string_is_empty(open_tag->tag_name->value) && parser_is_foreign_content_tag(open_tag->tag_name->value)) {
+    foreign_content_type_T content_type = parser_get_foreign_content_type(open_tag->tag_name->value);
     parser_enter_foreign_content(parser, content_type);
     parser_parse_foreign_content(parser, body, errors);
   } else {
@@ -931,13 +930,13 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element(
 
   AST_HTML_CLOSE_TAG_NODE_T* close_tag = parser_parse_html_close_tag(parser);
 
-  if (parser_in_svg_context(parser) == false && is_void_element(hb_string(close_tag->tag_name->value))) {
+  if (parser_in_svg_context(parser) == false && is_void_element(close_tag->tag_name->value)) {
     hb_array_push(body, close_tag);
     parser_parse_in_data_state(parser, body, errors);
     close_tag = parser_parse_html_close_tag(parser);
   }
 
-  bool matches_stack = parser_check_matching_tag(parser, hb_string(close_tag->tag_name->value));
+  bool matches_stack = parser_check_matching_tag(parser, close_tag->tag_name->value);
 
   if (matches_stack) {
     token_T* popped_token = parser_pop_open_tag(parser);
@@ -966,7 +965,7 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_element(parser_T* parser) {
   if (open_tag->is_void) { return parser_parse_html_self_closing_element(parser, open_tag); }
 
   // <tag>, in void element list, and not in inside an <svg> element
-  if (!open_tag->is_void && is_void_element(hb_string(open_tag->tag_name->value)) && !parser_in_svg_context(parser)) {
+  if (!open_tag->is_void && is_void_element(open_tag->tag_name->value) && !parser_in_svg_context(parser)) {
     return parser_parse_html_self_closing_element(parser, open_tag);
   }
 
@@ -1047,9 +1046,9 @@ static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children,
       token_T* next_token = lexer_next_token(parser->lexer);
       bool is_potential_match = false;
 
-      if (next_token && next_token->type == TOKEN_IDENTIFIER && next_token->value) {
+      if (next_token && next_token->type == TOKEN_IDENTIFIER && !hb_string_is_empty(next_token->value)) {
         is_potential_match =
-          parser_is_expected_closing_tag_name(hb_string(next_token->value), parser->foreign_content_type);
+          parser_is_expected_closing_tag_name(next_token->value, parser->foreign_content_type);
       }
 
       lexer_restore_state(parser->lexer, saved_state);
@@ -1171,7 +1170,7 @@ static void parser_parse_stray_closing_tags(parser_T* parser, hb_array_T* childr
 
     AST_HTML_CLOSE_TAG_NODE_T* close_tag = parser_parse_html_close_tag(parser);
 
-    if (!is_void_element(hb_string(close_tag->tag_name->value))) {
+    if (!is_void_element(close_tag->tag_name->value)) {
       append_missing_opening_tag_error(
         close_tag->tag_name,
         close_tag->base.location.start,

From 3bf30ad497dc30b65eb7f25114555ea1af8c9718 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Mon, 27 Oct 2025 18:58:17 +0100
Subject: [PATCH 22/33] Fix hb_string_from_c_string usages in lexer.c

---
 src/lexer.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/lexer.c b/src/lexer.c
index fdbb254fb..89c6b40f3 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -70,7 +70,7 @@ token_T* lexer_error(lexer_T* lexer, const char* message) {
     lexer->current_column
   );
 
-  return token_init(hb_string_from_c_string(error_message), TOKEN_ERROR, lexer);
+  return token_init(hb_string(error_message), TOKEN_ERROR, lexer);
 }
 
 static void lexer_advance(lexer_T* lexer) {
@@ -192,10 +192,10 @@ static token_T* lexer_parse_identifier(lexer_T* lexer) {
 // ===== ERB Parsing
 
 static token_T* lexer_parse_erb_open(lexer_T* lexer) {
-  hb_string_T erb_patterns[7] = { hb_string_from_c_string("<%=="), hb_string_from_c_string("<%%="),
-                                  hb_string_from_c_string("<%="),  hb_string_from_c_string("<%#"),
-                                  hb_string_from_c_string("<%-"),  hb_string_from_c_string("<%%"),
-                                  hb_string_from_c_string("<%") };
+  hb_string_T erb_patterns[7] = { hb_string("<%=="), hb_string("<%%="),
+                                  hb_string("<%="),  hb_string("<%#"),
+                                  hb_string("<%-"),  hb_string("<%%"),
+                                  hb_string("<%") };
 
   lexer->state = STATE_ERB_CONTENT;
   for (size_t i = 0; i < 7; i++) {
@@ -207,7 +207,7 @@ static token_T* lexer_parse_erb_open(lexer_T* lexer) {
 }
 
 static token_T* lexer_parse_erb_content(lexer_T* lexer) {
-
+  size_t start_position = lexer->current_position;
   while (!lexer_peek_erb_end(lexer, 0)) {
     if (lexer_eof(lexer)) {
       uint32_t end_position = lexer->current_position;
@@ -242,22 +242,22 @@ static token_T* lexer_parse_erb_close(lexer_T* lexer) {
   lexer->state = STATE_DATA;
 
   if (lexer_peek_erb_percent_close_tag(lexer, 0)) {
-    return lexer_advance_with(lexer, hb_string_from_c_string("%%>"), TOKEN_ERB_END);
+    return lexer_advance_with(lexer, hb_string("%%>"), TOKEN_ERB_END);
   }
   if (lexer_peek_erb_equals_close_tag(lexer, 0)) {
-    return lexer_advance_with(lexer, hb_string_from_c_string("=%>"), TOKEN_ERB_END);
+    return lexer_advance_with(lexer, hb_string("=%>"), TOKEN_ERB_END);
   }
   if (lexer_peek_erb_dash_close_tag(lexer, 0)) {
-    return lexer_advance_with(lexer, hb_string_from_c_string("-%>"), TOKEN_ERB_END);
+    return lexer_advance_with(lexer, hb_string("-%>"), TOKEN_ERB_END);
   }
 
-  return lexer_advance_with(lexer, hb_string_from_c_string("%>"), TOKEN_ERB_END);
+  return lexer_advance_with(lexer, hb_string("%>"), TOKEN_ERB_END);
 }
 
 // ===== Tokenizing Function
 
 token_T* lexer_next_token(lexer_T* lexer) {
-  if (lexer_eof(lexer)) { token_init(hb_string_from_c_string(""), TOKEN_EOF, lexer); }
+  if (lexer_eof(lexer)) { token_init(hb_string(""), TOKEN_EOF, lexer); }
   if (lexer_stalled(lexer)) { return lexer_error(lexer, "Lexer stalled after 5 iterations"); }
 
   if (lexer->state == STATE_ERB_CONTENT) { return lexer_parse_erb_content(lexer); }
@@ -294,33 +294,33 @@ token_T* lexer_next_token(lexer_T* lexer) {
       if (isalnum(lexer_peek(lexer, 1))) { return lexer_advance_current(lexer, TOKEN_HTML_TAG_START); }
 
       if (lexer_peek_for_html_comment_start(lexer, 0)) {
-        return lexer_advance_with(lexer, hb_string_from_c_string("<!--"), TOKEN_HTML_COMMENT_START);
+        return lexer_advance_with(lexer, hb_string("<!--"), TOKEN_HTML_COMMENT_START);
       }
 
       if (lexer_peek_for_close_tag_start(lexer, 0)) {
-        return lexer_advance_with(lexer, hb_string_from_c_string("</"), TOKEN_HTML_TAG_START_CLOSE);
+        return lexer_advance_with(lexer, hb_string("</"), TOKEN_HTML_TAG_START_CLOSE);
       }
 
       return lexer_advance_current(lexer, TOKEN_LT);
     }
 
     case '/': {
-      token_T* token = lexer_match_and_advance(lexer, hb_string_from_c_string("/>"), TOKEN_HTML_TAG_SELF_CLOSE);
+      token_T* token = lexer_match_and_advance(lexer, hb_string("/>"), TOKEN_HTML_TAG_SELF_CLOSE);
       return token ? token : lexer_advance_current(lexer, TOKEN_SLASH);
     }
 
     case '?': {
-      token_T* token = lexer_match_and_advance(lexer, hb_string_from_c_string("?>"), TOKEN_XML_DECLARATION_END);
+      token_T* token = lexer_match_and_advance(lexer, hb_string("?>"), TOKEN_XML_DECLARATION_END);
       return token ? token : lexer_advance_current(lexer, TOKEN_CHARACTER);
     }
 
     case '-': {
-      token_T* token = lexer_match_and_advance(lexer, hb_string_from_c_string("-->"), TOKEN_HTML_COMMENT_END);
+      token_T* token = lexer_match_and_advance(lexer, hb_string("-->"), TOKEN_HTML_COMMENT_END);
       return token ? token : lexer_advance_current(lexer, TOKEN_DASH);
     }
 
     case ']': {
-      token_T* token = lexer_match_and_advance(lexer, hb_string_from_c_string("]]>"), TOKEN_CDATA_END);
+      token_T* token = lexer_match_and_advance(lexer, hb_string("]]>"), TOKEN_CDATA_END);
       return token ? token : lexer_advance_current(lexer, TOKEN_CHARACTER);
     }
 

From 70c32cd7d6fba2e1d6b7763fae6fe5f9698f6c9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Mon, 27 Oct 2025 18:59:13 +0100
Subject: [PATCH 23/33] Fix hb_string_from_c_string usages in extract.c

---
 src/extract.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/extract.c b/src/extract.c
index 7ff547da9..f0afde2f3 100644
--- a/src/extract.c
+++ b/src/extract.c
@@ -21,9 +21,9 @@ void herb_extract_ruby_to_buffer_with_semicolons(const char* source, hb_buffer_T
       }
 
       case TOKEN_ERB_START: {
-        if (hb_string_equals(token->value, hb_string_from_c_string("<%#"))
-            || hb_string_equals(token->value, hb_string_from_c_string("<%%"))
-            || hb_string_equals(token->value, hb_string_from_c_string("<%%="))) {
+        if (hb_string_equals(token->value, hb_string("<%#"))
+            || hb_string_equals(token->value, hb_string("<%%"))
+            || hb_string_equals(token->value, hb_string("<%%="))) {
           skip_erb_content = true;
         }
 
@@ -73,9 +73,9 @@ void herb_extract_ruby_to_buffer(const char* source, hb_buffer_T* output) {
       }
 
       case TOKEN_ERB_START: {
-        if (hb_string_equals(token->value, hb_string_from_c_string("<%#"))
-            || hb_string_equals(token->value, hb_string_from_c_string("<%%"))
-            || hb_string_equals(token->value, hb_string_from_c_string("<%%="))) {
+        if (hb_string_equals(token->value, hb_string("<%#"))
+            || hb_string_equals(token->value, hb_string("<%%"))
+            || hb_string_equals(token->value, hb_string("<%%="))) {
           skip_erb_content = true;
         }
 

From ca1fa0bb616989883b8c4f4fbcc234c8ed030d1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Mon, 27 Oct 2025 18:59:54 +0100
Subject: [PATCH 24/33] Fix hb_string_from_c_string usages in parser.c

---
 src/parser.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/parser.c b/src/parser.c
index de58cc58d..c76002c79 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -609,7 +609,7 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
       token_T* equals_with_whitespace = calloc(1, sizeof(token_T));
       equals_with_whitespace->type = TOKEN_EQUALS;
       // TODO(Tim): This is a leak
-      equals_with_whitespace->value = hb_string_from_c_string(equals_buffer.value);
+      equals_with_whitespace->value = hb_string(equals_buffer.value);
       equals_with_whitespace->location = (location_T) { .start = equals_start, .end = equals_end };
       equals_with_whitespace->range = (range_T) { .from = range_start, .to = range_end };
 
@@ -719,7 +719,7 @@ static bool parser_lookahead_erb_is_attribute(lexer_T* lexer) {
 
 static void parser_handle_erb_in_open_tag(parser_T* parser, hb_array_T* children) {
   bool is_output_tag = !hb_string_is_empty(parser->current_token->value)
-    && hb_string_starts_with(parser->current_token->value, hb_string_from_c_string("<%="));
+    && hb_string_starts_with(parser->current_token->value, hb_string("<%="));
 
   if (!is_output_tag) {
     hb_array_append(children, parser_parse_erb_tag(parser));

From 78d0f7899d71983ebd0174840670cf08f0065c26 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Tue, 28 Oct 2025 07:28:46 +0100
Subject: [PATCH 25/33] WIP: Use hb_string in token_type_to_string

---
 src/include/token.h |  4 +-
 src/token.c         | 95 +++++++++++++++++++++------------------------
 2 files changed, 47 insertions(+), 52 deletions(-)

diff --git a/src/include/token.h b/src/include/token.h
index 7c8dd8778..2f4c0d29d 100644
--- a/src/include/token.h
+++ b/src/include/token.h
@@ -8,9 +8,9 @@
 
 token_T* token_init(hb_string_T value, token_type_T type, lexer_T* lexer);
 char* token_to_string(const token_T* token);
-const char* token_type_to_string(token_type_T type);
+hb_string_T token_type_to_string(token_type_T type);
 
-char* token_value(const token_T* token);
+hb_string_T token_value(const token_T* token);
 int token_type(const token_T* token);
 
 size_t token_sizeof(void);
diff --git a/src/token.c b/src/token.c
index 1a3d5aa50..50ffeec91 100644
--- a/src/token.c
+++ b/src/token.c
@@ -43,53 +43,53 @@ token_T* token_init(hb_string_T value, const token_type_T type, lexer_T* lexer)
   return token;
 }
 
-const char* token_type_to_string(const token_type_T type) {
+hb_string_T token_type_to_string(const token_type_T type) {
   switch (type) {
-    case TOKEN_WHITESPACE: return "TOKEN_WHITESPACE";
-    case TOKEN_NBSP: return "TOKEN_NBSP";
-    case TOKEN_NEWLINE: return "TOKEN_NEWLINE";
-    case TOKEN_IDENTIFIER: return "TOKEN_IDENTIFIER";
-    case TOKEN_HTML_DOCTYPE: return "TOKEN_HTML_DOCTYPE";
-    case TOKEN_XML_DECLARATION: return "TOKEN_XML_DECLARATION";
-    case TOKEN_XML_DECLARATION_END: return "TOKEN_XML_DECLARATION_END";
-    case TOKEN_CDATA_START: return "TOKEN_CDATA_START";
-    case TOKEN_CDATA_END: return "TOKEN_CDATA_END";
-    case TOKEN_HTML_TAG_START: return "TOKEN_HTML_TAG_START";
-    case TOKEN_HTML_TAG_END: return "TOKEN_HTML_TAG_END";
-    case TOKEN_HTML_TAG_START_CLOSE: return "TOKEN_HTML_TAG_START_CLOSE";
-    case TOKEN_HTML_TAG_SELF_CLOSE: return "TOKEN_HTML_TAG_SELF_CLOSE";
-    case TOKEN_HTML_COMMENT_START: return "TOKEN_HTML_COMMENT_START";
-    case TOKEN_HTML_COMMENT_END: return "TOKEN_HTML_COMMENT_END";
-    case TOKEN_EQUALS: return "TOKEN_EQUALS";
-    case TOKEN_QUOTE: return "TOKEN_QUOTE";
-    case TOKEN_BACKTICK: return "TOKEN_BACKTICK";
-    case TOKEN_BACKSLASH: return "TOKEN_BACKSLASH";
-    case TOKEN_DASH: return "TOKEN_DASH";
-    case TOKEN_UNDERSCORE: return "TOKEN_UNDERSCORE";
-    case TOKEN_EXCLAMATION: return "TOKEN_EXCLAMATION";
-    case TOKEN_SLASH: return "TOKEN_SLASH";
-    case TOKEN_SEMICOLON: return "TOKEN_SEMICOLON";
-    case TOKEN_COLON: return "TOKEN_COLON";
-    case TOKEN_AT: return "TOKEN_AT";
-    case TOKEN_LT: return "TOKEN_LT";
-    case TOKEN_PERCENT: return "TOKEN_PERCENT";
-    case TOKEN_AMPERSAND: return "TOKEN_AMPERSAND";
-    case TOKEN_ERB_START: return "TOKEN_ERB_START";
-    case TOKEN_ERB_CONTENT: return "TOKEN_ERB_CONTENT";
-    case TOKEN_ERB_END: return "TOKEN_ERB_END";
-    case TOKEN_CHARACTER: return "TOKEN_CHARACTER";
-    case TOKEN_ERROR: return "TOKEN_ERROR";
-    case TOKEN_EOF: return "TOKEN_EOF";
+    case TOKEN_WHITESPACE: return hb_string("TOKEN_WHITESPACE");
+    case TOKEN_NBSP: return hb_string("TOKEN_NBSP");
+    case TOKEN_NEWLINE: return hb_string("TOKEN_NEWLINE");
+    case TOKEN_IDENTIFIER: return hb_string("TOKEN_IDENTIFIER");
+    case TOKEN_HTML_DOCTYPE: return hb_string("TOKEN_HTML_DOCTYPE");
+    case TOKEN_XML_DECLARATION: return hb_string("TOKEN_XML_DECLARATION");
+    case TOKEN_XML_DECLARATION_END: return hb_string("TOKEN_XML_DECLARATION_END");
+    case TOKEN_CDATA_START: return hb_string("TOKEN_CDATA_START");
+    case TOKEN_CDATA_END: return hb_string("TOKEN_CDATA_END");
+    case TOKEN_HTML_TAG_START: return hb_string("TOKEN_HTML_TAG_START");
+    case TOKEN_HTML_TAG_END: return hb_string("TOKEN_HTML_TAG_END");
+    case TOKEN_HTML_TAG_START_CLOSE: return hb_string("TOKEN_HTML_TAG_START_CLOSE");
+    case TOKEN_HTML_TAG_SELF_CLOSE: return hb_string("TOKEN_HTML_TAG_SELF_CLOSE");
+    case TOKEN_HTML_COMMENT_START: return hb_string("TOKEN_HTML_COMMENT_START");
+    case TOKEN_HTML_COMMENT_END: return hb_string("TOKEN_HTML_COMMENT_END");
+    case TOKEN_EQUALS: return hb_string("TOKEN_EQUALS");
+    case TOKEN_QUOTE: return hb_string("TOKEN_QUOTE");
+    case TOKEN_BACKTICK: return hb_string("TOKEN_BACKTICK");
+    case TOKEN_BACKSLASH: return hb_string("TOKEN_BACKSLASH");
+    case TOKEN_DASH: return hb_string("TOKEN_DASH");
+    case TOKEN_UNDERSCORE: return hb_string("TOKEN_UNDERSCORE");
+    case TOKEN_EXCLAMATION: return hb_string("TOKEN_EXCLAMATION");
+    case TOKEN_SLASH: return hb_string("TOKEN_SLASH");
+    case TOKEN_SEMICOLON: return hb_string("TOKEN_SEMICOLON");
+    case TOKEN_COLON: return hb_string("TOKEN_COLON");
+    case TOKEN_AT: return hb_string("TOKEN_AT");
+    case TOKEN_LT: return hb_string("TOKEN_LT");
+    case TOKEN_PERCENT: return hb_string("TOKEN_PERCENT");
+    case TOKEN_AMPERSAND: return hb_string("TOKEN_AMPERSAND");
+    case TOKEN_ERB_START: return hb_string("TOKEN_ERB_START");
+    case TOKEN_ERB_CONTENT: return hb_string("TOKEN_ERB_CONTENT");
+    case TOKEN_ERB_END: return hb_string("TOKEN_ERB_END");
+    case TOKEN_CHARACTER: return hb_string("TOKEN_CHARACTER");
+    case TOKEN_ERROR: return hb_string("TOKEN_ERROR");
+    case TOKEN_EOF: return hb_string("TOKEN_EOF");
   }
 
-  return "Unknown token_type_T";
+  return hb_string("Unknown token_type_T");
 }
 
 char* token_to_string(const token_T* token) {
-  const char* type_string = token_type_to_string(token->type);
-  const char* template = "#<Herb::Token type=\"%s\" value=\"%.*s\" range=[%u, %u] start=(%u:%u) end=(%u:%u)>";
+  hb_string_T type_string = token_type_to_string(token->type);
+  hb_string_T template = hb_string("#<Herb::Token type=\"%.*s\" value=\"%.*s\" range=[%u, %u] start=(%u:%u) end=(%u:%u)>");
 
-  char* string = calloc(strlen(type_string) + strlen(template) + token->value.length + 16, sizeof(char));
+  char* string = calloc(template.length + type_string.length  + token->value.length + 16, sizeof(char));
   hb_string_T escaped;
 
   if (token->type == TOKEN_EOF) {
@@ -100,8 +100,9 @@ char* token_to_string(const token_T* token) {
 
   sprintf(
     string,
-    template,
-    type_string,
+    template.data,
+    type_string.length,
+    type_string.data,
     escaped.length,
     escaped.data,
     token->range.from,
@@ -117,12 +118,8 @@ char* token_to_string(const token_T* token) {
   return string;
 }
 
-char* token_value(const token_T* token) {
-  hb_buffer_T buffer;
-  hb_buffer_init(&buffer, token->value.length);
-  hb_buffer_append_string(&buffer, token->value);
-
-  return buffer.value;
+hb_string_T token_value(const token_T* token) {
+  return token->value;
 }
 
 int token_type(const token_T* token) {
@@ -149,7 +146,5 @@ token_T* token_copy(token_T* token) {
 void token_free(token_T* token) {
   if (!token) { return; }
 
-  // if (token->value != NULL) { free(token->value); }
-
   free(token);
 }

From 6c4814d0b257415ad27ba4d83f7500b1a7c415e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Tue, 28 Oct 2025 07:39:18 +0100
Subject: [PATCH 26/33] Use hb_string_T in errors template

---
 templates/src/errors.c.erb | 5 +----
 templates/template.rb      | 2 +-
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/templates/src/errors.c.erb b/templates/src/errors.c.erb
index b61edee00..6595914e4 100644
--- a/templates/src/errors.c.erb
+++ b/templates/src/errors.c.erb
@@ -42,10 +42,7 @@ void error_init(ERROR_T* error, const error_type_T type, position_T start, posit
   if (message) {
     <%- error.message_arguments.each_with_index do |argument, i| -%>
     <%- if error.message_template.scan(/%[sdulfz]/)[i] == "%s" -%>
-    char truncated_argument_<%= i %>[ERROR_MESSAGES_TRUNCATED_LENGTH + 1];
-    strncpy(truncated_argument_<%= i %>, <%= argument %>, ERROR_MESSAGES_TRUNCATED_LENGTH);
-    truncated_argument_<%= i %>[ERROR_MESSAGES_TRUNCATED_LENGTH] = '\0';
-
+    hb_string_T truncated_argument_<%= i %> = hb_string_truncate(<%= argument %>, ERROR_MESSAGES_TRUNCATED_LENGTH);
     <%- end -%>
     <%- end -%>
     snprintf(
diff --git a/templates/template.rb b/templates/template.rb
index 749c970db..a651afc22 100755
--- a/templates/template.rb
+++ b/templates/template.rb
@@ -103,7 +103,7 @@ def ruby_type
       end
 
       def c_type
-        "const char*"
+        "hb_string_T"
       end
     end
 

From a255eeba0877094d221b5a353694b0a24cc648dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Tue, 28 Oct 2025 07:50:54 +0100
Subject: [PATCH 27/33] Adapt errors call sites to use hb_string_T

---
 src/include/parser_helpers.h |  4 ++--
 src/parser.c                 | 38 ++++++++++++++++++------------------
 src/parser_helpers.c         |  9 ++++-----
 src/prism_helpers.c          |  6 +++---
 4 files changed, 28 insertions(+), 29 deletions(-)

diff --git a/src/include/parser_helpers.h b/src/include/parser_helpers.h
index b3ab98300..ecdd6208a 100644
--- a/src/include/parser_helpers.h
+++ b/src/include/parser_helpers.h
@@ -15,8 +15,8 @@ token_T* parser_pop_open_tag(const parser_T* parser);
 
 void parser_append_unexpected_error(
   parser_T* parser,
-  const char* description,
-  const char* expected,
+  hb_string_T description,
+  hb_string_T expected,
   hb_array_T* errors
 );
 void parser_append_unexpected_token_error(parser_T* parser, token_type_T expected_type, hb_array_T* errors);
diff --git a/src/parser.c b/src/parser.c
index c76002c79..94f9a48af 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -243,8 +243,8 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_arra
 
       token_T* token = parser_consume_expected(parser, TOKEN_ERROR, document_errors);
       append_unexpected_error(
-        "Token Error",
-        "not TOKEN_ERROR",
+        hb_string("Token Error"),
+        hb_string("not TOKEN_ERROR"),
         token->value,
         token->location.start,
         token->location.end,
@@ -267,9 +267,9 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_arra
 
   if (hb_buffer_length(&content) > 0) {
     text_node =
-      ast_html_text_node_init(hb_buffer_value(&content), start, parser->current_token->location.start, errors);
+      ast_html_text_node_init(hb_string(content.value), start, parser->current_token->location.start, errors);
   } else {
-    text_node = ast_html_text_node_init("", start, parser->current_token->location.start, errors);
+    text_node = ast_html_text_node_init(hb_string(""), start, parser->current_token->location.start, errors);
   }
 
   free(content.value);
@@ -394,8 +394,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
 
     if (token_is(parser, TOKEN_IDENTIFIER) || token_is(parser, TOKEN_CHARACTER)) {
       append_unexpected_error(
-        "Unescaped quote character in attribute value",
-        "escaped quote (\\') or different quote style (\")",
+        hb_string("Unescaped quote character in attribute value"),
+        hb_string("escaped quote (\\') or different quote style (\")"),
         opening_quote->value,
         potential_closing->location.start,
         potential_closing->location.end,
@@ -518,9 +518,9 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
     position_T end = token->location.end;
 
     append_unexpected_error(
-      "Invalid quote character for HTML attribute",
-      "single quote (') or double quote (\")",
-      "backtick (`)",
+      hb_string("Invalid quote character for HTML attribute"),
+      hb_string("single quote (') or double quote (\")"),
+      hb_string("backtick (`)"),
       start,
       end,
       errors
@@ -535,8 +535,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
   }
 
   append_unexpected_error(
-    "Unexpected Token",
-    "TOKEN_IDENTIFIER, TOKEN_QUOTE, TOKEN_ERB_START",
+    hb_string("Unexpected Token"),
+    hb_string("TOKEN_IDENTIFIER, TOKEN_QUOTE, TOKEN_ERB_START"),
     token_type_to_string(parser->current_token->type),
     parser->current_token->location.start,
     parser->current_token->location.end,
@@ -799,8 +799,8 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
 
     parser_append_unexpected_error(
       parser,
-      "Unexpected Token",
-      "TOKEN_IDENTIFIER, TOKEN_AT, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE",
+      hb_string("Unexpected Token"),
+      hb_string("TOKEN_IDENTIFIER, TOKEN_AT, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE"),
       errors
     );
   }
@@ -863,8 +863,8 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser)
 
     append_void_element_closing_tag_error(
       tag_name,
-      expected.data,
-      got.data,
+      expected,
+      got,
       tag_opening->location.start,
       tag_closing->location.end,
       errors
@@ -974,7 +974,7 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_element(parser_T* parser) {
 
   hb_array_T* errors = hb_array_init(8);
 
-  parser_append_unexpected_error(parser, "Unknown HTML open tag type", "HTMLOpenTag or HTMLSelfCloseTag", errors);
+  parser_append_unexpected_error(parser, hb_string("Unknown HTML open tag type"), hb_string("HTMLOpenTag or HTMLSelfCloseTag"), errors);
 
   return ast_html_element_node_init(
     open_tag,
@@ -1134,9 +1134,9 @@ static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, h
 
     parser_append_unexpected_error(
       parser,
-      "Unexpected token",
-      "TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, "
-      "TOKEN_NBSP, TOKEN_AT, TOKEN_BACKSLASH, or TOKEN_NEWLINE",
+      hb_string("Unexpected token"),
+      hb_string("TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, "
+      "TOKEN_NBSP, TOKEN_AT, TOKEN_BACKSLASH, or TOKEN_NEWLINE"),
       errors
     );
   }
diff --git a/src/parser_helpers.c b/src/parser_helpers.c
index a889a678e..3da3bc3b5 100644
--- a/src/parser_helpers.c
+++ b/src/parser_helpers.c
@@ -94,8 +94,8 @@ void parser_exit_foreign_content(parser_T* parser) {
 
 void parser_append_unexpected_error(
   parser_T* parser,
-  const char* description,
-  const char* expected,
+  hb_string_T description,
+  hb_string_T expected,
   hb_array_T* errors
 ) {
   token_T* token = parser_advance(parser);
@@ -128,10 +128,9 @@ void parser_append_literal_node_from_buffer(
   hb_array_T* children,
   position_T start
 ) {
-  if (hb_buffer_length(buffer) == 0) { return; }
-
+  if (buffer->length == 0) { return; }
   AST_LITERAL_NODE_T* literal =
-    ast_literal_node_init(hb_buffer_value(buffer), start, parser->current_token->location.start, NULL);
+    ast_literal_node_init(hb_string(buffer->value), start, parser->current_token->location.start, NULL);
 
   if (children != NULL) { hb_array_append(children, literal); }
   hb_buffer_clear(buffer);
diff --git a/src/prism_helpers.c b/src/prism_helpers.c
index 06ac155b5..22cef65c6 100644
--- a/src/prism_helpers.c
+++ b/src/prism_helpers.c
@@ -44,9 +44,9 @@ RUBY_PARSE_ERROR_T* ruby_parse_error_from_prism_error(
   position_T end = position_from_source_with_offset(source, end_offset);
 
   return ruby_parse_error_init(
-    error->message,
-    pm_diagnostic_id_human(error->diag_id),
-    pm_error_level_to_string(error->level),
+    hb_string(error->message),
+    hb_string(pm_diagnostic_id_human(error->diag_id)),
+    hb_string(pm_error_level_to_string(error->level)),
     start,
     end
   );

From 29d570409ff0f1f493c2ba73bc53260d5059d2ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Tue, 28 Oct 2025 08:08:35 +0100
Subject: [PATCH 28/33] Stop freeing strings

---
 templates/src/errors.c.erb | 1 -
 1 file changed, 1 deletion(-)

diff --git a/templates/src/errors.c.erb b/templates/src/errors.c.erb
index 6595914e4..107a9ac86 100644
--- a/templates/src/errors.c.erb
+++ b/templates/src/errors.c.erb
@@ -134,7 +134,6 @@ static void error_free_<%= error.human %>(<%= error.struct_type %>* <%= error.hu
   <%- when Herb::Template::SizeTField -%>
   // size_t is part of struct
   <%- when Herb::Template::StringField -%>
-  if (<%= error.human %>-><%= field.name %> != NULL) { free((char*) <%= error.human %>-><%= field.name %>); }
   <%- else -%>
   <%= field.inspect %>
   <%- end -%>

From efe44253d821c9a5caae8f15de07e5cd6b7dbfbf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Thu, 30 Oct 2025 20:57:00 +0100
Subject: [PATCH 29/33] Revert to malloc in lexer error message

---
 src/lexer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lexer.c b/src/lexer.c
index 89c6b40f3..aa825257b 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -58,7 +58,7 @@ void lexer_init(lexer_T* lexer, const char* source) {
 
 token_T* lexer_error(lexer_T* lexer, const char* message) {
   size_t error_message_length = 128;
-  char* error_message = hb_arena_alloc(lexer->allocator, sizeof(char) * error_message_length);
+  char* error_message = malloc(sizeof(char) * error_message_length); // hb_arena_alloc(lexer->allocator, sizeof(char) * error_message_length);
 
   snprintf(
     error_message,

From e62ab747353d5bee27fd59f5c65b08987cb2b474 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Thu, 30 Oct 2025 20:57:08 +0100
Subject: [PATCH 30/33] Fix test_token.c

---
 test/c/test_token.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/c/test_token.c b/test/c/test_token.c
index 69335ad9c..529ddfcb2 100644
--- a/test/c/test_token.c
+++ b/test/c/test_token.c
@@ -4,7 +4,7 @@
 #include "../../src/include/token.h"
 
 TEST(test_token)
-  ck_assert_str_eq(token_type_to_string(TOKEN_IDENTIFIER), "TOKEN_IDENTIFIER");
+  ck_assert(hb_string_equals(token_type_to_string(TOKEN_IDENTIFIER), hb_string("TOKEN_IDENTIFIER")));
 END
 
 TEST(test_token_to_string)

From a569e8838ad00fa62b5b37e648a7e30b43f8a7fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Thu, 30 Oct 2025 20:57:19 +0100
Subject: [PATCH 31/33] Fix issues in generated code

---
 src/ast_node.c                       | 2 +-
 templates/src/ast_nodes.c.erb        | 3 +--
 templates/src/ast_pretty_print.c.erb | 2 +-
 templates/src/errors.c.erb           | 6 +++---
 4 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/ast_node.c b/src/ast_node.c
index 976abfec8..4058c06e2 100644
--- a/src/ast_node.c
+++ b/src/ast_node.c
@@ -31,7 +31,7 @@ AST_LITERAL_NODE_T* ast_literal_node_init_from_token(const token_T* token) {
 
   ast_node_init(&literal->base, AST_LITERAL_NODE, token->location.start, token->location.end, NULL);
 
-  literal->content = NULL; // herb_strdup(token->value);
+  literal->content = token->value;
 
   return literal;
 }
diff --git a/templates/src/ast_nodes.c.erb b/templates/src/ast_nodes.c.erb
index 19cef5ef8..c00442570 100644
--- a/templates/src/ast_nodes.c.erb
+++ b/templates/src/ast_nodes.c.erb
@@ -35,7 +35,7 @@
   <%- when Herb::Template::PrismNodeField -%>
   <%= node.human %>-><%= field.name %> = <%= field.name %>;
   <%- when Herb::Template::StringField -%>
-  <%= node.human %>-><%= field.name %> = herb_strdup(<%= field.name %>);
+  <%= node.human %>-><%= field.name %> = <%= field.name %>;
   <%- when Herb::Template::AnalyzedRubyField -%>
   <%= node.human %>-><%= field.name %> = <%= field.name %>;
   <%- when Herb::Template::VoidPointerField -%>
@@ -107,7 +107,6 @@ static void ast_free_<%= node.human %>(<%= node.struct_type %>* <%= node.human %
     hb_array_free(&<%= node.human %>-><%= field.name %>);
   }
   <%- when Herb::Template::StringField -%>
-  if (<%= node.human %>-><%= field.name %> != NULL) { free((char*) <%= node.human %>-><%= field.name %>); }
   <%- when Herb::Template::PrismNodeField -%>
   if (<%= node.human %>-><%= field.name %> != NULL) {
     // The first argument to `pm_node_destroy` is a `pm_parser_t`, but it's currently unused:
diff --git a/templates/src/ast_pretty_print.c.erb b/templates/src/ast_pretty_print.c.erb
index f245e0224..b4be01ff5 100644
--- a/templates/src/ast_pretty_print.c.erb
+++ b/templates/src/ast_pretty_print.c.erb
@@ -41,7 +41,7 @@ void ast_pretty_print_node(AST_NODE_T* node, const size_t indent, const size_t r
       <%- when Herb::Template::ElementSourceField -%>
       pretty_print_string_property(element_source_to_string(<%= node.human %>-><%= field.name %>), hb_string("<%= field.name %>"), indent, relative_indent, <%= last %>, buffer);
       <%- when Herb::Template::StringField -%>
-      pretty_print_string_property(hb_string(<%= node.human %>-><%= field.name %>), hb_string("<%= field.name %>"), indent, relative_indent, <%= last %>, buffer);
+      pretty_print_string_property(<%= node.human %>-><%= field.name %>, hb_string("<%= field.name %>"), indent, relative_indent, <%= last %>, buffer);
       <%- when Herb::Template::PrismNodeField -%>
       pretty_print_string_property(hb_string("<%= field.name %>"), hb_string("<%= field.name %>"), indent, relative_indent, <%= last %>, buffer);
       <%- when Herb::Template::NodeField -%>
diff --git a/templates/src/errors.c.erb b/templates/src/errors.c.erb
index 107a9ac86..68ccdd840 100644
--- a/templates/src/errors.c.erb
+++ b/templates/src/errors.c.erb
@@ -78,7 +78,7 @@ void error_init(ERROR_T* error, const error_type_T type, position_T start, posit
   <%- when Herb::Template::SizeTField -%>
   <%= error.human %>-><%= field.name %> = <%= field.name %>;
   <%- when Herb::Template::StringField -%>
-  <%= error.human %>-><%= field.name %> = herb_strdup(<%= field.name %>);
+  <%= error.human %>-><%= field.name %> = <%= field.name %>;
   <%- else -%>
   <%= field.inspect %>
   <%- end -%>
@@ -216,11 +216,11 @@ static void error_pretty_print_<%= error.human %>(<%= error.struct_type %>* erro
   <%- when Herb::Template::TokenField -%>
   pretty_print_token_property(error-><%= field.name %>, hb_string("<%= field.name %>"), indent, relative_indent, <%= error.fields.length - 1 == index %>, buffer);
   <%- when Herb::Template::TokenTypeField -%>
-  pretty_print_property(hb_string(token_type_to_string(error-><%= field.name %>)), hb_string("<%= field.name %>"), indent, relative_indent, <%= error.fields.length - 1 == index %>, buffer);
+  pretty_print_property(token_type_to_string(error-><%= field.name %>), hb_string("<%= field.name %>"), indent, relative_indent, <%= error.fields.length - 1 == index %>, buffer);
   <%- when Herb::Template::SizeTField -%>
   pretty_print_size_t_property(hb_string(error-><%= field.name %>), hb_string("<%= field.name %>"), indent, relative_indent, <%= error.fields.length - 1 == index %>, buffer);
   <%- when Herb::Template::StringField -%>
-  pretty_print_quoted_property(hb_string("<%= field.name %>"), hb_string(error-><%= field.name %>), indent, relative_indent, <%= error.fields.length - 1 == index %>, buffer);
+  pretty_print_quoted_property(hb_string("<%= field.name %>"), error-><%= field.name %>, indent, relative_indent, <%= error.fields.length - 1 == index %>, buffer);
   <%- else -%>
   <%= field.inspect %>
   <%- end -%>

From 93d10abf9a48032dad32270950663c562b70cc7e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Thu, 30 Oct 2025 21:14:04 +0100
Subject: [PATCH 32/33] Return uint32_t in utf8_char_byte_length

---
 src/include/utf8.h | 2 +-
 src/utf8.c         | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/include/utf8.h b/src/include/utf8.h
index 4c2d6c5da..2ba0b44e0 100644
--- a/src/include/utf8.h
+++ b/src/include/utf8.h
@@ -5,7 +5,7 @@
 #include <stdbool.h>
 #include <stdlib.h>
 
-int utf8_char_byte_length(unsigned char first_byte);
+uint32_t utf8_char_byte_length(unsigned char first_byte);
 uint32_t utf8_sequence_length(hb_string_T value);
 bool utf8_is_valid_continuation_byte(unsigned char byte);
 
diff --git a/src/utf8.c b/src/utf8.c
index f24e7015d..79a8aa5e0 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -1,12 +1,13 @@
 #include "include/utf8.h"
 #include "include/util/hb_string.h"
+#include <stdint.h>
 
 // UTF-8 byte patterns:
 //   0xxxxxxx = 1 byte (ASCII)
 //   110xxxxx = 2 bytes
 //   1110xxxx = 3 bytes
 //   11110xxx = 4 bytes
-int utf8_char_byte_length(unsigned char first_byte) {
+uint32_t utf8_char_byte_length(unsigned char first_byte) {
   if ((first_byte & 0x80) == 0) {
     return 1;
   } else if ((first_byte & 0xE0) == 0xC0) {
@@ -28,13 +29,13 @@ bool utf8_is_valid_continuation_byte(unsigned char byte) {
 uint32_t utf8_sequence_length(hb_string_T value) {
   if (hb_string_is_empty(value)) { return 0; }
 
-  int expected_length = utf8_char_byte_length(value.data[0]);
+  uint32_t expected_length = utf8_char_byte_length(value.data[0]);
   if (value.length - expected_length < expected_length) {
     return 1; // Not enough bytes, treat as single byte
   }
 
   if (expected_length > 1) {
-    for (int i = 1; i < expected_length; i++) {
+    for (uint32_t i = 1; i < expected_length; i++) {
       if (!utf8_is_valid_continuation_byte((unsigned char) value.data[i])) {
         return 1; // Invalid continuation byte, treat first byte as single byte
       }

From 1039634b10070a0d6c9d9375a459d28bee4fd340 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20K=C3=A4chele?= <mail@timkaechele.me>
Date: Thu, 30 Oct 2025 21:23:34 +0100
Subject: [PATCH 33/33] Use truncate method in lexer

---
 src/lexer.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/lexer.c b/src/lexer.c
index aa825257b..21234dc8c 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -119,8 +119,11 @@ static token_T* lexer_advance_with_next(lexer_T* lexer, size_t count, token_type
 
   uint32_t end_position = lexer->current_position;
 
+  hb_string_T value = hb_string_slice(lexer->source, start_position);
+  value = hb_string_truncate(value, end_position - start_position);
+
   token_T* token = token_init(
-    (hb_string_T) { .data = lexer->source.data + start_position, .length = end_position - start_position },
+    value,
     type,
     lexer
   );
@@ -140,8 +143,8 @@ static token_T* lexer_advance_utf8_character(lexer_T* lexer, const token_type_T
   size_t start_position = lexer->current_position;
   lexer_advance_utf8_bytes(lexer, char_byte_length);
 
-  hb_string_T utf8_char = hb_string_slice(lexer->source, lexer->current_position);
-  utf8_char.length = MIN(char_byte_length, utf8_char.length);
+  hb_string_T utf8_char = hb_string_slice(lexer->source, start_position);
+  utf8_char = hb_string_truncate(utf8_char, char_byte_length);
 
   return token_init(utf8_char, type, lexer);
 }
@@ -164,7 +167,7 @@ static token_T* lexer_parse_whitespace(lexer_T* lexer) {
   uint32_t end_position = lexer->current_position;
 
   hb_string_T value = hb_string_slice(lexer->source, start_position);
-  value.length = end_position - start_position;
+  value = hb_string_truncate(value, end_position - start_position);
 
   token_T* token = token_init(value, TOKEN_WHITESPACE, lexer);
 
@@ -182,6 +185,7 @@ static token_T* lexer_parse_identifier(lexer_T* lexer) {
   uint32_t end_position = lexer->current_position;
 
   hb_string_T value = hb_string_slice(lexer->source, start_position);
+  value = hb_string_truncate(value, end_position - start_position);
   value.length = end_position - start_position;
 
   token_T* token = token_init(value, TOKEN_IDENTIFIER, lexer);
@@ -233,7 +237,7 @@ static token_T* lexer_parse_erb_content(lexer_T* lexer) {
 
   uint32_t end_position = lexer->current_position;
   hb_string_T value = hb_string_slice(lexer->source, start_position);
-  value.length = end_position - start_position;
+  value = hb_string_truncate(value, end_position - start_position);
 
   return token_init(value, TOKEN_ERB_CONTENT, lexer);
 }