diff --git a/.envrc b/.envrc
new file mode 100644
index 0000000..17d6464
--- /dev/null
+++ b/.envrc
@@ -0,0 +1 @@
+use_nix
\ No newline at end of file
diff --git a/build.zig b/build.zig
index 00a2907..1b9b0ce 100644
--- a/build.zig
+++ b/build.zig
@@ -1,31 +1,240 @@
 const std = @import("std");
 
 pub fn build(b: *std.build.Builder) void {
+    // build options:
+
+    const target = b.standardTargetOptions(.{});
     const optimize = b.standardOptimizeOption(.{});
 
-    _ = b.addModule("parser-toolkit", .{
-        .source_file = .{ .path = "src/main.zig" },
+    const test_step = b.step("test", "Run library tests");
+    const examples_step = b.step("examples", "Builds and installs examples");
+    const run_calc_step = b.step("run-calculator", "Runs calculator example");
+
+    const all_step = b.step("all", "Builds everything, tests everything");
+    all_step.dependOn(b.getInstallStep());
+    all_step.dependOn(test_step);
+    all_step.dependOn(examples_step);
+
+    // dependencies
+
+    const args_dep = b.dependency("args", .{});
+
+    // external modules
+
+    const args_mod = args_dep.module("args");
+
+    // internal modules
+
+    const ptk_mod = b.addModule("parser-toolkit", .{
+        .source_file = .{ .path = "src/toolkit/main.zig" },
         .dependencies = &.{},
     });
 
-    var main_tests = b.addTest(.{
-        .root_source_file = .{ .path = "src/main.zig" },
-        .optimize = optimize,
-    });
+    // Applications
+    const ptkdef_exe = blk: {
+        const ptkdef = b.addExecutable(.{
+            .name = "ptkgen",
+            .root_source_file = .{ .path = "src/ptkgen/main.zig" },
+            .optimize = optimize,
+            .target = target,
+        });
 
-    const test_step = b.step("test", "Run library tests");
-    test_step.dependOn(&b.addRunArtifact(main_tests).step);
+        ptkdef.addModule("parser-toolkit", ptk_mod);
+        ptkdef.addModule("args", args_mod);
 
-    const calculator_example = b.addExecutable(.{
-        .root_source_file = .{ .path = "examples/calculator.zig" },
-        .name = "calculator",
-        .optimize = optimize,
-    });
+        b.installArtifact(ptkdef);
 
-    b.installArtifact(calculator_example);
-    calculator_example.addAnonymousModule("parser-toolkit", .{
-        .source_file = .{ .path = "src/main.zig" },
-    });
+        break :blk ptkdef;
+    };
+
+    // test suite
+    {
+        // unit tests for ptk:
+        var ptk_tests = b.addTest(.{
+            .root_source_file = ptk_mod.source_file,
+            .optimize = optimize,
+        });
+        for (ptk_mod.dependencies.keys()) |dep_name| {
+            ptk_tests.addModule(dep_name, ptk_mod.dependencies.get(dep_name).?);
+        }
+        test_step.dependOn(&b.addRunArtifact(ptk_tests).step);
+
+        // unit tests for ptkgen:
+        var ptkgen_tests = b.addTest(.{
+            .root_source_file = .{ .path = "src/ptkgen/main.zig" },
+            .optimize = optimize,
+        });
+        ptkgen_tests.addModule("parser-toolkit", ptk_mod);
+        test_step.dependOn(&b.addRunArtifact(ptkgen_tests).step);
+
+        // Integration tests for ptkgen:
+        for (parser_accept_files ++ parser_reject_files) |file| {
+            const run = b.addRunArtifact(ptkdef_exe);
+            run.addArg("--test_mode=parse_only");
+            run.addFileArg(.{ .path = file });
+            test_step.dependOn(&run.step);
+        }
+
+        // Integration tests for ptkgen:
+        for (analyis_accept_files ++ analyis_reject_files) |file| {
+            const run = b.addRunArtifact(ptkdef_exe);
+            run.addArg("--test_mode=no_codegen");
+            run.addFileArg(.{ .path = file });
+            test_step.dependOn(&run.step);
+        }
+    }
+
+    // examples
+    {
+        const calculator_example = b.addExecutable(.{
+            .root_source_file = .{ .path = "examples/calculator.zig" },
+            .name = "calculator",
+            .optimize = optimize,
+        });
+        calculator_example.addModule("parser-toolkit", ptk_mod);
+        examples_step.dependOn(&b.addInstallArtifact(calculator_example, .{}).step);
 
-    b.step("run", "Runs the calculator example").dependOn(&b.addRunArtifact(calculator_example).step);
+        run_calc_step.dependOn(&b.addRunArtifact(calculator_example).step);
+    }
 }
+
+const example_files = [_][]const u8{
+    "/home/felix/projects/parser-toolkit/examples/ptkgen/grammar.ptk",
+    "examples/ptkgen/ast-with-unions.ptk",
+};
+
+const analyis_accept_files = [_][]const u8{
+    "test/analysis/accept/match-literal-rule.ptk",
+    "test/analysis/accept/match-literal-sequence.ptk",
+    "test/analysis/accept/match-literal-variants.ptk",
+    "test/analysis/accept/match-literal-sequence-variant.ptk",
+    "test/analysis/accept/match-group-one-item.ptk",
+    "test/analysis/accept/match-group-one-sequence.ptk",
+    "test/analysis/accept/match-group-many-item.ptk",
+    "test/analysis/accept/match-group-many-sequence.ptk",
+    "test/analysis/accept/match-group-nested.ptk",
+    "test/analysis/accept/match-optional-one-item.ptk",
+    "test/analysis/accept/match-optional-one-sequence.ptk",
+    "test/analysis/accept/match-optional-many-item.ptk",
+    "test/analysis/accept/match-optional-many-sequence.ptk",
+    "test/analysis/accept/match-optional-nested.ptk",
+    "test/analysis/accept/match-rep_zero-one-item.ptk",
+    "test/analysis/accept/match-rep_zero-one-sequence.ptk",
+    "test/analysis/accept/match-rep_zero-many-item.ptk",
+    "test/analysis/accept/match-rep_zero-many-sequence.ptk",
+    "test/analysis/accept/match-rep_zero-nested.ptk",
+    "test/analysis/accept/match-rep_one-one-item.ptk",
+    "test/analysis/accept/match-rep_one-one-sequence.ptk",
+    "test/analysis/accept/match-rep_one-many-item.ptk",
+    "test/analysis/accept/match-rep_one-many-sequence.ptk",
+    "test/analysis/accept/match-rep_one-nested.ptk",
+
+    "test/analysis/accept/start-decl.ptk",
+
+    "test/analysis/accept/pattern-custom.ptk",
+    "test/analysis/accept/pattern-literal.ptk",
+    "test/analysis/accept/pattern-regex.ptk",
+    "test/analysis/accept/pattern-word.ptk",
+
+    "test/analysis/accept/pattern-word-skip.ptk",
+    "test/analysis/accept/pattern-regex-skip.ptk",
+    "test/analysis/accept/pattern-literal-skip.ptk",
+    "test/analysis/accept/pattern-custom-skip.ptk",
+} ++ example_files;
+
+const analyis_reject_files = [_][]const u8{
+    "test/analysis/reject/duplicate-node.ptk",
+    "test/analysis/reject/duplicate-pattern.ptk",
+    "test/analysis/reject/duplicate-rule.ptk",
+
+    "test/analysis/accept/expect-warn-missing-start.ptk",
+
+    "test/analysis/reject/undeclared-start.ptk",
+    "test/analysis/reject/duplicate-undeclared-start.ptk",
+    "test/analysis/reject/duplicate-start.ptk",
+
+    "test/analysis/reject/duplicate-field-record.ptk",
+    "test/analysis/reject/duplicate-field-variant.ptk",
+
+    "test/analysis/reject/production-undeclared-pattern-ref.ptk",
+    "test/analysis/reject/production-undeclared-rule-ref.ptk",
+};
+
+const parser_accept_files = [_][]const u8{
+    "test/parser/accept/empty.ptk",
+    "test/parser/accept/empty-with-comment-linefeed.ptk",
+    "test/parser/accept/empty-with-comment.ptk",
+    "test/parser/accept/identifiers.ptk",
+
+    "test/parser/accept/optional-nospace.ptk",
+    "test/parser/accept/optional-space.ptk",
+    "test/parser/accept/rep_one-nospace.ptk",
+    "test/parser/accept/rep_one-space.ptk",
+    "test/parser/accept/rep_zero-nospace.ptk",
+    "test/parser/accept/rep_zero-space.ptk",
+
+    "test/parser/accept/basic-rule-ref.ptk",
+    "test/parser/accept/basic-token-ref.ptk",
+    "test/parser/accept/rule-primitive-sequence.ptk",
+
+    "test/parser/accept/document-start.ptk",
+
+    "test/parser/accept/mapping-value-ref.ptk",
+    "test/parser/accept/mapping-code-literal.ptk",
+    "test/parser/accept/mapping-user-value.ptk",
+
+    "test/parser/accept/mapping-builtin-function-a0.ptk",
+    "test/parser/accept/mapping-builtin-function-a1.ptk",
+    "test/parser/accept/mapping-builtin-function-a5.ptk",
+    "test/parser/accept/mapping-builtin-function-nest.ptk",
+
+    "test/parser/accept/mapping-user-function-a0.ptk",
+    "test/parser/accept/mapping-user-function-a1.ptk",
+    "test/parser/accept/mapping-user-function-a5.ptk",
+    "test/parser/accept/mapping-user-function-nest.ptk",
+
+    "test/parser/accept/mapping-array-a0.ptk",
+    "test/parser/accept/mapping-array-a1.ptk",
+    "test/parser/accept/mapping-array-a5.ptk",
+    "test/parser/accept/mapping-array-nested.ptk",
+
+    "test/parser/accept/mapping-variant-init.ptk",
+
+    "test/parser/accept/mapping-record-init-f1.ptk",
+    "test/parser/accept/mapping-record-init-f3.ptk",
+
+    "test/parser/accept/rule-typespec-custom.ptk",
+    "test/parser/accept/rule-typespec-ref.ptk",
+    "test/parser/accept/rule-typespec-literal.ptk",
+
+    "test/parser/accept/node-alias.ptk",
+    "test/parser/accept/node-custom.ptk",
+    "test/parser/accept/node-literal.ptk",
+
+    "test/parser/accept/node-record-f1.ptk",
+    "test/parser/accept/node-record-f4.ptk",
+
+    "test/parser/accept/node-variant-f4.ptk",
+    "test/parser/accept/node-variant-f1.ptk",
+} ++ analyis_accept_files;
+
+const parser_reject_files = [_][]const u8{
+    "test/parser/reject/empty-rule.ptk",
+    "test/parser/reject/empty-group.ptk",
+    "test/parser/reject/empty-optional.ptk",
+    "test/parser/reject/empty-rep_one.ptk",
+    "test/parser/reject/empty-rep_zero.ptk",
+
+    "test/parser/reject/unexpected-token-string.ptk",
+
+    "test/parser/reject/empty-mapping.ptk",
+    "test/parser/reject/bad-mapping-invalid-token.ptk",
+    "test/parser/reject/bad-mapping-too-long.ptk",
+
+    "test/parser/reject/node-no-type.ptk",
+    "test/parser/reject/rule-no-type.ptk",
+    "test/parser/reject/rule-no-type-no-prod.ptk",
+    "test/parser/reject/rule-bad-prod.ptk",
+
+    "test/parser/reject/pattern-unexpected-token.ptk",
+};
diff --git a/build.zig.zon b/build.zig.zon
new file mode 100644
index 0000000..5cbec5c
--- /dev/null
+++ b/build.zig.zon
@@ -0,0 +1,10 @@
+.{
+    .name = "parser-toolkit",
+    .version = "0.2.0",
+    .dependencies = .{
+        .args = .{
+            .url = "https://github.com/MasterQ32/zig-args/archive/7989929d055ef7618e60de84cc54644046516fdb.tar.gz",
+            .hash = "12207752d975a7f5d7cc65662ed1c6b117da8dec6d1bd7af9a39e1b65d90bf86e833",
+        },
+    },
+}
diff --git a/design/ptkdefv/design.md b/design/ptkdefv/design.md
new file mode 100644
index 0000000..e017c98
--- /dev/null
+++ b/design/ptkdefv/design.md
@@ -0,0 +1,4 @@
+# Parser Generator Language
+
+Create basic recursive descent parsers with "well-known" patterns that output a Zig AST data structure.
+
diff --git a/design/ptkdefv/grammar.ptk b/design/ptkdefv/grammar.ptk
new file mode 100644
index 0000000..e50f519
--- /dev/null
+++ b/design/ptkdefv/grammar.ptk
@@ -0,0 +1,43 @@
+
+
+root <document>; # <...> is a "rule reference"
+
+token identifier = regex "[A-Za-z_][A-Za-z0-9_]*"; # defines token "identifier" to match this regex
+
+token line-comment = regex "//[^\n]*" skip; # ignores this token when parsing, but tokenizer recognizes it
+token whitespace = regex "[ \t\r\n]" skip;
+
+rule document =
+    # [ ... ] is a loop construct, can appear several times
+    [ <using> ] <namespace-decl> [ <toplevel-decl> ]*
+;
+
+rule toplevel-decl = 
+    # | is a "either/or" scenario, with precedence from left to right (first come, first serve)
+    <namespace-group> | <interface-decl> | <module-decl>
+;
+
+rule interface-decl =
+    "interface" $identifier "(" ... ")" ";";
+;
+
+rule module-decl =
+    "module" $identifier "(" ... ")" "{" ... "}" ";";
+;
+
+rule using =
+    # "bla" is a literal token
+    # $bla is an explicitly defined token reference 
+    # ...? is an optional part of a parse
+    "using" <compound-identifier> ";" ( "as" $identifier )?
+;
+
+rule namespace-decl = 
+    "namespace" <compound-identifier> ";"
+;
+
+rule compound-identifier = 
+    $identifier [ "." $identifier ]*
+;
+
+
diff --git a/design/ptkdefv/mapping-concept-01.ptk b/design/ptkdefv/mapping-concept-01.ptk
new file mode 100644
index 0000000..9e4ccf9
--- /dev/null
+++ b/design/ptkdefv/mapping-concept-01.ptk
@@ -0,0 +1,37 @@
+
+# "!id" is a type reference 
+# "$id" is a token reference 
+# "<id>" is a rule reference 
+
+# maps type "array" to a slice/arraylist of whatever "int" is 
+node array = sequence !int;
+
+# "int" is the Zig type "i32"
+node int = literal "i32"; 
+
+# the initial rule is "list", also determines the root type of the ast
+start <list>;
+
+# "decimal" token is a decimal number sequence token
+token decimal = regex "\d+";
+
+# "list" is a sequence of decimals with comma separated, potential trailing comma,
+# enclosed by square brackets
+rule list = "[" [ $decimal "," ] $decimal? "]";
+#           $0  $1______________ $2_______ $3
+
+# the rule "list" is mapped to the type "array"
+# as a sequence of the second element (unwrapped into items) and
+# the third item appended. square brackets in a map are the "construct array operator".
+# if the array is not sequence of optionals, optional items are skipped in construction
+map <list> !array = [ $1..., $2 ];
+
+# the "decimal" token is mapped to i32 by invoking a Zig function called
+# "parse" that takes the token as input and returns "i32":
+map $decimal !int = @parse($0);
+
+
+
+
+
+
diff --git a/docs/grammar.md b/docs/grammar.md
new file mode 100644
index 0000000..0d3d2b5
--- /dev/null
+++ b/docs/grammar.md
@@ -0,0 +1,41 @@
+# Parser Toolkit Grammar
+
+## Syntax
+
+```rb
+
+@Identifier # references Identifier from the user context. can be used for types, functions, values
+<Rule>      # references another rule named Rule
+!Node       # references another ast node called Node
+
+
+```
+
+## Types
+
+```rb 
+literal `text`  # pastes text into the code
+optional ...    # makes ... an optional type
+
+struct          # constructs a structure type, having two fields:
+    field: !type,
+    field: !type
+
+union           # constructs a type for alternatives, here with two variants:
+    Foo: !type, # alternative called Foo
+    Bar: !type  # alternative called Bar
+
+```
+
+## Strings
+
+- `\x00 ... \xFF` => Hexadecimal escape
+- `\000 ... \377` => Octal escape
+- `\n` => LF (0x0A)
+- `\r` => CR (0x0D)
+- `\'` => single quote (0x27)
+- `\"` => double quote (0x22)
+- `\\` => back slash (0x5C)
+- `\u{????}` => UTF-8 encoded codepoint
+
+
diff --git a/docs/semantics.md b/docs/semantics.md
new file mode 100644
index 0000000..7d23443
--- /dev/null
+++ b/docs/semantics.md
@@ -0,0 +1,23 @@
+# PtkGen Semantics
+
+## Context References
+
+tl;dr: `$n` can access the elements of the top-level productions of a rule.
+
+```rb
+rule r = "hello" "world" => $0; # access "hello"
+rule r = "hello" "world" => $1; # access "world"
+```
+
+### Index Resolution
+
+1. Flatten hierarchy
+2. Use index in flattened list
+
+```rb
+rule r = a b   c d e    f g h; # [ a b  c d e      f g h ] => flat sequence
+rule r = a b ( c d e )? f g h; # [ a b  c? d? e?   f g h ] => `c`, `d`, `e` get promoted to optional)
+rule r = a b ( c d e )* f g h; # [ a b  [[c d e]]         f g h ] => `c d e` get promoted to list of lists ([[c d e], [c d e], ...])
+rule r = a b ( c d e )+ f g h; # [ a b  [[c d e]]         f g h ] => `c d e` get promoted to list of lists ([[c d e], [c d e], ...])
+rule r = a b ( c d e )  f g h; # [ a b  c d e  f g h ] => `c d e` gets flattened into the master list
+```
diff --git a/examples/ptkgen/ast-with-unions.ptk b/examples/ptkgen/ast-with-unions.ptk
new file mode 100644
index 0000000..369c9c9
--- /dev/null
+++ b/examples/ptkgen/ast-with-unions.ptk
@@ -0,0 +1,70 @@
+# parse a construct like this into a single type:
+# var   name       = value;
+# const name       = value;
+# var   name: type = value;
+# const name: type = value;
+
+node declaration = record
+    is_const: `bool`,
+    name: !identifier,
+    # type: optional !type,
+    value: !value
+;
+
+node identifier = `[]const u8`;
+node type = @TypeId; # enum { int, float, string }
+node value = @Value;
+
+start <decl>;
+
+rule decl : !declaration = 
+    <decl-type> <id> ( ":" <type> )? "=" <value> => {
+        is_const = $0,
+        name = $1,
+        type = $2,
+        value = $4
+    }
+#   $0_________ $1__ $2_____________ $3  $4_____
+;
+
+rule decl-type : `bool` = 
+      "var"   => `false`
+    | "const" => `true`
+;
+
+rule id : !identifier = "name" => tostring($0);
+
+rule type : !type = 
+      "int"    => `.int`
+    | "float"  => `.float`
+    | "string" => `.string`
+;
+
+rule value : !value = 
+      "10"       => @parseInt($0)
+    | "3.14"     => @parseFloat($0)
+    | "\"nice\"" => @parseStringLiteral($0)
+;
+
+
+
+# Unions have can only have a single option active at a time
+node TLDeclaration = variant 
+    ns        : !namespace,
+    interface : !interface,
+    module    : !module
+;
+
+node namespace = @Namespace;
+node interface = @Interface;
+node module = @Module;
+
+rule toplevel-decl : !TLDeclaration = 
+      <namespace-group> => ns: $0 # this is syntax for a union field selector as unions are not compounds
+    | <interface-decl>  => interface: $0
+    | <module-decl>     => module: $0
+;
+
+rule namespace-group = "to be done";
+rule interface-decl = "to be done";
+rule module-decl = "to be done";
diff --git a/examples/ptkgen/grammar.ptk b/examples/ptkgen/grammar.ptk
new file mode 100644
index 0000000..6a6d95a
--- /dev/null
+++ b/examples/ptkgen/grammar.ptk
@@ -0,0 +1,85 @@
+
+start <document>;
+
+rule document = ( <top_level> )* ;
+
+rule top_level =
+      <start_decl>
+    | <pattern_decl>
+    | <node_decl>
+    | <rule_decl>
+;
+
+rule start_decl = "start" $rule_ref ";" ;
+
+rule pattern_decl = "pattern" $identifier "=" <pattern_spec> ( "skip" )? ";" ;
+
+rule pattern_spec =
+    "literal" $string_literal
+  | "word"    $string_literal
+  | "regex"   $string_literal
+  | $userval
+;
+
+rule node_decl = "node" $identifier "="     ";" ;
+
+rule rule_decl = "rule" $identifier ( ":" <type> )? "=" <mapped_productions> ";" ;
+
+rule mapped_productions = <mapped_production> ( "|" <mapped_production> )* ;
+
+rule mapped_production = <production_sequence> ( "=>" <mapping> )? ;
+
+rule production_sequence = ( <production> )+;
+
+rule production =
+    $string_literal
+  | $rule_ref
+  | $token_ref
+  | "(" <production_sequence> ")" "?"
+  | "(" <production_sequence> ")" "*"
+  | "(" <production_sequence> ")" "+"
+  | "(" <production_sequence> ")"
+;
+
+rule mapping = 
+    $identifier ":" <mapped_value> # variant init
+  | <mapped_value>                 # regular init
+;
+
+rule mapped_value =
+    <record_ctor>                    # { field = <value>, field = <value>, ... }
+  | <list_ctor>                      # { <value>, <value>, ... }
+  | $code_literal                    # `code`
+  | $value_ref                       # $0
+  | $identifier "(" <value_list> ")" # builtin(...)
+  | $userval    "(" <value_list> ")" # @func(...)
+  | $userval                         # @value
+;
+
+rule record_ctor = 
+    "{" <assign_field> ( "," <assign_field> )* "}"
+;
+
+rule assign_field = 
+    $identifier "=" $mapped_value
+;
+
+rule list_ctor = "{" ( <value_list> )? "}";
+
+rule value_list =
+    <mapped_value> ( "," <mapped_value> )*
+;
+
+# TODO:
+
+rule type = "empty";
+
+pattern rule_ref = literal "";
+pattern identifier = regex "[A-Za-z_][A-Za-z0-9_]*";
+pattern string_literal = literal "";
+pattern userval = literal "";
+pattern token_ref = literal "";
+pattern code_literal = literal "";
+pattern value_ref = literal "";
+pattern mapped_value = literal "";
+
diff --git a/shell.nix b/shell.nix
new file mode 100644
index 0000000..664d354
--- /dev/null
+++ b/shell.nix
@@ -0,0 +1,11 @@
+{ pkgs ? import <nixpkgs> { } }:
+pkgs.mkShell {
+  nativeBuildInputs = [
+    # zig
+    pkgs.zig_0_11
+  ];
+  buildInputs = [ ];
+  shellHook = ''
+    # put your shell hook here
+  '';
+}
diff --git a/src/ptkgen/Diagnostics.zig b/src/ptkgen/Diagnostics.zig
new file mode 100644
index 0000000..6559663
--- /dev/null
+++ b/src/ptkgen/Diagnostics.zig
@@ -0,0 +1,468 @@
+const std = @import("std");
+const ptk = @import("parser-toolkit");
+
+const intl = @import("intl.zig");
+const parser = @import("parser.zig");
+
+const Diagnostics = @This();
+
+pub const Code = enum(u16) {
+    pub const first_error = 1000;
+    pub const first_warning = 4000;
+    pub const first_note = 8000;
+    pub const last_item = 9999;
+
+    // generic failures (1000-1099):
+    out_of_memory = 1000,
+    file_limit_exceeded = 1001,
+    io_error = 1002,
+
+    // non-recoverable syntax errors (1100-1199):
+    invalid_source_encoding = 1100,
+    unexpected_token_eof = 1101,
+    unexpected_token = 1102,
+    unexpected_character = 1103,
+    unexpected_eof = 1104,
+    bad_string_escape = 1105,
+    invalid_string_escape = 1106,
+    excess_tokens = 1107,
+    unexpected_toplevel_token = 1108,
+    unexpected_token_no_context = 1109,
+    unexpected_token_type_spec = 1110,
+    unexpected_token_mapping = 1111,
+    unexpected_token_production_list = 1112,
+    unexpected_token_production = 1113,
+    unexpected_token_pattern = 1114,
+
+    // recoverable syntax errors (1200-1299):
+    illegal_empty_group = 1200,
+    empty_mapping = 1201,
+    integer_overflow = 1202,
+    empty_typespec = 1203,
+
+    // semantic errors (1300-1399):
+
+    duplicate_identifier_rule = 1300,
+    duplicate_identifier_node = 1301,
+    duplicate_identifier_pattern = 1302,
+
+    reference_to_undeclared_rule = 1303,
+    reference_to_undeclared_node = 1304,
+    reference_to_undeclared_pattern = 1305,
+
+    multiple_start_symbols = 1306,
+
+    duplicate_compound_field = 1307,
+
+    context_reference_out_of_bounds = 1308,
+
+    variant_does_not_exist = 1309,
+
+    record_field_does_not_exist = 1310,
+    record_field_already_initialized = 1311,
+    record_field_not_initialized = 1312,
+
+    mapping_requires_typed_rule = 1313,
+
+    invalid_builtin_function = 1314,
+
+    // semantic warnings (4000-4099):
+
+    missing_start_symbol = 4000,
+
+    comptime {
+        std.debug.assert(first_error < first_warning);
+        std.debug.assert(first_warning < first_note);
+        std.debug.assert(first_note < last_item);
+    }
+
+    const max_item_len = blk: {
+        var len = 0;
+        for (@typeInfo(Code).Enum.fields) |fld| {
+            len = @max(len, fld.name);
+        }
+        break :blk len;
+    };
+
+    const code_strings = blk: {
+        @setEvalBranchQuota(10_000);
+        var map = std.EnumArray(Code, []const u8).initUndefined();
+
+        for (std.enums.values(Code)) |code| {
+            const tag = @tagName(code);
+
+            // perform kebab conversion:
+            var buf: [tag.len]u8 = tag[0..tag.len].*;
+            for (&buf) |*c| {
+                if (c.* == '_')
+                    c.* = '-';
+            }
+
+            map.set(code, &buf);
+        }
+
+        break :blk map;
+    };
+
+    pub fn isError(code: Code) bool {
+        const int = @intFromEnum(code);
+        return @intFromEnum(code) >= first_error and int < first_warning;
+    }
+
+    pub fn isWarning(code: Code) bool {
+        const int = @intFromEnum(code);
+        return int >= first_warning and int < first_note;
+    }
+
+    pub fn isNote(code: Code) bool {
+        const int = @intFromEnum(code);
+        return int >= first_note and int < last_item;
+    }
+
+    pub fn parse(string: []const u8) error{
+        /// Format is not recognized
+        InvalidFormat,
+        /// Numeric error code is out of range.
+        OutOfRange,
+        /// Numeric error code does not exist.
+        InvalidId,
+    }!Code {
+        if (string.len == 0 or (string[0] != 'E' and string[0] != 'W' and string[0] != 'D'))
+            return error.InvalidFormat;
+        const id = std.fmt.parseInt(u16, string[1..], 10) catch |err| switch (err) {
+            error.InvalidCharacter => return error.InvalidFormat,
+            error.Overflow => return error.OutOfRange,
+        };
+        if (id > last_item)
+            return error.OutOfRange;
+        return std.meta.intToEnum(Diagnostics.Code, id) catch return error.InvalidId;
+    }
+
+    pub fn format(code: Code, comptime fmt: []const u8, opt: std.fmt.FormatOptions, writer: anytype) !void {
+        _ = opt;
+
+        if (comptime std.mem.eql(u8, fmt, "d")) {
+            const code_prefix = if (code.isError())
+                "E"
+            else if (code.isWarning())
+                "W"
+            else
+                "D";
+
+            try writer.print("{s}{d:0>4}", .{ code_prefix, @intFromEnum(code) });
+        } else if (comptime std.mem.eql(u8, fmt, "s")) {
+            try writer.writeAll(code_strings.get(code));
+        } else {
+            @compileError("Code fmt must be {s} (string variant) or {d} (numeric variant)!");
+        }
+        //
+    }
+};
+
+const NoDiagnosticData = struct {};
+
+const UnexpectedTokenMessage = struct {
+    actual: parser.Token,
+};
+
+const DuplicateIdentifier = struct {
+    identifier: []const u8,
+    previous_location: ptk.Location,
+};
+const UndeclaredIdentifier = struct { identifier: []const u8 };
+
+pub fn Data(comptime code: Code) type {
+    return switch (code) {
+        .out_of_memory => NoDiagnosticData,
+        .file_limit_exceeded => NoDiagnosticData,
+        .io_error => struct { error_code: intl.FormattableError },
+
+        .unexpected_token_eof => struct {
+            expected_type: parser.TokenType,
+        },
+        .unexpected_token => struct {
+            expected_type: parser.TokenType,
+            actual: parser.Token,
+        },
+
+        .unexpected_toplevel_token => UnexpectedTokenMessage,
+        .unexpected_token_no_context => UnexpectedTokenMessage,
+        .unexpected_token_type_spec => UnexpectedTokenMessage,
+        .unexpected_token_mapping => UnexpectedTokenMessage,
+        .unexpected_token_production_list => UnexpectedTokenMessage,
+        .unexpected_token_production => UnexpectedTokenMessage,
+        .unexpected_token_pattern => UnexpectedTokenMessage,
+
+        .unexpected_eof => NoDiagnosticData,
+
+        .invalid_source_encoding => NoDiagnosticData,
+        .unexpected_character => struct { character: u21 },
+
+        .bad_string_escape => NoDiagnosticData,
+        .invalid_string_escape => struct { escape: u21 },
+        .excess_tokens => struct { token_type: parser.TokenType },
+
+        .illegal_empty_group => NoDiagnosticData,
+        .empty_mapping => NoDiagnosticData,
+
+        .integer_overflow => struct {
+            min: []const u8,
+            max: []const u8,
+            actual: []const u8,
+        },
+
+        .empty_typespec => NoDiagnosticData,
+
+        .duplicate_identifier_rule => DuplicateIdentifier,
+        .duplicate_identifier_node => DuplicateIdentifier,
+        .duplicate_identifier_pattern => DuplicateIdentifier,
+
+        .reference_to_undeclared_rule => UndeclaredIdentifier,
+        .reference_to_undeclared_node => UndeclaredIdentifier,
+        .reference_to_undeclared_pattern => UndeclaredIdentifier,
+
+        .multiple_start_symbols => struct {
+            identifier: []const u8,
+            previous_location: ptk.Location,
+        },
+
+        .missing_start_symbol => NoDiagnosticData,
+
+        .duplicate_compound_field => struct {
+            identifier: []const u8,
+            previous_location: ptk.Location,
+        },
+
+        .context_reference_out_of_bounds => struct {
+            index: u32,
+            limit: u32,
+        },
+
+        .variant_does_not_exist => struct {
+            field: []const u8,
+            type_location: ptk.Location,
+        },
+
+        .record_field_does_not_exist => struct {
+            field: []const u8,
+            type_location: ptk.Location,
+        },
+        .record_field_already_initialized => struct {
+            field: []const u8,
+            prev_init: ptk.Location,
+        },
+        .record_field_not_initialized => struct {
+            field: []const u8,
+            field_location: ptk.Location,
+        },
+
+        .mapping_requires_typed_rule => NoDiagnosticData,
+
+        .invalid_builtin_function => struct {
+            name: []const u8,
+        },
+
+        // else => @compileError(std.fmt.comptimePrint("Code {} has no diagnostic type associated!", .{code})),
+    };
+}
+
+pub const Message = struct {
+    level: ptk.Error.Level,
+    location: ptk.Location,
+    text: []const u8,
+};
+
+inner: ptk.Diagnostics,
+codes: std.ArrayList(Code),
+
+pub fn init(allocator: std.mem.Allocator) Diagnostics {
+    return Diagnostics{
+        .inner = ptk.Diagnostics.init(allocator),
+        .codes = std.ArrayList(Code).init(allocator),
+    };
+}
+
+pub fn deinit(diag: *Diagnostics) void {
+    diag.codes.deinit();
+    diag.inner.deinit();
+    diag.* = undefined;
+}
+
+pub fn hasErrors(diag: Diagnostics) bool {
+    return diag.inner.hasErrors();
+}
+
+pub fn hasWarnings(diag: Diagnostics) bool {
+    return diag.inner.hasWarnings();
+}
+
+fn Formatter(comptime T: type) type {
+    return switch (T) {
+        // text and unicode:
+        []const u8 => struct {
+            // TODO: Distinct between "string body" and "string literal"
+
+            value: T,
+
+            pub fn format(item: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+                _ = options;
+                _ = fmt;
+                try writer.print("{}", .{std.zig.fmtEscapes(item.value)});
+            }
+        },
+
+        u21 => struct {
+            value: T,
+            pub fn format(item: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+                _ = options;
+                _ = fmt;
+
+                if (item.value < 0x80) {
+                    const ascii: u8 = @intCast(item.value);
+
+                    if (std.ascii.isPrint(ascii)) {
+                        try writer.print("{c}", .{ascii});
+                    } else {
+                        try writer.print("[nonprint: 0x{X:0>2}]", .{ascii});
+                    }
+                } else {
+                    var buf: [4]u8 = undefined;
+                    if (std.unicode.utf8Encode(item.value, &buf)) |len| {
+                        try writer.print("{s}", .{buf[0..len]});
+                    } else |_| {
+                        try writer.print("<bad unicode: U+{X:0>4}>", .{item.value});
+                    }
+                }
+            }
+        },
+
+        // enums:
+        parser.TokenType => struct {
+            value: parser.TokenType,
+            pub fn format(item: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+                _ = options;
+                _ = fmt;
+                try writer.print("{s}", .{@tagName(item.value)});
+            }
+        },
+
+        parser.Token => struct {
+            value: parser.Token,
+            pub fn format(item: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+                _ = options;
+                _ = fmt;
+                try writer.print("{s} ('{}')", .{
+                    @tagName(item.value.type),
+                    std.zig.fmtEscapes(item.value.text),
+                });
+            }
+        },
+
+        ptk.Location => struct {
+            value: ptk.Location,
+            pub fn format(item: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+                _ = options;
+                _ = fmt;
+                try writer.print("{}", .{item.value});
+            }
+        },
+
+        intl.FormattableError => struct {
+            value: T,
+
+            pub fn format(item: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+                _ = options;
+                _ = fmt;
+
+                inline for (@typeInfo(intl.FormattableError).ErrorSet.?) |err| {
+                    if (item.value == @field(intl.FormattableError, err.name)) {
+                        try writer.writeAll(@field(intl.localization.errors, err.name));
+                        return;
+                    }
+                } else unreachable;
+            }
+        },
+
+        // integers:
+
+        u32 => struct {
+            value: T,
+            pub fn format(item: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+                _ = options;
+                _ = fmt;
+
+                try writer.print("{}", .{item.value});
+            }
+        },
+
+        else => @compileError(std.fmt.comptimePrint("{s} is not a supported diagnostic type!", .{@typeName(T)})),
+    };
+}
+
+fn createFormatter(comptime T: type, value: T) Formatter(T) {
+    return Formatter(T){ .value = value };
+}
+
+fn FormattedData(comptime code: Code) type {
+    const Field = std.builtin.Type.StructField;
+    const D = Data(code);
+
+    const src_fields = @typeInfo(D).Struct.fields;
+
+    var dst_fields: [src_fields.len]Field = undefined;
+
+    for (&dst_fields, src_fields) |*dst, src| {
+        dst.* = .{
+            .name = src.name,
+            .type = Formatter(src.type),
+            .default_value = null,
+            .is_comptime = false,
+            .alignment = @alignOf(Formatter(src.type)),
+        };
+    }
+
+    return @Type(.{
+        .Struct = .{
+            .layout = .Auto,
+            .fields = &dst_fields,
+            .decls = &.{},
+            .is_tuple = false,
+        },
+    });
+}
+
+fn formatData(comptime code: Code, params: Data(code)) FormattedData(code) {
+    var formatted: FormattedData(code) = undefined;
+    inline for (std.meta.fields(Data(code))) |fld| {
+        @field(formatted, fld.name) = createFormatter(fld.type, @field(params, fld.name));
+    }
+    return formatted;
+}
+
+pub fn emit(diag: *Diagnostics, location: ptk.Location, comptime code: Code, params: Data(code)) error{OutOfMemory}!void {
+    const level = if (code.isError())
+        ptk.Error.Level.@"error"
+    else if (code.isWarning())
+        ptk.Error.Level.warning
+    else if (code.isNote())
+        ptk.Error.Level.info
+    else
+        unreachable;
+
+    const fmt_string = @field(intl.localization.diagnostics, @tagName(code));
+
+    var stack_fallback = std.heap.stackFallback(1024, diag.inner.memory.allocator());
+    const stack_fallback_allocator = stack_fallback.get();
+
+    const formatted_params = formatData(code, params);
+
+    const message_text = try std.fmt.allocPrint(stack_fallback_allocator, fmt_string, formatted_params);
+    defer stack_fallback_allocator.free(message_text);
+
+    try diag.inner.emit(location, level, "{d}: {s}", .{ code, message_text });
+    try diag.codes.append(code);
+}
+
+pub fn render(diag: Diagnostics, stream: anytype) !void {
+    try diag.inner.print(stream);
+}
diff --git a/src/ptkgen/ast.zig b/src/ptkgen/ast.zig
new file mode 100644
index 0000000..a650c35
--- /dev/null
+++ b/src/ptkgen/ast.zig
@@ -0,0 +1,174 @@
+const std = @import("std");
+const ptk = @import("parser-toolkit");
+
+const Location = ptk.Location;
+
+pub fn List(comptime T: type) type {
+    return struct {
+        pub const Item = T;
+
+        pub const Node = std.TailQueue(T).Node;
+
+        inner: std.TailQueue(T) = .{},
+
+        pub fn append(list: *@This(), item: *@This().Node) void {
+            list.inner.append(item);
+        }
+
+        pub fn len(list: @This()) usize {
+            return list.inner.len;
+        }
+
+        pub fn only(list: @This()) ?T {
+            return if (list.inner.len == 1)
+                list.inner.first.?.data
+            else
+                null;
+        }
+    };
+}
+
+pub fn Iterator(comptime T: type) type {
+    return struct {
+        node: ?*List(T).Node,
+
+        pub fn next(iter: *@This()) ?*T {
+            const current = iter.node orelse return null;
+            iter.node = current.next;
+            return &current.data;
+        }
+    };
+}
+
+pub fn iterate(list: anytype) Iterator(@TypeOf(list).Item) {
+    return Iterator(@TypeOf(list).Item){ .node = list.inner.first };
+}
+
+pub fn Reference(comptime T: type) type {
+    return struct {
+        pub const Referenced = T;
+
+        location: Location,
+        identifier: ptk.strings.String,
+    };
+}
+
+fn String(comptime Tag: anytype) type {
+    return struct {
+        pub const tag = Tag;
+
+        location: Location,
+        value: ptk.strings.String,
+    };
+}
+
+pub const Identifier = String(.identifier);
+pub const StringLiteral = String(.string);
+pub const CodeLiteral = String(.code);
+pub const UserDefinedIdentifier = String(.user_defined);
+
+pub const Document = List(TopLevelDeclaration);
+
+pub const TopLevelDeclaration = union(enum) {
+    start: RuleRef,
+    rule: Rule,
+    node: Node,
+    pattern: Pattern,
+};
+
+pub const NodeRef = Reference(Node); // !mynode
+pub const RuleRef = Reference(Rule); // <myrule>
+pub const PatternRef = Reference(Pattern); // $mytoken
+
+pub const ValueRef = struct { // $0
+    location: Location,
+    index: u32,
+};
+
+pub const Node = struct { // node <name> = ...;
+    name: Identifier,
+    value: TypeSpec,
+};
+
+pub const Rule = struct { // rule <name> ( : <type> )? = ...;
+    name: Identifier, //
+    ast_type: ?TypeSpec, // if specified, defines the ast node of the rule
+    productions: List(MappedProduction), // all alternatives of the rule
+};
+
+pub const Pattern = struct { // token <name> = ...;
+    name: Identifier,
+    data: Data,
+    invisible: bool,
+
+    pub const Data = union(enum) {
+        literal: StringLiteral, // literal "+"
+        word: StringLiteral, // word "while"
+        regex: StringLiteral, // regex "string"
+        external: UserDefinedIdentifier, // @matchMe
+    };
+};
+
+pub const MappedProduction = struct { // ... => value
+    production: Production, // the thing before "=>"
+    mapping: ?AstMapping, // the thing after "=>"
+};
+
+pub const Production = union(enum) {
+    literal: StringLiteral, // "text"
+    terminal: PatternRef, // $token
+    recursion: RuleRef, // <rule>
+    sequence: List(Production), // ...
+    optional: List(Production), // ( ... )?
+    repetition_zero: List(Production), // [ ... ]*
+    repetition_one: List(Production), // [ ... ]+
+};
+
+pub const AstMapping = union(enum) {
+    record: List(FieldAssignment), // { field = ..., field = ... }
+    list: List(AstMapping), // { ..., ..., ... }
+    variant: VariantInitializer, // field: ...
+
+    literal: CodeLiteral, // field: value
+    context_reference: ValueRef, // $0
+    user_reference: UserDefinedIdentifier, // @field
+    user_function_call: FunctionCall(UserDefinedIdentifier), // @builtin(a,b,c)
+    function_call: FunctionCall(Identifier), // identifier(a,b,c)
+};
+
+pub const VariantInitializer = struct {
+    field: Identifier,
+    value: *AstMapping,
+};
+
+pub fn FunctionCall(comptime Name: type) type {
+    return struct {
+        function: Name,
+        arguments: List(AstMapping),
+    };
+}
+
+pub const FieldAssignment = struct {
+    location: Location,
+    field: Identifier,
+    value: *AstMapping,
+};
+
+pub const TypeSpec = union(enum) {
+    reference: NodeRef, // !type
+    literal: CodeLiteral, // literal `bool`
+    custom: UserDefinedIdentifier, // custom `Custom`
+    record: CompoundType, // struct <fields...>
+    variant: CompoundType, // union <fields...>
+};
+
+pub const CompoundType = struct {
+    location: Location,
+    fields: List(Field),
+};
+
+pub const Field = struct {
+    location: Location,
+    name: Identifier,
+    type: TypeSpec,
+};
diff --git a/src/ptkgen/dump/ast.zig b/src/ptkgen/dump/ast.zig
new file mode 100644
index 0000000..9c5d675
--- /dev/null
+++ b/src/ptkgen/dump/ast.zig
@@ -0,0 +1,241 @@
+const std = @import("std");
+const ptk = @import("parser-toolkit");
+
+const ast = @import("../ast.zig");
+const parser = @import("../parser.zig");
+
+pub fn dump(strings: *const ptk.strings.Pool, decls: parser.Document) void {
+    var printer = AstPrinter{
+        .strings = strings,
+    };
+
+    printer.dumpRoot(decls.top_level_declarations);
+}
+
+const AstPrinter = struct {
+    const print = std.debug.print;
+
+    strings: *const ptk.strings.Pool,
+
+    fn dumpRoot(printer: AstPrinter, decls: ast.List(ast.TopLevelDeclaration)) void {
+        print("ast dump:\n", .{});
+
+        var iter = ast.iterate(decls);
+        while (iter.next()) |decl| {
+            switch (decl.*) {
+                .start => |item| print("start <{}>;\n", .{printer.fmtId(item.identifier)}),
+
+                .rule => |rule| {
+                    print("rule {s}", .{printer.fmtId(rule.name.value)});
+
+                    if (rule.ast_type) |ast_type| {
+                        print(" : ", .{});
+                        printer.dumpAstType(ast_type);
+                    }
+
+                    print(" = \n", .{});
+
+                    var prods = ast.iterate(rule.productions);
+                    var first = true;
+                    while (prods.next()) |prod| {
+                        defer first = false;
+                        if (!first) {
+                            print("\n  | ", .{});
+                        } else {
+                            print("    ", .{});
+                        }
+                        printer.dumpMappedProd(prod.*);
+                    }
+
+                    print("\n;\n", .{});
+                },
+
+                .node => |node| {
+                    print("node {s} = ", .{printer.fmtId(node.name.value)});
+                    printer.dumpAstType(node.value);
+                    print(";\n", .{});
+                },
+
+                .pattern => |pattern| {
+                    print("pattern {s} = ", .{printer.fmtId(pattern.name.value)});
+
+                    switch (pattern.data) {
+                        .literal => |value| print("literal \"{}\"", .{printer.fmtString(value.value)}),
+                        .word => |value| print("word \"{}\"", .{printer.fmtString(value.value)}),
+                        .regex => |value| print("regex \"{}\"", .{printer.fmtString(value.value)}),
+                        .external => |value| print("@{}", .{printer.fmtId(value.value)}),
+                    }
+
+                    if (pattern.invisible) {
+                        print(" skip", .{});
+                    }
+                    print(";\n", .{});
+                },
+            }
+        }
+    }
+
+    fn dumpAstType(printer: AstPrinter, typespec: ast.TypeSpec) void {
+        switch (typespec) {
+            .reference => |ref| print("!{}", .{printer.fmtId(ref.identifier)}),
+            .literal => |lit| print("`{s}`", .{printer.strings.get(lit.value)}),
+            .custom => |custom| print("@{}", .{printer.fmtId(custom.value)}),
+            .record, .variant => |compound| {
+                const multi_field = compound.fields.len() > 1;
+
+                print("{s} ", .{@tagName(typespec)});
+                var iter = ast.iterate(compound.fields);
+
+                if (multi_field) {
+                    var line_prefix: []const u8 = "\n    ";
+                    while (iter.next()) |field| {
+                        print("{s}{}: ", .{ line_prefix, printer.fmtId(field.name.value) });
+                        printer.dumpAstType(field.type);
+
+                        if (multi_field) {
+                            line_prefix = ",\n    ";
+                        }
+                    }
+                    print("\n", .{});
+                } else {
+                    const field = iter.next().?;
+
+                    print("{}: ", .{printer.fmtId(field.name.value)});
+                    printer.dumpAstType(field.type);
+                }
+            },
+        }
+    }
+
+    fn dumpMappedProd(printer: AstPrinter, mapped_prod: ast.MappedProduction) void {
+        printer.dumpProd(mapped_prod.production);
+
+        if (mapped_prod.mapping) |mapping| {
+            print(" => ", .{});
+            printer.dumpMapping(mapping);
+        }
+    }
+
+    fn dumpProd(printer: AstPrinter, production: ast.Production) void {
+        switch (production) {
+            .literal => |lit| print("\"{}\"", .{printer.fmtString(lit.value)}),
+            .terminal => |term| print("${}", .{printer.fmtId(term.identifier)}),
+            .recursion => |term| print("<{}>", .{printer.fmtId(term.identifier)}),
+
+            .sequence, .optional, .repetition_zero, .repetition_one => |seq| {
+                print("(", .{});
+
+                var iter = ast.iterate(seq);
+                while (iter.next()) |item| {
+                    print(" ", .{});
+                    printer.dumpProd(item.*);
+                }
+
+                print(" )", .{});
+                switch (production) {
+                    .sequence => {},
+                    .optional => print("?", .{}),
+                    .repetition_zero => print("*", .{}),
+                    .repetition_one => print("+", .{}),
+                    else => unreachable,
+                }
+            },
+        }
+    }
+
+    fn dumpMapping(printer: AstPrinter, mapping: ast.AstMapping) void {
+        switch (mapping) {
+            .record => |record| {
+                std.debug.assert(record.len() > 0);
+
+                print("{{ ", .{});
+
+                var first = true;
+                var iter = ast.iterate(record);
+                while (iter.next()) |arg| {
+                    if (!first) {
+                        print(", ", .{});
+                    }
+                    first = false;
+
+                    print("{} = ", .{printer.fmtId(arg.field.value)});
+
+                    printer.dumpMapping(arg.value.*);
+                }
+
+                print(" }}", .{});
+            },
+
+            .list => |list| {
+                if (list.len() > 0) {
+                    print("{{ ", .{});
+                    printer.dumpMappingList(list);
+                    print(" }}", .{});
+                } else {
+                    print("{{}}", .{});
+                }
+            },
+
+            .variant => |variant| {
+                print("{}: ", .{printer.fmtId(variant.field.value)});
+                printer.dumpMapping(variant.value.*);
+            },
+
+            .literal => |literal| print("`{s}`", .{printer.strings.get(literal.value)}),
+
+            .context_reference => |context_reference| print("${}", .{context_reference.index}),
+
+            .user_reference => |user_reference| print("@{}", .{printer.fmtId(user_reference.value)}),
+
+            .user_function_call => |user_function_call| {
+                print("@{}(", .{printer.fmtId(user_function_call.function.value)});
+                printer.dumpMappingList(user_function_call.arguments);
+                print(")", .{});
+            },
+
+            .function_call => |function_call| {
+                print("{}(", .{printer.fmtId(function_call.function.value)});
+                printer.dumpMappingList(function_call.arguments);
+                print(")", .{});
+            },
+        }
+    }
+
+    fn dumpMappingList(printer: AstPrinter, list: ast.List(ast.AstMapping)) void {
+        var first = true;
+        var iter = ast.iterate(list);
+        while (iter.next()) |arg| {
+            if (!first) {
+                print(", ", .{});
+            }
+            first = false;
+
+            printer.dumpMapping(arg.*);
+        }
+    }
+
+    fn fmtString(printer: AstPrinter, str: ptk.strings.String) StringPrinter {
+        return StringPrinter{ .printer = printer, .str = str, .mode = .text };
+    }
+
+    fn fmtId(printer: AstPrinter, str: ptk.strings.String) StringPrinter {
+        return StringPrinter{ .printer = printer, .str = str, .mode = .id };
+    }
+
+    const StringPrinter = struct {
+        printer: AstPrinter,
+        str: ptk.strings.String,
+        mode: enum { id, text },
+
+        pub fn format(strpr: StringPrinter, fmt: []const u8, opt: std.fmt.FormatOptions, writer: anytype) !void {
+            _ = opt;
+            _ = fmt;
+
+            const text = strpr.printer.strings.get(strpr.str);
+            switch (strpr.mode) {
+                .id => try writer.print("{}", .{std.zig.fmtId(text)}),
+                .text => try writer.print("{}", .{std.zig.fmtEscapes(text)}),
+            }
+        }
+    };
+};
diff --git a/src/ptkgen/dump/json.zig b/src/ptkgen/dump/json.zig
new file mode 100644
index 0000000..0da58ee
--- /dev/null
+++ b/src/ptkgen/dump/json.zig
@@ -0,0 +1,294 @@
+const std = @import("std");
+const ptk = @import("parser-toolkit");
+
+const sema = @import("../sema.zig");
+const parser = @import("../parser.zig");
+
+pub fn createJsonValue(
+    arena: *std.heap.ArenaAllocator,
+    strings: *const ptk.strings.Pool,
+    grammar: sema.Grammar,
+) !std.json.Value {
+    const allocator = arena.allocator();
+
+    var mapper = JsonMapper{
+        .allocator = allocator,
+        .strings = strings,
+    };
+
+    var root = std.json.ObjectMap.init(allocator);
+    errdefer root.deinit();
+
+    if (grammar.start) |start| {
+        try root.put("start", mapper.jsonString(start.rule.name));
+    } else {
+        try root.put("start", .null);
+    }
+
+    {
+        var list = mapper.newArray();
+        errdefer list.deinit();
+
+        var iter = grammar.literal_patterns.iterator();
+        while (iter.next()) |kvp| {
+            try list.append(mapper.jsonString(kvp.value_ptr.*.data.literal_match));
+        }
+
+        try root.put("literal_patterns", .{ .array = list });
+    }
+
+    {
+        var patterns = std.json.ObjectMap.init(allocator);
+        errdefer patterns.deinit();
+
+        var iter = grammar.patterns.iterator();
+        while (iter.next()) |kvp| {
+            const spattern: *sema.Pattern = kvp.value_ptr.*;
+
+            var jpattern = std.json.ObjectMap.init(allocator);
+            errdefer jpattern.deinit();
+
+            // try jpattern.put("name", .{ .string = strings.get(spattern.name) });
+            try jpattern.put("kind", .{ .string = @tagName(spattern.data) });
+            switch (spattern.data) {
+                inline else => |val| try jpattern.put("data", mapper.jsonString(val)),
+            }
+
+            try patterns.putNoClobber(
+                strings.get(kvp.key_ptr.*),
+                .{ .object = jpattern },
+            );
+        }
+
+        try root.put("patterns", .{ .object = patterns });
+    }
+
+    {
+        var nodes = std.json.ObjectMap.init(allocator);
+        errdefer nodes.deinit();
+
+        var iter = grammar.nodes.iterator();
+        while (iter.next()) |kvp| {
+            const snode: *sema.Node = kvp.value_ptr.*;
+
+            var jtype = try mapper.convertType(snode.type);
+
+            try nodes.putNoClobber(
+                strings.get(kvp.key_ptr.*),
+                jtype,
+            );
+        }
+
+        try root.put("ast_nodes", .{ .object = nodes });
+    }
+
+    {
+        var rules = std.json.ObjectMap.init(allocator);
+        errdefer rules.deinit();
+
+        var iter = grammar.rules.iterator();
+        while (iter.next()) |kvp| {
+            const srule: *sema.Rule = kvp.value_ptr.*;
+
+            var jrule = mapper.newObject();
+            errdefer jrule.deinit();
+
+            if (srule.type) |rule_type| {
+                var jtype = try mapper.convertType(rule_type);
+                try jrule.putNoClobber("type", jtype);
+            } else {
+                try jrule.putNoClobber("type", .null);
+            }
+
+            {
+                var jprods = mapper.newArray();
+                errdefer jprods.deinit();
+
+                try jprods.resize(srule.productions.len);
+
+                for (jprods.items, srule.productions) |*jmprod_val, mapped_production| {
+                    var jmprod = mapper.newObject();
+                    errdefer jmprod.deinit();
+
+                    var jprod = try mapper.convertProduction(mapped_production.production);
+
+                    try jmprod.putNoClobber("production", jprod);
+
+                    if (mapped_production.mapping) |mapping| {
+                        var jmap = try mapper.convertMapping(mapping);
+                        try jmprod.putNoClobber("mapping", jmap);
+                    } else {
+                        try jmprod.putNoClobber("mapping", .null);
+                    }
+
+                    jmprod_val.* = .{ .object = jmprod };
+                }
+
+                try jrule.putNoClobber("mapped_productions", .{ .array = jprods });
+            }
+
+            try rules.putNoClobber(
+                strings.get(kvp.key_ptr.*),
+                .{ .object = jrule },
+            );
+        }
+
+        try root.put("rules", .{ .object = rules });
+    }
+
+    return std.json.Value{ .object = root };
+}
+
+const JsonMapper = struct {
+    allocator: std.mem.Allocator,
+    strings: *const ptk.strings.Pool,
+
+    fn convertProduction(mapper: JsonMapper, production: sema.Production) error{OutOfMemory}!std.json.Value {
+        var jtype = mapper.newObject();
+        errdefer jtype.deinit();
+
+        try jtype.putNoClobber("kind", .{ .string = @tagName(production) });
+
+        const data: std.json.Value = switch (production) {
+            .terminal => |terminal| blk: {
+                if (terminal.is_literal) {
+                    try jtype.put("kind", .{ .string = "literal-terminal" });
+                }
+                break :blk mapper.jsonString(terminal.name);
+            },
+            .recursion => |recursion| mapper.jsonString(recursion.name),
+
+            .sequence => |sequence| blk: {
+                var list = mapper.newArray();
+                errdefer list.deinit();
+
+                try list.resize(sequence.len);
+
+                for (list.items, sequence) |*dst, src| {
+                    dst.* = try mapper.convertProduction(src);
+                }
+
+                break :blk .{ .array = list };
+            },
+
+            .optional, .repetition_zero, .repetition_one => |optional| try mapper.convertProduction(optional.*),
+        };
+        try jtype.putNoClobber("data", data);
+
+        return .{ .object = jtype };
+    }
+
+    fn convertMapping(mapper: JsonMapper, mapping: sema.Mapping) error{OutOfMemory}!std.json.Value {
+        var jtype = mapper.newObject();
+        errdefer jtype.deinit();
+
+        try jtype.putNoClobber("kind", .{ .string = @tagName(mapping) });
+
+        switch (mapping) {
+            .record_initializer => |record_initializer| {
+                var list = mapper.newArray();
+                errdefer list.deinit();
+
+                try list.resize(record_initializer.fields.len);
+
+                for (list.items, record_initializer.fields) |*dst, src| {
+                    var jfield = mapper.newObject();
+                    errdefer jfield.deinit();
+
+                    try jfield.putNoClobber("field", mapper.jsonString(src.field.name));
+                    try jfield.putNoClobber("value", try mapper.convertMapping(src.value));
+
+                    dst.* = .{ .object = jfield };
+                }
+
+                try jtype.putNoClobber("fields", .{ .array = list });
+            },
+            .list_initializer => |list_initializer| {
+                var list = mapper.newArray();
+                errdefer list.deinit();
+
+                try list.resize(list_initializer.items.len);
+
+                for (list.items, list_initializer.items) |*dst, src| {
+                    dst.* = try mapper.convertMapping(src);
+                }
+
+                try jtype.putNoClobber("items", .{ .array = list });
+            },
+            .variant_initializer => |variant_initializer| {
+                try jtype.putNoClobber("field", mapper.jsonString(variant_initializer.field.name));
+                try jtype.putNoClobber("value", try mapper.convertMapping(variant_initializer.value.*));
+            },
+            .user_function_call, .builtin_function_call => |function_call| {
+                var list = mapper.newArray();
+                errdefer list.deinit();
+
+                try list.resize(function_call.arguments.len);
+
+                for (list.items, function_call.arguments) |*dst, src| {
+                    dst.* = try mapper.convertMapping(src);
+                }
+
+                try jtype.putNoClobber("arguments", .{ .array = list });
+
+                try jtype.putNoClobber("function", mapper.jsonString(function_call.function));
+            },
+
+            .code_literal, .user_literal => |literal| {
+                try jtype.putNoClobber("literal", mapper.jsonString(literal));
+            },
+
+            .context_reference => |context_reference| {
+                try jtype.putNoClobber("index", .{ .integer = context_reference.index });
+            },
+        }
+
+        return .{ .object = jtype };
+    }
+
+    fn convertType(mapper: JsonMapper, stype: *sema.Type) error{OutOfMemory}!std.json.Value {
+        const data: std.json.Value = switch (stype.*) {
+            .code_literal, .user_type => |literal| mapper.jsonString(literal),
+            .named => |named| mapper.jsonString(named.name),
+
+            .optional => |inner| try mapper.convertType(inner),
+
+            .record, .variant => |compound| blk: {
+                var fields = mapper.newObject();
+                errdefer fields.deinit();
+
+                for (compound.fields.keys(), compound.fields.values()) |name, field| {
+                    var field_type = try mapper.convertType(field.type);
+                    try fields.putNoClobber(
+                        mapper.strings.get(name),
+                        field_type,
+                    );
+                }
+
+                break :blk .{ .object = fields };
+            },
+
+            .token => .null,
+        };
+
+        var jtype = mapper.newObject();
+        errdefer jtype.deinit();
+
+        try jtype.putNoClobber("kind", .{ .string = @tagName(stype.*) });
+        try jtype.putNoClobber("data", data);
+
+        return .{ .object = jtype };
+    }
+
+    fn jsonString(mapper: JsonMapper, string: ptk.strings.String) std.json.Value {
+        return .{ .string = mapper.strings.get(string) };
+    }
+
+    fn newObject(mapper: JsonMapper) std.json.ObjectMap {
+        return std.json.ObjectMap.init(mapper.allocator);
+    }
+
+    fn newArray(mapper: JsonMapper) std.json.Array {
+        return std.json.Array.init(mapper.allocator);
+    }
+};
diff --git a/src/ptkgen/dump/sema.zig b/src/ptkgen/dump/sema.zig
new file mode 100644
index 0000000..f338341
--- /dev/null
+++ b/src/ptkgen/dump/sema.zig
@@ -0,0 +1,173 @@
+const std = @import("std");
+const ptk = @import("parser-toolkit");
+
+const sema = @import("../sema.zig");
+const parser = @import("../parser.zig");
+
+pub fn dump(strings: *const ptk.strings.Pool, grammar: sema.Grammar) void {
+    var printer = SemaPrinter{
+        .strings = strings,
+    };
+
+    SemaPrinter.print("literal patterns:\n", .{});
+    printer.dumpPatterns(grammar.literal_patterns);
+
+    SemaPrinter.print("\nuser patterns:\n", .{});
+    printer.dumpPatterns(grammar.patterns);
+
+    SemaPrinter.print("\nstart rule: ", .{});
+    if (grammar.start) |start| {
+        SemaPrinter.print("<{}>\n", .{printer.fmtId(start.rule.name)});
+    } else {
+        SemaPrinter.print("-none-\n", .{});
+    }
+
+    SemaPrinter.print("\nast nodes:\n", .{});
+    printer.dumpNodes(grammar.nodes);
+
+    SemaPrinter.print("\nrules:\n", .{});
+    printer.dumpRules(grammar.rules);
+}
+
+const SemaPrinter = struct {
+    const print = std.debug.print;
+
+    strings: *const ptk.strings.Pool,
+
+    fn dumpPatterns(printer: SemaPrinter, patterns: sema.StringHashMap(*sema.Pattern)) void {
+        for (patterns.values()) |pattern| {
+            print("pattern {} = ", .{printer.fmtId(pattern.name)});
+
+            switch (pattern.data) {
+                inline else => |value, tag| print("{s} \"{}\"", .{ @tagName(tag), printer.fmtString(value) }),
+            }
+
+            print(";\n", .{});
+        }
+    }
+
+    fn dumpNodes(printer: SemaPrinter, nodes: sema.StringHashMap(*sema.Node)) void {
+        for (nodes.values()) |node| {
+            print("node {} = ", .{printer.fmtId(node.name)});
+
+            printer.dumpType(node.type);
+
+            print(";\n", .{});
+        }
+    }
+
+    fn dumpRules(printer: SemaPrinter, rules: sema.StringHashMap(*sema.Rule)) void {
+        for (rules.values()) |rule| {
+            print("rule {}", .{printer.fmtId(rule.name)});
+
+            if (rule.type) |rule_type| {
+                print(": ", .{});
+                printer.dumpType(rule_type);
+            }
+
+            print(" = ", .{});
+
+            for (rule.productions, 0..) |production, i| {
+                if (i > 0) print("\n    | ", .{});
+                printer.dumpMappedProduction(production);
+            }
+
+            print(";\n", .{});
+        }
+    }
+
+    fn dumpMappedProduction(printer: SemaPrinter, mapped_prod: sema.MappedProduction) void {
+        printer.dumpProduction(mapped_prod.production);
+
+        if (mapped_prod.mapping) |mapping| {
+            print(" -> ", .{});
+            printer.dumpMapping(mapping);
+        }
+    }
+
+    fn dumpProduction(printer: SemaPrinter, production: sema.Production) void {
+        switch (production) {
+            .terminal => |terminal| {
+                if (terminal.is_literal) {
+                    print("\"{}\"", .{printer.fmtString(terminal.data.literal_match)});
+                } else {
+                    print("${}", .{printer.fmtId(terminal.name)});
+                }
+            },
+            .recursion => |recursion| print("<{}>", .{printer.fmtId(recursion.name)}),
+            .sequence => |sequence| {
+                for (sequence, 0..) |item, i| {
+                    if (i > 0)
+                        print(" ", .{});
+                    printer.dumpProduction(item);
+                }
+            },
+            .optional => |optional| {
+                print("(", .{});
+                printer.dumpProduction(optional.*);
+                print(")?", .{});
+            },
+            .repetition_zero => |repetition_zero| {
+                print("(", .{});
+                printer.dumpProduction(repetition_zero.*);
+                print(")*", .{});
+            },
+            .repetition_one => |repetition_one| {
+                print("(", .{});
+                printer.dumpProduction(repetition_one.*);
+                print(")+", .{});
+            },
+        }
+    }
+
+    fn dumpMapping(printer: SemaPrinter, mapping: sema.Mapping) void {
+        _ = mapping;
+        _ = printer;
+    }
+
+    fn dumpType(printer: SemaPrinter, stype: *sema.Type) void {
+        switch (stype.*) {
+            .code_literal => |literal| print("`{}`", .{printer.fmtString(literal)}),
+            .user_type => |literal| print("@{}", .{printer.fmtId(literal)}),
+            .optional => |inner| {
+                print("optional ", .{});
+                printer.dumpType(inner);
+            },
+            inline .record, .variant => |compound, tag| {
+                print("{s} ", .{@tagName(tag)});
+                for (compound.fields.keys(), compound.fields.values(), 0..) |name, field, i| {
+                    if (i > 0)
+                        print(", ", .{});
+                    print("{}: ", .{printer.fmtId(name)});
+                    printer.dumpType(field.type);
+                }
+            },
+            .named => |other| print("!{}", .{printer.fmtId(other.name)}),
+        }
+    }
+
+    fn fmtString(printer: SemaPrinter, str: ptk.strings.String) StringPrinter {
+        return StringPrinter{ .printer = printer, .str = str, .mode = .text };
+    }
+
+    fn fmtId(printer: SemaPrinter, str: ptk.strings.String) StringPrinter {
+        return StringPrinter{ .printer = printer, .str = str, .mode = .id };
+    }
+
+    const StringPrinter = struct {
+        printer: SemaPrinter,
+        str: ptk.strings.String,
+        mode: enum { id, text },
+
+        pub fn format(strpr: StringPrinter, fmt: []const u8, opt: std.fmt.FormatOptions, writer: anytype) !void {
+            _ = opt;
+            _ = fmt;
+
+            const text = strpr.printer.strings.get(strpr.str);
+            switch (strpr.mode) {
+                .id => try writer.print("{}", .{std.zig.fmtId(text)}),
+                .text => try writer.print("{}", .{std.zig.fmtEscapes(text)}),
+            }
+        }
+    };
+};
diff --git a/src/ptkgen/intl.zig b/src/ptkgen/intl.zig
new file mode 100644
index 0000000..fa0e3d5
--- /dev/null
+++ b/src/ptkgen/intl.zig
@@ -0,0 +1,115 @@
+const std = @import("std");
+
+const Diagnostics = @import("Diagnostics.zig");
+
+pub const Language = enum {
+    en,
+};
+
+pub const language: Language = .en;
+
+pub const localization = @field(localizations, @tagName(language));
+
+pub const localizations = struct {
+    pub const en = Localization.generate(@embedFile("intl/en.json"));
+};
+
+pub const FormattableError: type = blk: {
+    const list = @typeInfo(std.meta.fieldInfo(Localization, .errors).type).Struct.fields;
+
+    var errors: [list.len]std.builtin.Type.Error = undefined;
+    for (&errors, list) |*dst, src| {
+        dst.* = .{ .name = src.name };
+    }
+
+    break :blk @Type(.{
+        .ErrorSet = &errors,
+    });
+};
+
+pub const DiagnosticStrings: type = blk: {
+    const list = @typeInfo(Diagnostics.Code).Enum.fields;
+
+    var dst_fields: [list.len]std.builtin.Type.StructField = undefined;
+    for (&dst_fields, list) |*dst, src| {
+        dst.* = .{
+            .name = src.name,
+            .type = []const u8,
+            .default_value = null,
+            .is_comptime = false,
+            .alignment = @alignOf([]const u8),
+        };
+    }
+
+    break :blk @Type(.{
+        .Struct = .{
+            .layout = .Auto,
+            .fields = &dst_fields,
+            .decls = &.{},
+            .is_tuple = false,
+        },
+    });
+};
+
+pub const Localization = struct {
+    diagnostics: DiagnosticStrings,
+
+    errors: struct {
+        Unexpected: []const u8,
+
+        SyntaxError: []const u8,
+        SemanticError: []const u8,
+
+        OutOfMemory: []const u8,
+
+        InputOutput: []const u8,
+        AccessDenied: []const u8,
+        BrokenPipe: []const u8,
+        SystemResources: []const u8,
+        OperationAborted: []const u8,
+        WouldBlock: []const u8,
+        ConnectionResetByPeer: []const u8,
+        IsDir: []const u8,
+        ConnectionTimedOut: []const u8,
+        NotOpenForReading: []const u8,
+        NetNameDeleted: []const u8,
+
+        FileTooBig: []const u8,
+        InvalidSourceEncoding: []const u8,
+
+        DiskQuota: []const u8,
+        NoSpaceLeft: []const u8,
+        DeviceBusy: []const u8,
+        InvalidArgument: []const u8,
+        NotOpenForWriting: []const u8,
+        LockViolation: []const u8,
+        ProcessFdQuotaExceeded: []const u8,
+        SystemFdQuotaExceeded: []const u8,
+        SharingViolation: []const u8,
+        PathAlreadyExists: []const u8,
+        FileNotFound: []const u8,
+        PipeBusy: []const u8,
+        NameTooLong: []const u8,
+        InvalidUtf8: []const u8,
+        BadPathName: []const u8,
+        NetworkNotFound: []const u8,
+        InvalidHandle: []const u8,
+        SymLinkLoop: []const u8,
+        NoDevice: []const u8,
+        NotDir: []const u8,
+        FileLocksNotSupported: []const u8,
+        FileBusy: []const u8,
+        LinkQuotaExceeded: []const u8,
+        ReadOnlyFileSystem: []const u8,
+        RenameAcrossMountPoints: []const u8,
+    },
+
+    pub fn generate(comptime buffer: []const u8) Localization {
+        @setEvalBranchQuota(1_000_000);
+
+        var alloc_buf: [4 * buffer.len]u8 = undefined;
+        var fba = std.heap.FixedBufferAllocator.init(&alloc_buf);
+
+        return std.json.parseFromSliceLeaky(Localization, fba.allocator(), buffer, .{}) catch |err| @compileError(std.fmt.comptimePrint("failed to parse json: {}", .{err}));
+    }
+};
diff --git a/src/ptkgen/intl/en.json b/src/ptkgen/intl/en.json
new file mode 100644
index 0000000..00ec0ea
--- /dev/null
+++ b/src/ptkgen/intl/en.json
@@ -0,0 +1,86 @@
+{
+    "diagnostics": {
+        "out_of_memory": "Out of memory",
+        "file_limit_exceeded": "Input file exceeds maximum file size",
+        "io_error": "I/O error: {[error_code]}",
+        "invalid_source_encoding": "Invalid source code encoding detected",
+        "bad_string_escape": "Invalid string escape: Escape sequence at the end of string.",
+        "invalid_string_escape": "Invalid string escape '\\{[escape]}'.",
+        "excess_tokens": "Excess token at the end of the file: {[token_type]}-",
+        "illegal_empty_group": "Production sequence may not be empty.",
+        "integer_overflow": "Integer value {[actual]} out of range. Values must be between {[min]} and {[max]}.",
+        "empty_mapping": "Empty mappings are not allowed.",
+        "empty_typespec": "A type specifier is missing.",
+        "unexpected_token_eof": "Expected a token of type '{[expected_type]}', but the end of file was discovered.",
+        "unexpected_token": "Expected a token of type '{[expected_type]}', but found token {[actual]}.",
+        "unexpected_character": "Unexpected character '{[character]}' found.",
+        "unexpected_eof": "Unexpected end of file.",
+        "unexpected_toplevel_token": "Expected a top level declaration ('start', 'rule', 'node' or 'pattern'), but found token {[actual]}",
+        "unexpected_token_no_context": "Unexpected token '{[actual]}'.",
+        "unexpected_token_type_spec": "Expected a type specifier, but found '{[actual]}'.",
+        "unexpected_token_mapping": "Expected an AST mapping, but found '{[actual]}'.",
+        "unexpected_token_production_list": "Expected ';' or '|', but found '{[actual]}'.",
+        "unexpected_token_production": "Expected a production, but found '{[actual]}'.",
+        "unexpected_token_pattern": "Expected a pattern definition, but found '{[actual]}'.",
+        "duplicate_identifier_rule": "Rule {[identifier]} already defined here: {[previous_location]}",
+        "duplicate_identifier_node": "Node {[identifier]} already defined here: {[previous_location]}",
+        "duplicate_identifier_pattern": "Pattern {[identifier]} already defined here: {[previous_location]}",
+        "reference_to_undeclared_rule": "Reference to undeclared rule '{[identifier]}'.",
+        "reference_to_undeclared_node": "Reference to undeclared node '{[identifier]}'.",
+        "reference_to_undeclared_pattern": "Reference to undeclared pattern '{[identifier]}'.",
+        "missing_start_symbol": "Grammar file has no start symbol declared.",
+        "multiple_start_symbols": "Another start rule '({[identifier]})' was already declared here: {[previous_location]}",
+        "duplicate_compound_field": "Another field named '{[identifier]s}' was already declared here: {[previous_location]}",
+        "context_reference_out_of_bounds": "Context reference index out of bounds. {[index]} was given, but the highest possible index is {[limit]}.",
+        "variant_does_not_exist": "The variant field {[identifier]s} does not exist. The variant type is declared here: {[type_location]}",
+        "record_field_does_not_exist": "The record field {[field]s} does not exist. The record type is declared here: {[type_location]}",
+        "record_field_already_initialized": "The record field {[field]s} is already initialized. Previous initialization: {[prev_init]}",
+        "record_field_not_initialized": "The record field {[field]s} was not initialized. Field declared here: {[field_location]}",
+        "mapping_requires_typed_rule": "The use of a rule mapping requires that the rule has an explicitly declared type.",
+        "invalid_builtin_function": "The builtin function {[name]s} does not exist!"
+    },
+    "errors": {
+        "SyntaxError": "syntax error",
+        "SemanticError": "semantic error",
+        "Unexpected": "unexpected error encountered",
+        "OutOfMemory": "out of memory",
+        "InputOutput": "input output",
+        "AccessDenied": "access denied",
+        "BrokenPipe": "broken pipe",
+        "SystemResources": "system resources",
+        "OperationAborted": "operation aborted",
+        "WouldBlock": "would block",
+        "ConnectionResetByPeer": "connection reset by peer",
+        "IsDir": "path points to directory",
+        "ConnectionTimedOut": "connection timed out",
+        "NotOpenForReading": "not open for reading",
+        "NetNameDeleted": "net name deleted",
+        "FileTooBig": "Input file exceeds resources",
+        "InvalidSourceEncoding": "invalid source encoding",
+        "DiskQuota": "disk quota",
+        "NoSpaceLeft": "no space left",
+        "DeviceBusy": "device busy",
+        "InvalidArgument": "invalid argument",
+        "NotOpenForWriting": "not open for writing",
+        "LockViolation": "lock violation",
+        "ProcessFdQuotaExceeded": "process fd quota exceeded",
+        "SystemFdQuotaExceeded": "system fd quota exceeded",
+        "SharingViolation": "sharing violation",
+        "PathAlreadyExists": "path already exists",
+        "FileNotFound": "file not found",
+        "PipeBusy": "pipe busy",
+        "NameTooLong": "name too long",
+        "InvalidUtf8": "invalid utf8",
+        "BadPathName": "bad path name",
+        "NetworkNotFound": "network not found",
+        "InvalidHandle": "invalid handle",
+        "SymLinkLoop": "sym link loop",
+        "NoDevice": "no device",
+        "NotDir": "not dir",
+        "FileLocksNotSupported": "file locks not supported",
+        "FileBusy": "file busy",
+        "LinkQuotaExceeded": "link quota exceeded",
+        "ReadOnlyFileSystem": "read only file system",
+        "RenameAcrossMountPoints": "rename across mount points"
+    }
+}
\ No newline at end of file
diff --git a/src/ptkgen/main.zig b/src/ptkgen/main.zig
new file mode 100644
index 0000000..699b991
--- /dev/null
+++ b/src/ptkgen/main.zig
@@ -0,0 +1,403 @@
+//!
+//! Parser Toolkit Grammar Compiler
+//!
+
+const std = @import("std");
+const args_parser = @import("args");
+const ptk = @import("parser-toolkit");
+
+const ast = @import("ast.zig");
+const sema = @import("sema.zig");
+const intl = @import("intl.zig");
+const parser = @import("parser.zig");
+const ast_dump = @import("dump/ast.zig");
+const sema_dump = @import("dump/sema.zig");
+const json_dump = @import("dump/json.zig");
+
+const Diagnostics = @import("Diagnostics.zig");
+
+comptime {
+    // reference for unit tests:
+    _ = parser;
+}
+
+pub const Format = enum {
+    json,
+    // zig,
+};
+
+pub const CliOptions = struct {
+    help: bool = false,
+    output: ?[]const u8 = null,
+    test_mode: TestMode = .none,
+    trace: bool = false,
+    format: Format = .json,
+
+    @"max-file-size": u32 = 4 * 1024, // 4 MB of source code is a lot!
+
+    dump: bool = false,
+
+    pub const shorthands = .{
+        .h = "help",
+        .o = "output",
+        .D = "dump",
+    };
+
+    pub const meta = .{
+        .full_text = "Compiles a .ptk grammar file into Zig code.",
+
+        .usage_summary = "[-h] [-o <file>] [<input>]",
+
+        .option_docs = .{
+            .help = "Prints this help.",
+            .output = "If given, will print the generated code into <file>",
+
+            .test_mode = "(internal use only, required for testing)",
+
+            .@"max-file-size" = "Maximum input file size in KiB (default: 4096)",
+
+            .trace = "Prints a parse trace",
+
+            .format = "Selects the output format of the grammar. Can be one of [ json, zig ]",
+
+            .dump = "Dumps results from parser and sema to stderr.",
+        },
+    };
+};
+
+const TestMode = enum {
+    none,
+    parse_only,
+    no_codegen,
+};
+
+const AppError = error{OutOfMemory} || std.fs.File.WriteError;
+pub fn main() AppError!u8 {
+    // errdefer |e| @compileLog(@TypeOf(e));
+
+    var stdout = std.io.getStdOut();
+    var stdin = std.io.getStdIn();
+    var stderr = std.io.getStdErr();
+
+    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
+    defer _ = gpa.deinit();
+
+    var arena = std.heap.ArenaAllocator.init(gpa.allocator());
+    defer arena.deinit();
+
+    const dynamic_allocator = gpa.allocator();
+    const static_allocator = arena.allocator();
+
+    var cli = args_parser.parseForCurrentProcess(CliOptions, static_allocator, .print) catch return 1;
+    defer cli.deinit();
+
+    if (cli.options.help) {
+        try args_parser.printHelp(CliOptions, cli.executable_name orelse "ptkgen", stdout.writer());
+        return 0;
+    }
+
+    var string_pool = try ptk.strings.Pool.init(dynamic_allocator);
+    defer string_pool.deinit();
+
+    var diagnostics = Diagnostics.init(dynamic_allocator);
+    defer diagnostics.deinit();
+
+    var input_file = switch (cli.positionals.len) {
+        0 => stdin,
+        1 => std.fs.cwd().openFile(cli.positionals[0], .{}) catch |err| {
+            try stderr.writer().print("failed to open file {s}: {s}\n", .{
+                cli.positionals[0],
+                @errorName(err),
+            });
+            return 1;
+        },
+        else => {
+            try stderr.writeAll("Expects either a single positional file or none.\nSee --help for usage!\n");
+            return 1;
+        },
+    };
+    defer input_file.close();
+
+    const file_name = if (cli.positionals.len > 0)
+        cli.positionals[0]
+    else
+        "stdint";
+
+    var expectations = std.ArrayList(TestExpectation).init(dynamic_allocator);
+    defer expectations.deinit();
+
+    const processing_ok = process_file: {
+        // 4 MB should be enough for now...
+        var source_code = input_file.readToEndAlloc(static_allocator, 1024 * cli.options.@"max-file-size") catch |err| {
+            try convertErrorToDiagnostics(&diagnostics, file_name, err);
+            break :process_file false;
+        };
+
+        defer static_allocator.free(source_code);
+
+        if (cli.options.test_mode != .none) {
+            // in test mode, parse expectations from source code:
+            var lines = std.mem.tokenize(u8, source_code, "\n");
+            while (lines.next()) |line| {
+                const prefix = "# expected:";
+                if (std.mem.startsWith(u8, line, prefix)) {
+                    var items = std.mem.tokenize(u8, line[prefix.len..], " \t,");
+                    while (items.next()) |error_code| {
+                        const code = Diagnostics.Code.parse(
+                            error_code,
+                        ) catch @panic("invalid error code!");
+
+                        try expectations.append(.{ .code = code });
+                    }
+                }
+            }
+        }
+
+        compileFile(
+            dynamic_allocator,
+            &diagnostics,
+            &string_pool,
+            source_code,
+            file_name,
+            cli.options,
+        ) catch |err| {
+            try convertErrorToDiagnostics(&diagnostics, file_name, err);
+            break :process_file false;
+        };
+
+        // Todo: continue from here?
+
+        break :process_file true;
+    };
+
+    if (cli.options.test_mode == .none) {
+        try diagnostics.render(stderr.writer());
+
+        return if (processing_ok and !diagnostics.hasErrors())
+            0 // exit code for success
+        else
+            1; // exit code for failure
+    } else {
+        // test fails through `error.TestExpectationMismatched`, not through diagnostics:
+        validateDiagnostics(dynamic_allocator, diagnostics, expectations.items) catch {
+            try stderr.writeAll("Full diagnostics:\n");
+            try diagnostics.render(stderr.writer());
+
+            return 1;
+        };
+        return 0;
+    }
+}
+
+fn convertErrorToDiagnostics(diagnostics: *Diagnostics, file_name: []const u8, err: intl.FormattableError) error{OutOfMemory}!void {
+    switch (err) {
+        // syntax errors must produce diagnostics:
+        error.SyntaxError, error.SemanticError, error.InvalidSourceEncoding => std.debug.assert(diagnostics.hasErrors()),
+
+        error.OutOfMemory => {
+            try diagnostics.emit(.{
+                .source = file_name,
+                .line = 1,
+                .column = 1,
+            }, .out_of_memory, .{});
+        },
+
+        error.FileTooBig => {
+            try diagnostics.emit(.{
+                .source = file_name,
+                .line = 1,
+                .column = 1,
+            }, .file_limit_exceeded, .{});
+        },
+
+        // input errors:
+        error.InputOutput,
+        error.AccessDenied,
+        error.BrokenPipe,
+        error.SystemResources,
+        error.OperationAborted,
+        error.WouldBlock,
+        error.ConnectionResetByPeer,
+        error.Unexpected,
+        error.IsDir,
+        error.ConnectionTimedOut,
+        error.NotOpenForReading,
+        error.NetNameDeleted,
+
+        // output errors:
+        error.DiskQuota,
+        error.NoSpaceLeft,
+        error.DeviceBusy,
+        error.InvalidArgument,
+        error.NotOpenForWriting,
+        error.LockViolation,
+        error.ProcessFdQuotaExceeded,
+        error.SystemFdQuotaExceeded,
+        error.SharingViolation,
+        error.PathAlreadyExists,
+        error.FileNotFound,
+        error.PipeBusy,
+        error.NameTooLong,
+        error.InvalidUtf8,
+        error.BadPathName,
+        error.NetworkNotFound,
+        error.InvalidHandle,
+        error.SymLinkLoop,
+        error.NoDevice,
+        error.NotDir,
+        error.FileLocksNotSupported,
+        error.FileBusy,
+        error.LinkQuotaExceeded,
+        error.ReadOnlyFileSystem,
+        error.RenameAcrossMountPoints,
+        => |e| {
+            try diagnostics.emit(.{
+                .source = file_name,
+                .line = 1,
+                .column = 1,
+            }, .io_error, .{ .error_code = e });
+        },
+    }
+}
+
+const TestExpectation = struct {
+    code: Diagnostics.Code,
+};
+
+fn validateDiagnostics(allocator: std.mem.Allocator, diagnostics: Diagnostics, expectations: []const TestExpectation) !void {
+    var available = std.ArrayList(Diagnostics.Code).init(allocator);
+    defer available.deinit();
+
+    var expected = std.ArrayList(Diagnostics.Code).init(allocator);
+    defer expected.deinit();
+
+    try available.appendSlice(diagnostics.codes.items);
+    try expected.resize(expectations.len);
+
+    for (expected.items, expectations) |*dst, src| {
+        dst.* = src.code;
+    }
+
+    // Remove everything from expected and available that is present in both:
+    {
+        var i: usize = 0;
+        while (i < expected.items.len) {
+            const e = expected.items[i];
+
+            if (std.mem.indexOfScalar(Diagnostics.Code, available.items, e)) |index| {
+                _ = available.swapRemove(index);
+                _ = expected.swapRemove(i);
+                // std.log.info("found matching diagnostic {s}", .{@tagName(e)});
+            } else {
+                i += 1;
+            }
+        }
+    }
+
+    // Remove all non-errors from available, we do match on them with "-W4000" instead of forcing a expected W4000 into all files without start rules (or similar)
+    {
+        var i: usize = 0;
+        while (i < available.items.len) {
+            const code = available.items[i];
+            if (!code.isError()) {
+                _ = available.swapRemove(i);
+            } else {
+                i += 1;
+            }
+        }
+    }
+
+    const ok = (available.items.len == 0) and (expected.items.len == 0);
+
+    for (available.items) |code| {
+        std.log.err("unexpected diagnostic: {s} ({d})", .{ code, code });
+    }
+    for (expected.items) |code| {
+        std.log.err("unmatched diagnostic:  {s} ({d})", .{ code, code });
+    }
+
+    if (!ok)
+        return error.TestExpectationMismatched;
+}
+
+fn compileFile(
+    allocator: std.mem.Allocator,
+    diagnostics: *Diagnostics,
+    string_pool: *ptk.strings.Pool,
+    source_code: []const u8,
+    file_name: []const u8,
+    options: CliOptions,
+) !void {
+    var tree = try parser.parse(
+        .{
+            .allocator = allocator,
+            .diagnostics = diagnostics,
+            .string_pool = string_pool,
+            .file_name = file_name,
+            .source_code = source_code,
+            .trace_enabled = options.trace,
+        },
+    );
+    defer tree.deinit();
+
+    if (options.test_mode == .parse_only) {
+        return;
+    }
+
+    var grammar = try sema.analyze(
+        allocator,
+        diagnostics,
+        string_pool,
+        tree.top_level_declarations,
+    );
+    defer grammar.deinit();
+
+    // TODO: Implement parsergen / tablegen / highlightergen
+
+    if (options.dump) {
+        std.debug.print("ast dump:\n", .{});
+        ast_dump.dump(string_pool, tree);
+
+        std.debug.print("\n\nsema dump:\n", .{});
+        sema_dump.dump(string_pool, grammar);
+    }
+
+    if (options.test_mode != .none)
+        return;
+
+    // Output generation:
+    {
+        const use_stdout = (options.output == null) or std.mem.eql(u8, options.output.?, "-");
+
+        var atomic_output_file: std.fs.AtomicFile = undefined;
+        if (!use_stdout) {
+            atomic_output_file = try std.fs.cwd().atomicFile(options.output.?, .{});
+        }
+        defer if (!use_stdout)
+            atomic_output_file.deinit();
+
+        var output_file = if (use_stdout)
+            std.io.getStdOut()
+        else
+            atomic_output_file.file;
+
+        // write to output_file here:
+        switch (options.format) {
+            .json => {
+                var arena = std.heap.ArenaAllocator.init(allocator);
+                defer arena.deinit();
+
+                var json_repr: std.json.Value = try json_dump.createJsonValue(
+                    &arena,
+                    string_pool,
+                    grammar,
+                );
+
+                try std.json.stringify(json_repr, .{}, output_file.writer());
+            },
+        }
+
+        if (!use_stdout)
+            try atomic_output_file.finish();
+    }
+}
diff --git a/src/ptkgen/parser.zig b/src/ptkgen/parser.zig
new file mode 100644
index 0000000..7d26a61
--- /dev/null
+++ b/src/ptkgen/parser.zig
@@ -0,0 +1,1495 @@
+const std = @import("std");
+const ptk = @import("parser-toolkit");
+const ast = @import("ast.zig");
+
+const Diagnostics = @import("Diagnostics.zig");
+
+const fmtEscapes = std.zig.fmtEscapes;
+
+const BAD_TYPE_SPEC: ast.TypeSpec = undefined;
+
+pub const Document = struct {
+    arena: std.heap.ArenaAllocator,
+    file_name: []const u8,
+    top_level_declarations: ast.Document,
+
+    pub fn deinit(ts: *Document) void {
+        ts.arena.deinit();
+        ts.* = undefined;
+    }
+};
+
+pub fn parse(opt: struct {
+    allocator: std.mem.Allocator,
+    diagnostics: *Diagnostics,
+    string_pool: *ptk.strings.Pool,
+    file_name: []const u8,
+    source_code: []const u8,
+    trace_enabled: bool,
+}) !Document {
+    var arena = std.heap.ArenaAllocator.init(opt.allocator);
+    errdefer arena.deinit();
+
+    const file_name_copy = try arena.allocator().dupe(u8, opt.file_name);
+
+    var tokenizer = Tokenizer.init(opt.source_code, file_name_copy);
+
+    var parser = Parser{
+        .core = ParserCore.init(&tokenizer),
+        .arena = arena.allocator(),
+        .pool = opt.string_pool,
+        .diagnostics = opt.diagnostics,
+        .trace_enabled = opt.trace_enabled,
+    };
+
+    const document_node = parser.acceptDocument() catch |err| switch (err) {
+
+        // Unrecoverable syntax error, must have created diagnostics already
+        error.SyntaxError => |e| {
+            std.debug.assert(opt.diagnostics.hasErrors());
+
+            if (opt.trace_enabled) {
+                if (@errorReturnTrace()) |trace| {
+                    std.debug.dumpStackTrace(trace.*);
+                }
+            }
+
+            return e;
+        },
+        error.InvalidSourceEncoding => |e| {
+            std.debug.assert(opt.diagnostics.hasErrors());
+
+            return e;
+        },
+
+        error.OutOfMemory => |e| return e,
+    };
+
+    if (tokenizer.next()) |token_or_null| {
+        if (token_or_null) |token| {
+            try opt.diagnostics.emit(token.location, .excess_tokens, .{ .token_type = token.type });
+            return error.SyntaxError;
+        }
+    } else |_| {
+        try parser.emitUnexpectedCharacter(tokenizer.current_location, tokenizer.offset);
+        return error.SyntaxError;
+    }
+
+    return Document{
+        .arena = arena,
+        .file_name = file_name_copy,
+        .top_level_declarations = document_node,
+    };
+}
+
+pub const TokenType = enum {
+    // keywords
+
+    start,
+    node,
+    rule,
+    pattern,
+
+    record,
+    variant,
+    optional,
+
+    literal,
+    word,
+    regex,
+    skip,
+
+    // user values
+
+    identifier, // foo-bar_bam
+    node_ref, // !node
+    rule_ref, // <rule>
+    token_ref, // $token
+    value_ref, // $0
+    userval_ref, // @userval
+
+    // values
+
+    string_literal, // "string"
+    code_literal, // `code`
+
+    // operators
+
+    @"=",
+    @",",
+    @".",
+    @"*",
+    @"+",
+    @":",
+    @";",
+    @"|",
+    @"!",
+    @"?",
+    @"[",
+    @"]",
+    @"(",
+    @")",
+    @"{",
+    @"}",
+    @"=>",
+
+    // auxiliary
+
+    line_comment,
+    whitespace,
+};
+
+pub const Token = Tokenizer.Token;
+
+const ParserCore = ptk.ParserCore(Tokenizer, .{ .whitespace, .line_comment });
+
+const Parser = struct {
+    const RS = ptk.RuleSet(TokenType);
+    const String = ptk.strings.String;
+
+    core: ParserCore,
+    arena: std.mem.Allocator,
+    pool: *ptk.strings.Pool,
+    diagnostics: *Diagnostics,
+
+    trace_enabled: bool,
+    trace_depth: u32 = 0,
+
+    pub fn acceptDocument(parser: *Parser) FatalAcceptError!ast.Document {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        var doc = ast.Document{};
+
+        while (true) {
+            const decl_or_eof = try parser.acceptTopLevelDecl();
+
+            const decl = decl_or_eof orelse break;
+
+            try parser.append(ast.TopLevelDeclaration, &doc, decl);
+        }
+
+        return doc;
+    }
+
+    fn acceptTopLevelDecl(parser: *Parser) FatalAcceptError!?ast.TopLevelDeclaration {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        if (parser.acceptStartDecl()) |root_rule| {
+            return .{ .start = root_rule };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptRule()) |rule| {
+            return .{ .rule = rule };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptNode()) |node| {
+            return .{ .node = node };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptPatternDefinition()) |pattern| {
+            return .{ .pattern = pattern };
+        } else |err| try filterAcceptError(err);
+
+        // Detect any excess tokens on the top level:
+        if (parser.core.nextToken()) |maybe_token| {
+            if (maybe_token) |token| {
+                try parser.emitDiagnostic(token.location, .unexpected_toplevel_token, .{
+                    .actual = token,
+                });
+                return error.SyntaxError;
+            } else {
+                // This is actually the good path here, as only if we don't find any token or tokenization error,
+                // we reached the end of the file.
+            }
+        } else |err| switch (err) {
+            error.UnexpectedCharacter => {
+                try parser.emitUnexpectedCharacter(parser.core.tokenizer.current_location, parser.core.tokenizer.offset);
+                return error.SyntaxError;
+            },
+        }
+
+        return null;
+    }
+
+    fn acceptStartDecl(parser: *Parser) AcceptError!ast.RuleRef {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        try parser.acceptLiteral(.start, .recover);
+        const init_rule = try parser.acceptRuleReference(.fail);
+
+        try parser.acceptLiteral(.@";", .fail);
+
+        return init_rule;
+    }
+
+    fn acceptPatternDefinition(parser: *Parser) AcceptError!ast.Pattern {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        try parser.acceptLiteral(.pattern, .recover);
+
+        const name = try parser.acceptIdentifier(.fail);
+        try parser.acceptLiteral(.@"=", .fail);
+
+        const data = try parser.acceptPatternSpec();
+
+        const invisible = try parser.tryAcceptLiteral(.skip);
+
+        try parser.acceptLiteral(.@";", .fail);
+
+        return .{
+            .name = name,
+            .data = data,
+            .invisible = invisible,
+        };
+    }
+
+    fn acceptPatternSpec(parser: *Parser) AcceptError!ast.Pattern.Data {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        var state = parser.save();
+        errdefer parser.restore(state);
+
+        if (try parser.tryAcceptLiteral(.literal)) {
+            const string = try parser.acceptStringLiteral(.fail);
+            return .{ .literal = string };
+        }
+
+        if (try parser.tryAcceptLiteral(.word)) {
+            const string = try parser.acceptStringLiteral(.fail);
+            return .{ .word = string };
+        }
+
+        if (try parser.tryAcceptLiteral(.regex)) {
+            const string = try parser.acceptStringLiteral(.fail);
+            return .{ .regex = string };
+        }
+
+        if (parser.acceptUserReference()) |ref| {
+            return .{ .external = ref };
+        } else |err| try filterAcceptError(err);
+
+        return parser.emitUnexpectedToken(.{
+            .unexpected_token = .unexpected_token_pattern,
+        });
+    }
+
+    fn acceptNode(parser: *Parser) AcceptError!ast.Node {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        var state = parser.save();
+        errdefer parser.restore(state);
+
+        try parser.acceptLiteral(.node, .recover);
+
+        const identifier = try parser.acceptIdentifier(.fail);
+
+        try parser.acceptLiteral(.@"=", .fail);
+
+        const value = try parser.acceptTypeSpec();
+
+        try parser.acceptLiteral(.@";", .fail);
+
+        return .{
+            .name = identifier,
+            .value = value,
+        };
+    }
+
+    fn acceptRule(parser: *Parser) AcceptError!ast.Rule {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        var state = parser.save();
+        errdefer parser.restore(state);
+
+        try parser.acceptLiteral(.rule, .recover);
+
+        const identifier = try parser.acceptIdentifier(.fail);
+
+        const rule_type = if (try parser.tryAcceptLiteral(.@":"))
+            try parser.acceptTypeSpec()
+        else
+            null;
+
+        try parser.acceptLiteral(.@"=", .fail);
+
+        var list: ast.List(ast.MappedProduction) = .{};
+
+        while (true) {
+            var production = try parser.acceptMappedProduction();
+
+            try parser.append(ast.MappedProduction, &list, production);
+
+            // if a semicolon follows, we're done
+            if (try parser.tryAcceptLiteral(.@";")) {
+                break;
+            }
+            // if a pipe follows, we got more rules
+            else if (try parser.tryAcceptLiteral(.@"|")) {
+                continue;
+            }
+            // otherwise, it's a syntax error:
+            else {
+                return parser.emitUnexpectedToken(.{
+                    .unexpected_token = .unexpected_token_production_list,
+                });
+            }
+
+            try parser.acceptLiteral(.@"|", .fail);
+        }
+
+        return ast.Rule{
+            .ast_type = rule_type,
+            .productions = list,
+            .name = identifier,
+        };
+    }
+
+    fn acceptMappedProduction(parser: *Parser) AcceptError!ast.MappedProduction {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        var sequence = try parser.acceptProductionSequence();
+
+        const mapping = if (try parser.tryAcceptLiteral(.@"=>"))
+            try parser.acceptAstMapping(.fail)
+        else
+            null;
+
+        return ast.MappedProduction{
+            // Auto-flatten the "tree" here if the top level production is a "sequence" of one
+            .production = if (sequence.only()) |item|
+                item
+            else
+                .{ .sequence = sequence },
+            .mapping = mapping,
+        };
+    }
+
+    fn acceptProductionSequence(parser: *Parser) AcceptError!ast.List(ast.Production) {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        var list: ast.List(ast.Production) = .{};
+
+        sequence_loop: while (true) {
+            if (parser.acceptProduction()) |prod| {
+                try parser.append(ast.Production, &list, prod);
+            } else |err| switch (err) {
+                error.UnexpectedTokenRecoverable => {
+                    // we couldn't accept a production, so let's see if we're in a legal state here:
+
+                    const seekahead_reset = parser.save();
+
+                    // all of the following might allow to terminate a list:
+                    inline for (.{ .@")", .@";", .@"=>", .@"|" }) |legal_terminator| {
+                        if (try parser.tryAcceptLiteral(legal_terminator)) {
+                            // All of the above tokens
+                            parser.restore(seekahead_reset);
+                            break :sequence_loop;
+                        }
+                    }
+
+                    return parser.emitUnexpectedToken(.{
+                        .unexpected_token = .unexpected_token_production,
+                    });
+                },
+                error.OutOfMemory, error.InvalidSourceEncoding, error.SyntaxError => |e| return e,
+            }
+        }
+
+        if (list.len() == 0) {
+            // Empty list is a recoverable syntax error:
+            try parser.emitDiagnostic(null, .illegal_empty_group, .{});
+        }
+
+        return list;
+    }
+
+    fn acceptProduction(parser: *Parser) AcceptError!ast.Production {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        if (try parser.tryAcceptLiteral(.@"(")) {
+            var sequence = try parser.acceptProductionSequence();
+            try parser.acceptLiteral(.@")", .fail);
+
+            if (try parser.tryAcceptLiteral(.@"?")) {
+                return .{ .optional = sequence };
+            } else if (try parser.tryAcceptLiteral(.@"+")) {
+                return .{ .repetition_one = sequence };
+            } else if (try parser.tryAcceptLiteral(.@"*")) {
+                return .{ .repetition_zero = sequence };
+            } else {
+                return .{ .sequence = sequence };
+            }
+        }
+
+        if (parser.acceptStringLiteral(.recover)) |str| {
+            return ast.Production{ .literal = str };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptTokenReference(.recover)) |ref| {
+            return ast.Production{ .terminal = ref };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptRuleReference(.recover)) |ref| {
+            return ast.Production{ .recursion = ref };
+        } else |err| try filterAcceptError(err);
+
+        // We're done with out list
+        return error.UnexpectedTokenRecoverable;
+    }
+
+    fn acceptAstMapping(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.AstMapping {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        const state = parser.save();
+        errdefer parser.restore(state);
+
+        const position = parser.core.tokenizer.current_location;
+
+        if (parser.acceptVariantInit()) |init| {
+            return .{ .variant = init };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptRecordInit()) |init| {
+            return .{ .record = init };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptListInit()) |init| {
+            return .{ .list = init };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptCodeLiteral()) |literal| {
+            return .{ .literal = literal };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptValueReference()) |literal| {
+            return .{ .context_reference = literal };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptBuiltinCall()) |call| {
+            return .{ .function_call = call };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptUserCall()) |call| {
+            return .{ .user_function_call = call };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptUserReference()) |ref| {
+            return .{ .user_reference = ref };
+        } else |err| try filterAcceptError(err);
+
+        if (try parser.tryAcceptLiteral(.@";") or try parser.tryAcceptLiteral(.@"|")) {
+            try parser.emitDiagnostic(position, .empty_mapping, .{});
+            return error.SyntaxError;
+        }
+
+        switch (accept_mode) {
+            .recover => return error.UnexpectedTokenRecoverable,
+            .fail => return parser.emitUnexpectedToken(.{
+                .unexpected_token = .unexpected_token_mapping,
+            }),
+        }
+    }
+
+    fn acceptVariantInit(parser: *Parser) AcceptError!ast.VariantInitializer {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        const state = parser.save();
+        errdefer parser.restore(state);
+
+        const field = try parser.acceptIdentifier(.recover);
+
+        try parser.acceptLiteral(.@":", .recover);
+
+        const value = try parser.acceptAstMapping(.fail);
+
+        const clone = try parser.arena.create(ast.AstMapping);
+        clone.* = value;
+
+        return .{
+            .field = field,
+            .value = clone,
+        };
+    }
+
+    fn acceptRecordInit(parser: *Parser) AcceptError!ast.List(ast.FieldAssignment) {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        const state = parser.save();
+        errdefer parser.restore(state);
+
+        try parser.acceptLiteral(.@"{", .recover);
+
+        var mode: AcceptMode = .recover;
+
+        var list = ast.List(ast.FieldAssignment){};
+        while (true) {
+            // First item might fail, then it's not a record initializer, but
+            // afterwards, all fields must comply
+            defer mode = .fail;
+
+            const node = try parser.acceptFieldInit(mode);
+
+            try parser.append(ast.FieldAssignment, &list, node);
+
+            if (!try parser.tryAcceptLiteral(.@",")) {
+                break;
+            }
+        }
+
+        try parser.acceptLiteral(.@"}", .fail);
+
+        return list;
+    }
+
+    fn acceptFieldInit(parser: *Parser, mode: AcceptMode) AcceptError!ast.FieldAssignment {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        const state = parser.save();
+        errdefer parser.restore(state);
+
+        const location = parser.core.tokenizer.current_location;
+
+        const field = try parser.acceptIdentifier(mode);
+
+        try parser.acceptLiteral(.@"=", .fail);
+
+        const value = try parser.acceptAstMapping(.fail);
+
+        const clone = try parser.arena.create(ast.AstMapping);
+        clone.* = value;
+
+        return .{
+            .location = location,
+            .field = field,
+            .value = clone,
+        };
+    }
+
+    fn acceptListInit(parser: *Parser) AcceptError!ast.List(ast.AstMapping) {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        const state = parser.save();
+        errdefer parser.restore(state);
+
+        try parser.acceptLiteral(.@"{", .recover);
+
+        var items = try parser.acceptMappingList();
+
+        try parser.acceptLiteral(.@"}", .fail);
+
+        return items;
+    }
+
+    fn acceptCodeLiteral(parser: *Parser) AcceptError!ast.CodeLiteral {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        const token = try parser.acceptToken(.code_literal, .recover);
+
+        std.debug.assert(std.mem.startsWith(u8, token.text, "`"));
+        std.debug.assert(std.mem.endsWith(u8, token.text, "`"));
+
+        var prefix_len: usize = 0;
+        while (token.text[prefix_len] == '`') {
+            prefix_len += 1;
+        }
+
+        return ast.CodeLiteral{
+            .location = token.location,
+            .value = try parser.pool.insert(token.text[prefix_len .. token.text.len - prefix_len]),
+        };
+    }
+
+    fn acceptValueReference(parser: *Parser) AcceptError!ast.ValueRef {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        const token = try parser.acceptToken(.value_ref, .recover);
+        std.debug.assert(std.mem.startsWith(u8, token.text, "$"));
+        return ast.ValueRef{
+            .location = token.location,
+            .index = std.fmt.parseInt(u32, token.text[1..], 10) catch |err| switch (err) {
+                error.InvalidCharacter => unreachable, // ensured by tokenizer,
+                error.Overflow => blk: {
+                    try parser.emitDiagnostic(token.location, .integer_overflow, .{
+                        .min = comptime std.fmt.comptimePrint("{}", .{std.math.minInt(u32)}),
+                        .max = comptime std.fmt.comptimePrint("{}", .{std.math.maxInt(u32)}),
+                        .actual = token.text[1..],
+                    });
+                    break :blk 0;
+                },
+            },
+        };
+    }
+
+    fn acceptBuiltinCall(parser: *Parser) AcceptError!ast.FunctionCall(ast.Identifier) {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        const state = parser.save();
+        errdefer parser.restore(state);
+
+        const id = try parser.acceptIdentifier(.recover);
+
+        try parser.acceptLiteral(.@"(", .fail); // a builtin function is the only legal way to use an identifier here, so we fail unrecoverably
+
+        const list = try parser.acceptMappingList();
+
+        try parser.acceptLiteral(.@")", .fail);
+
+        return .{
+            .function = id,
+            .arguments = list,
+        };
+    }
+
+    fn acceptUserCall(parser: *Parser) AcceptError!ast.FunctionCall(ast.UserDefinedIdentifier) {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        const state = parser.save();
+        errdefer parser.restore(state);
+
+        const id = try parser.acceptUserReference();
+
+        // If we only accept a user value, fail and fall back to regular user value acceptance later
+        try parser.acceptLiteral(.@"(", .recover);
+
+        const list = try parser.acceptMappingList();
+
+        try parser.acceptLiteral(.@")", .fail);
+
+        return .{
+            .function = id,
+            .arguments = list,
+        };
+    }
+
+    fn acceptUserReference(parser: *Parser) AcceptError!ast.UserDefinedIdentifier {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        const token = try parser.acceptToken(.userval_ref, .recover);
+        std.debug.assert(std.mem.startsWith(u8, token.text, "@"));
+        return ast.UserDefinedIdentifier{
+            .location = token.location,
+            .value = try parser.pool.insert(token.text[1..]),
+        };
+    }
+
+    fn acceptMappingList(parser: *Parser) AcceptError!ast.List(ast.AstMapping) {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        const list_state = parser.save();
+        errdefer parser.restore(list_state);
+
+        var list = ast.List(ast.AstMapping){};
+
+        var accept_mode: AcceptMode = .recover;
+        while (true) {
+            // first item is allowed to be failing, otherwise comma separation must be done!
+            defer accept_mode = .fail;
+
+            const item_state = parser.save();
+
+            if (parser.acceptAstMapping(accept_mode)) |mapping| {
+                try parser.append(ast.AstMapping, &list, mapping);
+            } else |err| {
+                try filterAcceptError(err);
+                parser.restore(item_state); // rollback to the previous item
+                break;
+            }
+
+            if (!try parser.tryAcceptLiteral(.@",")) {
+                break;
+            }
+        }
+
+        return list;
+    }
+
+    fn acceptTypeSpec(parser: *Parser) AcceptError!ast.TypeSpec {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        const list_state = parser.save();
+        errdefer parser.restore(list_state);
+
+        const position = parser.core.tokenizer.current_location;
+
+        if (parser.acceptCodeLiteral()) |code| {
+            return .{ .literal = code };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptUserReference()) |ref| {
+            return .{ .custom = ref };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptNodeReference(.recover)) |ref| {
+            return .{ .reference = ref };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptCompoundType(.record)) |record| {
+            return .{ .record = record };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptCompoundType(.variant)) |variant| {
+            return .{ .variant = variant };
+        } else |err| try filterAcceptError(err);
+
+        const contiuation_pos = parser.save();
+        if (try parser.tryAcceptLiteral(.@";") or try parser.tryAcceptLiteral(.@"|") or try parser.tryAcceptLiteral(.@"=")) {
+            try parser.emitDiagnostic(position, .empty_typespec, .{});
+
+            // restore the previous position, we just seeked a bit forward to make better
+            // errors here:
+            parser.restore(contiuation_pos);
+
+            return BAD_TYPE_SPEC;
+        }
+
+        // switch (accept_mode) {
+        //     .recover => return error.UnexpectedTokenRecoverable,
+        //     .fail =>
+        return parser.emitUnexpectedToken(.{
+            .unexpected_token = .unexpected_token_type_spec,
+        });
+        // }
+    }
+
+    fn acceptCompoundType(parser: *Parser, comptime designator: TokenType) AcceptError!ast.CompoundType {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        const list_state = parser.save();
+        errdefer parser.restore(list_state);
+
+        const current_location = parser.core.tokenizer.current_location;
+
+        // we can recover "struct"/"record", afterwards you must follow the rules
+        try parser.acceptLiteral(designator, .recover);
+
+        var fields = ast.List(ast.Field){};
+
+        while (true) {
+            const field = try parser.acceptField();
+
+            try parser.append(ast.Field, &fields, field);
+
+            if (try parser.tryAcceptLiteral(.@",")) {
+                // Comma means we're having another field
+                continue;
+            } else {
+                // Otherwise, the list is over.
+                break;
+            }
+        }
+
+        return .{
+            .location = current_location,
+            .fields = fields,
+        };
+    }
+
+    fn acceptField(parser: *Parser) AcceptError!ast.Field {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        const list_state = parser.save();
+        errdefer parser.restore(list_state);
+
+        const current_location = parser.core.tokenizer.current_location;
+
+        const name = try parser.acceptIdentifier(.fail);
+
+        try parser.acceptLiteral(.@":", .fail);
+
+        const type_spec = try parser.acceptTypeSpec();
+
+        return .{
+            .location = current_location,
+            .name = name,
+            .type = type_spec,
+        };
+    }
+
+    fn acceptStringLiteral(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.StringLiteral {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        const token = try parser.acceptToken(.string_literal, accept_mode);
+
+        std.debug.assert(token.text.len >= 2);
+
+        return ast.StringLiteral{
+            .location = token.location,
+            .value = try parser.unwrapString(token.location, token.text[1 .. token.text.len - 1]),
+        };
+    }
+
+    fn acceptIdentifier(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.Identifier {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        const token = try parser.acceptToken(.identifier, accept_mode);
+        return ast.Identifier{
+            .location = token.location,
+            .value = try parser.unwrapIdentifierString(token.location, token.text),
+        };
+    }
+
+    fn acceptRuleReference(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.RuleRef {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        const token = try parser.acceptToken(.rule_ref, accept_mode);
+        std.debug.assert(std.mem.startsWith(u8, token.text, "<"));
+        std.debug.assert(std.mem.endsWith(u8, token.text, ">"));
+        return ast.RuleRef{
+            .location = token.location,
+            .identifier = try parser.unwrapIdentifierString(token.location, token.text[1 .. token.text.len - 1]),
+        };
+    }
+
+    fn acceptTokenReference(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.PatternRef {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        const token = try parser.acceptToken(.token_ref, accept_mode);
+        std.debug.assert(std.mem.startsWith(u8, token.text, "$"));
+        return ast.PatternRef{
+            .location = token.location,
+            .identifier = try parser.unwrapIdentifierString(token.location, token.text[1..]),
+        };
+    }
+
+    fn acceptNodeReference(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.NodeRef {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        const token = try parser.acceptToken(.node_ref, accept_mode);
+        std.debug.assert(std.mem.startsWith(u8, token.text, "!"));
+        return ast.NodeRef{
+            .location = token.location,
+            .identifier = try parser.unwrapIdentifierString(token.location, token.text[1..]),
+        };
+    }
+
+    fn acceptLiteral(parser: *Parser, comptime token_type: TokenType, accept_mode: AcceptMode) AcceptError!void {
+        _ = try parser.acceptToken(token_type, accept_mode);
+    }
+
+    fn tryAcceptLiteral(parser: *Parser, comptime token_type: TokenType) FatalAcceptError!bool {
+        _ = parser.acceptToken(token_type, .recover) catch |err| switch (err) {
+            error.UnexpectedTokenRecoverable => return false,
+            error.OutOfMemory, error.InvalidSourceEncoding, error.SyntaxError => |e| return e,
+        };
+        return true;
+    }
+
+    /// Tries to accept a given token and will emit a diagnostic if it fails.
+    fn acceptToken(parser: *Parser, comptime token_type: TokenType, accept_mode: AcceptMode) AcceptError!Token {
+        const saved_state = parser.save();
+        errdefer parser.restore(saved_state);
+
+        const source_offset = parser.core.tokenizer.offset;
+        const location = parser.core.tokenizer.current_location;
+
+        if (parser.core.accept(RS.any)) |token| {
+            errdefer parser.emitTrace(.{ .token_reject = .{ .actual = token, .expected = token_type } });
+            if (token.type != token_type) {
+                switch (accept_mode) {
+                    .fail => {
+                        try parser.emitDiagnostic(location, .unexpected_token, .{
+                            .expected_type = token_type,
+                            .actual = token,
+                        });
+                        return error.SyntaxError;
+                    },
+                    .recover => return error.UnexpectedTokenRecoverable,
+                }
+            }
+            parser.emitTrace(.{ .token_accept = token });
+            return token;
+        } else |err| switch (err) {
+            error.UnexpectedToken => unreachable, // RS.any will always accept the token
+            error.EndOfStream => switch (accept_mode) {
+                .fail => {
+                    try parser.emitDiagnostic(location, .unexpected_token_eof, .{ .expected_type = token_type });
+                    return error.SyntaxError;
+                },
+                .recover => return error.UnexpectedTokenRecoverable,
+            },
+            error.UnexpectedCharacter => {
+                try parser.emitUnexpectedCharacter(location, source_offset);
+                return error.SyntaxError;
+            },
+        }
+    }
+
+    const AcceptMode = enum {
+        /// Will emit a syntax error with diagnostic
+        fail,
+
+        /// Is recoverable
+        recover,
+    };
+
+    // management:
+    const TraceKind = union(enum) {
+        token_accept: Token,
+        token_reject: struct { actual: Token, expected: TokenType },
+        rule: []const u8,
+    };
+
+    const Trace = struct {
+        depth: u32,
+        kind: TraceKind,
+
+        pub fn format(trace: Trace, fmt: []const u8, opt: std.fmt.FormatOptions, writer: anytype) !void {
+            _ = fmt;
+            _ = opt;
+            try writer.writeByteNTimes(' ', 4 * trace.depth);
+            try writer.print("{s}:", .{@tagName(trace.kind)});
+            switch (trace.kind) {
+                .token_accept => |item| try writer.print("accept {}", .{item}),
+                .token_reject => |item| try writer.print("reject {}, expected '{s}'", .{ item.actual, @tagName(item.expected) }),
+                .rule => |item| try writer.print("{s}", .{item}),
+            }
+        }
+    };
+
+    fn traceEnterRule(parser: *Parser, loc: std.builtin.SourceLocation) void {
+        parser.emitTrace(.{ .rule = loc.fn_name });
+        parser.trace_depth += 1;
+    }
+
+    fn popTrace(parser: *Parser) void {
+        parser.trace_depth -= 1;
+    }
+
+    fn emitTrace(parser: Parser, trace: TraceKind) void {
+        if (!parser.trace_enabled) {
+            return;
+        }
+        std.log.debug("rule trace: {}", .{Trace{
+            .depth = parser.trace_depth,
+            .kind = trace,
+        }});
+    }
+
+    fn emitDiagnostic(parser: Parser, loc: ?ptk.Location, comptime code: Diagnostics.Code, data: Diagnostics.Data(code)) !void {
+        // Anything detected here is always an error
+        std.debug.assert(code.isError());
+        try parser.diagnostics.emit(loc orelse parser.core.tokenizer.current_location, code, data);
+    }
+
+    const UnexpectedTokenOptions = struct {
+        unexpected_token: Diagnostics.Code,
+    };
+    fn emitUnexpectedToken(parser: *Parser, comptime opt: UnexpectedTokenOptions) AcceptError {
+        if (Diagnostics.Data(opt.unexpected_token) != Diagnostics.Data(.unexpected_token_no_context)) {
+            @compileError("Generic unexpected token must use the same type as 'unexpected_token_no_context' diagnostic.");
+        }
+
+        const state = parser.save();
+        defer parser.restore(state);
+
+        const location = parser.core.tokenizer.current_location;
+        const offset = parser.core.tokenizer.offset;
+
+        const token_or_null = parser.core.nextToken() catch |err| switch (err) {
+            error.UnexpectedCharacter => {
+                try parser.emitUnexpectedCharacter(location, offset);
+                return error.SyntaxError;
+            },
+        };
+
+        const token = token_or_null orelse {
+            try parser.emitDiagnostic(location, .unexpected_eof, .{});
+            return error.SyntaxError;
+        };
+
+        try parser.emitDiagnostic(location, opt.unexpected_token, .{
+            .actual = token,
+        });
+        return error.SyntaxError;
+    }
+
+    fn emitUnexpectedCharacter(parser: Parser, location: ptk.Location, source_offset: usize) !void {
+        var utf8_view = std.unicode.Utf8View.init(parser.core.tokenizer.source[source_offset..]) catch {
+            try parser.emitDiagnostic(location, .invalid_source_encoding, .{});
+            return error.InvalidSourceEncoding;
+        };
+
+        var iter = utf8_view.iterator();
+
+        try parser.emitDiagnostic(location, .unexpected_character, .{
+            .character = iter.nextCodepoint() orelse @panic("very unexpected end of file"),
+        });
+    }
+
+    fn unwrapIdentifierString(parser: *Parser, loc: ptk.Location, raw: []const u8) !ptk.strings.String {
+        std.debug.assert(raw.len > 0);
+        if (raw[0] == '@') {
+            std.debug.assert(raw[1] == '"');
+            std.debug.assert(raw[raw.len - 1] == '"');
+            // string-escaped identifier
+            return try parser.unwrapString(loc, raw[2 .. raw.len - 1]);
+        } else {
+            return try parser.pool.insert(raw);
+        }
+    }
+
+    fn unwrapString(parser: *Parser, loc: ptk.Location, raw: []const u8) !ptk.strings.String {
+        var fallback = std.heap.stackFallback(512, parser.arena);
+
+        var working_space = std.ArrayList(u8).init(fallback.get());
+        defer working_space.deinit();
+
+        var i: usize = 0;
+        while (i < raw.len) {
+            const c = raw[i];
+            if (c == '\\') {
+                i += 1;
+                if (i >= raw.len) {
+                    try parser.emitDiagnostic(loc, .bad_string_escape, .{});
+                    return error.SyntaxError;
+                }
+                const escape = raw[i];
+                const slice = switch (escape) {
+                    'n' => "\n",
+                    'r' => "\r",
+                    '\"' => "\"",
+                    '\'' => "\'",
+                    '\\' => "\\",
+
+                    'x' => @panic("Implement hex escape \\x??"),
+                    'u' => @panic("Implement unicode utf-8 escapes \\u{????}"),
+
+                    '0'...'3' => @panic("Implement octal escape \\???"),
+
+                    else => {
+                        try parser.emitDiagnostic(loc, .invalid_string_escape, .{ .escape = escape });
+                        return error.SyntaxError;
+                    },
+                };
+                try working_space.appendSlice(slice);
+            } else {
+                try working_space.append(c);
+            }
+            i += 1;
+        }
+
+        return try parser.pool.insert(working_space.items);
+    }
+
+    fn save(parser: Parser) ParserCore.State {
+        return parser.core.saveState();
+    }
+
+    fn restore(parser: *Parser, state: ParserCore.State) void {
+        parser.core.restoreState(state);
+    }
+
+    fn internString(parser: *Parser, string: []const u8) !String {
+        return try parser.pool.insert(string);
+    }
+
+    fn append(parser: *Parser, comptime T: type, list: *ast.List(T), item: T) !void {
+        const node = try parser.arena.create(ast.List(T).Node);
+        errdefer parser.arena.destroy(node);
+
+        node.data = item;
+
+        list.append(node);
+    }
+
+    pub const FatalAcceptError = error{
+        // We're out of memory accepting some rule. We cannot recover from this.
+        OutOfMemory,
+
+        // Something could not be accepted.
+        SyntaxError,
+
+        // The source code contained invalid bytes
+        InvalidSourceEncoding,
+    };
+
+    pub const AcceptError = FatalAcceptError || error{
+        // The token stream contains an unexpected token, this is a syntax error
+        UnexpectedTokenRecoverable,
+    };
+
+    fn filterAcceptError(err: AcceptError) FatalAcceptError!void {
+        return switch (err) {
+            error.UnexpectedTokenRecoverable => {},
+
+            error.OutOfMemory,
+            error.SyntaxError,
+            error.InvalidSourceEncoding,
+            => |e| return e,
+        };
+    }
+};
+
+const match = ptk.matchers;
+const Pattern = ptk.Pattern(TokenType);
+const Tokenizer = ptk.Tokenizer(TokenType, &.{
+    Pattern.create(.line_comment, match.sequenceOf(.{ match.literal("#"), match.takeNoneOf("\r\n") })),
+
+    Pattern.create(.node, match.word("node")),
+    Pattern.create(.record, match.word("record")),
+    Pattern.create(.variant, match.word("variant")),
+    Pattern.create(.optional, match.word("optional")),
+    Pattern.create(.start, match.word("start")),
+    Pattern.create(.rule, match.word("rule")),
+    Pattern.create(.pattern, match.word("pattern")),
+    Pattern.create(.literal, match.word("literal")),
+    Pattern.create(.word, match.word("word")),
+    Pattern.create(.regex, match.word("regex")),
+    Pattern.create(.skip, match.word("skip")),
+
+    Pattern.create(.string_literal, matchStringLiteral),
+    Pattern.create(.code_literal, matchCodeLiteral),
+
+    // identifiers must come after keywords:
+    Pattern.create(.identifier, matchRawIdentifier),
+    Pattern.create(.node_ref, matchNodeRef),
+    Pattern.create(.rule_ref, matchRuleRef),
+    Pattern.create(.token_ref, matchTokenRef),
+    Pattern.create(.value_ref, matchValueRef),
+    Pattern.create(.userval_ref, matchBuiltinRef),
+
+    Pattern.create(.@"=>", match.literal("=>")),
+
+    Pattern.create(.@"=", match.literal("=")),
+    Pattern.create(.@",", match.literal(",")),
+    Pattern.create(.@".", match.literal(".")),
+    Pattern.create(.@"*", match.literal("*")),
+    Pattern.create(.@"+", match.literal("+")),
+    Pattern.create(.@":", match.literal(":")),
+    Pattern.create(.@";", match.literal(";")),
+    Pattern.create(.@"|", match.literal("|")),
+    Pattern.create(.@"!", match.literal("!")),
+    Pattern.create(.@"?", match.literal("?")),
+    Pattern.create(.@"[", match.literal("[")),
+    Pattern.create(.@"]", match.literal("]")),
+    Pattern.create(.@"(", match.literal("(")),
+    Pattern.create(.@")", match.literal(")")),
+    Pattern.create(.@"{", match.literal("{")),
+    Pattern.create(.@"}", match.literal("}")),
+
+    // Whitespace is the "kitchen sink" at the end:
+    Pattern.create(.whitespace, match.takeAnyOf(" \r\n\t")),
+});
+
+/// Accepts a basic identifier without any prefix or suffix.
+/// The regex that matches this pattern is roughly this:
+///
+///     (@\"[^"]+\")|([A-Za-z_][A-Za-z0-9_\-]*)
+///
+fn matchRawIdentifier(text: []const u8) usize {
+    if (text.len < 1)
+        return 0;
+
+    if (std.mem.startsWith(u8, text, "@\"")) {
+        if (text.len < 3)
+            return 0;
+
+        var i: usize = 2; // skip `@"`
+        while (i < text.len) : (i += 1) {
+            if (text[i] == '\"')
+                return i + 1;
+            if (text[i] == '\\')
+                i += 1;
+        }
+
+        return 0;
+    } else {
+        const prefix_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_";
+        const suffix_chars = prefix_chars ++ "0123456789";
+        const inner_chars = suffix_chars ++ "-";
+
+        if (std.mem.indexOfScalar(u8, prefix_chars, text[0]) == null)
+            return 0; // invalid start char
+
+        // Suffix check is done in "postprocessing" by checking if any identifier ends with "-"
+
+        var len: usize = 1;
+        while (len < text.len and std.mem.indexOfScalar(u8, inner_chars, text[len]) != null) {
+            len += 1;
+        }
+
+        return len;
+    }
+
+    return 0;
+}
+
+test matchRawIdentifier {
+    try ptk.testing.validateMatcher(matchRawIdentifier, &.{
+        // good:
+        "a",
+        "a-z",
+        "items10",
+        "_foo",
+        "_",
+        "_cheese-cake",
+    }, &.{
+        // bad:
+        "-",
+        "-10",
+        "10",
+        "1-2",
+        "10items",
+    });
+}
+
+const matchNodeRef = match.sequenceOf(.{ match.literal("!"), matchRawIdentifier });
+
+test matchNodeRef {
+    try ptk.testing.validateMatcher(matchNodeRef, &.{
+        // good:
+        "!a",
+        "!foo_bar",
+    }, &.{
+        // bad:
+        "a",
+        "!",
+    });
+}
+
+const matchRuleRef = match.sequenceOf(.{ match.literal("<"), matchRawIdentifier, match.literal(">") });
+
+test matchRuleRef {
+    try ptk.testing.validateMatcher(matchRuleRef, &.{
+        // good:
+        "<foo>",
+        "<bad-boy>",
+        "<good_boy>",
+        "<@\"very exiting boy\">",
+    }, &.{
+        // bad:
+        "<foo",
+        "foo",
+        "foo>",
+    });
+}
+
+const matchTokenRef = match.sequenceOf(.{ match.literal("$"), matchRawIdentifier });
+
+test matchTokenRef {
+    try ptk.testing.validateMatcher(matchTokenRef, &.{
+        // good:
+        "$token",
+        "$user-token",
+        "$user_token",
+        "$@\"wtf\"",
+    }, &.{
+        // bad:
+        "$\"wtf\"",
+        "bad boy",
+        "bad-boy",
+        "$0",
+        "$100",
+    });
+}
+
+const matchValueRef = match.sequenceOf(.{ match.literal("$"), match.decimalNumber });
+
+test matchValueRef {
+    try ptk.testing.validateMatcher(matchValueRef, &.{
+        // good:
+        "$0",
+        "$10",
+        "$99999999",
+    }, &.{
+        // bad:
+        "9",
+        "$",
+        "$foo",
+    });
+}
+
+const matchBuiltinRef = match.sequenceOf(.{ match.literal("@"), matchRawIdentifier });
+
+test matchBuiltinRef {
+    try ptk.testing.validateMatcher(matchBuiltinRef, &.{
+        // good:
+        "@token",
+        "@user-token",
+        "@user_token",
+        "@@\"wtf\"",
+    }, &.{
+        // bad:
+        "@\"wtf\"",
+        "bad boy",
+        "bad-boy",
+        "@0",
+        "@100",
+    });
+}
+
+fn matchStringLiteral(text: []const u8) usize {
+    if (text.len < 2)
+        return 0;
+
+    if (text[0] != '"')
+        return 0;
+
+    var i: usize = 1; // skip `"`
+    while (i < text.len) : (i += 1) {
+        if (text[i] == '\"')
+            return i + 1;
+        if (text[i] == '\\')
+            i += 1;
+    }
+
+    return 0;
+}
+
+test matchStringLiteral {
+    try ptk.testing.validateMatcher(matchStringLiteral, &.{
+        // good:
+        "\"\"",
+        "\"x\"",
+        "\" \"",
+        "\" hello \\\"world\\\"\"",
+    }, &.{
+        // bad:
+        "\"",
+        "\"\\\"",
+        "\"",
+        "foo\"",
+    });
+}
+
+fn matchCodeLiteral(text: []const u8) usize {
+    var prefix_len: usize = 0;
+    while (prefix_len < text.len and text[prefix_len] == '`') {
+        prefix_len += 1;
+    }
+
+    if (prefix_len == 0 or 2 * prefix_len >= text.len)
+        return 0;
+
+    const body_len = std.mem.indexOf(u8, text[prefix_len..], text[0..prefix_len]) orelse return 0;
+
+    return 2 * prefix_len + body_len;
+}
+
+test matchCodeLiteral {
+    try ptk.testing.validateMatcher(matchCodeLiteral, &.{
+        // good:
+        "`x`",
+        "`\"hello, World!\"`",
+        "`\n\n`",
+        "`\x00`",
+        "``you can write a `code` snippet like this!``",
+    }, &.{
+        // bad:
+        "`",
+        "``",
+        "```hello, world!``",
+    });
+}
+
+test "parser string literal" {
+    const Test = struct {
+        pub fn run(expected: []const u8, code: []const u8) !void {
+            var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+            defer arena.deinit();
+
+            var diag = Diagnostics.init(std.testing.allocator);
+            defer diag.deinit();
+
+            var strings = try ptk.strings.Pool.init(std.testing.allocator);
+            defer strings.deinit();
+
+            var tokenizer = Tokenizer.init(code, "unittest");
+
+            var parser = Parser{
+                .diagnostics = &diag,
+                .pool = &strings,
+                .core = ParserCore.init(&tokenizer),
+                .arena = arena.allocator(),
+                .trace_enabled = false,
+            };
+
+            const literal = try parser.acceptStringLiteral(.fail);
+
+            const actual = strings.get(literal.value);
+
+            try std.testing.expectEqualStrings(expected, actual);
+        }
+    };
+
+    // Empty string:
+    try Test.run("",
+        \\""
+    );
+
+    // Regular string
+    try Test.run("hello, world!",
+        \\"hello, world!"
+    );
+
+    // Validate escape sequences:
+    try Test.run("\r",
+        \\"\r"
+    );
+    try Test.run("\n",
+        \\"\n"
+    );
+    try Test.run("\\",
+        \\"\\"
+    );
+    try Test.run("\"",
+        \\"\""
+    );
+    try Test.run("\"hello, world!\"",
+        \\"\"hello, world!\""
+    );
+    try Test.run("A\'B",
+        \\"A\'B"
+    );
+    // TODO: enable those tests for escape sequences!
+    // try Test.run("\x34",
+    //     \\"\x34"
+    // );
+    // try Test.run("A\xFFB",
+    //     \\"A\xFFB"
+    // );
+    // try Test.run("\x10\x22",
+    //     \\"\x10\x22"
+    // );
+    // try Test.run("A\x1BB",
+    //     \\"A\033B"
+    // );
+    // try Test.run("A\xFFB",
+    //     \\"A\377B"
+    // );
+    // try Test.run("A\x01B",
+    //     \\"A\001B"
+    // );
+    // try Test.run("[\u{1F4A9}]",
+    //     \\"[\u{1F4A9}]"
+    // );
+}
diff --git a/src/ptkgen/sema.zig b/src/ptkgen/sema.zig
new file mode 100644
index 0000000..bd64364
--- /dev/null
+++ b/src/ptkgen/sema.zig
@@ -0,0 +1,1057 @@
+const std = @import("std");
+const ptk = @import("parser-toolkit");
+
+const logger = std.log.scoped(.ptk_sema);
+
+const ast = @import("ast.zig");
+const Diagnostics = @import("Diagnostics.zig");
+
+pub const AnalyzeError = error{ OutOfMemory, SemanticError };
+
+const String = ptk.strings.String;
+
+pub fn StringHashMap(comptime T: type) type {
+    return std.AutoArrayHashMap(String, T);
+}
+
+pub const Grammar = struct {
+    arena: std.heap.ArenaAllocator,
+
+    start: ?StartDeclaration,
+
+    rules: StringHashMap(*Rule),
+    nodes: StringHashMap(*Node),
+    patterns: StringHashMap(*Pattern),
+    literal_patterns: StringHashMap(*Pattern),
+
+    pub fn deinit(grammar: *Grammar) void {
+        grammar.rules.deinit();
+        grammar.nodes.deinit();
+        grammar.patterns.deinit();
+        grammar.literal_patterns.deinit();
+        grammar.arena.deinit();
+        grammar.* = undefined;
+    }
+};
+
+pub const StartDeclaration = struct {
+    rule: *Rule,
+    location: ptk.Location,
+};
+
+pub const Rule = struct {
+    location: ptk.Location,
+    name: String,
+
+    type: ?*Type,
+    productions: []MappedProduction,
+};
+
+/// A production of a rule that is able to map the parsed structure
+/// into an AST node.
+pub const MappedProduction = struct {
+    production: Production,
+    mapping: ?Mapping,
+};
+
+/// A production is a part of a grammar. Productions consume
+/// tokens and generate structure from this.
+pub const Production = union(enum) {
+    terminal: *Pattern, // literal and terminal ast nodes are wrapped to this
+    recursion: *Rule, // <rule>
+    sequence: []Production, // ...
+    optional: *Production, // ( ... )?
+    repetition_zero: *Production, // [ ... ]*
+    repetition_one: *Production, // [ ... ]+
+};
+
+pub const Mapping = union(enum) {
+    record_initializer: RecordInitializer, // { a = b, c = d, ... }
+    list_initializer: ListInitializer, // [ a, b, c, ... ]
+    variant_initializer: VariantInitializer, // field: ...
+
+    user_function_call: FunctionCall, // @builtin(a,b,c)
+    builtin_function_call: FunctionCall, // identifier(a,b,c)
+
+    code_literal: String, // `code`
+    user_literal: String, // @user_data
+
+    context_reference: ContextReference, // $0
+};
+
+pub const ContextReference = struct {
+    index: u32,
+    production: *Production,
+    type: *Type,
+};
+
+const RecordInitializer = struct {
+    type: *Type,
+    fields: []FieldInitializer,
+};
+
+const FieldInitializer = struct {
+    field: *Field,
+    value: Mapping,
+};
+
+const ListInitializer = struct {
+    type: *Type,
+    items: []Mapping,
+};
+
+const VariantInitializer = struct {
+    type: *Type,
+    field: *Field,
+    value: *Mapping,
+};
+
+const FunctionCall = struct {
+    return_type: ?*Type,
+    function: String,
+    arguments: []Mapping,
+};
+
+pub const Node = struct {
+    location: ptk.Location,
+    name: String,
+
+    type: *Type,
+};
+
+pub const Pattern = struct {
+    location: ptk.Location,
+    name: String,
+    is_literal: bool,
+    data: Data,
+
+    pub const Data = union(enum) {
+        literal_match: String,
+        word: String,
+        regex: String,
+        external: String,
+    };
+};
+
+pub const Type = union(enum) {
+    // trivial types:
+    code_literal: String,
+    user_type: String,
+
+    // anonymous compound types:
+    optional: *Type,
+    record: *CompoundType,
+    variant: *CompoundType,
+
+    // ast nodes are basically "named types" and must be handled as such
+    named: *Node,
+
+    // builtin types:
+    token, // points to a PTK token
+
+    pub fn id(t: *const Type) TypeId {
+        return @as(TypeId, t.*);
+    }
+};
+
+pub const TypeId: type = std.meta.Tag(Type);
+
+pub const CompoundType = struct {
+    fields: StringHashMap(Field),
+};
+
+pub const Field = struct {
+    location: ptk.Location,
+    name: String,
+    type: *Type,
+};
+
+pub fn analyze(allocator: std.mem.Allocator, diagnostics: *Diagnostics, strings: *const ptk.strings.Pool, document: ast.Document) AnalyzeError!Grammar {
+    std.debug.assert(diagnostics.hasErrors() == false);
+    errdefer |err| if (err == error.SemanticError)
+        std.debug.assert(diagnostics.hasErrors());
+
+    var grammar = Grammar{
+        .arena = std.heap.ArenaAllocator.init(allocator),
+
+        .rules = StringHashMap(*Rule).init(allocator),
+        .nodes = StringHashMap(*Node).init(allocator),
+        .patterns = StringHashMap(*Pattern).init(allocator),
+        .literal_patterns = StringHashMap(*Pattern).init(allocator),
+
+        .start = null,
+    };
+    errdefer grammar.deinit();
+
+    var analyzer = Analyzer{
+        .arena = grammar.arena.allocator(),
+        .diagnostics = diagnostics,
+        .strings = strings,
+
+        .rule_to_ast = std.AutoHashMap(*Rule, *ast.Rule).init(allocator),
+        .node_to_ast = std.AutoHashMap(*Node, *ast.Node).init(allocator),
+        .pattern_to_ast = std.AutoHashMap(*Pattern, *ast.Pattern).init(allocator),
+
+        .type_stash = Analyzer.TypeStash.init(allocator),
+
+        .document = document,
+
+        .target = &grammar,
+    };
+    defer analyzer.deinit();
+
+    try innerAnalysis(&analyzer);
+
+    if (grammar.start == null) {
+        try analyzer.emitDiagnostic(ptk.Location{
+            .line = 0,
+            .column = 0,
+            .source = null,
+        }, .missing_start_symbol, .{});
+    }
+
+    return grammar;
+}
+
+var BAD_TYPE_SENTINEL: Type = undefined;
+var BAD_NODE_SENTINEL: Node = undefined;
+var BAD_RULE_SENTINEL: Rule = undefined;
+var BAD_PATTERN_SENTINEL: Pattern = undefined;
+var BAD_PRODUCTION_SENTINEL: Production = undefined;
+var BAD_FIELD_SENTINEL: Field = undefined;
+
+fn innerAnalysis(analyzer: *Analyzer) AnalyzeError!void {
+    // Phase 0: Validate productions on legality (coarse error checking)
+    // - Generates errors for badly constructed elements
+    try analyzer.validateAstRulesCoarse();
+
+    // Phase 1: Create all global declarations
+    // - Populates the declaration lookups
+    // - Generates errors for duplicate identifiers
+    try analyzer.createDeclarations();
+
+    // Phase 2: Instantiate all node types and patterns, determine start symbol
+
+    try analyzer.iterateOn(.start, Analyzer.instantiateStartSymbol);
+    try analyzer.iterateOn(.pattern, Analyzer.instantiatePatterns);
+    try analyzer.iterateOn(.node, Analyzer.instantiateNodeTypes);
+
+    // Phase 3: Validate generated types
+    try analyzer.iterateOn(.node, Analyzer.validateNodes);
+
+    // Phase 4: Instantiate AST productions
+    try analyzer.iterateOn(.rule, Analyzer.instantiateRules);
+
+    // Phase 5: Instantiate and validate AST mappings
+    try analyzer.iterateOn(.rule, Analyzer.instantiateMappings); // Create data structures
+    try analyzer.iterateOn(.rule, Analyzer.linkAndValidateMappedProductions); // Validate if data tr
+}
+
+const Analyzer = struct {
+    const TypeStash = std.HashMap(*Type, void, TypeContext, std.hash_map.default_max_load_percentage);
+
+    arena: std.mem.Allocator,
+    diagnostics: *Diagnostics,
+    strings: *const ptk.strings.Pool,
+    target: *Grammar,
+
+    document: ast.Document,
+
+    rule_to_ast: std.AutoHashMap(*Rule, *ast.Rule),
+    node_to_ast: std.AutoHashMap(*Node, *ast.Node),
+    pattern_to_ast: std.AutoHashMap(*Pattern, *ast.Pattern),
+
+    type_stash: TypeStash,
+
+    deduplicated_type_count: usize = 0,
+
+    fn deinit(analyzer: *Analyzer) void {
+        analyzer.rule_to_ast.deinit();
+        analyzer.node_to_ast.deinit();
+        analyzer.pattern_to_ast.deinit();
+        analyzer.type_stash.deinit();
+        analyzer.* = undefined;
+    }
+
+    const IterativeAnalysisError = error{RecoverableSemanticError} || AnalyzeError;
+
+    fn iterateOn(
+        analyzer: *Analyzer,
+        comptime node_type: std.meta.FieldEnum(ast.TopLevelDeclaration),
+        comptime functor: fn (*Analyzer, *std.meta.FieldType(ast.TopLevelDeclaration, node_type)) IterativeAnalysisError!void,
+    ) AnalyzeError!void {
+        var iter = ast.iterate(analyzer.document);
+        while (iter.next()) |item| {
+            switch (item.*) {
+                @field(std.meta.Tag(ast.TopLevelDeclaration), @tagName(node_type)) => |*node| {
+                    functor(analyzer, node) catch |err| switch (err) {
+                        error.RecoverableSemanticError => {},
+                        else => |e| return e,
+                    };
+                },
+                else => {},
+            }
+        }
+    }
+
+    fn validateAstRulesCoarse(analyzer: *Analyzer) !void {
+        var iter = ast.iterate(analyzer.document);
+        while (iter.next()) |item| {
+            switch (item.*) {
+                .start => |start| {
+                    _ = start;
+                },
+
+                .rule => |rule| {
+                    _ = rule;
+                },
+
+                .node => |node| {
+                    _ = node;
+                },
+
+                .pattern => |pattern| {
+                    _ = pattern;
+                },
+            }
+        }
+    }
+
+    /// Creates declarations in the target Grammar and makes sure all declared objects are reachable.
+    /// Emits diagnostics for duplicate declarations.
+    fn createDeclarations(analyzer: *Analyzer) !void {
+        var iter = ast.iterate(analyzer.document);
+        while (iter.next()) |item| {
+            switch (item.*) {
+                .start => {},
+
+                .rule => |*rule| {
+                    const instance = try analyzer.declareElement(
+                        Rule,
+                        ast.Rule,
+                        &analyzer.target.rules,
+                        &analyzer.rule_to_ast,
+                        rule,
+                        rule.name,
+                        .duplicate_identifier_rule,
+                    );
+                    instance.* = .{
+                        .location = rule.name.location,
+                        .name = rule.name.value,
+
+                        .type = undefined, // created in phase 4
+                        .productions = &.{}, // created in phase 5
+                    };
+                },
+
+                .node => |*node| {
+                    const instance = try analyzer.declareElement(
+                        Node,
+                        ast.Node,
+                        &analyzer.target.nodes,
+                        &analyzer.node_to_ast,
+                        node,
+                        node.name,
+                        .duplicate_identifier_node,
+                    );
+                    instance.* = .{
+                        .location = node.name.location,
+                        .name = node.name.value,
+
+                        .type = undefined, // created in phase 2
+                    };
+                },
+
+                .pattern => |*pattern| {
+                    const instance = try analyzer.declareElement(
+                        Pattern,
+                        ast.Pattern,
+                        &analyzer.target.patterns,
+                        &analyzer.pattern_to_ast,
+                        pattern,
+                        pattern.name,
+                        .duplicate_identifier_pattern,
+                    );
+                    instance.* = .{
+                        .location = pattern.name.location,
+                        .name = pattern.name.value,
+                        .is_literal = false,
+                        .data = undefined, // created in phase 2
+                    };
+                },
+            }
+        }
+    }
+
+    /// Searches all start symbol declarations and stores a reference to the initial rule.
+    /// Will emit diagnostics for duplicate start symbol decls and invalid references.
+    fn instantiateStartSymbol(analyzer: *Analyzer, start: *ast.RuleRef) !void {
+        if (analyzer.target.start) |old_start| {
+            try analyzer.emitDiagnostic(start.location, .multiple_start_symbols, .{
+                .identifier = analyzer.strings.get(old_start.rule.name),
+                .previous_location = old_start.location,
+            });
+            // error return is further down below so we can also catch the undefined reference error
+        }
+
+        const rule = analyzer.target.rules.get(start.identifier) orelse {
+            try analyzer.emitDiagnostic(start.location, .reference_to_undeclared_rule, .{
+                .identifier = analyzer.strings.get(start.identifier),
+            });
+            return error.RecoverableSemanticError;
+        };
+
+        if (analyzer.target.start != null) {
+            // return for the first if block
+            return error.RecoverableSemanticError;
+        }
+
+        analyzer.target.start = .{
+            .rule = rule,
+            .location = start.location,
+        };
+    }
+
+    /// Fully populate all content of the pattern declarations. Emits diagnostics for invalid patterns.
+    fn instantiatePatterns(analyzer: *Analyzer, ast_pattern: *ast.Pattern) !void {
+        const sema_pattern = analyzer.target.patterns.get(ast_pattern.name.value).?;
+
+        sema_pattern.data = switch (ast_pattern.data) {
+            .literal => |value| .{ .literal_match = value.value },
+            .word => |value| .{ .word = value.value },
+            .regex => |value| .{ .regex = value.value },
+            .external => |value| .{ .external = value.value },
+        };
+
+        // TODO: Implement regex validation here!
+    }
+
+    /// Instantiates and validates all node declarations.
+    /// Emits diagnostics for bad type declarations.
+    fn instantiateNodeTypes(analyzer: *Analyzer, ast_node: *ast.Node) !void {
+        const sema_node = analyzer.target.nodes.get(ast_node.name.value).?;
+
+        sema_node.type = try analyzer.resolveType(&ast_node.value);
+    }
+
+    fn validateNodes(analyzer: *Analyzer, ast_node: *ast.Node) !void {
+        const sema_node = analyzer.target.nodes.get(ast_node.name.value).?;
+
+        try analyzer.validateType(sema_node.type);
+    }
+
+    fn instantiateRules(analyzer: *Analyzer, ast_rule: *ast.Rule) !void {
+        const sema_rule = analyzer.target.rules.get(ast_rule.name.value).?;
+
+        sema_rule.type = if (ast_rule.ast_type) |ast_type|
+            try analyzer.resolveType(&ast_type)
+        else
+            null;
+
+        sema_rule.productions = try analyzer.target.arena.allocator().alloc(MappedProduction, ast_rule.productions.len());
+        errdefer {
+            analyzer.target.arena.allocator().free(sema_rule.productions);
+            sema_rule.productions = &.{};
+        }
+
+        if (sema_rule.productions.len == 0) {
+            @panic("empty sema rule!");
+        }
+
+        var iter = ast.iterate(ast_rule.productions);
+        var index: usize = 0;
+        while (iter.next()) |ast_production| : (index += 1) {
+            const sema_production = &sema_rule.productions[index];
+
+            sema_production.* = MappedProduction{
+                .production = try analyzer.translateProduction(ast_production.production),
+                .mapping = null, // Will be instantiated later
+            };
+        }
+    }
+
+    fn translateProduction(analyzer: *Analyzer, ast_prod: ast.Production) error{OutOfMemory}!Production {
+        switch (ast_prod) {
+            .literal => |literal| {
+                const gop = try analyzer.target.literal_patterns.getOrPut(literal.value);
+                if (!gop.found_existing) {
+                    gop.value_ptr.* = try analyzer.target.arena.allocator().create(Pattern);
+                    gop.value_ptr.*.* = .{
+                        .location = literal.location, // place of first use
+                        .name = literal.value,
+                        .data = .{ .literal_match = literal.value },
+                        .is_literal = true,
+                    };
+                }
+                return Production{ .terminal = gop.value_ptr.* };
+            },
+            .terminal => |terminal| {
+                if (analyzer.target.patterns.get(terminal.identifier)) |pattern| {
+                    return Production{ .terminal = pattern };
+                } else {
+                    try analyzer.emitDiagnostic(terminal.location, .reference_to_undeclared_pattern, .{
+                        .identifier = analyzer.strings.get(terminal.identifier),
+                    });
+                    return Production{ .terminal = &BAD_PATTERN_SENTINEL };
+                }
+            },
+            .recursion => |recursion| {
+                if (analyzer.target.rules.get(recursion.identifier)) |rule| {
+                    return Production{ .recursion = rule };
+                } else {
+                    try analyzer.emitDiagnostic(recursion.location, .reference_to_undeclared_rule, .{
+                        .identifier = analyzer.strings.get(recursion.identifier),
+                    });
+                    return Production{ .recursion = &BAD_RULE_SENTINEL };
+                }
+            },
+            .sequence => |sequence| {
+                if (sequence.len() == 0)
+                    @panic("bad sequence: empty");
+
+                var seq = std.ArrayList(Production).init(analyzer.target.arena.allocator());
+                defer seq.deinit();
+
+                try seq.ensureTotalCapacityPrecise(sequence.len());
+
+                var iter = ast.iterate(sequence);
+                while (iter.next()) |inner_prod| {
+                    const inner_sema = try analyzer.translateProduction(inner_prod.*);
+                    seq.appendAssumeCapacity(inner_sema);
+                }
+
+                return Production{
+                    .sequence = seq.toOwnedSlice() catch @panic("bad capacity"),
+                };
+            },
+            .optional => |optional| {
+                const nested = try analyzer.target.arena.allocator().create(Production);
+                errdefer analyzer.target.arena.allocator().destroy(nested);
+                nested.* = try analyzer.translateProduction(.{ .sequence = optional });
+                return .{ .optional = nested };
+            },
+            .repetition_zero => |repetition| {
+                const nested = try analyzer.target.arena.allocator().create(Production);
+                errdefer analyzer.target.arena.allocator().destroy(nested);
+                nested.* = try analyzer.translateProduction(.{ .sequence = repetition });
+                return .{ .repetition_zero = nested };
+            },
+            .repetition_one => |repetition| {
+                const nested = try analyzer.target.arena.allocator().create(Production);
+                errdefer analyzer.target.arena.allocator().destroy(nested);
+                nested.* = try analyzer.translateProduction(.{ .sequence = repetition });
+                return .{ .repetition_one = nested };
+            },
+        }
+    }
+
+    fn instantiateMappings(analyzer: *Analyzer, ast_rule: *ast.Rule) !void {
+        const sem_rule: *Rule = analyzer.target.rules.get(ast_rule.name.value).?;
+
+        var iter = ast.iterate(ast_rule.productions);
+
+        for (sem_rule.productions) |*sem_prod| {
+            const ast_prod = iter.next().?;
+            sem_prod.mapping = if (ast_prod.mapping) |src_mapping|
+                try analyzer.translateMapping(src_mapping)
+            else
+                null;
+        }
+        std.debug.assert(iter.next() == null);
+    }
+
+    fn translateMapping(analyzer: *Analyzer, ast_mapping: ast.AstMapping) error{OutOfMemory}!Mapping {
+        switch (ast_mapping) {
+            .literal => |ref| return Mapping{ .code_literal = ref.value },
+            .user_reference => |ref| return Mapping{ .code_literal = ref.value },
+
+            .context_reference => |ast_context_reference| {
+                return Mapping{
+                    .context_reference = .{
+                        .index = ast_context_reference.index,
+                        .production = &BAD_PRODUCTION_SENTINEL,
+                        .type = &BAD_TYPE_SENTINEL,
+                    },
+                };
+            },
+
+            inline .user_function_call, .function_call => |function_call| {
+                const function_name = function_call.function.value;
+
+                var args = try analyzer.target.arena.allocator().alloc(Mapping, function_call.arguments.len());
+                errdefer analyzer.target.arena.allocator().free(args);
+
+                var iter = ast.iterate(function_call.arguments);
+                for (args) |*item| {
+                    const src = iter.next().?;
+                    item.* = try analyzer.translateMapping(src.*);
+                }
+                std.debug.assert(iter.next() == null);
+
+                const fncall = FunctionCall{
+                    .arguments = args,
+                    .function = function_name,
+                    .return_type = null,
+                };
+
+                return switch (ast_mapping) {
+                    .user_function_call => Mapping{ .user_function_call = fncall },
+                    .function_call => Mapping{ .builtin_function_call = fncall },
+                    else => unreachable,
+                };
+            },
+
+            .variant => |ast_variant| {
+                const init_expr = try analyzer.translateMapping(ast_variant.value.*);
+
+                // ast_variant.field.value
+                return Mapping{
+                    .variant_initializer = .{
+                        .type = &BAD_TYPE_SENTINEL,
+                        .field = &BAD_FIELD_SENTINEL,
+                        .value = try moveToHeap(&analyzer.target.arena, Mapping, init_expr),
+                    },
+                };
+            },
+
+            .list => |ast_list| {
+                var items = try analyzer.target.arena.allocator().alloc(Mapping, ast_list.len());
+                errdefer analyzer.target.arena.allocator().free(items);
+
+                var iter = ast.iterate(ast_list);
+                for (items) |*item| {
+                    const src = iter.next().?;
+                    item.* = try analyzer.translateMapping(src.*);
+                }
+                std.debug.assert(iter.next() == null);
+
+                return Mapping{
+                    .list_initializer = .{
+                        .items = items,
+                        .type = &BAD_TYPE_SENTINEL,
+                    },
+                };
+            },
+
+            .record => |ast_record| {
+                var fields = try analyzer.target.arena.allocator().alloc(FieldInitializer, ast_record.len());
+                errdefer analyzer.target.arena.allocator().free(fields);
+
+                var iter = ast.iterate(ast_record);
+                for (fields) |*item| {
+                    const src = iter.next().?;
+                    const field_name = src.field.value;
+                    _ = field_name;
+                    item.* = .{
+                        .field = &BAD_FIELD_SENTINEL,
+                        .value = try analyzer.translateMapping(src.value.*),
+                    };
+                }
+                std.debug.assert(iter.next() == null);
+
+                return Mapping{
+                    .record_initializer = .{
+                        .fields = fields,
+                        .type = &BAD_TYPE_SENTINEL,
+                    },
+                };
+            },
+        }
+    }
+
+    const TypeTransform = struct {
+        optional: bool = false,
+        sequence: bool = false,
+
+        pub fn add(tt: TypeTransform, comptime field: enum { optional, sequence }) TypeTransform {
+            var copy = tt;
+            @field(copy, @tagName(field)) = true;
+            return copy;
+        }
+
+        pub fn format(tt: TypeTransform, fmt: []const u8, opt: std.fmt.FormatOptions, writer: anytype) !void {
+            _ = fmt;
+            _ = opt;
+            var list = std.BoundedArray([]const u8, 2){};
+
+            if (tt.optional) list.appendAssumeCapacity("opt");
+            if (tt.sequence) list.appendAssumeCapacity("seq");
+
+            try writer.writeAll("TypeTransform(");
+
+            if (list.len == 0) {
+                try writer.writeAll("none");
+            } else {
+                for (list.slice(), 0..) |item, i| {
+                    if (i > 0)
+                        try writer.writeAll(",");
+                    try writer.writeAll(item);
+                }
+            }
+
+            try writer.writeAll(")");
+        }
+    };
+
+    const IndexedProd = struct {
+        transform: TypeTransform,
+        production: *Production,
+    };
+
+    const ProductionIndex = std.ArrayList(IndexedProd);
+
+    fn linkAndValidateMappedProductions(analyzer: *Analyzer, ast_rule: *ast.Rule) !void {
+        const sem_rule: *Rule = analyzer.target.rules.get(ast_rule.name.value).?;
+
+        const has_any_mapping = for (sem_rule.productions) |prod| {
+            if (prod.mapping != null)
+                break true;
+        } else false;
+
+        if (has_any_mapping and sem_rule.type == null) {
+            try analyzer.emitDiagnostic(sem_rule.location, .mapping_requires_typed_rule, .{});
+            return;
+        }
+
+        if (!has_any_mapping) {
+            // We're done here, nothing to link and validate.
+            return;
+        }
+
+        const rule_type = sem_rule.type.?;
+
+        var iter = ast.iterate(ast_rule.productions);
+
+        var prod_index = ProductionIndex.init(analyzer.arena);
+        defer prod_index.deinit();
+
+        for (sem_rule.productions) |*sem_prod| {
+            const ast_prod = iter.next().?;
+
+            if (ast_prod.mapping) |src_mapping| {
+                const dst_mapping = &sem_prod.mapping.?;
+
+                // Rebuild index:
+                prod_index.shrinkRetainingCapacity(0);
+                try analyzer.rebuildProductionIndex(&prod_index, &sem_prod.production, .{});
+
+                std.debug.print("index:\n", .{});
+                for (0.., prod_index.items) |index, item| {
+                    std.debug.print("[{}]: {} {s}\n", .{ index, item.transform, @tagName(item.production.*) });
+                }
+
+                try analyzer.linkAndValidateMapping(
+                    rule_type,
+                    dst_mapping,
+                    src_mapping,
+                    prod_index.items,
+                );
+            } else {
+                std.debug.assert(sem_prod.mapping == null);
+            }
+        }
+
+        std.debug.assert(iter.next() == null);
+    }
+
+    fn rebuildProductionIndex(analyzer: *Analyzer, prod_index: *ProductionIndex, production: *Production, transform: TypeTransform) error{OutOfMemory}!void {
+        switch (production.*) {
+            // Those are terminals and will be appended as-is:
+            .terminal => try prod_index.append(.{ .production = production, .transform = transform }),
+            .recursion => try prod_index.append(.{ .production = production, .transform = transform }),
+
+            // Sequences are unwrapped:
+            .sequence => |list| for (list) |*inner_prod| {
+                try analyzer.rebuildProductionIndex(prod_index, inner_prod, transform);
+            },
+
+            // They just "recurse" into their inner workings, but annotate type changes:
+            .optional => |inner_prod| {
+                try analyzer.rebuildProductionIndex(prod_index, inner_prod, transform.add(.optional));
+            },
+
+            .repetition_zero => |inner_prod| {
+                try analyzer.rebuildProductionIndex(prod_index, inner_prod, transform.add(.sequence));
+            },
+
+            .repetition_one => |inner_prod| {
+                try analyzer.rebuildProductionIndex(prod_index, inner_prod, transform.add(.sequence));
+            },
+        }
+    }
+
+    fn linkAndValidateMapping(analyzer: *Analyzer, type_context: *Type, sem_map: *Mapping, ast_map: ast.AstMapping, production_index: []const IndexedProd) !void {
+        _ = type_context;
+
+        switch (sem_map.*) {
+            // Always fine, and terminate recursion:
+            .code_literal, .user_literal => {},
+
+            // Rule refs:
+
+            .context_reference => |*context_reference| {
+                if (context_reference.index >= production_index.len) {
+                    context_reference.production = &BAD_PRODUCTION_SENTINEL;
+                    try analyzer.emitDiagnostic(ast_map.context_reference.location, .context_reference_out_of_bounds, .{
+                        .index = context_reference.index,
+                        .limit = @as(u32, @truncate(production_index.len - 1)), // should never underflow as empty rules are illegal
+                    });
+                    return;
+                }
+
+                context_reference.production = production_index[context_reference.index].production;
+
+                const base_type: *Type = switch (context_reference.production.*) {
+                    //
+                    .terminal => blk: {
+                        var proto: Type = .token;
+                        const canon = try analyzer.getCanonicalType(&proto);
+                        std.debug.assert(canon != &proto);
+                        break :blk canon;
+                    },
+
+                    // Invocations of other
+                    .recursion => |rule| rule.type,
+
+                    .sequence,
+                    .optional,
+                    .repetition_zero,
+                    .repetition_one,
+                    => unreachable, // we should not be able to reach those
+
+                };
+
+                // TODO: Transform type for context reference
+
+                context_reference.type = base_type;
+            },
+
+            // Calls:
+
+            .user_function_call => |*user_function_call| {
+                _ = user_function_call;
+            },
+
+            .builtin_function_call => |*builtin_function_call| {
+                _ = builtin_function_call;
+            },
+
+            // Compounds:
+
+            .record_initializer => |*record_initializer| {
+                _ = record_initializer;
+            },
+
+            .list_initializer => |*list_initializer| {
+                _ = list_initializer;
+            },
+
+            .variant_initializer => |*variant_initializer| {
+                _ = variant_initializer;
+            },
+        }
+    }
+
+    /// Checks if the given type is semantically ok or emits compiler errors if not.
+    fn validateType(analyzer: *Analyzer, type_node: *Type) error{OutOfMemory}!void {
+        if (type_node == &BAD_TYPE_SENTINEL) {
+            @panic("bad sentinel");
+        }
+
+        switch (type_node.*) {
+            .code_literal, .user_type => {}, // always fine
+            .optional => |child_type| try analyzer.validateType(child_type),
+            .record, .variant => |compound_type| {
+                var fields = compound_type.fields.iterator();
+                while (fields.next()) |kv| {
+                    const field_type = kv.value_ptr.type;
+                    try analyzer.validateType(field_type);
+                }
+            },
+            .named => |node| {
+                if (node == &BAD_NODE_SENTINEL) {
+                    @panic("bad node!");
+                }
+            },
+        }
+    }
+
+    /// Constructs a new compound type from the given AST declaration. Will emit diagnostics
+    /// on error and returns an incomplete type if errors happened.
+    fn createCompoundType(analyzer: *Analyzer, def: ast.CompoundType) !*CompoundType {
+        const ct = try analyzer.target.arena.allocator().create(CompoundType);
+        errdefer analyzer.target.arena.allocator().destroy(ct);
+
+        ct.* = CompoundType{
+            .fields = StringHashMap(Field).init(analyzer.target.arena.allocator()),
+        };
+        errdefer ct.fields.deinit();
+
+        try ct.fields.ensureTotalCapacity(def.fields.len());
+
+        var iter = ast.iterate(def.fields);
+        while (iter.next()) |field_def| {
+            const field_type = try analyzer.resolveType(&field_def.type);
+            const gop_result = ct.fields.getOrPutAssumeCapacity(field_def.name.value);
+
+            if (gop_result.found_existing) {
+                try analyzer.emitDiagnostic(field_def.location, .duplicate_compound_field, .{
+                    .previous_location = gop_result.value_ptr.location,
+                    .identifier = analyzer.strings.get(field_def.name.value),
+                });
+                continue;
+            }
+
+            gop_result.value_ptr.* = .{
+                .type = field_type,
+                .location = field_def.location,
+                .name = field_def.name.value,
+            };
+        }
+
+        return ct;
+    }
+
+    fn destroyCompoundType(analyzer: *Analyzer, ct: *CompoundType) void {
+        ct.fields.deinit();
+        analyzer.target.arena.allocator().destroy(ct);
+        ct.* = undefined;
+    }
+
+    fn resolveType(analyzer: *Analyzer, type_node: *const ast.TypeSpec) error{OutOfMemory}!*Type {
+        var compound_type: ?*CompoundType = null;
+        var proto_type: Type = switch (type_node.*) {
+            .reference => |def| .{
+                .named = analyzer.target.nodes.get(def.identifier) orelse blk: {
+                    try analyzer.emitDiagnostic(def.location, .reference_to_undeclared_node, .{
+                        .identifier = analyzer.strings.get(def.identifier),
+                    });
+                    break :blk &BAD_NODE_SENTINEL;
+                },
+            },
+            .literal => |def| Type{ .code_literal = def.value },
+            .custom => |def| Type{ .user_type = def.value },
+            .record => |def| blk: {
+                compound_type = try analyzer.createCompoundType(def);
+                break :blk .{ .record = compound_type.? };
+            },
+            .variant => |def| blk: {
+                compound_type = try analyzer.createCompoundType(def);
+                break :blk .{ .record = compound_type.? };
+            },
+        };
+        errdefer if (compound_type) |ct|
+            analyzer.destroyCompoundType(ct);
+
+        return try analyzer.getCanonicalType(&proto_type);
+    }
+
+    fn getCanonicalType(analyzer: Analyzer, proto_type: *Type) error{OutOfMemory}!?*Type {
+        if (analyzer.getUniqueTypeHandle(&proto_type)) |resolved_type| {
+            analyzer.deduplicated_type_count += 1;
+            // logger.debug("deduplicated a {s}", .{@tagName(resolved_type.*)});
+            return resolved_type;
+        }
+
+        const new_type = try analyzer.target.arena.allocator().create(Type);
+        errdefer analyzer.target.arena.allocator().destroy(new_type);
+
+        new_type.* = proto_type;
+
+        try analyzer.type_stash.putNoClobber(new_type, {});
+
+        return new_type;
+    }
+
+    fn getUniqueTypeHandle(analyzer: Analyzer, proto_type: *Type) ?*Type {
+        if (analyzer.type_stash.getKey(proto_type)) |key| {
+            return key;
+        }
+        return null;
+    }
+
+    const DeclarationError = error{
+        OutOfMemory,
+        SemanticError,
+    };
+    fn declareElement(
+        analyzer: *Analyzer,
+        comptime Element: type,
+        comptime AstNode: type,
+        set: *StringHashMap(*Element),
+        ast_map: *std.AutoHashMap(*Element, *AstNode),
+        ast_node: *AstNode,
+        name: ast.Identifier,
+        comptime diagnostic: Diagnostics.Code,
+    ) DeclarationError!*Element {
+        const gop = try set.getOrPut(name.value);
+        if (gop.found_existing) {
+            // emit diagnostic here
+            try analyzer.emitDiagnostic(name.location, diagnostic, .{
+                .identifier = analyzer.strings.get(name.value),
+                .previous_location = gop.value_ptr.*.*.location,
+            });
+            return error.SemanticError;
+        }
+        errdefer _ = set.swapRemove(name.value);
+
+        const item = try analyzer.arena.create(Element);
+        errdefer analyzer.arena.destroy(item);
+
+        item.* = undefined;
+
+        gop.value_ptr.* = item;
+
+        try ast_map.putNoClobber(item, ast_node);
+
+        return item;
+    }
+
+    fn emitDiagnostic(analyzer: *Analyzer, location: ptk.Location, comptime code: Diagnostics.Code, params: Diagnostics.Data(code)) !void {
+        try analyzer.diagnostics.emit(location, code, params);
+    }
+};
+
+const TypeContext = struct {
+    const HashFn = std.hash.Fnv1a_64;
+
+    pub fn eql(ctx: TypeContext, lhs: *Type, rhs: *Type) bool {
+        _ = ctx;
+        if (lhs == rhs)
+            return true;
+        if (lhs.id() != rhs.id())
+            return false;
+        switch (lhs.*) {
+            inline .code_literal, .user_type, .optional, .named => |val, tag| return val == @field(rhs, @tagName(tag)),
+            .record, .variant => return false, // they are same-by-identitiy
+        }
+    }
+
+    pub fn hash(ctx: TypeContext, t: *Type) u64 {
+        _ = ctx;
+        var hasher = HashFn.init();
+        hasher.update(@tagName(t.*));
+        switch (t.*) {
+            .code_literal => |lit| hasher.update(&std.mem.toBytes(@intFromEnum(lit))),
+            .user_type => |lit| hasher.update(&std.mem.toBytes(@intFromEnum(lit))),
+            .optional => |child| hasher.update(&std.mem.toBytes(child)),
+            .named => |node| hasher.update(&std.mem.toBytes(node)),
+            .record, .variant => hasher.update(&std.mem.toBytes(t)),
+        }
+        return hasher.final();
+    }
+};
+
+fn moveToHeap(arena: *std.heap.ArenaAllocator, comptime T: type, template: T) error{OutOfMemory}!*T {
+    const dupe = try arena.allocator().create(T);
+    dupe.* = template;
+    return dupe;
+}
+
+pub const BuiltinFunction = struct {
+    name: []const u8,
+};
+
+pub const builtins = struct {
+    pub const foo = BuiltinFunction{ .name = "foo" };
+};
diff --git a/src/Diagnostics.zig b/src/toolkit/Diagnostics.zig
similarity index 84%
rename from src/Diagnostics.zig
rename to src/toolkit/Diagnostics.zig
index 0a93c19..bf3a842 100644
--- a/src/Diagnostics.zig
+++ b/src/toolkit/Diagnostics.zig
@@ -38,8 +38,15 @@ pub fn emit(self: *Self, location: Location, level: Error.Level, comptime fmt: [
     const str = try std.fmt.allocPrintZ(allocator, fmt, args);
     errdefer allocator.free(str);
 
+    var cloned_location = location;
+    if (location.source) |source| {
+        cloned_location.source = try allocator.dupe(u8, source);
+    }
+    errdefer if (cloned_location.source) |source|
+        allocator.free(source);
+
     try self.errors.append(allocator, Error{
-        .location = location,
+        .location = cloned_location,
         .level = level,
         .message = str,
     });
diff --git a/src/Error.zig b/src/toolkit/Error.zig
similarity index 100%
rename from src/Error.zig
rename to src/toolkit/Error.zig
diff --git a/src/Location.zig b/src/toolkit/Location.zig
similarity index 100%
rename from src/Location.zig
rename to src/toolkit/Location.zig
diff --git a/src/StringCache.zig b/src/toolkit/StringCache.zig
similarity index 100%
rename from src/StringCache.zig
rename to src/toolkit/StringCache.zig
diff --git a/src/main.zig b/src/toolkit/main.zig
similarity index 78%
rename from src/main.zig
rename to src/toolkit/main.zig
index 784dec5..9a5d40b 100644
--- a/src/main.zig
+++ b/src/toolkit/main.zig
@@ -17,9 +17,15 @@ pub const RuleSet = pcore.RuleSet;
 pub const Error = @import("Error.zig");
 pub const Diagnostics = @import("Diagnostics.zig");
 pub const StringCache = @import("StringCache.zig");
+pub const strings = @import("strings.zig");
 
-test {
+pub const testing = struct {
+    pub const validateMatcher = tok.testMatcher;
+};
+
+comptime {
     _ = Location;
     _ = tok;
     _ = pcore;
+    _ = strings;
 }
diff --git a/src/parser_core.zig b/src/toolkit/parser_core.zig
similarity index 99%
rename from src/parser_core.zig
rename to src/toolkit/parser_core.zig
index 394d679..9bfcf42 100644
--- a/src/parser_core.zig
+++ b/src/toolkit/parser_core.zig
@@ -52,6 +52,7 @@ pub fn ParserCore(comptime TokenizerT: type, comptime ignore_list: anytype) type
         }
 
         pub const AcceptError = error{ EndOfStream, UnexpectedToken } || Tokenizer.NextError;
+
         /// Accepts a token that matches `rule`. Otherwise returns
         /// - `error.EndOfStream` when no tokens are available
         /// - `error.UnexpectedToken` when an invalid token was encountered
diff --git a/src/toolkit/strings.zig b/src/toolkit/strings.zig
new file mode 100644
index 0000000..9c41933
--- /dev/null
+++ b/src/toolkit/strings.zig
@@ -0,0 +1,156 @@
+pub const std = @import("std");
+
+pub const String = enum(u32) {
+    empty,
+
+    _,
+
+    pub fn format(string: String, fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+        _ = fmt;
+        _ = options;
+        if (string == .empty) {
+            try writer.writeAll("String(empty)");
+        } else {
+            try writer.print("String({})", .{
+                @intFromEnum(string),
+            });
+        }
+    }
+};
+
+/// A string pool that can store up to 4 GB of text and deduplicate instances.
+///
+/// Use this to reduce the memory footprint of your AST and allow quick comparison of strings
+/// by using the `String` type instead of doing a `std.mem.eql`.
+pub const Pool = struct {
+    data: std.ArrayList(u8),
+    count: usize = 0,
+
+    pub fn init(allocator: std.mem.Allocator) !Pool {
+        var pool = Pool{
+            .data = std.ArrayList(u8).init(allocator),
+        };
+        errdefer pool.deinit();
+
+        std.debug.assert(try pool.insert("") == .empty);
+
+        return pool;
+    }
+
+    pub fn deinit(pool: *Pool) void {
+        pool.data.deinit();
+        pool.* = undefined;
+    }
+
+    pub fn insert(pool: *Pool, string: []const u8) error{OutOfMemory}!String {
+        std.debug.assert(std.mem.indexOfScalar(u8, string, 0) == null); // Interned strings must not contain NUL!
+
+        const storage = pool.data.items;
+
+        var search_index: usize = 0;
+        while (search_index < storage.len) {
+            const index = std.mem.indexOfPos(u8, storage, search_index, string) orelse break;
+
+            if (index + string.len + 1 > storage.len)
+                break;
+
+            if (storage[index + string.len] == 0)
+                return @enumFromInt(index);
+
+            // starts with `string`, but doesn't end with NUL.
+            search_index = index + string.len;
+        }
+
+        const index = storage.len;
+
+        if (index > std.math.maxInt(u32)) {
+            return error.OutOfMemory;
+        }
+
+        try pool.data.ensureUnusedCapacity(string.len + 1); // invalidates storage
+        pool.data.appendSliceAssumeCapacity(string);
+        pool.data.appendAssumeCapacity(0);
+        pool.count += 1;
+
+        return @enumFromInt(index);
+    }
+
+    /// Returns the string in the pool.
+    pub fn get(pool: *const Pool, string: String) [:0]const u8 {
+        const storage = pool.data.items;
+        const index: usize = @intFromEnum(string);
+        std.debug.assert(index < storage.len);
+        const slice = std.mem.sliceTo(storage[index..], 0);
+        return slice.ptr[0..slice.len :0];
+    }
+
+    pub fn format(pool: Pool, fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+        _ = fmt;
+        _ = options;
+        try writer.print("StringPool(count={}, size={:.2f})", .{
+            pool.count,
+            std.fmt.fmtIntSizeBin(pool.data.items.len),
+        });
+    }
+};
+
+/// Very simplistic string deduplicator, returns the same slice for each string.
+/// Does only perform deduplication, no fancy storage strategy.
+pub const Dedupe = struct {
+    arena: std.heap.ArenaAllocator,
+    items: std.StringHashMapUnmanaged(void),
+
+    pub fn init(allocator: std.mem.Allocator) Dedupe {
+        return Dedupe{
+            .arena = std.heap.ArenaAllocator.init(allocator),
+            .items = .{},
+        };
+    }
+
+    pub fn deinit(cache: *Dedupe) void {
+        cache.items.deinit(cache.arena.child_allocator);
+        cache.arena.deinit();
+        cache.* = undefined;
+    }
+
+    /// Gets or inserts a string into the cache. `string` might be a short-lived value,
+    /// the returned value is guaranteed to have the livetime of the string cache.
+    pub fn fetch(cache: *Dedupe, string: []const u8) ![]const u8 {
+        const allocator = cache.arena.child_allocator;
+        const gop = try cache.items.getOrPut(allocator, string);
+        if (!gop.found_existing) {
+            errdefer _ = cache.items.remove(string);
+            gop.key_ptr.* = try cache.arena.allocator().dupe(u8, string);
+        }
+        return gop.key_ptr.*;
+    }
+};
+
+test Pool {
+    var pool = try Pool.init(std.testing.allocator);
+    defer pool.deinit();
+
+    try std.testing.expectEqualStrings("", pool.get(.empty));
+
+    try std.testing.expectEqual(String.empty, try pool.insert(""));
+
+    const a = try pool.insert("hello, world!");
+    const b = try pool.insert("world!"); // suffix of a
+    const c = try pool.insert("world"); // non-suffix
+
+    // All strings must be unique:
+    try std.testing.expect(a != b);
+    try std.testing.expect(a != c);
+    try std.testing.expect(b != c);
+
+    // But must retain their qualities:
+    try std.testing.expectEqualStrings("hello, world!", pool.get(a));
+    try std.testing.expectEqualStrings("world!", pool.get(b));
+    try std.testing.expectEqualStrings("world", pool.get(c));
+
+    // sequential inserts may never return different values:
+    try std.testing.expectEqual(a, try pool.insert("hello, world!"));
+    try std.testing.expectEqual(a, try pool.insert("hello, world!"));
+    try std.testing.expectEqual(a, try pool.insert("hello, world!"));
+    try std.testing.expectEqual(a, try pool.insert("hello, world!"));
+}
diff --git a/src/token.zig b/src/toolkit/token.zig
similarity index 56%
rename from src/token.zig
rename to src/toolkit/token.zig
index 60ae8fa..028272c 100644
--- a/src/token.zig
+++ b/src/toolkit/token.zig
@@ -14,5 +14,15 @@ pub fn Token(comptime Type: type) type {
 
         /// The type of the token that was matched by a matching function
         type: Type,
+
+        pub fn format(token: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+            _ = fmt;
+            _ = options;
+            try writer.print("Token {{ .type = {}, .text = \"{}\", .location = {} }}", .{
+                token.type,
+                std.zig.fmtEscapes(token.text),
+                token.location,
+            });
+        }
     };
 }
diff --git a/src/tokenizer.zig b/src/toolkit/tokenizer.zig
similarity index 89%
rename from src/tokenizer.zig
rename to src/toolkit/tokenizer.zig
index 1ee859c..ec20f18 100644
--- a/src/tokenizer.zig
+++ b/src/toolkit/tokenizer.zig
@@ -3,7 +3,9 @@ const std = @import("std");
 const Location = @import("Location.zig");
 const GenericToken = @import("token.zig").Token;
 
-pub const Matcher = *const fn (str: []const u8) ?usize;
+/// This is a function that will either accept a `text` as a token
+/// of a non-zero length or returns `0` if the text does not match the token.
+pub const Matcher = *const fn (text: []const u8) usize;
 
 pub fn Pattern(comptime TokenType: type) type {
     return struct {
@@ -66,14 +68,13 @@ pub fn Tokenizer(comptime TokenTypeT: type, comptime patterns: []const Pattern(T
             if (rest.len == 0)
                 return null;
             const maybe_token = for (patterns) |pat| {
-                if (pat.match(rest)) |len| {
-                    if (len > 0) {
-                        break Token{
-                            .location = self.current_location,
-                            .text = rest[0..len],
-                            .type = pat.type,
-                        };
-                    }
+                const len = pat.match(rest);
+                if (len > 0) {
+                    break Token{
+                        .location = self.current_location,
+                        .text = rest[0..len],
+                        .type = pat.type,
+                    };
                 }
             } else null;
             if (maybe_token) |token| {
@@ -91,11 +92,11 @@ pub const matchers = struct {
     /// Matches the literal `text`.
     pub fn literal(comptime text: []const u8) Matcher {
         return struct {
-            fn match(str: []const u8) ?usize {
+            fn match(str: []const u8) usize {
                 return if (std.mem.startsWith(u8, str, text))
                     text.len
                 else
-                    null;
+                    0;
             }
         }.match;
     }
@@ -103,17 +104,17 @@ pub const matchers = struct {
     /// Matches any "word" that is "text\b"
     pub fn word(comptime text: []const u8) Matcher {
         return struct {
-            fn match(input: []const u8) ?usize {
+            fn match(input: []const u8) usize {
                 if (std.mem.startsWith(u8, input, text)) {
                     if (text.len == input.len)
                         return text.len;
                     const c = input[text.len];
                     if (std.ascii.isAlphanumeric(c) or (c == '_')) // matches regex \w\W
-                        return null;
+                        return 0;
                     return text.len;
                 }
 
-                return null;
+                return 0;
             }
         }.match;
     }
@@ -121,7 +122,7 @@ pub const matchers = struct {
     /// Takes characters while they are any of the given `chars`.
     pub fn takeAnyOf(comptime chars: []const u8) Matcher {
         return struct {
-            fn match(str: []const u8) ?usize {
+            fn match(str: []const u8) usize {
                 for (str, 0..) |c, i| {
                     if (std.mem.indexOfScalar(u8, chars, c) == null) {
                         return i;
@@ -140,7 +141,7 @@ pub const matchers = struct {
         };
 
         return struct {
-            fn match(str: []const u8) ?usize {
+            fn match(str: []const u8) usize {
                 for (str, 0..) |c, i| {
                     const lc = std.ascii.toLower(c);
                     if (std.mem.indexOfScalar(u8, lower_chars, lc) == null) {
@@ -155,7 +156,7 @@ pub const matchers = struct {
     /// Takes characters while they are not any of the given `chars`.
     pub fn takeNoneOf(comptime chars: []const u8) Matcher {
         return struct {
-            fn match(str: []const u8) ?usize {
+            fn match(str: []const u8) usize {
                 for (str, 0..) |c, i| {
                     if (std.mem.indexOfScalar(u8, chars, c) != null) {
                         return i;
@@ -168,10 +169,12 @@ pub const matchers = struct {
 
     pub fn withPrefix(comptime prefix: []const u8, comptime matcher: Matcher) Matcher {
         return struct {
-            fn match(str: []const u8) ?usize {
+            fn match(str: []const u8) usize {
                 if (!std.mem.startsWith(u8, str, prefix))
-                    return null;
-                const pattern_len = matcher(str[prefix.len..]) orelse return null;
+                    return 0;
+                const pattern_len = matcher(str[prefix.len..]);
+                if (pattern_len == 0)
+                    return 0;
                 return prefix.len + pattern_len;
             }
         }.match;
@@ -183,12 +186,12 @@ pub const matchers = struct {
         if (sequence.len == 0)
             @compileError("Empty sequence not allowed!");
         return struct {
-            fn match(input: []const u8) ?usize {
+            fn match(input: []const u8) usize {
                 var total_len: usize = 0;
                 for (sequence) |seq_match| {
-                    const len = seq_match(input[total_len..]) orelse return null;
+                    const len = seq_match(input[total_len..]);
                     if (len == 0)
-                        return null;
+                        return 0;
                     total_len += len;
                 }
                 return total_len;
@@ -198,7 +201,7 @@ pub const matchers = struct {
 
     // pre-shipped typical patterns
 
-    pub fn identifier(str: []const u8) ?usize {
+    pub fn identifier(str: []const u8) usize {
         const first_char = "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
         const all_chars = first_char ++ "0123456789";
         for (str, 0..) |c, i| {
@@ -209,7 +212,7 @@ pub const matchers = struct {
         return str.len;
     }
 
-    pub fn whitespace(str: []const u8) ?usize {
+    pub fn whitespace(str: []const u8) usize {
         for (str, 0..) |c, i| {
             if (!std.ascii.isWhitespace(c))
                 return i;
@@ -217,12 +220,12 @@ pub const matchers = struct {
         return str.len;
     }
 
-    pub fn linefeed(str: []const u8) ?usize {
+    pub fn linefeed(str: []const u8) usize {
         if (std.mem.startsWith(u8, str, "\r\n"))
             return 2;
         if (std.mem.startsWith(u8, str, "\n"))
             return 1;
-        return null;
+        return 0;
     }
 
     pub fn numberOfBase(comptime base: comptime_int) Matcher {
@@ -321,12 +324,11 @@ test "save/restore tokenization" {
     try std.testing.expectEqual(Location{ .source = null, .line = 2, .column = 1 }, id1.location);
 }
 
-fn testMatcher(match: Matcher, good: []const []const u8, bad: []const []const u8) !void {
+pub fn testMatcher(match: Matcher, good: []const []const u8, bad: []const []const u8) !void {
+    std.debug.assert(good.len > 0);
+    std.debug.assert(bad.len > 0);
     for (good) |str| {
-        const v = match(str) orelse {
-            std.log.err("Didn't match pattern '{s}'", .{str});
-            return error.MissedGoodPattern;
-        };
+        const v = match(str);
         if (v == 0) {
             std.log.err("Didn't match pattern '{s}'", .{str});
             return error.MissedGoodPattern;
@@ -334,7 +336,7 @@ fn testMatcher(match: Matcher, good: []const []const u8, bad: []const []const u8
     }
     for (bad) |str| {
         const v = match(str);
-        if (v != null and v.? > 0) {
+        if (v > 0) {
             std.log.err("Matched pattern '{s}'", .{str});
             return error.MissedBadPattern;
         }
diff --git a/test/analysis/accept/expect-warn-missing-start.ptk b/test/analysis/accept/expect-warn-missing-start.ptk
new file mode 100644
index 0000000..f31365e
--- /dev/null
+++ b/test/analysis/accept/expect-warn-missing-start.ptk
@@ -0,0 +1 @@
+# expected: W4000
diff --git a/test/analysis/accept/map-simple-builtin-fncall-0.ptk b/test/analysis/accept/map-simple-builtin-fncall-0.ptk
new file mode 100644
index 0000000..b4d4eec
--- /dev/null
+++ b/test/analysis/accept/map-simple-builtin-fncall-0.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => builtin();
\ No newline at end of file
diff --git a/test/analysis/accept/map-simple-builtin-fncall-1.ptk b/test/analysis/accept/map-simple-builtin-fncall-1.ptk
new file mode 100644
index 0000000..21ebc7f
--- /dev/null
+++ b/test/analysis/accept/map-simple-builtin-fncall-1.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => builtin(`1`);
\ No newline at end of file
diff --git a/test/analysis/accept/map-simple-builtin-fncall-4.ptk b/test/analysis/accept/map-simple-builtin-fncall-4.ptk
new file mode 100644
index 0000000..09e4372
--- /dev/null
+++ b/test/analysis/accept/map-simple-builtin-fncall-4.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => builtin(`1`, `2`, `3`, `4`);
\ No newline at end of file
diff --git a/test/analysis/accept/map-simple-code-literal.ptk b/test/analysis/accept/map-simple-code-literal.ptk
new file mode 100644
index 0000000..475f0a4
--- /dev/null
+++ b/test/analysis/accept/map-simple-code-literal.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => `code`;
\ No newline at end of file
diff --git a/test/analysis/accept/map-simple-list-0.ptk b/test/analysis/accept/map-simple-list-0.ptk
new file mode 100644
index 0000000..dffe97f
--- /dev/null
+++ b/test/analysis/accept/map-simple-list-0.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => { };
\ No newline at end of file
diff --git a/test/analysis/accept/map-simple-list-1.ptk b/test/analysis/accept/map-simple-list-1.ptk
new file mode 100644
index 0000000..ab4e2c2
--- /dev/null
+++ b/test/analysis/accept/map-simple-list-1.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => { `1` };
\ No newline at end of file
diff --git a/test/analysis/accept/map-simple-list-4.ptk b/test/analysis/accept/map-simple-list-4.ptk
new file mode 100644
index 0000000..3f970b9
--- /dev/null
+++ b/test/analysis/accept/map-simple-list-4.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => { `1`, `2`, `3`, `4` };
\ No newline at end of file
diff --git a/test/analysis/accept/map-simple-record-0.ptk b/test/analysis/accept/map-simple-record-0.ptk
new file mode 100644
index 0000000..8f1a98c
--- /dev/null
+++ b/test/analysis/accept/map-simple-record-0.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => { };
diff --git a/test/analysis/accept/map-simple-record-1.ptk b/test/analysis/accept/map-simple-record-1.ptk
new file mode 100644
index 0000000..4cf6bfd
--- /dev/null
+++ b/test/analysis/accept/map-simple-record-1.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => { field = `1` };
diff --git a/test/analysis/accept/map-simple-record-4.ptk b/test/analysis/accept/map-simple-record-4.ptk
new file mode 100644
index 0000000..5f03773
--- /dev/null
+++ b/test/analysis/accept/map-simple-record-4.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => { x = `1`, y = `2`, z = `3`, w = `4` };
diff --git a/test/analysis/accept/map-simple-ruleref.ptk b/test/analysis/accept/map-simple-ruleref.ptk
new file mode 100644
index 0000000..4e0bc07
--- /dev/null
+++ b/test/analysis/accept/map-simple-ruleref.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => $0;
\ No newline at end of file
diff --git a/test/analysis/accept/map-simple-user-fncall-0.ptk b/test/analysis/accept/map-simple-user-fncall-0.ptk
new file mode 100644
index 0000000..82eb16e
--- /dev/null
+++ b/test/analysis/accept/map-simple-user-fncall-0.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => @userFn();
\ No newline at end of file
diff --git a/test/analysis/accept/map-simple-user-fncall-1.ptk b/test/analysis/accept/map-simple-user-fncall-1.ptk
new file mode 100644
index 0000000..b6b55fe
--- /dev/null
+++ b/test/analysis/accept/map-simple-user-fncall-1.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => @userFn(`1`);
\ No newline at end of file
diff --git a/test/analysis/accept/map-simple-user-fncall-4.ptk b/test/analysis/accept/map-simple-user-fncall-4.ptk
new file mode 100644
index 0000000..ab0bcb2
--- /dev/null
+++ b/test/analysis/accept/map-simple-user-fncall-4.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => @userFn(`1`, `2`, `3`, `4`);
\ No newline at end of file
diff --git a/test/analysis/accept/map-simple-user-literal.ptk b/test/analysis/accept/map-simple-user-literal.ptk
new file mode 100644
index 0000000..afef9ad
--- /dev/null
+++ b/test/analysis/accept/map-simple-user-literal.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => @externalThingy;
\ No newline at end of file
diff --git a/test/analysis/accept/map-simple-variant.ptk b/test/analysis/accept/map-simple-variant.ptk
new file mode 100644
index 0000000..229b3cb
--- /dev/null
+++ b/test/analysis/accept/map-simple-variant.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => field: `code`;
\ No newline at end of file
diff --git a/test/analysis/accept/match-group-many-item.ptk b/test/analysis/accept/match-group-many-item.ptk
new file mode 100644
index 0000000..5e1e31f
--- /dev/null
+++ b/test/analysis/accept/match-group-many-item.ptk
@@ -0,0 +1 @@
+rule mode = ( "first" "second" "third" );
\ No newline at end of file
diff --git a/test/analysis/accept/match-group-many-sequence.ptk b/test/analysis/accept/match-group-many-sequence.ptk
new file mode 100644
index 0000000..40902e7
--- /dev/null
+++ b/test/analysis/accept/match-group-many-sequence.ptk
@@ -0,0 +1 @@
+rule mode = "first" ( "one" "two" "three" ) "third";
\ No newline at end of file
diff --git a/test/analysis/accept/match-group-nested.ptk b/test/analysis/accept/match-group-nested.ptk
new file mode 100644
index 0000000..d35091c
--- /dev/null
+++ b/test/analysis/accept/match-group-nested.ptk
@@ -0,0 +1 @@
+rule mode = "L0:0" ( "L1:0" ( "L2:0" "L2:1" "L2:2" ) "L1:2" ) "L0:2";
\ No newline at end of file
diff --git a/test/analysis/accept/match-group-one-item.ptk b/test/analysis/accept/match-group-one-item.ptk
new file mode 100644
index 0000000..faa24e7
--- /dev/null
+++ b/test/analysis/accept/match-group-one-item.ptk
@@ -0,0 +1 @@
+rule mode = ( "item" );
\ No newline at end of file
diff --git a/test/analysis/accept/match-group-one-sequence.ptk b/test/analysis/accept/match-group-one-sequence.ptk
new file mode 100644
index 0000000..e34f909
--- /dev/null
+++ b/test/analysis/accept/match-group-one-sequence.ptk
@@ -0,0 +1 @@
+rule mode = "first" ( "second" ) "third";
\ No newline at end of file
diff --git a/test/analysis/accept/match-literal-rule.ptk b/test/analysis/accept/match-literal-rule.ptk
new file mode 100644
index 0000000..a0b8dc0
--- /dev/null
+++ b/test/analysis/accept/match-literal-rule.ptk
@@ -0,0 +1 @@
+rule basic = "basic";
\ No newline at end of file
diff --git a/test/analysis/accept/match-literal-sequence-variant.ptk b/test/analysis/accept/match-literal-sequence-variant.ptk
new file mode 100644
index 0000000..842274e
--- /dev/null
+++ b/test/analysis/accept/match-literal-sequence-variant.ptk
@@ -0,0 +1,4 @@
+rule mode = 
+    "basic" "item"
+  | "extended" "item"
+;
\ No newline at end of file
diff --git a/test/analysis/accept/match-literal-sequence.ptk b/test/analysis/accept/match-literal-sequence.ptk
new file mode 100644
index 0000000..245add7
--- /dev/null
+++ b/test/analysis/accept/match-literal-sequence.ptk
@@ -0,0 +1 @@
+rule basic = "basic" "words" "after" "another";
\ No newline at end of file
diff --git a/test/analysis/accept/match-literal-variants.ptk b/test/analysis/accept/match-literal-variants.ptk
new file mode 100644
index 0000000..28ff569
--- /dev/null
+++ b/test/analysis/accept/match-literal-variants.ptk
@@ -0,0 +1 @@
+rule mode = "basic" | "extended";
\ No newline at end of file
diff --git a/test/analysis/accept/match-optional-many-item.ptk b/test/analysis/accept/match-optional-many-item.ptk
new file mode 100644
index 0000000..fb4b409
--- /dev/null
+++ b/test/analysis/accept/match-optional-many-item.ptk
@@ -0,0 +1 @@
+rule mode = ( "first" "second" "third" )?;
\ No newline at end of file
diff --git a/test/analysis/accept/match-optional-many-sequence.ptk b/test/analysis/accept/match-optional-many-sequence.ptk
new file mode 100644
index 0000000..2c49812
--- /dev/null
+++ b/test/analysis/accept/match-optional-many-sequence.ptk
@@ -0,0 +1 @@
+rule mode = "first" ( "one" "two" "three" )? "third";
\ No newline at end of file
diff --git a/test/analysis/accept/match-optional-nested.ptk b/test/analysis/accept/match-optional-nested.ptk
new file mode 100644
index 0000000..18bf0d9
--- /dev/null
+++ b/test/analysis/accept/match-optional-nested.ptk
@@ -0,0 +1 @@
+rule mode = "L0:0" ( "L1:0" ( "L2:0" "L2:1" "L2:2" )? "L1:2" )? "L0:2";
\ No newline at end of file
diff --git a/test/analysis/accept/match-optional-one-item.ptk b/test/analysis/accept/match-optional-one-item.ptk
new file mode 100644
index 0000000..3c5ccc0
--- /dev/null
+++ b/test/analysis/accept/match-optional-one-item.ptk
@@ -0,0 +1 @@
+rule mode = ( "item" )?;
\ No newline at end of file
diff --git a/test/analysis/accept/match-optional-one-sequence.ptk b/test/analysis/accept/match-optional-one-sequence.ptk
new file mode 100644
index 0000000..c5fd167
--- /dev/null
+++ b/test/analysis/accept/match-optional-one-sequence.ptk
@@ -0,0 +1 @@
+rule mode = "first" ( "second" )? "third";
\ No newline at end of file
diff --git a/test/analysis/accept/match-rep_one-many-item.ptk b/test/analysis/accept/match-rep_one-many-item.ptk
new file mode 100644
index 0000000..89961d7
--- /dev/null
+++ b/test/analysis/accept/match-rep_one-many-item.ptk
@@ -0,0 +1 @@
+rule mode = ( "first" "second" "third" )+;
\ No newline at end of file
diff --git a/test/analysis/accept/match-rep_one-many-sequence.ptk b/test/analysis/accept/match-rep_one-many-sequence.ptk
new file mode 100644
index 0000000..0568546
--- /dev/null
+++ b/test/analysis/accept/match-rep_one-many-sequence.ptk
@@ -0,0 +1 @@
+rule mode = "first" ( "one" "two" "three" )+ "third";
\ No newline at end of file
diff --git a/test/analysis/accept/match-rep_one-nested.ptk b/test/analysis/accept/match-rep_one-nested.ptk
new file mode 100644
index 0000000..99fbc2f
--- /dev/null
+++ b/test/analysis/accept/match-rep_one-nested.ptk
@@ -0,0 +1 @@
+rule mode = "L0:0" ( "L1:0" ( "L2:0" "L2:1" "L2:2" )+ "L1:2" )+ "L0:2";
\ No newline at end of file
diff --git a/test/analysis/accept/match-rep_one-one-item.ptk b/test/analysis/accept/match-rep_one-one-item.ptk
new file mode 100644
index 0000000..7f273d5
--- /dev/null
+++ b/test/analysis/accept/match-rep_one-one-item.ptk
@@ -0,0 +1 @@
+rule mode = ( "item" )+;
\ No newline at end of file
diff --git a/test/analysis/accept/match-rep_one-one-sequence.ptk b/test/analysis/accept/match-rep_one-one-sequence.ptk
new file mode 100644
index 0000000..64af460
--- /dev/null
+++ b/test/analysis/accept/match-rep_one-one-sequence.ptk
@@ -0,0 +1 @@
+rule mode = "first" ( "second" )+ "third";
\ No newline at end of file
diff --git a/test/analysis/accept/match-rep_zero-many-item.ptk b/test/analysis/accept/match-rep_zero-many-item.ptk
new file mode 100644
index 0000000..5d9b366
--- /dev/null
+++ b/test/analysis/accept/match-rep_zero-many-item.ptk
@@ -0,0 +1 @@
+rule mode = ( "first" "second" "third" )*;
\ No newline at end of file
diff --git a/test/analysis/accept/match-rep_zero-many-sequence.ptk b/test/analysis/accept/match-rep_zero-many-sequence.ptk
new file mode 100644
index 0000000..cadf2c5
--- /dev/null
+++ b/test/analysis/accept/match-rep_zero-many-sequence.ptk
@@ -0,0 +1 @@
+rule mode = "first" ( "one" "two" "three" )* "third";
\ No newline at end of file
diff --git a/test/analysis/accept/match-rep_zero-nested.ptk b/test/analysis/accept/match-rep_zero-nested.ptk
new file mode 100644
index 0000000..fee0799
--- /dev/null
+++ b/test/analysis/accept/match-rep_zero-nested.ptk
@@ -0,0 +1 @@
+rule mode = "L0:0" ( "L1:0" ( "L2:0" "L2:1" "L2:2" )* "L1:2" )* "L0:2";
\ No newline at end of file
diff --git a/test/analysis/accept/match-rep_zero-one-item.ptk b/test/analysis/accept/match-rep_zero-one-item.ptk
new file mode 100644
index 0000000..d058aee
--- /dev/null
+++ b/test/analysis/accept/match-rep_zero-one-item.ptk
@@ -0,0 +1 @@
+rule mode = ( "item" )*;
\ No newline at end of file
diff --git a/test/analysis/accept/match-rep_zero-one-sequence.ptk b/test/analysis/accept/match-rep_zero-one-sequence.ptk
new file mode 100644
index 0000000..34e3a06
--- /dev/null
+++ b/test/analysis/accept/match-rep_zero-one-sequence.ptk
@@ -0,0 +1 @@
+rule mode = "first" ( "second" )* "third";
\ No newline at end of file
diff --git a/test/analysis/accept/pattern-custom-skip.ptk b/test/analysis/accept/pattern-custom-skip.ptk
new file mode 100644
index 0000000..83f23c7
--- /dev/null
+++ b/test/analysis/accept/pattern-custom-skip.ptk
@@ -0,0 +1,4 @@
+
+
+pattern a_word = @externalFunction;
+
diff --git a/test/analysis/accept/pattern-custom.ptk b/test/analysis/accept/pattern-custom.ptk
new file mode 100644
index 0000000..83f23c7
--- /dev/null
+++ b/test/analysis/accept/pattern-custom.ptk
@@ -0,0 +1,4 @@
+
+
+pattern a_word = @externalFunction;
+
diff --git a/test/analysis/accept/pattern-literal-skip.ptk b/test/analysis/accept/pattern-literal-skip.ptk
new file mode 100644
index 0000000..a5efb6c
--- /dev/null
+++ b/test/analysis/accept/pattern-literal-skip.ptk
@@ -0,0 +1,4 @@
+
+
+pattern a_word = literal "a-word" skip;
+
diff --git a/test/analysis/accept/pattern-literal.ptk b/test/analysis/accept/pattern-literal.ptk
new file mode 100644
index 0000000..4964d2c
--- /dev/null
+++ b/test/analysis/accept/pattern-literal.ptk
@@ -0,0 +1,4 @@
+
+
+pattern a_word = literal "a-word";
+
diff --git a/test/analysis/accept/pattern-regex-skip.ptk b/test/analysis/accept/pattern-regex-skip.ptk
new file mode 100644
index 0000000..b9e45ec
--- /dev/null
+++ b/test/analysis/accept/pattern-regex-skip.ptk
@@ -0,0 +1,4 @@
+
+
+pattern a_word = regex "a-word" skip;
+
diff --git a/test/analysis/accept/pattern-regex.ptk b/test/analysis/accept/pattern-regex.ptk
new file mode 100644
index 0000000..4ec3715
--- /dev/null
+++ b/test/analysis/accept/pattern-regex.ptk
@@ -0,0 +1,4 @@
+
+
+pattern a_word = regex "a-word";
+
diff --git a/test/analysis/accept/pattern-word-skip.ptk b/test/analysis/accept/pattern-word-skip.ptk
new file mode 100644
index 0000000..07a0e07
--- /dev/null
+++ b/test/analysis/accept/pattern-word-skip.ptk
@@ -0,0 +1,4 @@
+
+
+pattern a_word = word "a-word" skip;
+
diff --git a/test/analysis/accept/pattern-word.ptk b/test/analysis/accept/pattern-word.ptk
new file mode 100644
index 0000000..07a0e07
--- /dev/null
+++ b/test/analysis/accept/pattern-word.ptk
@@ -0,0 +1,4 @@
+
+
+pattern a_word = word "a-word" skip;
+
diff --git a/test/analysis/accept/start-decl.ptk b/test/analysis/accept/start-decl.ptk
new file mode 100644
index 0000000..404f545
--- /dev/null
+++ b/test/analysis/accept/start-decl.ptk
@@ -0,0 +1,7 @@
+
+
+
+start <magic>;
+
+rule magic = "magic";
+
diff --git a/test/analysis/reject/duplicate-field-record.ptk b/test/analysis/reject/duplicate-field-record.ptk
new file mode 100644
index 0000000..3a64f2a
--- /dev/null
+++ b/test/analysis/reject/duplicate-field-record.ptk
@@ -0,0 +1,7 @@
+# expected: E1307
+
+node bad = record
+    x: `bool`,
+    y: `bool`,
+    x: `bool`
+;
\ No newline at end of file
diff --git a/test/analysis/reject/duplicate-field-variant.ptk b/test/analysis/reject/duplicate-field-variant.ptk
new file mode 100644
index 0000000..377a38a
--- /dev/null
+++ b/test/analysis/reject/duplicate-field-variant.ptk
@@ -0,0 +1,7 @@
+# expected: E1307
+
+node bad = variant
+    x: `bool`,
+    y: `bool`,
+    x: `bool`
+;
\ No newline at end of file
diff --git a/test/analysis/reject/duplicate-node.ptk b/test/analysis/reject/duplicate-node.ptk
new file mode 100644
index 0000000..0f67291
--- /dev/null
+++ b/test/analysis/reject/duplicate-node.ptk
@@ -0,0 +1,3 @@
+# expected: E1301
+node foo = `bool`;
+node foo = `bool`;
\ No newline at end of file
diff --git a/test/analysis/reject/duplicate-pattern.ptk b/test/analysis/reject/duplicate-pattern.ptk
new file mode 100644
index 0000000..4302396
--- /dev/null
+++ b/test/analysis/reject/duplicate-pattern.ptk
@@ -0,0 +1,3 @@
+# expected: E1302
+pattern foo = literal "bla";
+pattern foo = literal "bla";
\ No newline at end of file
diff --git a/test/analysis/reject/duplicate-rule.ptk b/test/analysis/reject/duplicate-rule.ptk
new file mode 100644
index 0000000..eff3ee6
--- /dev/null
+++ b/test/analysis/reject/duplicate-rule.ptk
@@ -0,0 +1,3 @@
+# expected: E1300
+rule foo = "";
+rule foo = "";
\ No newline at end of file
diff --git a/test/analysis/reject/duplicate-start.ptk b/test/analysis/reject/duplicate-start.ptk
new file mode 100644
index 0000000..52c55cf
--- /dev/null
+++ b/test/analysis/reject/duplicate-start.ptk
@@ -0,0 +1,10 @@
+# expected: E1306
+
+start <magic>;
+
+rule magic = "magic";
+
+rule disco = "disco";
+
+start <disco>;
+
diff --git a/test/analysis/reject/duplicate-undeclared-start.ptk b/test/analysis/reject/duplicate-undeclared-start.ptk
new file mode 100644
index 0000000..8b53833
--- /dev/null
+++ b/test/analysis/reject/duplicate-undeclared-start.ptk
@@ -0,0 +1,8 @@
+# expected: E1303, E1306
+
+start <magic>;
+
+rule magic = "magic";
+
+start <no_disco>;
+
diff --git a/test/analysis/reject/map-ruleref-oob.ptk b/test/analysis/reject/map-ruleref-oob.ptk
new file mode 100644
index 0000000..8af2ba4
--- /dev/null
+++ b/test/analysis/reject/map-ruleref-oob.ptk
@@ -0,0 +1,2 @@
+# expected: E1308
+rule basic = "hello" => $1;
\ No newline at end of file
diff --git a/test/analysis/reject/production-undeclared-pattern-ref.ptk b/test/analysis/reject/production-undeclared-pattern-ref.ptk
new file mode 100644
index 0000000..10e66f0
--- /dev/null
+++ b/test/analysis/reject/production-undeclared-pattern-ref.ptk
@@ -0,0 +1,3 @@
+# expected: E1305
+
+rule foo = $pat;
\ No newline at end of file
diff --git a/test/analysis/reject/production-undeclared-rule-ref.ptk b/test/analysis/reject/production-undeclared-rule-ref.ptk
new file mode 100644
index 0000000..a5525cc
--- /dev/null
+++ b/test/analysis/reject/production-undeclared-rule-ref.ptk
@@ -0,0 +1,3 @@
+# expected: E1303
+
+rule foo = <bar>;
\ No newline at end of file
diff --git a/test/analysis/reject/undeclared-start.ptk b/test/analysis/reject/undeclared-start.ptk
new file mode 100644
index 0000000..5a97c96
--- /dev/null
+++ b/test/analysis/reject/undeclared-start.ptk
@@ -0,0 +1,2 @@
+# expected: E1303, W4000
+start <undeclared>;
\ No newline at end of file
diff --git a/test/parser/accept/basic-rule-ref.ptk b/test/parser/accept/basic-rule-ref.ptk
new file mode 100644
index 0000000..e31192c
--- /dev/null
+++ b/test/parser/accept/basic-rule-ref.ptk
@@ -0,0 +1 @@
+rule output = <input>;
\ No newline at end of file
diff --git a/test/parser/accept/basic-token-ref.ptk b/test/parser/accept/basic-token-ref.ptk
new file mode 100644
index 0000000..29f9ce7
--- /dev/null
+++ b/test/parser/accept/basic-token-ref.ptk
@@ -0,0 +1 @@
+rule output = $terminal;
\ No newline at end of file
diff --git a/test/parser/accept/document-start.ptk b/test/parser/accept/document-start.ptk
new file mode 100644
index 0000000..0623db6
--- /dev/null
+++ b/test/parser/accept/document-start.ptk
@@ -0,0 +1 @@
+start <root>;
\ No newline at end of file
diff --git a/test/parser/accept/empty-with-comment-linefeed.ptk b/test/parser/accept/empty-with-comment-linefeed.ptk
new file mode 100644
index 0000000..a1e7613
--- /dev/null
+++ b/test/parser/accept/empty-with-comment-linefeed.ptk
@@ -0,0 +1 @@
+# hello, world!
diff --git a/test/parser/accept/empty-with-comment.ptk b/test/parser/accept/empty-with-comment.ptk
new file mode 100644
index 0000000..0017949
--- /dev/null
+++ b/test/parser/accept/empty-with-comment.ptk
@@ -0,0 +1 @@
+# hello, world!
\ No newline at end of file
diff --git a/test/parser/accept/empty.ptk b/test/parser/accept/empty.ptk
new file mode 100644
index 0000000..e69de29
diff --git a/test/parser/accept/identifiers.ptk b/test/parser/accept/identifiers.ptk
new file mode 100644
index 0000000..3c4baaa
--- /dev/null
+++ b/test/parser/accept/identifiers.ptk
@@ -0,0 +1,8 @@
+
+rule a                  = "whatever";
+rule _                  = "whatever";
+rule a0                 = "whatever";
+rule a-z                = "whatever";
+rule _10                = "whatever";
+rule @"x"               = "whatever";
+rule @"hello, world!"   = "whatever";
diff --git a/test/parser/accept/mapping-array-a0.ptk b/test/parser/accept/mapping-array-a0.ptk
new file mode 100644
index 0000000..3ef8c33
--- /dev/null
+++ b/test/parser/accept/mapping-array-a0.ptk
@@ -0,0 +1 @@
+rule r = "" => { };
\ No newline at end of file
diff --git a/test/parser/accept/mapping-array-a1.ptk b/test/parser/accept/mapping-array-a1.ptk
new file mode 100644
index 0000000..48a6912
--- /dev/null
+++ b/test/parser/accept/mapping-array-a1.ptk
@@ -0,0 +1 @@
+rule r = "" => { $0 };
\ No newline at end of file
diff --git a/test/parser/accept/mapping-array-a5.ptk b/test/parser/accept/mapping-array-a5.ptk
new file mode 100644
index 0000000..a46ab16
--- /dev/null
+++ b/test/parser/accept/mapping-array-a5.ptk
@@ -0,0 +1 @@
+rule r = "" => { $0, $1, $2, $3, $4 };
\ No newline at end of file
diff --git a/test/parser/accept/mapping-array-nested.ptk b/test/parser/accept/mapping-array-nested.ptk
new file mode 100644
index 0000000..be8a59a
--- /dev/null
+++ b/test/parser/accept/mapping-array-nested.ptk
@@ -0,0 +1 @@
+rule r = "" => { $0, { $10, $11, $12 }, $2 };
\ No newline at end of file
diff --git a/test/parser/accept/mapping-builtin-function-a0.ptk b/test/parser/accept/mapping-builtin-function-a0.ptk
new file mode 100644
index 0000000..478e220
--- /dev/null
+++ b/test/parser/accept/mapping-builtin-function-a0.ptk
@@ -0,0 +1 @@
+rule r = "" => tostring();
\ No newline at end of file
diff --git a/test/parser/accept/mapping-builtin-function-a1.ptk b/test/parser/accept/mapping-builtin-function-a1.ptk
new file mode 100644
index 0000000..58e9623
--- /dev/null
+++ b/test/parser/accept/mapping-builtin-function-a1.ptk
@@ -0,0 +1 @@
+rule r = "" => tostring($0);
\ No newline at end of file
diff --git a/test/parser/accept/mapping-builtin-function-a5.ptk b/test/parser/accept/mapping-builtin-function-a5.ptk
new file mode 100644
index 0000000..acf6f75
--- /dev/null
+++ b/test/parser/accept/mapping-builtin-function-a5.ptk
@@ -0,0 +1 @@
+rule r = "" => tostring($0, $1, $2, $3, $4);
\ No newline at end of file
diff --git a/test/parser/accept/mapping-builtin-function-nest.ptk b/test/parser/accept/mapping-builtin-function-nest.ptk
new file mode 100644
index 0000000..c7457fe
--- /dev/null
+++ b/test/parser/accept/mapping-builtin-function-nest.ptk
@@ -0,0 +1 @@
+rule r = "" => tostring($0, tostring($1), $4);
\ No newline at end of file
diff --git a/test/parser/accept/mapping-code-literal.ptk b/test/parser/accept/mapping-code-literal.ptk
new file mode 100644
index 0000000..b18e2b9
--- /dev/null
+++ b/test/parser/accept/mapping-code-literal.ptk
@@ -0,0 +1 @@
+rule r = "" => `.item`;
\ No newline at end of file
diff --git a/test/parser/accept/mapping-record-init-f1.ptk b/test/parser/accept/mapping-record-init-f1.ptk
new file mode 100644
index 0000000..dcce273
--- /dev/null
+++ b/test/parser/accept/mapping-record-init-f1.ptk
@@ -0,0 +1 @@
+rule r = "" => { x = $0 };
\ No newline at end of file
diff --git a/test/parser/accept/mapping-record-init-f3.ptk b/test/parser/accept/mapping-record-init-f3.ptk
new file mode 100644
index 0000000..22d7640
--- /dev/null
+++ b/test/parser/accept/mapping-record-init-f3.ptk
@@ -0,0 +1 @@
+rule r = "" => { x = $0, y = $1, z = $2 };
\ No newline at end of file
diff --git a/test/parser/accept/mapping-user-function-a0.ptk b/test/parser/accept/mapping-user-function-a0.ptk
new file mode 100644
index 0000000..12d6fce
--- /dev/null
+++ b/test/parser/accept/mapping-user-function-a0.ptk
@@ -0,0 +1 @@
+rule r = "" => @convert();
\ No newline at end of file
diff --git a/test/parser/accept/mapping-user-function-a1.ptk b/test/parser/accept/mapping-user-function-a1.ptk
new file mode 100644
index 0000000..0c51664
--- /dev/null
+++ b/test/parser/accept/mapping-user-function-a1.ptk
@@ -0,0 +1 @@
+rule r = "" => @convert($0);
\ No newline at end of file
diff --git a/test/parser/accept/mapping-user-function-a5.ptk b/test/parser/accept/mapping-user-function-a5.ptk
new file mode 100644
index 0000000..684e3f3
--- /dev/null
+++ b/test/parser/accept/mapping-user-function-a5.ptk
@@ -0,0 +1 @@
+rule r = "" => @convert($0, $1, $2, $3, $4);
\ No newline at end of file
diff --git a/test/parser/accept/mapping-user-function-nest.ptk b/test/parser/accept/mapping-user-function-nest.ptk
new file mode 100644
index 0000000..f78963b
--- /dev/null
+++ b/test/parser/accept/mapping-user-function-nest.ptk
@@ -0,0 +1 @@
+rule r = "" => @convert($0, tostring($1), $4);
\ No newline at end of file
diff --git a/test/parser/accept/mapping-user-value.ptk b/test/parser/accept/mapping-user-value.ptk
new file mode 100644
index 0000000..2183ab2
--- /dev/null
+++ b/test/parser/accept/mapping-user-value.ptk
@@ -0,0 +1 @@
+rule r = "" => @value;
\ No newline at end of file
diff --git a/test/parser/accept/mapping-value-ref.ptk b/test/parser/accept/mapping-value-ref.ptk
new file mode 100644
index 0000000..b2293b8
--- /dev/null
+++ b/test/parser/accept/mapping-value-ref.ptk
@@ -0,0 +1 @@
+rule r = "" => $0;
\ No newline at end of file
diff --git a/test/parser/accept/mapping-variant-init.ptk b/test/parser/accept/mapping-variant-init.ptk
new file mode 100644
index 0000000..0fc50e8
--- /dev/null
+++ b/test/parser/accept/mapping-variant-init.ptk
@@ -0,0 +1 @@
+rule r = "" => child: $0;
\ No newline at end of file
diff --git a/test/parser/accept/node-alias.ptk b/test/parser/accept/node-alias.ptk
new file mode 100644
index 0000000..468dbc0
--- /dev/null
+++ b/test/parser/accept/node-alias.ptk
@@ -0,0 +1 @@
+node Alias = !OtherType;
\ No newline at end of file
diff --git a/test/parser/accept/node-custom.ptk b/test/parser/accept/node-custom.ptk
new file mode 100644
index 0000000..da3a508
--- /dev/null
+++ b/test/parser/accept/node-custom.ptk
@@ -0,0 +1 @@
+node String = @StringIdentifier;
\ No newline at end of file
diff --git a/test/parser/accept/node-literal.ptk b/test/parser/accept/node-literal.ptk
new file mode 100644
index 0000000..d2e3530
--- /dev/null
+++ b/test/parser/accept/node-literal.ptk
@@ -0,0 +1 @@
+node String = `[]const u8`;
\ No newline at end of file
diff --git a/test/parser/accept/node-record-f1.ptk b/test/parser/accept/node-record-f1.ptk
new file mode 100644
index 0000000..8b8db7d
--- /dev/null
+++ b/test/parser/accept/node-record-f1.ptk
@@ -0,0 +1 @@
+node Struct = record field: `bool`;
\ No newline at end of file
diff --git a/test/parser/accept/node-record-f4.ptk b/test/parser/accept/node-record-f4.ptk
new file mode 100644
index 0000000..28b3356
--- /dev/null
+++ b/test/parser/accept/node-record-f4.ptk
@@ -0,0 +1,6 @@
+node Struct = record
+    x: `i32`,
+    y: `i32`,
+    z: `i32`,
+    location: !Location
+;
\ No newline at end of file
diff --git a/test/parser/accept/node-variant-f1.ptk b/test/parser/accept/node-variant-f1.ptk
new file mode 100644
index 0000000..0f675d8
--- /dev/null
+++ b/test/parser/accept/node-variant-f1.ptk
@@ -0,0 +1 @@
+node Struct = variant field: `bool`;
\ No newline at end of file
diff --git a/test/parser/accept/node-variant-f4.ptk b/test/parser/accept/node-variant-f4.ptk
new file mode 100644
index 0000000..e346aea
--- /dev/null
+++ b/test/parser/accept/node-variant-f4.ptk
@@ -0,0 +1,6 @@
+node Struct = variant
+    x: `i32`,
+    y: `i32`,
+    z: `i32`,
+    location: !Location
+;
\ No newline at end of file
diff --git a/test/parser/accept/optional-nospace.ptk b/test/parser/accept/optional-nospace.ptk
new file mode 100644
index 0000000..c72723f
--- /dev/null
+++ b/test/parser/accept/optional-nospace.ptk
@@ -0,0 +1 @@
+rule group=("word")?;
\ No newline at end of file
diff --git a/test/parser/accept/optional-space.ptk b/test/parser/accept/optional-space.ptk
new file mode 100644
index 0000000..b95fdab
--- /dev/null
+++ b/test/parser/accept/optional-space.ptk
@@ -0,0 +1 @@
+rule group = ( "word" ) ? ;
\ No newline at end of file
diff --git a/test/parser/accept/rep_one-nospace.ptk b/test/parser/accept/rep_one-nospace.ptk
new file mode 100644
index 0000000..9a8646d
--- /dev/null
+++ b/test/parser/accept/rep_one-nospace.ptk
@@ -0,0 +1 @@
+rule group=("word")+;
\ No newline at end of file
diff --git a/test/parser/accept/rep_one-space.ptk b/test/parser/accept/rep_one-space.ptk
new file mode 100644
index 0000000..c624039
--- /dev/null
+++ b/test/parser/accept/rep_one-space.ptk
@@ -0,0 +1 @@
+rule group = ( "word" ) + ;
\ No newline at end of file
diff --git a/test/parser/accept/rep_zero-nospace.ptk b/test/parser/accept/rep_zero-nospace.ptk
new file mode 100644
index 0000000..3bfb157
--- /dev/null
+++ b/test/parser/accept/rep_zero-nospace.ptk
@@ -0,0 +1 @@
+rule group=("word")*;
\ No newline at end of file
diff --git a/test/parser/accept/rep_zero-space.ptk b/test/parser/accept/rep_zero-space.ptk
new file mode 100644
index 0000000..3696d95
--- /dev/null
+++ b/test/parser/accept/rep_zero-space.ptk
@@ -0,0 +1 @@
+rule group = ( "word" ) * ;
\ No newline at end of file
diff --git a/test/parser/accept/rule-primitive-sequence.ptk b/test/parser/accept/rule-primitive-sequence.ptk
new file mode 100644
index 0000000..0067313
--- /dev/null
+++ b/test/parser/accept/rule-primitive-sequence.ptk
@@ -0,0 +1 @@
+rule sequence = "literal" $terminal <rule> "literal" $terminal <rule>; 
\ No newline at end of file
diff --git a/test/parser/accept/rule-typespec-custom.ptk b/test/parser/accept/rule-typespec-custom.ptk
new file mode 100644
index 0000000..3df8de4
--- /dev/null
+++ b/test/parser/accept/rule-typespec-custom.ptk
@@ -0,0 +1 @@
+rule r : @Point = "";
\ No newline at end of file
diff --git a/test/parser/accept/rule-typespec-literal.ptk b/test/parser/accept/rule-typespec-literal.ptk
new file mode 100644
index 0000000..7a700d7
--- /dev/null
+++ b/test/parser/accept/rule-typespec-literal.ptk
@@ -0,0 +1 @@
+rule r : `bool` = "";
\ No newline at end of file
diff --git a/test/parser/accept/rule-typespec-ref.ptk b/test/parser/accept/rule-typespec-ref.ptk
new file mode 100644
index 0000000..1af0072
--- /dev/null
+++ b/test/parser/accept/rule-typespec-ref.ptk
@@ -0,0 +1 @@
+rule r : !farpointer = "";
\ No newline at end of file
diff --git a/test/parser/reject/bad-mapping-invalid-token.ptk b/test/parser/reject/bad-mapping-invalid-token.ptk
new file mode 100644
index 0000000..aada416
--- /dev/null
+++ b/test/parser/reject/bad-mapping-invalid-token.ptk
@@ -0,0 +1,2 @@
+# expected: E1111
+rule group = "value" => "bad" ;
\ No newline at end of file
diff --git a/test/parser/reject/bad-mapping-too-long.ptk b/test/parser/reject/bad-mapping-too-long.ptk
new file mode 100644
index 0000000..057dcd5
--- /dev/null
+++ b/test/parser/reject/bad-mapping-too-long.ptk
@@ -0,0 +1,2 @@
+# expected: E1112
+rule group = "value" => $0 whatever ;
\ No newline at end of file
diff --git a/test/parser/reject/empty-group.ptk b/test/parser/reject/empty-group.ptk
new file mode 100644
index 0000000..2860712
--- /dev/null
+++ b/test/parser/reject/empty-group.ptk
@@ -0,0 +1,2 @@
+# expected: E1200
+rule group = ( );
\ No newline at end of file
diff --git a/test/parser/reject/empty-mapping.ptk b/test/parser/reject/empty-mapping.ptk
new file mode 100644
index 0000000..6479ae9
--- /dev/null
+++ b/test/parser/reject/empty-mapping.ptk
@@ -0,0 +1,2 @@
+# expected: E1201
+rule group = "value" => ;
\ No newline at end of file
diff --git a/test/parser/reject/empty-optional.ptk b/test/parser/reject/empty-optional.ptk
new file mode 100644
index 0000000..82ac677
--- /dev/null
+++ b/test/parser/reject/empty-optional.ptk
@@ -0,0 +1,2 @@
+# expected: E1200
+rule group = ( )?;
\ No newline at end of file
diff --git a/test/parser/reject/empty-rep_one.ptk b/test/parser/reject/empty-rep_one.ptk
new file mode 100644
index 0000000..82ac677
--- /dev/null
+++ b/test/parser/reject/empty-rep_one.ptk
@@ -0,0 +1,2 @@
+# expected: E1200
+rule group = ( )?;
\ No newline at end of file
diff --git a/test/parser/reject/empty-rep_zero.ptk b/test/parser/reject/empty-rep_zero.ptk
new file mode 100644
index 0000000..82ac677
--- /dev/null
+++ b/test/parser/reject/empty-rep_zero.ptk
@@ -0,0 +1,2 @@
+# expected: E1200
+rule group = ( )?;
\ No newline at end of file
diff --git a/test/parser/reject/empty-rule.ptk b/test/parser/reject/empty-rule.ptk
new file mode 100644
index 0000000..8d32fe9
--- /dev/null
+++ b/test/parser/reject/empty-rule.ptk
@@ -0,0 +1,2 @@
+# expected: E1200
+rule group = ;
\ No newline at end of file
diff --git a/test/parser/reject/node-no-type.ptk b/test/parser/reject/node-no-type.ptk
new file mode 100644
index 0000000..9a6b774
--- /dev/null
+++ b/test/parser/reject/node-no-type.ptk
@@ -0,0 +1,2 @@
+# expected: E1203
+node foo = ;
\ No newline at end of file
diff --git a/test/parser/reject/pattern-unexpected-token.ptk b/test/parser/reject/pattern-unexpected-token.ptk
new file mode 100644
index 0000000..158522d
--- /dev/null
+++ b/test/parser/reject/pattern-unexpected-token.ptk
@@ -0,0 +1,4 @@
+# expected: E1114
+
+pattern a_word = `illegal`;
+
diff --git a/test/parser/reject/rule-bad-prod.ptk b/test/parser/reject/rule-bad-prod.ptk
new file mode 100644
index 0000000..f5bf832
--- /dev/null
+++ b/test/parser/reject/rule-bad-prod.ptk
@@ -0,0 +1,2 @@
+# expected: E1113
+rule foo = `illegal here`;
\ No newline at end of file
diff --git a/test/parser/reject/rule-no-type-no-prod.ptk b/test/parser/reject/rule-no-type-no-prod.ptk
new file mode 100644
index 0000000..bbd4401
--- /dev/null
+++ b/test/parser/reject/rule-no-type-no-prod.ptk
@@ -0,0 +1,2 @@
+# expected: E1203, E1200
+rule foo : = ;
\ No newline at end of file
diff --git a/test/parser/reject/rule-no-type.ptk b/test/parser/reject/rule-no-type.ptk
new file mode 100644
index 0000000..6ab328d
--- /dev/null
+++ b/test/parser/reject/rule-no-type.ptk
@@ -0,0 +1,2 @@
+# expected: E1203
+rule foo : = "code";
\ No newline at end of file
diff --git a/test/parser/reject/unexpected-token-string.ptk b/test/parser/reject/unexpected-token-string.ptk
new file mode 100644
index 0000000..4848c41
--- /dev/null
+++ b/test/parser/reject/unexpected-token-string.ptk
@@ -0,0 +1,2 @@
+# expected: E1108
+"bad toplevel token!"
\ No newline at end of file