diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..17d6464 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use_nix \ No newline at end of file diff --git a/build.zig b/build.zig index 00a2907..1b9b0ce 100644 --- a/build.zig +++ b/build.zig @@ -1,31 +1,240 @@ const std = @import("std"); pub fn build(b: *std.build.Builder) void { + // build options: + + const target = b.standardTargetOptions(.{}); const optimize = b.standardOptimizeOption(.{}); - _ = b.addModule("parser-toolkit", .{ - .source_file = .{ .path = "src/main.zig" }, + const test_step = b.step("test", "Run library tests"); + const examples_step = b.step("examples", "Builds and installs examples"); + const run_calc_step = b.step("run-calculator", "Runs calculator example"); + + const all_step = b.step("all", "Builds everything, tests everything"); + all_step.dependOn(b.getInstallStep()); + all_step.dependOn(test_step); + all_step.dependOn(examples_step); + + // dependencies + + const args_dep = b.dependency("args", .{}); + + // external modules + + const args_mod = args_dep.module("args"); + + // internal modules + + const ptk_mod = b.addModule("parser-toolkit", .{ + .source_file = .{ .path = "src/toolkit/main.zig" }, .dependencies = &.{}, }); - var main_tests = b.addTest(.{ - .root_source_file = .{ .path = "src/main.zig" }, - .optimize = optimize, - }); + // Applications + const ptkdef_exe = blk: { + const ptkdef = b.addExecutable(.{ + .name = "ptkgen", + .root_source_file = .{ .path = "src/ptkgen/main.zig" }, + .optimize = optimize, + .target = target, + }); - const test_step = b.step("test", "Run library tests"); - test_step.dependOn(&b.addRunArtifact(main_tests).step); + ptkdef.addModule("parser-toolkit", ptk_mod); + ptkdef.addModule("args", args_mod); - const calculator_example = b.addExecutable(.{ - .root_source_file = .{ .path = "examples/calculator.zig" }, - .name = "calculator", - .optimize = optimize, - }); + b.installArtifact(ptkdef); - b.installArtifact(calculator_example); - calculator_example.addAnonymousModule("parser-toolkit", .{ - .source_file = .{ .path = "src/main.zig" }, - }); + break :blk ptkdef; + }; + + // test suite + { + // unit tests for ptk: + var ptk_tests = b.addTest(.{ + .root_source_file = ptk_mod.source_file, + .optimize = optimize, + }); + for (ptk_mod.dependencies.keys()) |dep_name| { + ptk_tests.addModule(dep_name, ptk_mod.dependencies.get(dep_name).?); + } + test_step.dependOn(&b.addRunArtifact(ptk_tests).step); + + // unit tests for ptkgen: + var ptkgen_tests = b.addTest(.{ + .root_source_file = .{ .path = "src/ptkgen/main.zig" }, + .optimize = optimize, + }); + ptkgen_tests.addModule("parser-toolkit", ptk_mod); + test_step.dependOn(&b.addRunArtifact(ptkgen_tests).step); + + // Integration tests for ptkgen: + for (parser_accept_files ++ parser_reject_files) |file| { + const run = b.addRunArtifact(ptkdef_exe); + run.addArg("--test_mode=parse_only"); + run.addFileArg(.{ .path = file }); + test_step.dependOn(&run.step); + } + + // Integration tests for ptkgen: + for (analyis_accept_files ++ analyis_reject_files) |file| { + const run = b.addRunArtifact(ptkdef_exe); + run.addArg("--test_mode=no_codegen"); + run.addFileArg(.{ .path = file }); + test_step.dependOn(&run.step); + } + } + + // examples + { + const calculator_example = b.addExecutable(.{ + .root_source_file = .{ .path = "examples/calculator.zig" }, + .name = "calculator", + .optimize = optimize, + }); + calculator_example.addModule("parser-toolkit", ptk_mod); + examples_step.dependOn(&b.addInstallArtifact(calculator_example, .{}).step); - b.step("run", "Runs the calculator example").dependOn(&b.addRunArtifact(calculator_example).step); + run_calc_step.dependOn(&b.addRunArtifact(calculator_example).step); + } } + +const example_files = [_][]const u8{ + "/home/felix/projects/parser-toolkit/examples/ptkgen/grammar.ptk", + "examples/ptkgen/ast-with-unions.ptk", +}; + +const analyis_accept_files = [_][]const u8{ + "test/analysis/accept/match-literal-rule.ptk", + "test/analysis/accept/match-literal-sequence.ptk", + "test/analysis/accept/match-literal-variants.ptk", + "test/analysis/accept/match-literal-sequence-variant.ptk", + "test/analysis/accept/match-group-one-item.ptk", + "test/analysis/accept/match-group-one-sequence.ptk", + "test/analysis/accept/match-group-many-item.ptk", + "test/analysis/accept/match-group-many-sequence.ptk", + "test/analysis/accept/match-group-nested.ptk", + "test/analysis/accept/match-optional-one-item.ptk", + "test/analysis/accept/match-optional-one-sequence.ptk", + "test/analysis/accept/match-optional-many-item.ptk", + "test/analysis/accept/match-optional-many-sequence.ptk", + "test/analysis/accept/match-optional-nested.ptk", + "test/analysis/accept/match-rep_zero-one-item.ptk", + "test/analysis/accept/match-rep_zero-one-sequence.ptk", + "test/analysis/accept/match-rep_zero-many-item.ptk", + "test/analysis/accept/match-rep_zero-many-sequence.ptk", + "test/analysis/accept/match-rep_zero-nested.ptk", + "test/analysis/accept/match-rep_one-one-item.ptk", + "test/analysis/accept/match-rep_one-one-sequence.ptk", + "test/analysis/accept/match-rep_one-many-item.ptk", + "test/analysis/accept/match-rep_one-many-sequence.ptk", + "test/analysis/accept/match-rep_one-nested.ptk", + + "test/analysis/accept/start-decl.ptk", + + "test/analysis/accept/pattern-custom.ptk", + "test/analysis/accept/pattern-literal.ptk", + "test/analysis/accept/pattern-regex.ptk", + "test/analysis/accept/pattern-word.ptk", + + "test/analysis/accept/pattern-word-skip.ptk", + "test/analysis/accept/pattern-regex-skip.ptk", + "test/analysis/accept/pattern-literal-skip.ptk", + "test/analysis/accept/pattern-custom-skip.ptk", +} ++ example_files; + +const analyis_reject_files = [_][]const u8{ + "test/analysis/reject/duplicate-node.ptk", + "test/analysis/reject/duplicate-pattern.ptk", + "test/analysis/reject/duplicate-rule.ptk", + + "test/analysis/accept/expect-warn-missing-start.ptk", + + "test/analysis/reject/undeclared-start.ptk", + "test/analysis/reject/duplicate-undeclared-start.ptk", + "test/analysis/reject/duplicate-start.ptk", + + "test/analysis/reject/duplicate-field-record.ptk", + "test/analysis/reject/duplicate-field-variant.ptk", + + "test/analysis/reject/production-undeclared-pattern-ref.ptk", + "test/analysis/reject/production-undeclared-rule-ref.ptk", +}; + +const parser_accept_files = [_][]const u8{ + "test/parser/accept/empty.ptk", + "test/parser/accept/empty-with-comment-linefeed.ptk", + "test/parser/accept/empty-with-comment.ptk", + "test/parser/accept/identifiers.ptk", + + "test/parser/accept/optional-nospace.ptk", + "test/parser/accept/optional-space.ptk", + "test/parser/accept/rep_one-nospace.ptk", + "test/parser/accept/rep_one-space.ptk", + "test/parser/accept/rep_zero-nospace.ptk", + "test/parser/accept/rep_zero-space.ptk", + + "test/parser/accept/basic-rule-ref.ptk", + "test/parser/accept/basic-token-ref.ptk", + "test/parser/accept/rule-primitive-sequence.ptk", + + "test/parser/accept/document-start.ptk", + + "test/parser/accept/mapping-value-ref.ptk", + "test/parser/accept/mapping-code-literal.ptk", + "test/parser/accept/mapping-user-value.ptk", + + "test/parser/accept/mapping-builtin-function-a0.ptk", + "test/parser/accept/mapping-builtin-function-a1.ptk", + "test/parser/accept/mapping-builtin-function-a5.ptk", + "test/parser/accept/mapping-builtin-function-nest.ptk", + + "test/parser/accept/mapping-user-function-a0.ptk", + "test/parser/accept/mapping-user-function-a1.ptk", + "test/parser/accept/mapping-user-function-a5.ptk", + "test/parser/accept/mapping-user-function-nest.ptk", + + "test/parser/accept/mapping-array-a0.ptk", + "test/parser/accept/mapping-array-a1.ptk", + "test/parser/accept/mapping-array-a5.ptk", + "test/parser/accept/mapping-array-nested.ptk", + + "test/parser/accept/mapping-variant-init.ptk", + + "test/parser/accept/mapping-record-init-f1.ptk", + "test/parser/accept/mapping-record-init-f3.ptk", + + "test/parser/accept/rule-typespec-custom.ptk", + "test/parser/accept/rule-typespec-ref.ptk", + "test/parser/accept/rule-typespec-literal.ptk", + + "test/parser/accept/node-alias.ptk", + "test/parser/accept/node-custom.ptk", + "test/parser/accept/node-literal.ptk", + + "test/parser/accept/node-record-f1.ptk", + "test/parser/accept/node-record-f4.ptk", + + "test/parser/accept/node-variant-f4.ptk", + "test/parser/accept/node-variant-f1.ptk", +} ++ analyis_accept_files; + +const parser_reject_files = [_][]const u8{ + "test/parser/reject/empty-rule.ptk", + "test/parser/reject/empty-group.ptk", + "test/parser/reject/empty-optional.ptk", + "test/parser/reject/empty-rep_one.ptk", + "test/parser/reject/empty-rep_zero.ptk", + + "test/parser/reject/unexpected-token-string.ptk", + + "test/parser/reject/empty-mapping.ptk", + "test/parser/reject/bad-mapping-invalid-token.ptk", + "test/parser/reject/bad-mapping-too-long.ptk", + + "test/parser/reject/node-no-type.ptk", + "test/parser/reject/rule-no-type.ptk", + "test/parser/reject/rule-no-type-no-prod.ptk", + "test/parser/reject/rule-bad-prod.ptk", + + "test/parser/reject/pattern-unexpected-token.ptk", +}; diff --git a/build.zig.zon b/build.zig.zon new file mode 100644 index 0000000..5cbec5c --- /dev/null +++ b/build.zig.zon @@ -0,0 +1,10 @@ +.{ + .name = "parser-toolkit", + .version = "0.2.0", + .dependencies = .{ + .args = .{ + .url = "https://github.com/MasterQ32/zig-args/archive/7989929d055ef7618e60de84cc54644046516fdb.tar.gz", + .hash = "12207752d975a7f5d7cc65662ed1c6b117da8dec6d1bd7af9a39e1b65d90bf86e833", + }, + }, +} diff --git a/design/ptkdefv/design.md b/design/ptkdefv/design.md new file mode 100644 index 0000000..e017c98 --- /dev/null +++ b/design/ptkdefv/design.md @@ -0,0 +1,4 @@ +# Parser Generator Language + +Create basic recursive descent parsers with "well-known" patterns that output a Zig AST data structure. + diff --git a/design/ptkdefv/grammar.ptk b/design/ptkdefv/grammar.ptk new file mode 100644 index 0000000..e50f519 --- /dev/null +++ b/design/ptkdefv/grammar.ptk @@ -0,0 +1,43 @@ + + +root ; # <...> is a "rule reference" + +token identifier = regex "[A-Za-z_][A-Za-z0-9_]*"; # defines token "identifier" to match this regex + +token line-comment = regex "//[^\n]*" skip; # ignores this token when parsing, but tokenizer recognizes it +token whitespace = regex "[ \t\r\n]" skip; + +rule document = + # [ ... ] is a loop construct, can appear several times + [ ] [ ]* +; + +rule toplevel-decl = + # | is a "either/or" scenario, with precedence from left to right (first come, first serve) + | | +; + +rule interface-decl = + "interface" $identifier "(" ... ")" ";"; +; + +rule module-decl = + "module" $identifier "(" ... ")" "{" ... "}" ";"; +; + +rule using = + # "bla" is a literal token + # $bla is an explicitly defined token reference + # ...? is an optional part of a parse + "using" ";" ( "as" $identifier )? +; + +rule namespace-decl = + "namespace" ";" +; + +rule compound-identifier = + $identifier [ "." $identifier ]* +; + + diff --git a/design/ptkdefv/mapping-concept-01.ptk b/design/ptkdefv/mapping-concept-01.ptk new file mode 100644 index 0000000..9e4ccf9 --- /dev/null +++ b/design/ptkdefv/mapping-concept-01.ptk @@ -0,0 +1,37 @@ + +# "!id" is a type reference +# "$id" is a token reference +# "" is a rule reference + +# maps type "array" to a slice/arraylist of whatever "int" is +node array = sequence !int; + +# "int" is the Zig type "i32" +node int = literal "i32"; + +# the initial rule is "list", also determines the root type of the ast +start ; + +# "decimal" token is a decimal number sequence token +token decimal = regex "\d+"; + +# "list" is a sequence of decimals with comma separated, potential trailing comma, +# enclosed by square brackets +rule list = "[" [ $decimal "," ] $decimal? "]"; +# $0 $1______________ $2_______ $3 + +# the rule "list" is mapped to the type "array" +# as a sequence of the second element (unwrapped into items) and +# the third item appended. square brackets in a map are the "construct array operator". +# if the array is not sequence of optionals, optional items are skipped in construction +map !array = [ $1..., $2 ]; + +# the "decimal" token is mapped to i32 by invoking a Zig function called +# "parse" that takes the token as input and returns "i32": +map $decimal !int = @parse($0); + + + + + + diff --git a/docs/grammar.md b/docs/grammar.md new file mode 100644 index 0000000..0d3d2b5 --- /dev/null +++ b/docs/grammar.md @@ -0,0 +1,41 @@ +# Parser Toolkit Grammar + +## Syntax + +```rb + +@Identifier # references Identifier from the user context. can be used for types, functions, values + # references another rule named Rule +!Node # references another ast node called Node + + +``` + +## Types + +```rb +literal `text` # pastes text into the code +optional ... # makes ... an optional type + +struct # constructs a structure type, having two fields: + field: !type, + field: !type + +union # constructs a type for alternatives, here with two variants: + Foo: !type, # alternative called Foo + Bar: !type # alternative called Bar + +``` + +## Strings + +- `\x00 ... \xFF` => Hexadecimal escape +- `\000 ... \377` => Octal escape +- `\n` => LF (0x0A) +- `\r` => CR (0x0D) +- `\'` => single quote (0x27) +- `\"` => double quote (0x22) +- `\\` => back slash (0x5C) +- `\u{????}` => UTF-8 encoded codepoint + + diff --git a/docs/semantics.md b/docs/semantics.md new file mode 100644 index 0000000..7d23443 --- /dev/null +++ b/docs/semantics.md @@ -0,0 +1,23 @@ +# PtkGen Semantics + +## Context References + +tl;dr: `$n` can access the elements of the top-level productions of a rule. + +```rb +rule r = "hello" "world" => $0; # access "hello" +rule r = "hello" "world" => $1; # access "world" +``` + +### Index Resolution + +1. Flatten hierarchy +2. Use index in flattened list + +```rb +rule r = a b c d e f g h; # [ a b c d e f g h ] => flat sequence +rule r = a b ( c d e )? f g h; # [ a b c? d? e? f g h ] => `c`, `d`, `e` get promoted to optional) +rule r = a b ( c d e )* f g h; # [ a b [[c d e]] f g h ] => `c d e` get promoted to list of lists ([[c d e], [c d e], ...]) +rule r = a b ( c d e )+ f g h; # [ a b [[c d e]] f g h ] => `c d e` get promoted to list of lists ([[c d e], [c d e], ...]) +rule r = a b ( c d e ) f g h; # [ a b c d e f g h ] => `c d e` gets flattened into the master list +``` diff --git a/examples/ptkgen/ast-with-unions.ptk b/examples/ptkgen/ast-with-unions.ptk new file mode 100644 index 0000000..369c9c9 --- /dev/null +++ b/examples/ptkgen/ast-with-unions.ptk @@ -0,0 +1,70 @@ +# parse a construct like this into a single type: +# var name = value; +# const name = value; +# var name: type = value; +# const name: type = value; + +node declaration = record + is_const: `bool`, + name: !identifier, + # type: optional !type, + value: !value +; + +node identifier = `[]const u8`; +node type = @TypeId; # enum { int, float, string } +node value = @Value; + +start ; + +rule decl : !declaration = + ( ":" )? "=" => { + is_const = $0, + name = $1, + type = $2, + value = $4 + } +# $0_________ $1__ $2_____________ $3 $4_____ +; + +rule decl-type : `bool` = + "var" => `false` + | "const" => `true` +; + +rule id : !identifier = "name" => tostring($0); + +rule type : !type = + "int" => `.int` + | "float" => `.float` + | "string" => `.string` +; + +rule value : !value = + "10" => @parseInt($0) + | "3.14" => @parseFloat($0) + | "\"nice\"" => @parseStringLiteral($0) +; + + + +# Unions have can only have a single option active at a time +node TLDeclaration = variant + ns : !namespace, + interface : !interface, + module : !module +; + +node namespace = @Namespace; +node interface = @Interface; +node module = @Module; + +rule toplevel-decl : !TLDeclaration = + => ns: $0 # this is syntax for a union field selector as unions are not compounds + | => interface: $0 + | => module: $0 +; + +rule namespace-group = "to be done"; +rule interface-decl = "to be done"; +rule module-decl = "to be done"; diff --git a/examples/ptkgen/grammar.ptk b/examples/ptkgen/grammar.ptk new file mode 100644 index 0000000..6a6d95a --- /dev/null +++ b/examples/ptkgen/grammar.ptk @@ -0,0 +1,85 @@ + +start ; + +rule document = ( )* ; + +rule top_level = + + | + | + | +; + +rule start_decl = "start" $rule_ref ";" ; + +rule pattern_decl = "pattern" $identifier "=" ( "skip" )? ";" ; + +rule pattern_spec = + "literal" $string_literal + | "word" $string_literal + | "regex" $string_literal + | $userval +; + +rule node_decl = "node" $identifier "=" ";" ; + +rule rule_decl = "rule" $identifier ( ":" )? "=" ";" ; + +rule mapped_productions = ( "|" )* ; + +rule mapped_production = ( "=>" )? ; + +rule production_sequence = ( )+; + +rule production = + $string_literal + | $rule_ref + | $token_ref + | "(" ")" "?" + | "(" ")" "*" + | "(" ")" "+" + | "(" ")" +; + +rule mapping = + $identifier ":" # variant init + | # regular init +; + +rule mapped_value = + # { field = , field = , ... } + | # { , , ... } + | $code_literal # `code` + | $value_ref # $0 + | $identifier "(" ")" # builtin(...) + | $userval "(" ")" # @func(...) + | $userval # @value +; + +rule record_ctor = + "{" ( "," )* "}" +; + +rule assign_field = + $identifier "=" $mapped_value +; + +rule list_ctor = "{" ( )? "}"; + +rule value_list = + ( "," )* +; + +# TODO: + +rule type = "empty"; + +pattern rule_ref = literal ""; +pattern identifier = regex "[A-Za-z_][A-Za-z0-9_]*"; +pattern string_literal = literal ""; +pattern userval = literal ""; +pattern token_ref = literal ""; +pattern code_literal = literal ""; +pattern value_ref = literal ""; +pattern mapped_value = literal ""; + diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000..664d354 --- /dev/null +++ b/shell.nix @@ -0,0 +1,11 @@ +{ pkgs ? import { } }: +pkgs.mkShell { + nativeBuildInputs = [ + # zig + pkgs.zig_0_11 + ]; + buildInputs = [ ]; + shellHook = '' + # put your shell hook here + ''; +} diff --git a/src/ptkgen/Diagnostics.zig b/src/ptkgen/Diagnostics.zig new file mode 100644 index 0000000..6559663 --- /dev/null +++ b/src/ptkgen/Diagnostics.zig @@ -0,0 +1,468 @@ +const std = @import("std"); +const ptk = @import("parser-toolkit"); + +const intl = @import("intl.zig"); +const parser = @import("parser.zig"); + +const Diagnostics = @This(); + +pub const Code = enum(u16) { + pub const first_error = 1000; + pub const first_warning = 4000; + pub const first_note = 8000; + pub const last_item = 9999; + + // generic failures (1000-1099): + out_of_memory = 1000, + file_limit_exceeded = 1001, + io_error = 1002, + + // non-recoverable syntax errors (1100-1199): + invalid_source_encoding = 1100, + unexpected_token_eof = 1101, + unexpected_token = 1102, + unexpected_character = 1103, + unexpected_eof = 1104, + bad_string_escape = 1105, + invalid_string_escape = 1106, + excess_tokens = 1107, + unexpected_toplevel_token = 1108, + unexpected_token_no_context = 1109, + unexpected_token_type_spec = 1110, + unexpected_token_mapping = 1111, + unexpected_token_production_list = 1112, + unexpected_token_production = 1113, + unexpected_token_pattern = 1114, + + // recoverable syntax errors (1200-1299): + illegal_empty_group = 1200, + empty_mapping = 1201, + integer_overflow = 1202, + empty_typespec = 1203, + + // semantic errors (1300-1399): + + duplicate_identifier_rule = 1300, + duplicate_identifier_node = 1301, + duplicate_identifier_pattern = 1302, + + reference_to_undeclared_rule = 1303, + reference_to_undeclared_node = 1304, + reference_to_undeclared_pattern = 1305, + + multiple_start_symbols = 1306, + + duplicate_compound_field = 1307, + + context_reference_out_of_bounds = 1308, + + variant_does_not_exist = 1309, + + record_field_does_not_exist = 1310, + record_field_already_initialized = 1311, + record_field_not_initialized = 1312, + + mapping_requires_typed_rule = 1313, + + invalid_builtin_function = 1314, + + // semantic warnings (4000-4099): + + missing_start_symbol = 4000, + + comptime { + std.debug.assert(first_error < first_warning); + std.debug.assert(first_warning < first_note); + std.debug.assert(first_note < last_item); + } + + const max_item_len = blk: { + var len = 0; + for (@typeInfo(Code).Enum.fields) |fld| { + len = @max(len, fld.name); + } + break :blk len; + }; + + const code_strings = blk: { + @setEvalBranchQuota(10_000); + var map = std.EnumArray(Code, []const u8).initUndefined(); + + for (std.enums.values(Code)) |code| { + const tag = @tagName(code); + + // perform kebab conversion: + var buf: [tag.len]u8 = tag[0..tag.len].*; + for (&buf) |*c| { + if (c.* == '_') + c.* = '-'; + } + + map.set(code, &buf); + } + + break :blk map; + }; + + pub fn isError(code: Code) bool { + const int = @intFromEnum(code); + return @intFromEnum(code) >= first_error and int < first_warning; + } + + pub fn isWarning(code: Code) bool { + const int = @intFromEnum(code); + return int >= first_warning and int < first_note; + } + + pub fn isNote(code: Code) bool { + const int = @intFromEnum(code); + return int >= first_note and int < last_item; + } + + pub fn parse(string: []const u8) error{ + /// Format is not recognized + InvalidFormat, + /// Numeric error code is out of range. + OutOfRange, + /// Numeric error code does not exist. + InvalidId, + }!Code { + if (string.len == 0 or (string[0] != 'E' and string[0] != 'W' and string[0] != 'D')) + return error.InvalidFormat; + const id = std.fmt.parseInt(u16, string[1..], 10) catch |err| switch (err) { + error.InvalidCharacter => return error.InvalidFormat, + error.Overflow => return error.OutOfRange, + }; + if (id > last_item) + return error.OutOfRange; + return std.meta.intToEnum(Diagnostics.Code, id) catch return error.InvalidId; + } + + pub fn format(code: Code, comptime fmt: []const u8, opt: std.fmt.FormatOptions, writer: anytype) !void { + _ = opt; + + if (comptime std.mem.eql(u8, fmt, "d")) { + const code_prefix = if (code.isError()) + "E" + else if (code.isWarning()) + "W" + else + "D"; + + try writer.print("{s}{d:0>4}", .{ code_prefix, @intFromEnum(code) }); + } else if (comptime std.mem.eql(u8, fmt, "s")) { + try writer.writeAll(code_strings.get(code)); + } else { + @compileError("Code fmt must be {s} (string variant) or {d} (numeric variant)!"); + } + // + } +}; + +const NoDiagnosticData = struct {}; + +const UnexpectedTokenMessage = struct { + actual: parser.Token, +}; + +const DuplicateIdentifier = struct { + identifier: []const u8, + previous_location: ptk.Location, +}; +const UndeclaredIdentifier = struct { identifier: []const u8 }; + +pub fn Data(comptime code: Code) type { + return switch (code) { + .out_of_memory => NoDiagnosticData, + .file_limit_exceeded => NoDiagnosticData, + .io_error => struct { error_code: intl.FormattableError }, + + .unexpected_token_eof => struct { + expected_type: parser.TokenType, + }, + .unexpected_token => struct { + expected_type: parser.TokenType, + actual: parser.Token, + }, + + .unexpected_toplevel_token => UnexpectedTokenMessage, + .unexpected_token_no_context => UnexpectedTokenMessage, + .unexpected_token_type_spec => UnexpectedTokenMessage, + .unexpected_token_mapping => UnexpectedTokenMessage, + .unexpected_token_production_list => UnexpectedTokenMessage, + .unexpected_token_production => UnexpectedTokenMessage, + .unexpected_token_pattern => UnexpectedTokenMessage, + + .unexpected_eof => NoDiagnosticData, + + .invalid_source_encoding => NoDiagnosticData, + .unexpected_character => struct { character: u21 }, + + .bad_string_escape => NoDiagnosticData, + .invalid_string_escape => struct { escape: u21 }, + .excess_tokens => struct { token_type: parser.TokenType }, + + .illegal_empty_group => NoDiagnosticData, + .empty_mapping => NoDiagnosticData, + + .integer_overflow => struct { + min: []const u8, + max: []const u8, + actual: []const u8, + }, + + .empty_typespec => NoDiagnosticData, + + .duplicate_identifier_rule => DuplicateIdentifier, + .duplicate_identifier_node => DuplicateIdentifier, + .duplicate_identifier_pattern => DuplicateIdentifier, + + .reference_to_undeclared_rule => UndeclaredIdentifier, + .reference_to_undeclared_node => UndeclaredIdentifier, + .reference_to_undeclared_pattern => UndeclaredIdentifier, + + .multiple_start_symbols => struct { + identifier: []const u8, + previous_location: ptk.Location, + }, + + .missing_start_symbol => NoDiagnosticData, + + .duplicate_compound_field => struct { + identifier: []const u8, + previous_location: ptk.Location, + }, + + .context_reference_out_of_bounds => struct { + index: u32, + limit: u32, + }, + + .variant_does_not_exist => struct { + field: []const u8, + type_location: ptk.Location, + }, + + .record_field_does_not_exist => struct { + field: []const u8, + type_location: ptk.Location, + }, + .record_field_already_initialized => struct { + field: []const u8, + prev_init: ptk.Location, + }, + .record_field_not_initialized => struct { + field: []const u8, + field_location: ptk.Location, + }, + + .mapping_requires_typed_rule => NoDiagnosticData, + + .invalid_builtin_function => struct { + name: []const u8, + }, + + // else => @compileError(std.fmt.comptimePrint("Code {} has no diagnostic type associated!", .{code})), + }; +} + +pub const Message = struct { + level: ptk.Error.Level, + location: ptk.Location, + text: []const u8, +}; + +inner: ptk.Diagnostics, +codes: std.ArrayList(Code), + +pub fn init(allocator: std.mem.Allocator) Diagnostics { + return Diagnostics{ + .inner = ptk.Diagnostics.init(allocator), + .codes = std.ArrayList(Code).init(allocator), + }; +} + +pub fn deinit(diag: *Diagnostics) void { + diag.codes.deinit(); + diag.inner.deinit(); + diag.* = undefined; +} + +pub fn hasErrors(diag: Diagnostics) bool { + return diag.inner.hasErrors(); +} + +pub fn hasWarnings(diag: Diagnostics) bool { + return diag.inner.hasWarnings(); +} + +fn Formatter(comptime T: type) type { + return switch (T) { + // text and unicode: + []const u8 => struct { + // TODO: Distinct between "string body" and "string literal" + + value: T, + + pub fn format(item: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = options; + _ = fmt; + try writer.print("{}", .{std.zig.fmtEscapes(item.value)}); + } + }, + + u21 => struct { + value: T, + pub fn format(item: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = options; + _ = fmt; + + if (item.value < 0x80) { + const ascii: u8 = @intCast(item.value); + + if (std.ascii.isPrint(ascii)) { + try writer.print("{c}", .{ascii}); + } else { + try writer.print("[nonprint: 0x{X:0>2}]", .{ascii}); + } + } else { + var buf: [4]u8 = undefined; + if (std.unicode.utf8Encode(item.value, &buf)) |len| { + try writer.print("{s}", .{buf[0..len]}); + } else |_| { + try writer.print("4}>", .{item.value}); + } + } + } + }, + + // enums: + parser.TokenType => struct { + value: parser.TokenType, + pub fn format(item: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = options; + _ = fmt; + try writer.print("{s}", .{@tagName(item.value)}); + } + }, + + parser.Token => struct { + value: parser.Token, + pub fn format(item: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = options; + _ = fmt; + try writer.print("{s} ('{}')", .{ + @tagName(item.value.type), + std.zig.fmtEscapes(item.value.text), + }); + } + }, + + ptk.Location => struct { + value: ptk.Location, + pub fn format(item: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = options; + _ = fmt; + try writer.print("{}", .{item.value}); + } + }, + + intl.FormattableError => struct { + value: T, + + pub fn format(item: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = options; + _ = fmt; + + inline for (@typeInfo(intl.FormattableError).ErrorSet.?) |err| { + if (item.value == @field(intl.FormattableError, err.name)) { + try writer.writeAll(@field(intl.localization.errors, err.name)); + return; + } + } else unreachable; + } + }, + + // integers: + + u32 => struct { + value: T, + pub fn format(item: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = options; + _ = fmt; + + try writer.print("{}", .{item.value}); + } + }, + + else => @compileError(std.fmt.comptimePrint("{s} is not a supported diagnostic type!", .{@typeName(T)})), + }; +} + +fn createFormatter(comptime T: type, value: T) Formatter(T) { + return Formatter(T){ .value = value }; +} + +fn FormattedData(comptime code: Code) type { + const Field = std.builtin.Type.StructField; + const D = Data(code); + + const src_fields = @typeInfo(D).Struct.fields; + + var dst_fields: [src_fields.len]Field = undefined; + + for (&dst_fields, src_fields) |*dst, src| { + dst.* = .{ + .name = src.name, + .type = Formatter(src.type), + .default_value = null, + .is_comptime = false, + .alignment = @alignOf(Formatter(src.type)), + }; + } + + return @Type(.{ + .Struct = .{ + .layout = .Auto, + .fields = &dst_fields, + .decls = &.{}, + .is_tuple = false, + }, + }); +} + +fn formatData(comptime code: Code, params: Data(code)) FormattedData(code) { + var formatted: FormattedData(code) = undefined; + inline for (std.meta.fields(Data(code))) |fld| { + @field(formatted, fld.name) = createFormatter(fld.type, @field(params, fld.name)); + } + return formatted; +} + +pub fn emit(diag: *Diagnostics, location: ptk.Location, comptime code: Code, params: Data(code)) error{OutOfMemory}!void { + const level = if (code.isError()) + ptk.Error.Level.@"error" + else if (code.isWarning()) + ptk.Error.Level.warning + else if (code.isNote()) + ptk.Error.Level.info + else + unreachable; + + const fmt_string = @field(intl.localization.diagnostics, @tagName(code)); + + var stack_fallback = std.heap.stackFallback(1024, diag.inner.memory.allocator()); + const stack_fallback_allocator = stack_fallback.get(); + + const formatted_params = formatData(code, params); + + const message_text = try std.fmt.allocPrint(stack_fallback_allocator, fmt_string, formatted_params); + defer stack_fallback_allocator.free(message_text); + + try diag.inner.emit(location, level, "{d}: {s}", .{ code, message_text }); + try diag.codes.append(code); +} + +pub fn render(diag: Diagnostics, stream: anytype) !void { + try diag.inner.print(stream); +} diff --git a/src/ptkgen/ast.zig b/src/ptkgen/ast.zig new file mode 100644 index 0000000..a650c35 --- /dev/null +++ b/src/ptkgen/ast.zig @@ -0,0 +1,174 @@ +const std = @import("std"); +const ptk = @import("parser-toolkit"); + +const Location = ptk.Location; + +pub fn List(comptime T: type) type { + return struct { + pub const Item = T; + + pub const Node = std.TailQueue(T).Node; + + inner: std.TailQueue(T) = .{}, + + pub fn append(list: *@This(), item: *@This().Node) void { + list.inner.append(item); + } + + pub fn len(list: @This()) usize { + return list.inner.len; + } + + pub fn only(list: @This()) ?T { + return if (list.inner.len == 1) + list.inner.first.?.data + else + null; + } + }; +} + +pub fn Iterator(comptime T: type) type { + return struct { + node: ?*List(T).Node, + + pub fn next(iter: *@This()) ?*T { + const current = iter.node orelse return null; + iter.node = current.next; + return ¤t.data; + } + }; +} + +pub fn iterate(list: anytype) Iterator(@TypeOf(list).Item) { + return Iterator(@TypeOf(list).Item){ .node = list.inner.first }; +} + +pub fn Reference(comptime T: type) type { + return struct { + pub const Referenced = T; + + location: Location, + identifier: ptk.strings.String, + }; +} + +fn String(comptime Tag: anytype) type { + return struct { + pub const tag = Tag; + + location: Location, + value: ptk.strings.String, + }; +} + +pub const Identifier = String(.identifier); +pub const StringLiteral = String(.string); +pub const CodeLiteral = String(.code); +pub const UserDefinedIdentifier = String(.user_defined); + +pub const Document = List(TopLevelDeclaration); + +pub const TopLevelDeclaration = union(enum) { + start: RuleRef, + rule: Rule, + node: Node, + pattern: Pattern, +}; + +pub const NodeRef = Reference(Node); // !mynode +pub const RuleRef = Reference(Rule); // +pub const PatternRef = Reference(Pattern); // $mytoken + +pub const ValueRef = struct { // $0 + location: Location, + index: u32, +}; + +pub const Node = struct { // node = ...; + name: Identifier, + value: TypeSpec, +}; + +pub const Rule = struct { // rule ( : )? = ...; + name: Identifier, // + ast_type: ?TypeSpec, // if specified, defines the ast node of the rule + productions: List(MappedProduction), // all alternatives of the rule +}; + +pub const Pattern = struct { // token = ...; + name: Identifier, + data: Data, + invisible: bool, + + pub const Data = union(enum) { + literal: StringLiteral, // literal "+" + word: StringLiteral, // word "while" + regex: StringLiteral, // regex "string" + external: UserDefinedIdentifier, // @matchMe + }; +}; + +pub const MappedProduction = struct { // ... => value + production: Production, // the thing before "=>" + mapping: ?AstMapping, // the thing after "=>" +}; + +pub const Production = union(enum) { + literal: StringLiteral, // "text" + terminal: PatternRef, // $token + recursion: RuleRef, // + sequence: List(Production), // ... + optional: List(Production), // ( ... )? + repetition_zero: List(Production), // [ ... ]* + repetition_one: List(Production), // [ ... ]+ +}; + +pub const AstMapping = union(enum) { + record: List(FieldAssignment), // { field = ..., field = ... } + list: List(AstMapping), // { ..., ..., ... } + variant: VariantInitializer, // field: ... + + literal: CodeLiteral, // field: value + context_reference: ValueRef, // $0 + user_reference: UserDefinedIdentifier, // @field + user_function_call: FunctionCall(UserDefinedIdentifier), // @builtin(a,b,c) + function_call: FunctionCall(Identifier), // identifier(a,b,c) +}; + +pub const VariantInitializer = struct { + field: Identifier, + value: *AstMapping, +}; + +pub fn FunctionCall(comptime Name: type) type { + return struct { + function: Name, + arguments: List(AstMapping), + }; +} + +pub const FieldAssignment = struct { + location: Location, + field: Identifier, + value: *AstMapping, +}; + +pub const TypeSpec = union(enum) { + reference: NodeRef, // !type + literal: CodeLiteral, // literal `bool` + custom: UserDefinedIdentifier, // custom `Custom` + record: CompoundType, // struct + variant: CompoundType, // union +}; + +pub const CompoundType = struct { + location: Location, + fields: List(Field), +}; + +pub const Field = struct { + location: Location, + name: Identifier, + type: TypeSpec, +}; diff --git a/src/ptkgen/dump/ast.zig b/src/ptkgen/dump/ast.zig new file mode 100644 index 0000000..9c5d675 --- /dev/null +++ b/src/ptkgen/dump/ast.zig @@ -0,0 +1,241 @@ +const std = @import("std"); +const ptk = @import("parser-toolkit"); + +const ast = @import("../ast.zig"); +const parser = @import("../parser.zig"); + +pub fn dump(strings: *const ptk.strings.Pool, decls: parser.Document) void { + var printer = AstPrinter{ + .strings = strings, + }; + + printer.dumpRoot(decls.top_level_declarations); +} + +const AstPrinter = struct { + const print = std.debug.print; + + strings: *const ptk.strings.Pool, + + fn dumpRoot(printer: AstPrinter, decls: ast.List(ast.TopLevelDeclaration)) void { + print("ast dump:\n", .{}); + + var iter = ast.iterate(decls); + while (iter.next()) |decl| { + switch (decl.*) { + .start => |item| print("start <{}>;\n", .{printer.fmtId(item.identifier)}), + + .rule => |rule| { + print("rule {s}", .{printer.fmtId(rule.name.value)}); + + if (rule.ast_type) |ast_type| { + print(" : ", .{}); + printer.dumpAstType(ast_type); + } + + print(" = \n", .{}); + + var prods = ast.iterate(rule.productions); + var first = true; + while (prods.next()) |prod| { + defer first = false; + if (!first) { + print("\n | ", .{}); + } else { + print(" ", .{}); + } + printer.dumpMappedProd(prod.*); + } + + print("\n;\n", .{}); + }, + + .node => |node| { + print("node {s} = ", .{printer.fmtId(node.name.value)}); + printer.dumpAstType(node.value); + print(";\n", .{}); + }, + + .pattern => |pattern| { + print("pattern {s} = ", .{printer.fmtId(pattern.name.value)}); + + switch (pattern.data) { + .literal => |value| print("literal \"{}\"", .{printer.fmtString(value.value)}), + .word => |value| print("word \"{}\"", .{printer.fmtString(value.value)}), + .regex => |value| print("regex \"{}\"", .{printer.fmtString(value.value)}), + .external => |value| print("@{}", .{printer.fmtId(value.value)}), + } + + if (pattern.invisible) { + print(" skip", .{}); + } + print(";\n", .{}); + }, + } + } + } + + fn dumpAstType(printer: AstPrinter, typespec: ast.TypeSpec) void { + switch (typespec) { + .reference => |ref| print("!{}", .{printer.fmtId(ref.identifier)}), + .literal => |lit| print("`{s}`", .{printer.strings.get(lit.value)}), + .custom => |custom| print("@{}", .{printer.fmtId(custom.value)}), + .record, .variant => |compound| { + const multi_field = compound.fields.len() > 1; + + print("{s} ", .{@tagName(typespec)}); + var iter = ast.iterate(compound.fields); + + if (multi_field) { + var line_prefix: []const u8 = "\n "; + while (iter.next()) |field| { + print("{s}{}: ", .{ line_prefix, printer.fmtId(field.name.value) }); + printer.dumpAstType(field.type); + + if (multi_field) { + line_prefix = ",\n "; + } + } + print("\n", .{}); + } else { + const field = iter.next().?; + + print("{}: ", .{printer.fmtId(field.name.value)}); + printer.dumpAstType(field.type); + } + }, + } + } + + fn dumpMappedProd(printer: AstPrinter, mapped_prod: ast.MappedProduction) void { + printer.dumpProd(mapped_prod.production); + + if (mapped_prod.mapping) |mapping| { + print(" => ", .{}); + printer.dumpMapping(mapping); + } + } + + fn dumpProd(printer: AstPrinter, production: ast.Production) void { + switch (production) { + .literal => |lit| print("\"{}\"", .{printer.fmtString(lit.value)}), + .terminal => |term| print("${}", .{printer.fmtId(term.identifier)}), + .recursion => |term| print("<{}>", .{printer.fmtId(term.identifier)}), + + .sequence, .optional, .repetition_zero, .repetition_one => |seq| { + print("(", .{}); + + var iter = ast.iterate(seq); + while (iter.next()) |item| { + print(" ", .{}); + printer.dumpProd(item.*); + } + + print(" )", .{}); + switch (production) { + .sequence => {}, + .optional => print("?", .{}), + .repetition_zero => print("*", .{}), + .repetition_one => print("+", .{}), + else => unreachable, + } + }, + } + } + + fn dumpMapping(printer: AstPrinter, mapping: ast.AstMapping) void { + switch (mapping) { + .record => |record| { + std.debug.assert(record.len() > 0); + + print("{{ ", .{}); + + var first = true; + var iter = ast.iterate(record); + while (iter.next()) |arg| { + if (!first) { + print(", ", .{}); + } + first = false; + + print("{} = ", .{printer.fmtId(arg.field.value)}); + + printer.dumpMapping(arg.value.*); + } + + print(" }}", .{}); + }, + + .list => |list| { + if (list.len() > 0) { + print("{{ ", .{}); + printer.dumpMappingList(list); + print(" }}", .{}); + } else { + print("{{}}", .{}); + } + }, + + .variant => |variant| { + print("{}: ", .{printer.fmtId(variant.field.value)}); + printer.dumpMapping(variant.value.*); + }, + + .literal => |literal| print("`{s}`", .{printer.strings.get(literal.value)}), + + .context_reference => |context_reference| print("${}", .{context_reference.index}), + + .user_reference => |user_reference| print("@{}", .{printer.fmtId(user_reference.value)}), + + .user_function_call => |user_function_call| { + print("@{}(", .{printer.fmtId(user_function_call.function.value)}); + printer.dumpMappingList(user_function_call.arguments); + print(")", .{}); + }, + + .function_call => |function_call| { + print("{}(", .{printer.fmtId(function_call.function.value)}); + printer.dumpMappingList(function_call.arguments); + print(")", .{}); + }, + } + } + + fn dumpMappingList(printer: AstPrinter, list: ast.List(ast.AstMapping)) void { + var first = true; + var iter = ast.iterate(list); + while (iter.next()) |arg| { + if (!first) { + print(", ", .{}); + } + first = false; + + printer.dumpMapping(arg.*); + } + } + + fn fmtString(printer: AstPrinter, str: ptk.strings.String) StringPrinter { + return StringPrinter{ .printer = printer, .str = str, .mode = .text }; + } + + fn fmtId(printer: AstPrinter, str: ptk.strings.String) StringPrinter { + return StringPrinter{ .printer = printer, .str = str, .mode = .id }; + } + + const StringPrinter = struct { + printer: AstPrinter, + str: ptk.strings.String, + mode: enum { id, text }, + + pub fn format(strpr: StringPrinter, fmt: []const u8, opt: std.fmt.FormatOptions, writer: anytype) !void { + _ = opt; + _ = fmt; + + const text = strpr.printer.strings.get(strpr.str); + switch (strpr.mode) { + .id => try writer.print("{}", .{std.zig.fmtId(text)}), + .text => try writer.print("{}", .{std.zig.fmtEscapes(text)}), + } + } + }; +}; diff --git a/src/ptkgen/dump/json.zig b/src/ptkgen/dump/json.zig new file mode 100644 index 0000000..0da58ee --- /dev/null +++ b/src/ptkgen/dump/json.zig @@ -0,0 +1,294 @@ +const std = @import("std"); +const ptk = @import("parser-toolkit"); + +const sema = @import("../sema.zig"); +const parser = @import("../parser.zig"); + +pub fn createJsonValue( + arena: *std.heap.ArenaAllocator, + strings: *const ptk.strings.Pool, + grammar: sema.Grammar, +) !std.json.Value { + const allocator = arena.allocator(); + + var mapper = JsonMapper{ + .allocator = allocator, + .strings = strings, + }; + + var root = std.json.ObjectMap.init(allocator); + errdefer root.deinit(); + + if (grammar.start) |start| { + try root.put("start", mapper.jsonString(start.rule.name)); + } else { + try root.put("start", .null); + } + + { + var list = mapper.newArray(); + errdefer list.deinit(); + + var iter = grammar.literal_patterns.iterator(); + while (iter.next()) |kvp| { + try list.append(mapper.jsonString(kvp.value_ptr.*.data.literal_match)); + } + + try root.put("literal_patterns", .{ .array = list }); + } + + { + var patterns = std.json.ObjectMap.init(allocator); + errdefer patterns.deinit(); + + var iter = grammar.patterns.iterator(); + while (iter.next()) |kvp| { + const spattern: *sema.Pattern = kvp.value_ptr.*; + + var jpattern = std.json.ObjectMap.init(allocator); + errdefer jpattern.deinit(); + + // try jpattern.put("name", .{ .string = strings.get(spattern.name) }); + try jpattern.put("kind", .{ .string = @tagName(spattern.data) }); + switch (spattern.data) { + inline else => |val| try jpattern.put("data", mapper.jsonString(val)), + } + + try patterns.putNoClobber( + strings.get(kvp.key_ptr.*), + .{ .object = jpattern }, + ); + } + + try root.put("patterns", .{ .object = patterns }); + } + + { + var nodes = std.json.ObjectMap.init(allocator); + errdefer nodes.deinit(); + + var iter = grammar.nodes.iterator(); + while (iter.next()) |kvp| { + const snode: *sema.Node = kvp.value_ptr.*; + + var jtype = try mapper.convertType(snode.type); + + try nodes.putNoClobber( + strings.get(kvp.key_ptr.*), + jtype, + ); + } + + try root.put("ast_nodes", .{ .object = nodes }); + } + + { + var rules = std.json.ObjectMap.init(allocator); + errdefer rules.deinit(); + + var iter = grammar.rules.iterator(); + while (iter.next()) |kvp| { + const srule: *sema.Rule = kvp.value_ptr.*; + + var jrule = mapper.newObject(); + errdefer jrule.deinit(); + + if (srule.type) |rule_type| { + var jtype = try mapper.convertType(rule_type); + try jrule.putNoClobber("type", jtype); + } else { + try jrule.putNoClobber("type", .null); + } + + { + var jprods = mapper.newArray(); + errdefer jprods.deinit(); + + try jprods.resize(srule.productions.len); + + for (jprods.items, srule.productions) |*jmprod_val, mapped_production| { + var jmprod = mapper.newObject(); + errdefer jmprod.deinit(); + + var jprod = try mapper.convertProduction(mapped_production.production); + + try jmprod.putNoClobber("production", jprod); + + if (mapped_production.mapping) |mapping| { + var jmap = try mapper.convertMapping(mapping); + try jmprod.putNoClobber("mapping", jmap); + } else { + try jmprod.putNoClobber("mapping", .null); + } + + jmprod_val.* = .{ .object = jmprod }; + } + + try jrule.putNoClobber("mapped_productions", .{ .array = jprods }); + } + + try rules.putNoClobber( + strings.get(kvp.key_ptr.*), + .{ .object = jrule }, + ); + } + + try root.put("rules", .{ .object = rules }); + } + + return std.json.Value{ .object = root }; +} + +const JsonMapper = struct { + allocator: std.mem.Allocator, + strings: *const ptk.strings.Pool, + + fn convertProduction(mapper: JsonMapper, production: sema.Production) error{OutOfMemory}!std.json.Value { + var jtype = mapper.newObject(); + errdefer jtype.deinit(); + + try jtype.putNoClobber("kind", .{ .string = @tagName(production) }); + + const data: std.json.Value = switch (production) { + .terminal => |terminal| blk: { + if (terminal.is_literal) { + try jtype.put("kind", .{ .string = "literal-terminal" }); + } + break :blk mapper.jsonString(terminal.name); + }, + .recursion => |recursion| mapper.jsonString(recursion.name), + + .sequence => |sequence| blk: { + var list = mapper.newArray(); + errdefer list.deinit(); + + try list.resize(sequence.len); + + for (list.items, sequence) |*dst, src| { + dst.* = try mapper.convertProduction(src); + } + + break :blk .{ .array = list }; + }, + + .optional, .repetition_zero, .repetition_one => |optional| try mapper.convertProduction(optional.*), + }; + try jtype.putNoClobber("data", data); + + return .{ .object = jtype }; + } + + fn convertMapping(mapper: JsonMapper, mapping: sema.Mapping) error{OutOfMemory}!std.json.Value { + var jtype = mapper.newObject(); + errdefer jtype.deinit(); + + try jtype.putNoClobber("kind", .{ .string = @tagName(mapping) }); + + switch (mapping) { + .record_initializer => |record_initializer| { + var list = mapper.newArray(); + errdefer list.deinit(); + + try list.resize(record_initializer.fields.len); + + for (list.items, record_initializer.fields) |*dst, src| { + var jfield = mapper.newObject(); + errdefer jfield.deinit(); + + try jfield.putNoClobber("field", mapper.jsonString(src.field.name)); + try jfield.putNoClobber("value", try mapper.convertMapping(src.value)); + + dst.* = .{ .object = jfield }; + } + + try jtype.putNoClobber("fields", .{ .array = list }); + }, + .list_initializer => |list_initializer| { + var list = mapper.newArray(); + errdefer list.deinit(); + + try list.resize(list_initializer.items.len); + + for (list.items, list_initializer.items) |*dst, src| { + dst.* = try mapper.convertMapping(src); + } + + try jtype.putNoClobber("items", .{ .array = list }); + }, + .variant_initializer => |variant_initializer| { + try jtype.putNoClobber("field", mapper.jsonString(variant_initializer.field.name)); + try jtype.putNoClobber("value", try mapper.convertMapping(variant_initializer.value.*)); + }, + .user_function_call, .builtin_function_call => |function_call| { + var list = mapper.newArray(); + errdefer list.deinit(); + + try list.resize(function_call.arguments.len); + + for (list.items, function_call.arguments) |*dst, src| { + dst.* = try mapper.convertMapping(src); + } + + try jtype.putNoClobber("arguments", .{ .array = list }); + + try jtype.putNoClobber("function", mapper.jsonString(function_call.function)); + }, + + .code_literal, .user_literal => |literal| { + try jtype.putNoClobber("literal", mapper.jsonString(literal)); + }, + + .context_reference => |context_reference| { + try jtype.putNoClobber("index", .{ .integer = context_reference.index }); + }, + } + + return .{ .object = jtype }; + } + + fn convertType(mapper: JsonMapper, stype: *sema.Type) error{OutOfMemory}!std.json.Value { + const data: std.json.Value = switch (stype.*) { + .code_literal, .user_type => |literal| mapper.jsonString(literal), + .named => |named| mapper.jsonString(named.name), + + .optional => |inner| try mapper.convertType(inner), + + .record, .variant => |compound| blk: { + var fields = mapper.newObject(); + errdefer fields.deinit(); + + for (compound.fields.keys(), compound.fields.values()) |name, field| { + var field_type = try mapper.convertType(field.type); + try fields.putNoClobber( + mapper.strings.get(name), + field_type, + ); + } + + break :blk .{ .object = fields }; + }, + + .token => .null, + }; + + var jtype = mapper.newObject(); + errdefer jtype.deinit(); + + try jtype.putNoClobber("kind", .{ .string = @tagName(stype.*) }); + try jtype.putNoClobber("data", data); + + return .{ .object = jtype }; + } + + fn jsonString(mapper: JsonMapper, string: ptk.strings.String) std.json.Value { + return .{ .string = mapper.strings.get(string) }; + } + + fn newObject(mapper: JsonMapper) std.json.ObjectMap { + return std.json.ObjectMap.init(mapper.allocator); + } + + fn newArray(mapper: JsonMapper) std.json.Array { + return std.json.Array.init(mapper.allocator); + } +}; diff --git a/src/ptkgen/dump/sema.zig b/src/ptkgen/dump/sema.zig new file mode 100644 index 0000000..f338341 --- /dev/null +++ b/src/ptkgen/dump/sema.zig @@ -0,0 +1,173 @@ +const std = @import("std"); +const ptk = @import("parser-toolkit"); + +const sema = @import("../sema.zig"); +const parser = @import("../parser.zig"); + +pub fn dump(strings: *const ptk.strings.Pool, grammar: sema.Grammar) void { + var printer = SemaPrinter{ + .strings = strings, + }; + + SemaPrinter.print("literal patterns:\n", .{}); + printer.dumpPatterns(grammar.literal_patterns); + + SemaPrinter.print("\nuser patterns:\n", .{}); + printer.dumpPatterns(grammar.patterns); + + SemaPrinter.print("\nstart rule: ", .{}); + if (grammar.start) |start| { + SemaPrinter.print("<{}>\n", .{printer.fmtId(start.rule.name)}); + } else { + SemaPrinter.print("-none-\n", .{}); + } + + SemaPrinter.print("\nast nodes:\n", .{}); + printer.dumpNodes(grammar.nodes); + + SemaPrinter.print("\nrules:\n", .{}); + printer.dumpRules(grammar.rules); +} + +const SemaPrinter = struct { + const print = std.debug.print; + + strings: *const ptk.strings.Pool, + + fn dumpPatterns(printer: SemaPrinter, patterns: sema.StringHashMap(*sema.Pattern)) void { + for (patterns.values()) |pattern| { + print("pattern {} = ", .{printer.fmtId(pattern.name)}); + + switch (pattern.data) { + inline else => |value, tag| print("{s} \"{}\"", .{ @tagName(tag), printer.fmtString(value) }), + } + + print(";\n", .{}); + } + } + + fn dumpNodes(printer: SemaPrinter, nodes: sema.StringHashMap(*sema.Node)) void { + for (nodes.values()) |node| { + print("node {} = ", .{printer.fmtId(node.name)}); + + printer.dumpType(node.type); + + print(";\n", .{}); + } + } + + fn dumpRules(printer: SemaPrinter, rules: sema.StringHashMap(*sema.Rule)) void { + for (rules.values()) |rule| { + print("rule {}", .{printer.fmtId(rule.name)}); + + if (rule.type) |rule_type| { + print(": ", .{}); + printer.dumpType(rule_type); + } + + print(" = ", .{}); + + for (rule.productions, 0..) |production, i| { + if (i > 0) print("\n | ", .{}); + printer.dumpMappedProduction(production); + } + + print(";\n", .{}); + } + } + + fn dumpMappedProduction(printer: SemaPrinter, mapped_prod: sema.MappedProduction) void { + printer.dumpProduction(mapped_prod.production); + + if (mapped_prod.mapping) |mapping| { + print(" -> ", .{}); + printer.dumpMapping(mapping); + } + } + + fn dumpProduction(printer: SemaPrinter, production: sema.Production) void { + switch (production) { + .terminal => |terminal| { + if (terminal.is_literal) { + print("\"{}\"", .{printer.fmtString(terminal.data.literal_match)}); + } else { + print("${}", .{printer.fmtId(terminal.name)}); + } + }, + .recursion => |recursion| print("<{}>", .{printer.fmtId(recursion.name)}), + .sequence => |sequence| { + for (sequence, 0..) |item, i| { + if (i > 0) + print(" ", .{}); + printer.dumpProduction(item); + } + }, + .optional => |optional| { + print("(", .{}); + printer.dumpProduction(optional.*); + print(")?", .{}); + }, + .repetition_zero => |repetition_zero| { + print("(", .{}); + printer.dumpProduction(repetition_zero.*); + print(")*", .{}); + }, + .repetition_one => |repetition_one| { + print("(", .{}); + printer.dumpProduction(repetition_one.*); + print(")+", .{}); + }, + } + } + + fn dumpMapping(printer: SemaPrinter, mapping: sema.Mapping) void { + _ = mapping; + _ = printer; + } + + fn dumpType(printer: SemaPrinter, stype: *sema.Type) void { + switch (stype.*) { + .code_literal => |literal| print("`{}`", .{printer.fmtString(literal)}), + .user_type => |literal| print("@{}", .{printer.fmtId(literal)}), + .optional => |inner| { + print("optional ", .{}); + printer.dumpType(inner); + }, + inline .record, .variant => |compound, tag| { + print("{s} ", .{@tagName(tag)}); + for (compound.fields.keys(), compound.fields.values(), 0..) |name, field, i| { + if (i > 0) + print(", ", .{}); + print("{}: ", .{printer.fmtId(name)}); + printer.dumpType(field.type); + } + }, + .named => |other| print("!{}", .{printer.fmtId(other.name)}), + } + } + + fn fmtString(printer: SemaPrinter, str: ptk.strings.String) StringPrinter { + return StringPrinter{ .printer = printer, .str = str, .mode = .text }; + } + + fn fmtId(printer: SemaPrinter, str: ptk.strings.String) StringPrinter { + return StringPrinter{ .printer = printer, .str = str, .mode = .id }; + } + + const StringPrinter = struct { + printer: SemaPrinter, + str: ptk.strings.String, + mode: enum { id, text }, + + pub fn format(strpr: StringPrinter, fmt: []const u8, opt: std.fmt.FormatOptions, writer: anytype) !void { + _ = opt; + _ = fmt; + + const text = strpr.printer.strings.get(strpr.str); + switch (strpr.mode) { + .id => try writer.print("{}", .{std.zig.fmtId(text)}), + .text => try writer.print("{}", .{std.zig.fmtEscapes(text)}), + } + } + }; +}; diff --git a/src/ptkgen/intl.zig b/src/ptkgen/intl.zig new file mode 100644 index 0000000..fa0e3d5 --- /dev/null +++ b/src/ptkgen/intl.zig @@ -0,0 +1,115 @@ +const std = @import("std"); + +const Diagnostics = @import("Diagnostics.zig"); + +pub const Language = enum { + en, +}; + +pub const language: Language = .en; + +pub const localization = @field(localizations, @tagName(language)); + +pub const localizations = struct { + pub const en = Localization.generate(@embedFile("intl/en.json")); +}; + +pub const FormattableError: type = blk: { + const list = @typeInfo(std.meta.fieldInfo(Localization, .errors).type).Struct.fields; + + var errors: [list.len]std.builtin.Type.Error = undefined; + for (&errors, list) |*dst, src| { + dst.* = .{ .name = src.name }; + } + + break :blk @Type(.{ + .ErrorSet = &errors, + }); +}; + +pub const DiagnosticStrings: type = blk: { + const list = @typeInfo(Diagnostics.Code).Enum.fields; + + var dst_fields: [list.len]std.builtin.Type.StructField = undefined; + for (&dst_fields, list) |*dst, src| { + dst.* = .{ + .name = src.name, + .type = []const u8, + .default_value = null, + .is_comptime = false, + .alignment = @alignOf([]const u8), + }; + } + + break :blk @Type(.{ + .Struct = .{ + .layout = .Auto, + .fields = &dst_fields, + .decls = &.{}, + .is_tuple = false, + }, + }); +}; + +pub const Localization = struct { + diagnostics: DiagnosticStrings, + + errors: struct { + Unexpected: []const u8, + + SyntaxError: []const u8, + SemanticError: []const u8, + + OutOfMemory: []const u8, + + InputOutput: []const u8, + AccessDenied: []const u8, + BrokenPipe: []const u8, + SystemResources: []const u8, + OperationAborted: []const u8, + WouldBlock: []const u8, + ConnectionResetByPeer: []const u8, + IsDir: []const u8, + ConnectionTimedOut: []const u8, + NotOpenForReading: []const u8, + NetNameDeleted: []const u8, + + FileTooBig: []const u8, + InvalidSourceEncoding: []const u8, + + DiskQuota: []const u8, + NoSpaceLeft: []const u8, + DeviceBusy: []const u8, + InvalidArgument: []const u8, + NotOpenForWriting: []const u8, + LockViolation: []const u8, + ProcessFdQuotaExceeded: []const u8, + SystemFdQuotaExceeded: []const u8, + SharingViolation: []const u8, + PathAlreadyExists: []const u8, + FileNotFound: []const u8, + PipeBusy: []const u8, + NameTooLong: []const u8, + InvalidUtf8: []const u8, + BadPathName: []const u8, + NetworkNotFound: []const u8, + InvalidHandle: []const u8, + SymLinkLoop: []const u8, + NoDevice: []const u8, + NotDir: []const u8, + FileLocksNotSupported: []const u8, + FileBusy: []const u8, + LinkQuotaExceeded: []const u8, + ReadOnlyFileSystem: []const u8, + RenameAcrossMountPoints: []const u8, + }, + + pub fn generate(comptime buffer: []const u8) Localization { + @setEvalBranchQuota(1_000_000); + + var alloc_buf: [4 * buffer.len]u8 = undefined; + var fba = std.heap.FixedBufferAllocator.init(&alloc_buf); + + return std.json.parseFromSliceLeaky(Localization, fba.allocator(), buffer, .{}) catch |err| @compileError(std.fmt.comptimePrint("failed to parse json: {}", .{err})); + } +}; diff --git a/src/ptkgen/intl/en.json b/src/ptkgen/intl/en.json new file mode 100644 index 0000000..00ec0ea --- /dev/null +++ b/src/ptkgen/intl/en.json @@ -0,0 +1,86 @@ +{ + "diagnostics": { + "out_of_memory": "Out of memory", + "file_limit_exceeded": "Input file exceeds maximum file size", + "io_error": "I/O error: {[error_code]}", + "invalid_source_encoding": "Invalid source code encoding detected", + "bad_string_escape": "Invalid string escape: Escape sequence at the end of string.", + "invalid_string_escape": "Invalid string escape '\\{[escape]}'.", + "excess_tokens": "Excess token at the end of the file: {[token_type]}-", + "illegal_empty_group": "Production sequence may not be empty.", + "integer_overflow": "Integer value {[actual]} out of range. Values must be between {[min]} and {[max]}.", + "empty_mapping": "Empty mappings are not allowed.", + "empty_typespec": "A type specifier is missing.", + "unexpected_token_eof": "Expected a token of type '{[expected_type]}', but the end of file was discovered.", + "unexpected_token": "Expected a token of type '{[expected_type]}', but found token {[actual]}.", + "unexpected_character": "Unexpected character '{[character]}' found.", + "unexpected_eof": "Unexpected end of file.", + "unexpected_toplevel_token": "Expected a top level declaration ('start', 'rule', 'node' or 'pattern'), but found token {[actual]}", + "unexpected_token_no_context": "Unexpected token '{[actual]}'.", + "unexpected_token_type_spec": "Expected a type specifier, but found '{[actual]}'.", + "unexpected_token_mapping": "Expected an AST mapping, but found '{[actual]}'.", + "unexpected_token_production_list": "Expected ';' or '|', but found '{[actual]}'.", + "unexpected_token_production": "Expected a production, but found '{[actual]}'.", + "unexpected_token_pattern": "Expected a pattern definition, but found '{[actual]}'.", + "duplicate_identifier_rule": "Rule {[identifier]} already defined here: {[previous_location]}", + "duplicate_identifier_node": "Node {[identifier]} already defined here: {[previous_location]}", + "duplicate_identifier_pattern": "Pattern {[identifier]} already defined here: {[previous_location]}", + "reference_to_undeclared_rule": "Reference to undeclared rule '{[identifier]}'.", + "reference_to_undeclared_node": "Reference to undeclared node '{[identifier]}'.", + "reference_to_undeclared_pattern": "Reference to undeclared pattern '{[identifier]}'.", + "missing_start_symbol": "Grammar file has no start symbol declared.", + "multiple_start_symbols": "Another start rule '({[identifier]})' was already declared here: {[previous_location]}", + "duplicate_compound_field": "Another field named '{[identifier]s}' was already declared here: {[previous_location]}", + "context_reference_out_of_bounds": "Context reference index out of bounds. {[index]} was given, but the highest possible index is {[limit]}.", + "variant_does_not_exist": "The variant field {[identifier]s} does not exist. The variant type is declared here: {[type_location]}", + "record_field_does_not_exist": "The record field {[field]s} does not exist. The record type is declared here: {[type_location]}", + "record_field_already_initialized": "The record field {[field]s} is already initialized. Previous initialization: {[prev_init]}", + "record_field_not_initialized": "The record field {[field]s} was not initialized. Field declared here: {[field_location]}", + "mapping_requires_typed_rule": "The use of a rule mapping requires that the rule has an explicitly declared type.", + "invalid_builtin_function": "The builtin function {[name]s} does not exist!" + }, + "errors": { + "SyntaxError": "syntax error", + "SemanticError": "semantic error", + "Unexpected": "unexpected error encountered", + "OutOfMemory": "out of memory", + "InputOutput": "input output", + "AccessDenied": "access denied", + "BrokenPipe": "broken pipe", + "SystemResources": "system resources", + "OperationAborted": "operation aborted", + "WouldBlock": "would block", + "ConnectionResetByPeer": "connection reset by peer", + "IsDir": "path points to directory", + "ConnectionTimedOut": "connection timed out", + "NotOpenForReading": "not open for reading", + "NetNameDeleted": "net name deleted", + "FileTooBig": "Input file exceeds resources", + "InvalidSourceEncoding": "invalid source encoding", + "DiskQuota": "disk quota", + "NoSpaceLeft": "no space left", + "DeviceBusy": "device busy", + "InvalidArgument": "invalid argument", + "NotOpenForWriting": "not open for writing", + "LockViolation": "lock violation", + "ProcessFdQuotaExceeded": "process fd quota exceeded", + "SystemFdQuotaExceeded": "system fd quota exceeded", + "SharingViolation": "sharing violation", + "PathAlreadyExists": "path already exists", + "FileNotFound": "file not found", + "PipeBusy": "pipe busy", + "NameTooLong": "name too long", + "InvalidUtf8": "invalid utf8", + "BadPathName": "bad path name", + "NetworkNotFound": "network not found", + "InvalidHandle": "invalid handle", + "SymLinkLoop": "sym link loop", + "NoDevice": "no device", + "NotDir": "not dir", + "FileLocksNotSupported": "file locks not supported", + "FileBusy": "file busy", + "LinkQuotaExceeded": "link quota exceeded", + "ReadOnlyFileSystem": "read only file system", + "RenameAcrossMountPoints": "rename across mount points" + } +} \ No newline at end of file diff --git a/src/ptkgen/main.zig b/src/ptkgen/main.zig new file mode 100644 index 0000000..699b991 --- /dev/null +++ b/src/ptkgen/main.zig @@ -0,0 +1,403 @@ +//! +//! Parser Toolkit Grammar Compiler +//! + +const std = @import("std"); +const args_parser = @import("args"); +const ptk = @import("parser-toolkit"); + +const ast = @import("ast.zig"); +const sema = @import("sema.zig"); +const intl = @import("intl.zig"); +const parser = @import("parser.zig"); +const ast_dump = @import("dump/ast.zig"); +const sema_dump = @import("dump/sema.zig"); +const json_dump = @import("dump/json.zig"); + +const Diagnostics = @import("Diagnostics.zig"); + +comptime { + // reference for unit tests: + _ = parser; +} + +pub const Format = enum { + json, + // zig, +}; + +pub const CliOptions = struct { + help: bool = false, + output: ?[]const u8 = null, + test_mode: TestMode = .none, + trace: bool = false, + format: Format = .json, + + @"max-file-size": u32 = 4 * 1024, // 4 MB of source code is a lot! + + dump: bool = false, + + pub const shorthands = .{ + .h = "help", + .o = "output", + .D = "dump", + }; + + pub const meta = .{ + .full_text = "Compiles a .ptk grammar file into Zig code.", + + .usage_summary = "[-h] [-o ] []", + + .option_docs = .{ + .help = "Prints this help.", + .output = "If given, will print the generated code into ", + + .test_mode = "(internal use only, required for testing)", + + .@"max-file-size" = "Maximum input file size in KiB (default: 4096)", + + .trace = "Prints a parse trace", + + .format = "Selects the output format of the grammar. Can be one of [ json, zig ]", + + .dump = "Dumps results from parser and sema to stderr.", + }, + }; +}; + +const TestMode = enum { + none, + parse_only, + no_codegen, +}; + +const AppError = error{OutOfMemory} || std.fs.File.WriteError; +pub fn main() AppError!u8 { + // errdefer |e| @compileLog(@TypeOf(e)); + + var stdout = std.io.getStdOut(); + var stdin = std.io.getStdIn(); + var stderr = std.io.getStdErr(); + + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + + var arena = std.heap.ArenaAllocator.init(gpa.allocator()); + defer arena.deinit(); + + const dynamic_allocator = gpa.allocator(); + const static_allocator = arena.allocator(); + + var cli = args_parser.parseForCurrentProcess(CliOptions, static_allocator, .print) catch return 1; + defer cli.deinit(); + + if (cli.options.help) { + try args_parser.printHelp(CliOptions, cli.executable_name orelse "ptkgen", stdout.writer()); + return 0; + } + + var string_pool = try ptk.strings.Pool.init(dynamic_allocator); + defer string_pool.deinit(); + + var diagnostics = Diagnostics.init(dynamic_allocator); + defer diagnostics.deinit(); + + var input_file = switch (cli.positionals.len) { + 0 => stdin, + 1 => std.fs.cwd().openFile(cli.positionals[0], .{}) catch |err| { + try stderr.writer().print("failed to open file {s}: {s}\n", .{ + cli.positionals[0], + @errorName(err), + }); + return 1; + }, + else => { + try stderr.writeAll("Expects either a single positional file or none.\nSee --help for usage!\n"); + return 1; + }, + }; + defer input_file.close(); + + const file_name = if (cli.positionals.len > 0) + cli.positionals[0] + else + "stdint"; + + var expectations = std.ArrayList(TestExpectation).init(dynamic_allocator); + defer expectations.deinit(); + + const processing_ok = process_file: { + // 4 MB should be enough for now... + var source_code = input_file.readToEndAlloc(static_allocator, 1024 * cli.options.@"max-file-size") catch |err| { + try convertErrorToDiagnostics(&diagnostics, file_name, err); + break :process_file false; + }; + + defer static_allocator.free(source_code); + + if (cli.options.test_mode != .none) { + // in test mode, parse expectations from source code: + var lines = std.mem.tokenize(u8, source_code, "\n"); + while (lines.next()) |line| { + const prefix = "# expected:"; + if (std.mem.startsWith(u8, line, prefix)) { + var items = std.mem.tokenize(u8, line[prefix.len..], " \t,"); + while (items.next()) |error_code| { + const code = Diagnostics.Code.parse( + error_code, + ) catch @panic("invalid error code!"); + + try expectations.append(.{ .code = code }); + } + } + } + } + + compileFile( + dynamic_allocator, + &diagnostics, + &string_pool, + source_code, + file_name, + cli.options, + ) catch |err| { + try convertErrorToDiagnostics(&diagnostics, file_name, err); + break :process_file false; + }; + + // Todo: continue from here? + + break :process_file true; + }; + + if (cli.options.test_mode == .none) { + try diagnostics.render(stderr.writer()); + + return if (processing_ok and !diagnostics.hasErrors()) + 0 // exit code for success + else + 1; // exit code for failure + } else { + // test fails through `error.TestExpectationMismatched`, not through diagnostics: + validateDiagnostics(dynamic_allocator, diagnostics, expectations.items) catch { + try stderr.writeAll("Full diagnostics:\n"); + try diagnostics.render(stderr.writer()); + + return 1; + }; + return 0; + } +} + +fn convertErrorToDiagnostics(diagnostics: *Diagnostics, file_name: []const u8, err: intl.FormattableError) error{OutOfMemory}!void { + switch (err) { + // syntax errors must produce diagnostics: + error.SyntaxError, error.SemanticError, error.InvalidSourceEncoding => std.debug.assert(diagnostics.hasErrors()), + + error.OutOfMemory => { + try diagnostics.emit(.{ + .source = file_name, + .line = 1, + .column = 1, + }, .out_of_memory, .{}); + }, + + error.FileTooBig => { + try diagnostics.emit(.{ + .source = file_name, + .line = 1, + .column = 1, + }, .file_limit_exceeded, .{}); + }, + + // input errors: + error.InputOutput, + error.AccessDenied, + error.BrokenPipe, + error.SystemResources, + error.OperationAborted, + error.WouldBlock, + error.ConnectionResetByPeer, + error.Unexpected, + error.IsDir, + error.ConnectionTimedOut, + error.NotOpenForReading, + error.NetNameDeleted, + + // output errors: + error.DiskQuota, + error.NoSpaceLeft, + error.DeviceBusy, + error.InvalidArgument, + error.NotOpenForWriting, + error.LockViolation, + error.ProcessFdQuotaExceeded, + error.SystemFdQuotaExceeded, + error.SharingViolation, + error.PathAlreadyExists, + error.FileNotFound, + error.PipeBusy, + error.NameTooLong, + error.InvalidUtf8, + error.BadPathName, + error.NetworkNotFound, + error.InvalidHandle, + error.SymLinkLoop, + error.NoDevice, + error.NotDir, + error.FileLocksNotSupported, + error.FileBusy, + error.LinkQuotaExceeded, + error.ReadOnlyFileSystem, + error.RenameAcrossMountPoints, + => |e| { + try diagnostics.emit(.{ + .source = file_name, + .line = 1, + .column = 1, + }, .io_error, .{ .error_code = e }); + }, + } +} + +const TestExpectation = struct { + code: Diagnostics.Code, +}; + +fn validateDiagnostics(allocator: std.mem.Allocator, diagnostics: Diagnostics, expectations: []const TestExpectation) !void { + var available = std.ArrayList(Diagnostics.Code).init(allocator); + defer available.deinit(); + + var expected = std.ArrayList(Diagnostics.Code).init(allocator); + defer expected.deinit(); + + try available.appendSlice(diagnostics.codes.items); + try expected.resize(expectations.len); + + for (expected.items, expectations) |*dst, src| { + dst.* = src.code; + } + + // Remove everything from expected and available that is present in both: + { + var i: usize = 0; + while (i < expected.items.len) { + const e = expected.items[i]; + + if (std.mem.indexOfScalar(Diagnostics.Code, available.items, e)) |index| { + _ = available.swapRemove(index); + _ = expected.swapRemove(i); + // std.log.info("found matching diagnostic {s}", .{@tagName(e)}); + } else { + i += 1; + } + } + } + + // Remove all non-errors from available, we do match on them with "-W4000" instead of forcing a expected W4000 into all files without start rules (or similar) + { + var i: usize = 0; + while (i < available.items.len) { + const code = available.items[i]; + if (!code.isError()) { + _ = available.swapRemove(i); + } else { + i += 1; + } + } + } + + const ok = (available.items.len == 0) and (expected.items.len == 0); + + for (available.items) |code| { + std.log.err("unexpected diagnostic: {s} ({d})", .{ code, code }); + } + for (expected.items) |code| { + std.log.err("unmatched diagnostic: {s} ({d})", .{ code, code }); + } + + if (!ok) + return error.TestExpectationMismatched; +} + +fn compileFile( + allocator: std.mem.Allocator, + diagnostics: *Diagnostics, + string_pool: *ptk.strings.Pool, + source_code: []const u8, + file_name: []const u8, + options: CliOptions, +) !void { + var tree = try parser.parse( + .{ + .allocator = allocator, + .diagnostics = diagnostics, + .string_pool = string_pool, + .file_name = file_name, + .source_code = source_code, + .trace_enabled = options.trace, + }, + ); + defer tree.deinit(); + + if (options.test_mode == .parse_only) { + return; + } + + var grammar = try sema.analyze( + allocator, + diagnostics, + string_pool, + tree.top_level_declarations, + ); + defer grammar.deinit(); + + // TODO: Implement parsergen / tablegen / highlightergen + + if (options.dump) { + std.debug.print("ast dump:\n", .{}); + ast_dump.dump(string_pool, tree); + + std.debug.print("\n\nsema dump:\n", .{}); + sema_dump.dump(string_pool, grammar); + } + + if (options.test_mode != .none) + return; + + // Output generation: + { + const use_stdout = (options.output == null) or std.mem.eql(u8, options.output.?, "-"); + + var atomic_output_file: std.fs.AtomicFile = undefined; + if (!use_stdout) { + atomic_output_file = try std.fs.cwd().atomicFile(options.output.?, .{}); + } + defer if (!use_stdout) + atomic_output_file.deinit(); + + var output_file = if (use_stdout) + std.io.getStdOut() + else + atomic_output_file.file; + + // write to output_file here: + switch (options.format) { + .json => { + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + + var json_repr: std.json.Value = try json_dump.createJsonValue( + &arena, + string_pool, + grammar, + ); + + try std.json.stringify(json_repr, .{}, output_file.writer()); + }, + } + + if (!use_stdout) + try atomic_output_file.finish(); + } +} diff --git a/src/ptkgen/parser.zig b/src/ptkgen/parser.zig new file mode 100644 index 0000000..7d26a61 --- /dev/null +++ b/src/ptkgen/parser.zig @@ -0,0 +1,1495 @@ +const std = @import("std"); +const ptk = @import("parser-toolkit"); +const ast = @import("ast.zig"); + +const Diagnostics = @import("Diagnostics.zig"); + +const fmtEscapes = std.zig.fmtEscapes; + +const BAD_TYPE_SPEC: ast.TypeSpec = undefined; + +pub const Document = struct { + arena: std.heap.ArenaAllocator, + file_name: []const u8, + top_level_declarations: ast.Document, + + pub fn deinit(ts: *Document) void { + ts.arena.deinit(); + ts.* = undefined; + } +}; + +pub fn parse(opt: struct { + allocator: std.mem.Allocator, + diagnostics: *Diagnostics, + string_pool: *ptk.strings.Pool, + file_name: []const u8, + source_code: []const u8, + trace_enabled: bool, +}) !Document { + var arena = std.heap.ArenaAllocator.init(opt.allocator); + errdefer arena.deinit(); + + const file_name_copy = try arena.allocator().dupe(u8, opt.file_name); + + var tokenizer = Tokenizer.init(opt.source_code, file_name_copy); + + var parser = Parser{ + .core = ParserCore.init(&tokenizer), + .arena = arena.allocator(), + .pool = opt.string_pool, + .diagnostics = opt.diagnostics, + .trace_enabled = opt.trace_enabled, + }; + + const document_node = parser.acceptDocument() catch |err| switch (err) { + + // Unrecoverable syntax error, must have created diagnostics already + error.SyntaxError => |e| { + std.debug.assert(opt.diagnostics.hasErrors()); + + if (opt.trace_enabled) { + if (@errorReturnTrace()) |trace| { + std.debug.dumpStackTrace(trace.*); + } + } + + return e; + }, + error.InvalidSourceEncoding => |e| { + std.debug.assert(opt.diagnostics.hasErrors()); + + return e; + }, + + error.OutOfMemory => |e| return e, + }; + + if (tokenizer.next()) |token_or_null| { + if (token_or_null) |token| { + try opt.diagnostics.emit(token.location, .excess_tokens, .{ .token_type = token.type }); + return error.SyntaxError; + } + } else |_| { + try parser.emitUnexpectedCharacter(tokenizer.current_location, tokenizer.offset); + return error.SyntaxError; + } + + return Document{ + .arena = arena, + .file_name = file_name_copy, + .top_level_declarations = document_node, + }; +} + +pub const TokenType = enum { + // keywords + + start, + node, + rule, + pattern, + + record, + variant, + optional, + + literal, + word, + regex, + skip, + + // user values + + identifier, // foo-bar_bam + node_ref, // !node + rule_ref, // + token_ref, // $token + value_ref, // $0 + userval_ref, // @userval + + // values + + string_literal, // "string" + code_literal, // `code` + + // operators + + @"=", + @",", + @".", + @"*", + @"+", + @":", + @";", + @"|", + @"!", + @"?", + @"[", + @"]", + @"(", + @")", + @"{", + @"}", + @"=>", + + // auxiliary + + line_comment, + whitespace, +}; + +pub const Token = Tokenizer.Token; + +const ParserCore = ptk.ParserCore(Tokenizer, .{ .whitespace, .line_comment }); + +const Parser = struct { + const RS = ptk.RuleSet(TokenType); + const String = ptk.strings.String; + + core: ParserCore, + arena: std.mem.Allocator, + pool: *ptk.strings.Pool, + diagnostics: *Diagnostics, + + trace_enabled: bool, + trace_depth: u32 = 0, + + pub fn acceptDocument(parser: *Parser) FatalAcceptError!ast.Document { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + var doc = ast.Document{}; + + while (true) { + const decl_or_eof = try parser.acceptTopLevelDecl(); + + const decl = decl_or_eof orelse break; + + try parser.append(ast.TopLevelDeclaration, &doc, decl); + } + + return doc; + } + + fn acceptTopLevelDecl(parser: *Parser) FatalAcceptError!?ast.TopLevelDeclaration { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + if (parser.acceptStartDecl()) |root_rule| { + return .{ .start = root_rule }; + } else |err| try filterAcceptError(err); + + if (parser.acceptRule()) |rule| { + return .{ .rule = rule }; + } else |err| try filterAcceptError(err); + + if (parser.acceptNode()) |node| { + return .{ .node = node }; + } else |err| try filterAcceptError(err); + + if (parser.acceptPatternDefinition()) |pattern| { + return .{ .pattern = pattern }; + } else |err| try filterAcceptError(err); + + // Detect any excess tokens on the top level: + if (parser.core.nextToken()) |maybe_token| { + if (maybe_token) |token| { + try parser.emitDiagnostic(token.location, .unexpected_toplevel_token, .{ + .actual = token, + }); + return error.SyntaxError; + } else { + // This is actually the good path here, as only if we don't find any token or tokenization error, + // we reached the end of the file. + } + } else |err| switch (err) { + error.UnexpectedCharacter => { + try parser.emitUnexpectedCharacter(parser.core.tokenizer.current_location, parser.core.tokenizer.offset); + return error.SyntaxError; + }, + } + + return null; + } + + fn acceptStartDecl(parser: *Parser) AcceptError!ast.RuleRef { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + try parser.acceptLiteral(.start, .recover); + const init_rule = try parser.acceptRuleReference(.fail); + + try parser.acceptLiteral(.@";", .fail); + + return init_rule; + } + + fn acceptPatternDefinition(parser: *Parser) AcceptError!ast.Pattern { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + try parser.acceptLiteral(.pattern, .recover); + + const name = try parser.acceptIdentifier(.fail); + try parser.acceptLiteral(.@"=", .fail); + + const data = try parser.acceptPatternSpec(); + + const invisible = try parser.tryAcceptLiteral(.skip); + + try parser.acceptLiteral(.@";", .fail); + + return .{ + .name = name, + .data = data, + .invisible = invisible, + }; + } + + fn acceptPatternSpec(parser: *Parser) AcceptError!ast.Pattern.Data { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + var state = parser.save(); + errdefer parser.restore(state); + + if (try parser.tryAcceptLiteral(.literal)) { + const string = try parser.acceptStringLiteral(.fail); + return .{ .literal = string }; + } + + if (try parser.tryAcceptLiteral(.word)) { + const string = try parser.acceptStringLiteral(.fail); + return .{ .word = string }; + } + + if (try parser.tryAcceptLiteral(.regex)) { + const string = try parser.acceptStringLiteral(.fail); + return .{ .regex = string }; + } + + if (parser.acceptUserReference()) |ref| { + return .{ .external = ref }; + } else |err| try filterAcceptError(err); + + return parser.emitUnexpectedToken(.{ + .unexpected_token = .unexpected_token_pattern, + }); + } + + fn acceptNode(parser: *Parser) AcceptError!ast.Node { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + var state = parser.save(); + errdefer parser.restore(state); + + try parser.acceptLiteral(.node, .recover); + + const identifier = try parser.acceptIdentifier(.fail); + + try parser.acceptLiteral(.@"=", .fail); + + const value = try parser.acceptTypeSpec(); + + try parser.acceptLiteral(.@";", .fail); + + return .{ + .name = identifier, + .value = value, + }; + } + + fn acceptRule(parser: *Parser) AcceptError!ast.Rule { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + var state = parser.save(); + errdefer parser.restore(state); + + try parser.acceptLiteral(.rule, .recover); + + const identifier = try parser.acceptIdentifier(.fail); + + const rule_type = if (try parser.tryAcceptLiteral(.@":")) + try parser.acceptTypeSpec() + else + null; + + try parser.acceptLiteral(.@"=", .fail); + + var list: ast.List(ast.MappedProduction) = .{}; + + while (true) { + var production = try parser.acceptMappedProduction(); + + try parser.append(ast.MappedProduction, &list, production); + + // if a semicolon follows, we're done + if (try parser.tryAcceptLiteral(.@";")) { + break; + } + // if a pipe follows, we got more rules + else if (try parser.tryAcceptLiteral(.@"|")) { + continue; + } + // otherwise, it's a syntax error: + else { + return parser.emitUnexpectedToken(.{ + .unexpected_token = .unexpected_token_production_list, + }); + } + + try parser.acceptLiteral(.@"|", .fail); + } + + return ast.Rule{ + .ast_type = rule_type, + .productions = list, + .name = identifier, + }; + } + + fn acceptMappedProduction(parser: *Parser) AcceptError!ast.MappedProduction { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + var sequence = try parser.acceptProductionSequence(); + + const mapping = if (try parser.tryAcceptLiteral(.@"=>")) + try parser.acceptAstMapping(.fail) + else + null; + + return ast.MappedProduction{ + // Auto-flatten the "tree" here if the top level production is a "sequence" of one + .production = if (sequence.only()) |item| + item + else + .{ .sequence = sequence }, + .mapping = mapping, + }; + } + + fn acceptProductionSequence(parser: *Parser) AcceptError!ast.List(ast.Production) { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + var list: ast.List(ast.Production) = .{}; + + sequence_loop: while (true) { + if (parser.acceptProduction()) |prod| { + try parser.append(ast.Production, &list, prod); + } else |err| switch (err) { + error.UnexpectedTokenRecoverable => { + // we couldn't accept a production, so let's see if we're in a legal state here: + + const seekahead_reset = parser.save(); + + // all of the following might allow to terminate a list: + inline for (.{ .@")", .@";", .@"=>", .@"|" }) |legal_terminator| { + if (try parser.tryAcceptLiteral(legal_terminator)) { + // All of the above tokens + parser.restore(seekahead_reset); + break :sequence_loop; + } + } + + return parser.emitUnexpectedToken(.{ + .unexpected_token = .unexpected_token_production, + }); + }, + error.OutOfMemory, error.InvalidSourceEncoding, error.SyntaxError => |e| return e, + } + } + + if (list.len() == 0) { + // Empty list is a recoverable syntax error: + try parser.emitDiagnostic(null, .illegal_empty_group, .{}); + } + + return list; + } + + fn acceptProduction(parser: *Parser) AcceptError!ast.Production { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + if (try parser.tryAcceptLiteral(.@"(")) { + var sequence = try parser.acceptProductionSequence(); + try parser.acceptLiteral(.@")", .fail); + + if (try parser.tryAcceptLiteral(.@"?")) { + return .{ .optional = sequence }; + } else if (try parser.tryAcceptLiteral(.@"+")) { + return .{ .repetition_one = sequence }; + } else if (try parser.tryAcceptLiteral(.@"*")) { + return .{ .repetition_zero = sequence }; + } else { + return .{ .sequence = sequence }; + } + } + + if (parser.acceptStringLiteral(.recover)) |str| { + return ast.Production{ .literal = str }; + } else |err| try filterAcceptError(err); + + if (parser.acceptTokenReference(.recover)) |ref| { + return ast.Production{ .terminal = ref }; + } else |err| try filterAcceptError(err); + + if (parser.acceptRuleReference(.recover)) |ref| { + return ast.Production{ .recursion = ref }; + } else |err| try filterAcceptError(err); + + // We're done with out list + return error.UnexpectedTokenRecoverable; + } + + fn acceptAstMapping(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.AstMapping { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + const state = parser.save(); + errdefer parser.restore(state); + + const position = parser.core.tokenizer.current_location; + + if (parser.acceptVariantInit()) |init| { + return .{ .variant = init }; + } else |err| try filterAcceptError(err); + + if (parser.acceptRecordInit()) |init| { + return .{ .record = init }; + } else |err| try filterAcceptError(err); + + if (parser.acceptListInit()) |init| { + return .{ .list = init }; + } else |err| try filterAcceptError(err); + + if (parser.acceptCodeLiteral()) |literal| { + return .{ .literal = literal }; + } else |err| try filterAcceptError(err); + + if (parser.acceptValueReference()) |literal| { + return .{ .context_reference = literal }; + } else |err| try filterAcceptError(err); + + if (parser.acceptBuiltinCall()) |call| { + return .{ .function_call = call }; + } else |err| try filterAcceptError(err); + + if (parser.acceptUserCall()) |call| { + return .{ .user_function_call = call }; + } else |err| try filterAcceptError(err); + + if (parser.acceptUserReference()) |ref| { + return .{ .user_reference = ref }; + } else |err| try filterAcceptError(err); + + if (try parser.tryAcceptLiteral(.@";") or try parser.tryAcceptLiteral(.@"|")) { + try parser.emitDiagnostic(position, .empty_mapping, .{}); + return error.SyntaxError; + } + + switch (accept_mode) { + .recover => return error.UnexpectedTokenRecoverable, + .fail => return parser.emitUnexpectedToken(.{ + .unexpected_token = .unexpected_token_mapping, + }), + } + } + + fn acceptVariantInit(parser: *Parser) AcceptError!ast.VariantInitializer { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + const state = parser.save(); + errdefer parser.restore(state); + + const field = try parser.acceptIdentifier(.recover); + + try parser.acceptLiteral(.@":", .recover); + + const value = try parser.acceptAstMapping(.fail); + + const clone = try parser.arena.create(ast.AstMapping); + clone.* = value; + + return .{ + .field = field, + .value = clone, + }; + } + + fn acceptRecordInit(parser: *Parser) AcceptError!ast.List(ast.FieldAssignment) { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + const state = parser.save(); + errdefer parser.restore(state); + + try parser.acceptLiteral(.@"{", .recover); + + var mode: AcceptMode = .recover; + + var list = ast.List(ast.FieldAssignment){}; + while (true) { + // First item might fail, then it's not a record initializer, but + // afterwards, all fields must comply + defer mode = .fail; + + const node = try parser.acceptFieldInit(mode); + + try parser.append(ast.FieldAssignment, &list, node); + + if (!try parser.tryAcceptLiteral(.@",")) { + break; + } + } + + try parser.acceptLiteral(.@"}", .fail); + + return list; + } + + fn acceptFieldInit(parser: *Parser, mode: AcceptMode) AcceptError!ast.FieldAssignment { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + const state = parser.save(); + errdefer parser.restore(state); + + const location = parser.core.tokenizer.current_location; + + const field = try parser.acceptIdentifier(mode); + + try parser.acceptLiteral(.@"=", .fail); + + const value = try parser.acceptAstMapping(.fail); + + const clone = try parser.arena.create(ast.AstMapping); + clone.* = value; + + return .{ + .location = location, + .field = field, + .value = clone, + }; + } + + fn acceptListInit(parser: *Parser) AcceptError!ast.List(ast.AstMapping) { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + const state = parser.save(); + errdefer parser.restore(state); + + try parser.acceptLiteral(.@"{", .recover); + + var items = try parser.acceptMappingList(); + + try parser.acceptLiteral(.@"}", .fail); + + return items; + } + + fn acceptCodeLiteral(parser: *Parser) AcceptError!ast.CodeLiteral { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + const token = try parser.acceptToken(.code_literal, .recover); + + std.debug.assert(std.mem.startsWith(u8, token.text, "`")); + std.debug.assert(std.mem.endsWith(u8, token.text, "`")); + + var prefix_len: usize = 0; + while (token.text[prefix_len] == '`') { + prefix_len += 1; + } + + return ast.CodeLiteral{ + .location = token.location, + .value = try parser.pool.insert(token.text[prefix_len .. token.text.len - prefix_len]), + }; + } + + fn acceptValueReference(parser: *Parser) AcceptError!ast.ValueRef { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + const token = try parser.acceptToken(.value_ref, .recover); + std.debug.assert(std.mem.startsWith(u8, token.text, "$")); + return ast.ValueRef{ + .location = token.location, + .index = std.fmt.parseInt(u32, token.text[1..], 10) catch |err| switch (err) { + error.InvalidCharacter => unreachable, // ensured by tokenizer, + error.Overflow => blk: { + try parser.emitDiagnostic(token.location, .integer_overflow, .{ + .min = comptime std.fmt.comptimePrint("{}", .{std.math.minInt(u32)}), + .max = comptime std.fmt.comptimePrint("{}", .{std.math.maxInt(u32)}), + .actual = token.text[1..], + }); + break :blk 0; + }, + }, + }; + } + + fn acceptBuiltinCall(parser: *Parser) AcceptError!ast.FunctionCall(ast.Identifier) { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + const state = parser.save(); + errdefer parser.restore(state); + + const id = try parser.acceptIdentifier(.recover); + + try parser.acceptLiteral(.@"(", .fail); // a builtin function is the only legal way to use an identifier here, so we fail unrecoverably + + const list = try parser.acceptMappingList(); + + try parser.acceptLiteral(.@")", .fail); + + return .{ + .function = id, + .arguments = list, + }; + } + + fn acceptUserCall(parser: *Parser) AcceptError!ast.FunctionCall(ast.UserDefinedIdentifier) { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + const state = parser.save(); + errdefer parser.restore(state); + + const id = try parser.acceptUserReference(); + + // If we only accept a user value, fail and fall back to regular user value acceptance later + try parser.acceptLiteral(.@"(", .recover); + + const list = try parser.acceptMappingList(); + + try parser.acceptLiteral(.@")", .fail); + + return .{ + .function = id, + .arguments = list, + }; + } + + fn acceptUserReference(parser: *Parser) AcceptError!ast.UserDefinedIdentifier { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + const token = try parser.acceptToken(.userval_ref, .recover); + std.debug.assert(std.mem.startsWith(u8, token.text, "@")); + return ast.UserDefinedIdentifier{ + .location = token.location, + .value = try parser.pool.insert(token.text[1..]), + }; + } + + fn acceptMappingList(parser: *Parser) AcceptError!ast.List(ast.AstMapping) { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + const list_state = parser.save(); + errdefer parser.restore(list_state); + + var list = ast.List(ast.AstMapping){}; + + var accept_mode: AcceptMode = .recover; + while (true) { + // first item is allowed to be failing, otherwise comma separation must be done! + defer accept_mode = .fail; + + const item_state = parser.save(); + + if (parser.acceptAstMapping(accept_mode)) |mapping| { + try parser.append(ast.AstMapping, &list, mapping); + } else |err| { + try filterAcceptError(err); + parser.restore(item_state); // rollback to the previous item + break; + } + + if (!try parser.tryAcceptLiteral(.@",")) { + break; + } + } + + return list; + } + + fn acceptTypeSpec(parser: *Parser) AcceptError!ast.TypeSpec { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + const list_state = parser.save(); + errdefer parser.restore(list_state); + + const position = parser.core.tokenizer.current_location; + + if (parser.acceptCodeLiteral()) |code| { + return .{ .literal = code }; + } else |err| try filterAcceptError(err); + + if (parser.acceptUserReference()) |ref| { + return .{ .custom = ref }; + } else |err| try filterAcceptError(err); + + if (parser.acceptNodeReference(.recover)) |ref| { + return .{ .reference = ref }; + } else |err| try filterAcceptError(err); + + if (parser.acceptCompoundType(.record)) |record| { + return .{ .record = record }; + } else |err| try filterAcceptError(err); + + if (parser.acceptCompoundType(.variant)) |variant| { + return .{ .variant = variant }; + } else |err| try filterAcceptError(err); + + const contiuation_pos = parser.save(); + if (try parser.tryAcceptLiteral(.@";") or try parser.tryAcceptLiteral(.@"|") or try parser.tryAcceptLiteral(.@"=")) { + try parser.emitDiagnostic(position, .empty_typespec, .{}); + + // restore the previous position, we just seeked a bit forward to make better + // errors here: + parser.restore(contiuation_pos); + + return BAD_TYPE_SPEC; + } + + // switch (accept_mode) { + // .recover => return error.UnexpectedTokenRecoverable, + // .fail => + return parser.emitUnexpectedToken(.{ + .unexpected_token = .unexpected_token_type_spec, + }); + // } + } + + fn acceptCompoundType(parser: *Parser, comptime designator: TokenType) AcceptError!ast.CompoundType { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + const list_state = parser.save(); + errdefer parser.restore(list_state); + + const current_location = parser.core.tokenizer.current_location; + + // we can recover "struct"/"record", afterwards you must follow the rules + try parser.acceptLiteral(designator, .recover); + + var fields = ast.List(ast.Field){}; + + while (true) { + const field = try parser.acceptField(); + + try parser.append(ast.Field, &fields, field); + + if (try parser.tryAcceptLiteral(.@",")) { + // Comma means we're having another field + continue; + } else { + // Otherwise, the list is over. + break; + } + } + + return .{ + .location = current_location, + .fields = fields, + }; + } + + fn acceptField(parser: *Parser) AcceptError!ast.Field { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + const list_state = parser.save(); + errdefer parser.restore(list_state); + + const current_location = parser.core.tokenizer.current_location; + + const name = try parser.acceptIdentifier(.fail); + + try parser.acceptLiteral(.@":", .fail); + + const type_spec = try parser.acceptTypeSpec(); + + return .{ + .location = current_location, + .name = name, + .type = type_spec, + }; + } + + fn acceptStringLiteral(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.StringLiteral { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + const token = try parser.acceptToken(.string_literal, accept_mode); + + std.debug.assert(token.text.len >= 2); + + return ast.StringLiteral{ + .location = token.location, + .value = try parser.unwrapString(token.location, token.text[1 .. token.text.len - 1]), + }; + } + + fn acceptIdentifier(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.Identifier { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + const token = try parser.acceptToken(.identifier, accept_mode); + return ast.Identifier{ + .location = token.location, + .value = try parser.unwrapIdentifierString(token.location, token.text), + }; + } + + fn acceptRuleReference(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.RuleRef { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + const token = try parser.acceptToken(.rule_ref, accept_mode); + std.debug.assert(std.mem.startsWith(u8, token.text, "<")); + std.debug.assert(std.mem.endsWith(u8, token.text, ">")); + return ast.RuleRef{ + .location = token.location, + .identifier = try parser.unwrapIdentifierString(token.location, token.text[1 .. token.text.len - 1]), + }; + } + + fn acceptTokenReference(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.PatternRef { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + const token = try parser.acceptToken(.token_ref, accept_mode); + std.debug.assert(std.mem.startsWith(u8, token.text, "$")); + return ast.PatternRef{ + .location = token.location, + .identifier = try parser.unwrapIdentifierString(token.location, token.text[1..]), + }; + } + + fn acceptNodeReference(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.NodeRef { + parser.traceEnterRule(@src()); + defer parser.popTrace(); + + const token = try parser.acceptToken(.node_ref, accept_mode); + std.debug.assert(std.mem.startsWith(u8, token.text, "!")); + return ast.NodeRef{ + .location = token.location, + .identifier = try parser.unwrapIdentifierString(token.location, token.text[1..]), + }; + } + + fn acceptLiteral(parser: *Parser, comptime token_type: TokenType, accept_mode: AcceptMode) AcceptError!void { + _ = try parser.acceptToken(token_type, accept_mode); + } + + fn tryAcceptLiteral(parser: *Parser, comptime token_type: TokenType) FatalAcceptError!bool { + _ = parser.acceptToken(token_type, .recover) catch |err| switch (err) { + error.UnexpectedTokenRecoverable => return false, + error.OutOfMemory, error.InvalidSourceEncoding, error.SyntaxError => |e| return e, + }; + return true; + } + + /// Tries to accept a given token and will emit a diagnostic if it fails. + fn acceptToken(parser: *Parser, comptime token_type: TokenType, accept_mode: AcceptMode) AcceptError!Token { + const saved_state = parser.save(); + errdefer parser.restore(saved_state); + + const source_offset = parser.core.tokenizer.offset; + const location = parser.core.tokenizer.current_location; + + if (parser.core.accept(RS.any)) |token| { + errdefer parser.emitTrace(.{ .token_reject = .{ .actual = token, .expected = token_type } }); + if (token.type != token_type) { + switch (accept_mode) { + .fail => { + try parser.emitDiagnostic(location, .unexpected_token, .{ + .expected_type = token_type, + .actual = token, + }); + return error.SyntaxError; + }, + .recover => return error.UnexpectedTokenRecoverable, + } + } + parser.emitTrace(.{ .token_accept = token }); + return token; + } else |err| switch (err) { + error.UnexpectedToken => unreachable, // RS.any will always accept the token + error.EndOfStream => switch (accept_mode) { + .fail => { + try parser.emitDiagnostic(location, .unexpected_token_eof, .{ .expected_type = token_type }); + return error.SyntaxError; + }, + .recover => return error.UnexpectedTokenRecoverable, + }, + error.UnexpectedCharacter => { + try parser.emitUnexpectedCharacter(location, source_offset); + return error.SyntaxError; + }, + } + } + + const AcceptMode = enum { + /// Will emit a syntax error with diagnostic + fail, + + /// Is recoverable + recover, + }; + + // management: + const TraceKind = union(enum) { + token_accept: Token, + token_reject: struct { actual: Token, expected: TokenType }, + rule: []const u8, + }; + + const Trace = struct { + depth: u32, + kind: TraceKind, + + pub fn format(trace: Trace, fmt: []const u8, opt: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = opt; + try writer.writeByteNTimes(' ', 4 * trace.depth); + try writer.print("{s}:", .{@tagName(trace.kind)}); + switch (trace.kind) { + .token_accept => |item| try writer.print("accept {}", .{item}), + .token_reject => |item| try writer.print("reject {}, expected '{s}'", .{ item.actual, @tagName(item.expected) }), + .rule => |item| try writer.print("{s}", .{item}), + } + } + }; + + fn traceEnterRule(parser: *Parser, loc: std.builtin.SourceLocation) void { + parser.emitTrace(.{ .rule = loc.fn_name }); + parser.trace_depth += 1; + } + + fn popTrace(parser: *Parser) void { + parser.trace_depth -= 1; + } + + fn emitTrace(parser: Parser, trace: TraceKind) void { + if (!parser.trace_enabled) { + return; + } + std.log.debug("rule trace: {}", .{Trace{ + .depth = parser.trace_depth, + .kind = trace, + }}); + } + + fn emitDiagnostic(parser: Parser, loc: ?ptk.Location, comptime code: Diagnostics.Code, data: Diagnostics.Data(code)) !void { + // Anything detected here is always an error + std.debug.assert(code.isError()); + try parser.diagnostics.emit(loc orelse parser.core.tokenizer.current_location, code, data); + } + + const UnexpectedTokenOptions = struct { + unexpected_token: Diagnostics.Code, + }; + fn emitUnexpectedToken(parser: *Parser, comptime opt: UnexpectedTokenOptions) AcceptError { + if (Diagnostics.Data(opt.unexpected_token) != Diagnostics.Data(.unexpected_token_no_context)) { + @compileError("Generic unexpected token must use the same type as 'unexpected_token_no_context' diagnostic."); + } + + const state = parser.save(); + defer parser.restore(state); + + const location = parser.core.tokenizer.current_location; + const offset = parser.core.tokenizer.offset; + + const token_or_null = parser.core.nextToken() catch |err| switch (err) { + error.UnexpectedCharacter => { + try parser.emitUnexpectedCharacter(location, offset); + return error.SyntaxError; + }, + }; + + const token = token_or_null orelse { + try parser.emitDiagnostic(location, .unexpected_eof, .{}); + return error.SyntaxError; + }; + + try parser.emitDiagnostic(location, opt.unexpected_token, .{ + .actual = token, + }); + return error.SyntaxError; + } + + fn emitUnexpectedCharacter(parser: Parser, location: ptk.Location, source_offset: usize) !void { + var utf8_view = std.unicode.Utf8View.init(parser.core.tokenizer.source[source_offset..]) catch { + try parser.emitDiagnostic(location, .invalid_source_encoding, .{}); + return error.InvalidSourceEncoding; + }; + + var iter = utf8_view.iterator(); + + try parser.emitDiagnostic(location, .unexpected_character, .{ + .character = iter.nextCodepoint() orelse @panic("very unexpected end of file"), + }); + } + + fn unwrapIdentifierString(parser: *Parser, loc: ptk.Location, raw: []const u8) !ptk.strings.String { + std.debug.assert(raw.len > 0); + if (raw[0] == '@') { + std.debug.assert(raw[1] == '"'); + std.debug.assert(raw[raw.len - 1] == '"'); + // string-escaped identifier + return try parser.unwrapString(loc, raw[2 .. raw.len - 1]); + } else { + return try parser.pool.insert(raw); + } + } + + fn unwrapString(parser: *Parser, loc: ptk.Location, raw: []const u8) !ptk.strings.String { + var fallback = std.heap.stackFallback(512, parser.arena); + + var working_space = std.ArrayList(u8).init(fallback.get()); + defer working_space.deinit(); + + var i: usize = 0; + while (i < raw.len) { + const c = raw[i]; + if (c == '\\') { + i += 1; + if (i >= raw.len) { + try parser.emitDiagnostic(loc, .bad_string_escape, .{}); + return error.SyntaxError; + } + const escape = raw[i]; + const slice = switch (escape) { + 'n' => "\n", + 'r' => "\r", + '\"' => "\"", + '\'' => "\'", + '\\' => "\\", + + 'x' => @panic("Implement hex escape \\x??"), + 'u' => @panic("Implement unicode utf-8 escapes \\u{????}"), + + '0'...'3' => @panic("Implement octal escape \\???"), + + else => { + try parser.emitDiagnostic(loc, .invalid_string_escape, .{ .escape = escape }); + return error.SyntaxError; + }, + }; + try working_space.appendSlice(slice); + } else { + try working_space.append(c); + } + i += 1; + } + + return try parser.pool.insert(working_space.items); + } + + fn save(parser: Parser) ParserCore.State { + return parser.core.saveState(); + } + + fn restore(parser: *Parser, state: ParserCore.State) void { + parser.core.restoreState(state); + } + + fn internString(parser: *Parser, string: []const u8) !String { + return try parser.pool.insert(string); + } + + fn append(parser: *Parser, comptime T: type, list: *ast.List(T), item: T) !void { + const node = try parser.arena.create(ast.List(T).Node); + errdefer parser.arena.destroy(node); + + node.data = item; + + list.append(node); + } + + pub const FatalAcceptError = error{ + // We're out of memory accepting some rule. We cannot recover from this. + OutOfMemory, + + // Something could not be accepted. + SyntaxError, + + // The source code contained invalid bytes + InvalidSourceEncoding, + }; + + pub const AcceptError = FatalAcceptError || error{ + // The token stream contains an unexpected token, this is a syntax error + UnexpectedTokenRecoverable, + }; + + fn filterAcceptError(err: AcceptError) FatalAcceptError!void { + return switch (err) { + error.UnexpectedTokenRecoverable => {}, + + error.OutOfMemory, + error.SyntaxError, + error.InvalidSourceEncoding, + => |e| return e, + }; + } +}; + +const match = ptk.matchers; +const Pattern = ptk.Pattern(TokenType); +const Tokenizer = ptk.Tokenizer(TokenType, &.{ + Pattern.create(.line_comment, match.sequenceOf(.{ match.literal("#"), match.takeNoneOf("\r\n") })), + + Pattern.create(.node, match.word("node")), + Pattern.create(.record, match.word("record")), + Pattern.create(.variant, match.word("variant")), + Pattern.create(.optional, match.word("optional")), + Pattern.create(.start, match.word("start")), + Pattern.create(.rule, match.word("rule")), + Pattern.create(.pattern, match.word("pattern")), + Pattern.create(.literal, match.word("literal")), + Pattern.create(.word, match.word("word")), + Pattern.create(.regex, match.word("regex")), + Pattern.create(.skip, match.word("skip")), + + Pattern.create(.string_literal, matchStringLiteral), + Pattern.create(.code_literal, matchCodeLiteral), + + // identifiers must come after keywords: + Pattern.create(.identifier, matchRawIdentifier), + Pattern.create(.node_ref, matchNodeRef), + Pattern.create(.rule_ref, matchRuleRef), + Pattern.create(.token_ref, matchTokenRef), + Pattern.create(.value_ref, matchValueRef), + Pattern.create(.userval_ref, matchBuiltinRef), + + Pattern.create(.@"=>", match.literal("=>")), + + Pattern.create(.@"=", match.literal("=")), + Pattern.create(.@",", match.literal(",")), + Pattern.create(.@".", match.literal(".")), + Pattern.create(.@"*", match.literal("*")), + Pattern.create(.@"+", match.literal("+")), + Pattern.create(.@":", match.literal(":")), + Pattern.create(.@";", match.literal(";")), + Pattern.create(.@"|", match.literal("|")), + Pattern.create(.@"!", match.literal("!")), + Pattern.create(.@"?", match.literal("?")), + Pattern.create(.@"[", match.literal("[")), + Pattern.create(.@"]", match.literal("]")), + Pattern.create(.@"(", match.literal("(")), + Pattern.create(.@")", match.literal(")")), + Pattern.create(.@"{", match.literal("{")), + Pattern.create(.@"}", match.literal("}")), + + // Whitespace is the "kitchen sink" at the end: + Pattern.create(.whitespace, match.takeAnyOf(" \r\n\t")), +}); + +/// Accepts a basic identifier without any prefix or suffix. +/// The regex that matches this pattern is roughly this: +/// +/// (@\"[^"]+\")|([A-Za-z_][A-Za-z0-9_\-]*) +/// +fn matchRawIdentifier(text: []const u8) usize { + if (text.len < 1) + return 0; + + if (std.mem.startsWith(u8, text, "@\"")) { + if (text.len < 3) + return 0; + + var i: usize = 2; // skip `@"` + while (i < text.len) : (i += 1) { + if (text[i] == '\"') + return i + 1; + if (text[i] == '\\') + i += 1; + } + + return 0; + } else { + const prefix_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_"; + const suffix_chars = prefix_chars ++ "0123456789"; + const inner_chars = suffix_chars ++ "-"; + + if (std.mem.indexOfScalar(u8, prefix_chars, text[0]) == null) + return 0; // invalid start char + + // Suffix check is done in "postprocessing" by checking if any identifier ends with "-" + + var len: usize = 1; + while (len < text.len and std.mem.indexOfScalar(u8, inner_chars, text[len]) != null) { + len += 1; + } + + return len; + } + + return 0; +} + +test matchRawIdentifier { + try ptk.testing.validateMatcher(matchRawIdentifier, &.{ + // good: + "a", + "a-z", + "items10", + "_foo", + "_", + "_cheese-cake", + }, &.{ + // bad: + "-", + "-10", + "10", + "1-2", + "10items", + }); +} + +const matchNodeRef = match.sequenceOf(.{ match.literal("!"), matchRawIdentifier }); + +test matchNodeRef { + try ptk.testing.validateMatcher(matchNodeRef, &.{ + // good: + "!a", + "!foo_bar", + }, &.{ + // bad: + "a", + "!", + }); +} + +const matchRuleRef = match.sequenceOf(.{ match.literal("<"), matchRawIdentifier, match.literal(">") }); + +test matchRuleRef { + try ptk.testing.validateMatcher(matchRuleRef, &.{ + // good: + "", + "", + "", + "<@\"very exiting boy\">", + }, &.{ + // bad: + "", + }); +} + +const matchTokenRef = match.sequenceOf(.{ match.literal("$"), matchRawIdentifier }); + +test matchTokenRef { + try ptk.testing.validateMatcher(matchTokenRef, &.{ + // good: + "$token", + "$user-token", + "$user_token", + "$@\"wtf\"", + }, &.{ + // bad: + "$\"wtf\"", + "bad boy", + "bad-boy", + "$0", + "$100", + }); +} + +const matchValueRef = match.sequenceOf(.{ match.literal("$"), match.decimalNumber }); + +test matchValueRef { + try ptk.testing.validateMatcher(matchValueRef, &.{ + // good: + "$0", + "$10", + "$99999999", + }, &.{ + // bad: + "9", + "$", + "$foo", + }); +} + +const matchBuiltinRef = match.sequenceOf(.{ match.literal("@"), matchRawIdentifier }); + +test matchBuiltinRef { + try ptk.testing.validateMatcher(matchBuiltinRef, &.{ + // good: + "@token", + "@user-token", + "@user_token", + "@@\"wtf\"", + }, &.{ + // bad: + "@\"wtf\"", + "bad boy", + "bad-boy", + "@0", + "@100", + }); +} + +fn matchStringLiteral(text: []const u8) usize { + if (text.len < 2) + return 0; + + if (text[0] != '"') + return 0; + + var i: usize = 1; // skip `"` + while (i < text.len) : (i += 1) { + if (text[i] == '\"') + return i + 1; + if (text[i] == '\\') + i += 1; + } + + return 0; +} + +test matchStringLiteral { + try ptk.testing.validateMatcher(matchStringLiteral, &.{ + // good: + "\"\"", + "\"x\"", + "\" \"", + "\" hello \\\"world\\\"\"", + }, &.{ + // bad: + "\"", + "\"\\\"", + "\"", + "foo\"", + }); +} + +fn matchCodeLiteral(text: []const u8) usize { + var prefix_len: usize = 0; + while (prefix_len < text.len and text[prefix_len] == '`') { + prefix_len += 1; + } + + if (prefix_len == 0 or 2 * prefix_len >= text.len) + return 0; + + const body_len = std.mem.indexOf(u8, text[prefix_len..], text[0..prefix_len]) orelse return 0; + + return 2 * prefix_len + body_len; +} + +test matchCodeLiteral { + try ptk.testing.validateMatcher(matchCodeLiteral, &.{ + // good: + "`x`", + "`\"hello, World!\"`", + "`\n\n`", + "`\x00`", + "``you can write a `code` snippet like this!``", + }, &.{ + // bad: + "`", + "``", + "```hello, world!``", + }); +} + +test "parser string literal" { + const Test = struct { + pub fn run(expected: []const u8, code: []const u8) !void { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + var diag = Diagnostics.init(std.testing.allocator); + defer diag.deinit(); + + var strings = try ptk.strings.Pool.init(std.testing.allocator); + defer strings.deinit(); + + var tokenizer = Tokenizer.init(code, "unittest"); + + var parser = Parser{ + .diagnostics = &diag, + .pool = &strings, + .core = ParserCore.init(&tokenizer), + .arena = arena.allocator(), + .trace_enabled = false, + }; + + const literal = try parser.acceptStringLiteral(.fail); + + const actual = strings.get(literal.value); + + try std.testing.expectEqualStrings(expected, actual); + } + }; + + // Empty string: + try Test.run("", + \\"" + ); + + // Regular string + try Test.run("hello, world!", + \\"hello, world!" + ); + + // Validate escape sequences: + try Test.run("\r", + \\"\r" + ); + try Test.run("\n", + \\"\n" + ); + try Test.run("\\", + \\"\\" + ); + try Test.run("\"", + \\"\"" + ); + try Test.run("\"hello, world!\"", + \\"\"hello, world!\"" + ); + try Test.run("A\'B", + \\"A\'B" + ); + // TODO: enable those tests for escape sequences! + // try Test.run("\x34", + // \\"\x34" + // ); + // try Test.run("A\xFFB", + // \\"A\xFFB" + // ); + // try Test.run("\x10\x22", + // \\"\x10\x22" + // ); + // try Test.run("A\x1BB", + // \\"A\033B" + // ); + // try Test.run("A\xFFB", + // \\"A\377B" + // ); + // try Test.run("A\x01B", + // \\"A\001B" + // ); + // try Test.run("[\u{1F4A9}]", + // \\"[\u{1F4A9}]" + // ); +} diff --git a/src/ptkgen/sema.zig b/src/ptkgen/sema.zig new file mode 100644 index 0000000..bd64364 --- /dev/null +++ b/src/ptkgen/sema.zig @@ -0,0 +1,1057 @@ +const std = @import("std"); +const ptk = @import("parser-toolkit"); + +const logger = std.log.scoped(.ptk_sema); + +const ast = @import("ast.zig"); +const Diagnostics = @import("Diagnostics.zig"); + +pub const AnalyzeError = error{ OutOfMemory, SemanticError }; + +const String = ptk.strings.String; + +pub fn StringHashMap(comptime T: type) type { + return std.AutoArrayHashMap(String, T); +} + +pub const Grammar = struct { + arena: std.heap.ArenaAllocator, + + start: ?StartDeclaration, + + rules: StringHashMap(*Rule), + nodes: StringHashMap(*Node), + patterns: StringHashMap(*Pattern), + literal_patterns: StringHashMap(*Pattern), + + pub fn deinit(grammar: *Grammar) void { + grammar.rules.deinit(); + grammar.nodes.deinit(); + grammar.patterns.deinit(); + grammar.literal_patterns.deinit(); + grammar.arena.deinit(); + grammar.* = undefined; + } +}; + +pub const StartDeclaration = struct { + rule: *Rule, + location: ptk.Location, +}; + +pub const Rule = struct { + location: ptk.Location, + name: String, + + type: ?*Type, + productions: []MappedProduction, +}; + +/// A production of a rule that is able to map the parsed structure +/// into an AST node. +pub const MappedProduction = struct { + production: Production, + mapping: ?Mapping, +}; + +/// A production is a part of a grammar. Productions consume +/// tokens and generate structure from this. +pub const Production = union(enum) { + terminal: *Pattern, // literal and terminal ast nodes are wrapped to this + recursion: *Rule, // + sequence: []Production, // ... + optional: *Production, // ( ... )? + repetition_zero: *Production, // [ ... ]* + repetition_one: *Production, // [ ... ]+ +}; + +pub const Mapping = union(enum) { + record_initializer: RecordInitializer, // { a = b, c = d, ... } + list_initializer: ListInitializer, // [ a, b, c, ... ] + variant_initializer: VariantInitializer, // field: ... + + user_function_call: FunctionCall, // @builtin(a,b,c) + builtin_function_call: FunctionCall, // identifier(a,b,c) + + code_literal: String, // `code` + user_literal: String, // @user_data + + context_reference: ContextReference, // $0 +}; + +pub const ContextReference = struct { + index: u32, + production: *Production, + type: *Type, +}; + +const RecordInitializer = struct { + type: *Type, + fields: []FieldInitializer, +}; + +const FieldInitializer = struct { + field: *Field, + value: Mapping, +}; + +const ListInitializer = struct { + type: *Type, + items: []Mapping, +}; + +const VariantInitializer = struct { + type: *Type, + field: *Field, + value: *Mapping, +}; + +const FunctionCall = struct { + return_type: ?*Type, + function: String, + arguments: []Mapping, +}; + +pub const Node = struct { + location: ptk.Location, + name: String, + + type: *Type, +}; + +pub const Pattern = struct { + location: ptk.Location, + name: String, + is_literal: bool, + data: Data, + + pub const Data = union(enum) { + literal_match: String, + word: String, + regex: String, + external: String, + }; +}; + +pub const Type = union(enum) { + // trivial types: + code_literal: String, + user_type: String, + + // anonymous compound types: + optional: *Type, + record: *CompoundType, + variant: *CompoundType, + + // ast nodes are basically "named types" and must be handled as such + named: *Node, + + // builtin types: + token, // points to a PTK token + + pub fn id(t: *const Type) TypeId { + return @as(TypeId, t.*); + } +}; + +pub const TypeId: type = std.meta.Tag(Type); + +pub const CompoundType = struct { + fields: StringHashMap(Field), +}; + +pub const Field = struct { + location: ptk.Location, + name: String, + type: *Type, +}; + +pub fn analyze(allocator: std.mem.Allocator, diagnostics: *Diagnostics, strings: *const ptk.strings.Pool, document: ast.Document) AnalyzeError!Grammar { + std.debug.assert(diagnostics.hasErrors() == false); + errdefer |err| if (err == error.SemanticError) + std.debug.assert(diagnostics.hasErrors()); + + var grammar = Grammar{ + .arena = std.heap.ArenaAllocator.init(allocator), + + .rules = StringHashMap(*Rule).init(allocator), + .nodes = StringHashMap(*Node).init(allocator), + .patterns = StringHashMap(*Pattern).init(allocator), + .literal_patterns = StringHashMap(*Pattern).init(allocator), + + .start = null, + }; + errdefer grammar.deinit(); + + var analyzer = Analyzer{ + .arena = grammar.arena.allocator(), + .diagnostics = diagnostics, + .strings = strings, + + .rule_to_ast = std.AutoHashMap(*Rule, *ast.Rule).init(allocator), + .node_to_ast = std.AutoHashMap(*Node, *ast.Node).init(allocator), + .pattern_to_ast = std.AutoHashMap(*Pattern, *ast.Pattern).init(allocator), + + .type_stash = Analyzer.TypeStash.init(allocator), + + .document = document, + + .target = &grammar, + }; + defer analyzer.deinit(); + + try innerAnalysis(&analyzer); + + if (grammar.start == null) { + try analyzer.emitDiagnostic(ptk.Location{ + .line = 0, + .column = 0, + .source = null, + }, .missing_start_symbol, .{}); + } + + return grammar; +} + +var BAD_TYPE_SENTINEL: Type = undefined; +var BAD_NODE_SENTINEL: Node = undefined; +var BAD_RULE_SENTINEL: Rule = undefined; +var BAD_PATTERN_SENTINEL: Pattern = undefined; +var BAD_PRODUCTION_SENTINEL: Production = undefined; +var BAD_FIELD_SENTINEL: Field = undefined; + +fn innerAnalysis(analyzer: *Analyzer) AnalyzeError!void { + // Phase 0: Validate productions on legality (coarse error checking) + // - Generates errors for badly constructed elements + try analyzer.validateAstRulesCoarse(); + + // Phase 1: Create all global declarations + // - Populates the declaration lookups + // - Generates errors for duplicate identifiers + try analyzer.createDeclarations(); + + // Phase 2: Instantiate all node types and patterns, determine start symbol + + try analyzer.iterateOn(.start, Analyzer.instantiateStartSymbol); + try analyzer.iterateOn(.pattern, Analyzer.instantiatePatterns); + try analyzer.iterateOn(.node, Analyzer.instantiateNodeTypes); + + // Phase 3: Validate generated types + try analyzer.iterateOn(.node, Analyzer.validateNodes); + + // Phase 4: Instantiate AST productions + try analyzer.iterateOn(.rule, Analyzer.instantiateRules); + + // Phase 5: Instantiate and validate AST mappings + try analyzer.iterateOn(.rule, Analyzer.instantiateMappings); // Create data structures + try analyzer.iterateOn(.rule, Analyzer.linkAndValidateMappedProductions); // Validate if data tr +} + +const Analyzer = struct { + const TypeStash = std.HashMap(*Type, void, TypeContext, std.hash_map.default_max_load_percentage); + + arena: std.mem.Allocator, + diagnostics: *Diagnostics, + strings: *const ptk.strings.Pool, + target: *Grammar, + + document: ast.Document, + + rule_to_ast: std.AutoHashMap(*Rule, *ast.Rule), + node_to_ast: std.AutoHashMap(*Node, *ast.Node), + pattern_to_ast: std.AutoHashMap(*Pattern, *ast.Pattern), + + type_stash: TypeStash, + + deduplicated_type_count: usize = 0, + + fn deinit(analyzer: *Analyzer) void { + analyzer.rule_to_ast.deinit(); + analyzer.node_to_ast.deinit(); + analyzer.pattern_to_ast.deinit(); + analyzer.type_stash.deinit(); + analyzer.* = undefined; + } + + const IterativeAnalysisError = error{RecoverableSemanticError} || AnalyzeError; + + fn iterateOn( + analyzer: *Analyzer, + comptime node_type: std.meta.FieldEnum(ast.TopLevelDeclaration), + comptime functor: fn (*Analyzer, *std.meta.FieldType(ast.TopLevelDeclaration, node_type)) IterativeAnalysisError!void, + ) AnalyzeError!void { + var iter = ast.iterate(analyzer.document); + while (iter.next()) |item| { + switch (item.*) { + @field(std.meta.Tag(ast.TopLevelDeclaration), @tagName(node_type)) => |*node| { + functor(analyzer, node) catch |err| switch (err) { + error.RecoverableSemanticError => {}, + else => |e| return e, + }; + }, + else => {}, + } + } + } + + fn validateAstRulesCoarse(analyzer: *Analyzer) !void { + var iter = ast.iterate(analyzer.document); + while (iter.next()) |item| { + switch (item.*) { + .start => |start| { + _ = start; + }, + + .rule => |rule| { + _ = rule; + }, + + .node => |node| { + _ = node; + }, + + .pattern => |pattern| { + _ = pattern; + }, + } + } + } + + /// Creates declarations in the target Grammar and makes sure all declared objects are reachable. + /// Emits diagnostics for duplicate declarations. + fn createDeclarations(analyzer: *Analyzer) !void { + var iter = ast.iterate(analyzer.document); + while (iter.next()) |item| { + switch (item.*) { + .start => {}, + + .rule => |*rule| { + const instance = try analyzer.declareElement( + Rule, + ast.Rule, + &analyzer.target.rules, + &analyzer.rule_to_ast, + rule, + rule.name, + .duplicate_identifier_rule, + ); + instance.* = .{ + .location = rule.name.location, + .name = rule.name.value, + + .type = undefined, // created in phase 4 + .productions = &.{}, // created in phase 5 + }; + }, + + .node => |*node| { + const instance = try analyzer.declareElement( + Node, + ast.Node, + &analyzer.target.nodes, + &analyzer.node_to_ast, + node, + node.name, + .duplicate_identifier_node, + ); + instance.* = .{ + .location = node.name.location, + .name = node.name.value, + + .type = undefined, // created in phase 2 + }; + }, + + .pattern => |*pattern| { + const instance = try analyzer.declareElement( + Pattern, + ast.Pattern, + &analyzer.target.patterns, + &analyzer.pattern_to_ast, + pattern, + pattern.name, + .duplicate_identifier_pattern, + ); + instance.* = .{ + .location = pattern.name.location, + .name = pattern.name.value, + .is_literal = false, + .data = undefined, // created in phase 2 + }; + }, + } + } + } + + /// Searches all start symbol declarations and stores a reference to the initial rule. + /// Will emit diagnostics for duplicate start symbol decls and invalid references. + fn instantiateStartSymbol(analyzer: *Analyzer, start: *ast.RuleRef) !void { + if (analyzer.target.start) |old_start| { + try analyzer.emitDiagnostic(start.location, .multiple_start_symbols, .{ + .identifier = analyzer.strings.get(old_start.rule.name), + .previous_location = old_start.location, + }); + // error return is further down below so we can also catch the undefined reference error + } + + const rule = analyzer.target.rules.get(start.identifier) orelse { + try analyzer.emitDiagnostic(start.location, .reference_to_undeclared_rule, .{ + .identifier = analyzer.strings.get(start.identifier), + }); + return error.RecoverableSemanticError; + }; + + if (analyzer.target.start != null) { + // return for the first if block + return error.RecoverableSemanticError; + } + + analyzer.target.start = .{ + .rule = rule, + .location = start.location, + }; + } + + /// Fully populate all content of the pattern declarations. Emits diagnostics for invalid patterns. + fn instantiatePatterns(analyzer: *Analyzer, ast_pattern: *ast.Pattern) !void { + const sema_pattern = analyzer.target.patterns.get(ast_pattern.name.value).?; + + sema_pattern.data = switch (ast_pattern.data) { + .literal => |value| .{ .literal_match = value.value }, + .word => |value| .{ .word = value.value }, + .regex => |value| .{ .regex = value.value }, + .external => |value| .{ .external = value.value }, + }; + + // TODO: Implement regex validation here! + } + + /// Instantiates and validates all node declarations. + /// Emits diagnostics for bad type declarations. + fn instantiateNodeTypes(analyzer: *Analyzer, ast_node: *ast.Node) !void { + const sema_node = analyzer.target.nodes.get(ast_node.name.value).?; + + sema_node.type = try analyzer.resolveType(&ast_node.value); + } + + fn validateNodes(analyzer: *Analyzer, ast_node: *ast.Node) !void { + const sema_node = analyzer.target.nodes.get(ast_node.name.value).?; + + try analyzer.validateType(sema_node.type); + } + + fn instantiateRules(analyzer: *Analyzer, ast_rule: *ast.Rule) !void { + const sema_rule = analyzer.target.rules.get(ast_rule.name.value).?; + + sema_rule.type = if (ast_rule.ast_type) |ast_type| + try analyzer.resolveType(&ast_type) + else + null; + + sema_rule.productions = try analyzer.target.arena.allocator().alloc(MappedProduction, ast_rule.productions.len()); + errdefer { + analyzer.target.arena.allocator().free(sema_rule.productions); + sema_rule.productions = &.{}; + } + + if (sema_rule.productions.len == 0) { + @panic("empty sema rule!"); + } + + var iter = ast.iterate(ast_rule.productions); + var index: usize = 0; + while (iter.next()) |ast_production| : (index += 1) { + const sema_production = &sema_rule.productions[index]; + + sema_production.* = MappedProduction{ + .production = try analyzer.translateProduction(ast_production.production), + .mapping = null, // Will be instantiated later + }; + } + } + + fn translateProduction(analyzer: *Analyzer, ast_prod: ast.Production) error{OutOfMemory}!Production { + switch (ast_prod) { + .literal => |literal| { + const gop = try analyzer.target.literal_patterns.getOrPut(literal.value); + if (!gop.found_existing) { + gop.value_ptr.* = try analyzer.target.arena.allocator().create(Pattern); + gop.value_ptr.*.* = .{ + .location = literal.location, // place of first use + .name = literal.value, + .data = .{ .literal_match = literal.value }, + .is_literal = true, + }; + } + return Production{ .terminal = gop.value_ptr.* }; + }, + .terminal => |terminal| { + if (analyzer.target.patterns.get(terminal.identifier)) |pattern| { + return Production{ .terminal = pattern }; + } else { + try analyzer.emitDiagnostic(terminal.location, .reference_to_undeclared_pattern, .{ + .identifier = analyzer.strings.get(terminal.identifier), + }); + return Production{ .terminal = &BAD_PATTERN_SENTINEL }; + } + }, + .recursion => |recursion| { + if (analyzer.target.rules.get(recursion.identifier)) |rule| { + return Production{ .recursion = rule }; + } else { + try analyzer.emitDiagnostic(recursion.location, .reference_to_undeclared_rule, .{ + .identifier = analyzer.strings.get(recursion.identifier), + }); + return Production{ .recursion = &BAD_RULE_SENTINEL }; + } + }, + .sequence => |sequence| { + if (sequence.len() == 0) + @panic("bad sequence: empty"); + + var seq = std.ArrayList(Production).init(analyzer.target.arena.allocator()); + defer seq.deinit(); + + try seq.ensureTotalCapacityPrecise(sequence.len()); + + var iter = ast.iterate(sequence); + while (iter.next()) |inner_prod| { + const inner_sema = try analyzer.translateProduction(inner_prod.*); + seq.appendAssumeCapacity(inner_sema); + } + + return Production{ + .sequence = seq.toOwnedSlice() catch @panic("bad capacity"), + }; + }, + .optional => |optional| { + const nested = try analyzer.target.arena.allocator().create(Production); + errdefer analyzer.target.arena.allocator().destroy(nested); + nested.* = try analyzer.translateProduction(.{ .sequence = optional }); + return .{ .optional = nested }; + }, + .repetition_zero => |repetition| { + const nested = try analyzer.target.arena.allocator().create(Production); + errdefer analyzer.target.arena.allocator().destroy(nested); + nested.* = try analyzer.translateProduction(.{ .sequence = repetition }); + return .{ .repetition_zero = nested }; + }, + .repetition_one => |repetition| { + const nested = try analyzer.target.arena.allocator().create(Production); + errdefer analyzer.target.arena.allocator().destroy(nested); + nested.* = try analyzer.translateProduction(.{ .sequence = repetition }); + return .{ .repetition_one = nested }; + }, + } + } + + fn instantiateMappings(analyzer: *Analyzer, ast_rule: *ast.Rule) !void { + const sem_rule: *Rule = analyzer.target.rules.get(ast_rule.name.value).?; + + var iter = ast.iterate(ast_rule.productions); + + for (sem_rule.productions) |*sem_prod| { + const ast_prod = iter.next().?; + sem_prod.mapping = if (ast_prod.mapping) |src_mapping| + try analyzer.translateMapping(src_mapping) + else + null; + } + std.debug.assert(iter.next() == null); + } + + fn translateMapping(analyzer: *Analyzer, ast_mapping: ast.AstMapping) error{OutOfMemory}!Mapping { + switch (ast_mapping) { + .literal => |ref| return Mapping{ .code_literal = ref.value }, + .user_reference => |ref| return Mapping{ .code_literal = ref.value }, + + .context_reference => |ast_context_reference| { + return Mapping{ + .context_reference = .{ + .index = ast_context_reference.index, + .production = &BAD_PRODUCTION_SENTINEL, + .type = &BAD_TYPE_SENTINEL, + }, + }; + }, + + inline .user_function_call, .function_call => |function_call| { + const function_name = function_call.function.value; + + var args = try analyzer.target.arena.allocator().alloc(Mapping, function_call.arguments.len()); + errdefer analyzer.target.arena.allocator().free(args); + + var iter = ast.iterate(function_call.arguments); + for (args) |*item| { + const src = iter.next().?; + item.* = try analyzer.translateMapping(src.*); + } + std.debug.assert(iter.next() == null); + + const fncall = FunctionCall{ + .arguments = args, + .function = function_name, + .return_type = null, + }; + + return switch (ast_mapping) { + .user_function_call => Mapping{ .user_function_call = fncall }, + .function_call => Mapping{ .builtin_function_call = fncall }, + else => unreachable, + }; + }, + + .variant => |ast_variant| { + const init_expr = try analyzer.translateMapping(ast_variant.value.*); + + // ast_variant.field.value + return Mapping{ + .variant_initializer = .{ + .type = &BAD_TYPE_SENTINEL, + .field = &BAD_FIELD_SENTINEL, + .value = try moveToHeap(&analyzer.target.arena, Mapping, init_expr), + }, + }; + }, + + .list => |ast_list| { + var items = try analyzer.target.arena.allocator().alloc(Mapping, ast_list.len()); + errdefer analyzer.target.arena.allocator().free(items); + + var iter = ast.iterate(ast_list); + for (items) |*item| { + const src = iter.next().?; + item.* = try analyzer.translateMapping(src.*); + } + std.debug.assert(iter.next() == null); + + return Mapping{ + .list_initializer = .{ + .items = items, + .type = &BAD_TYPE_SENTINEL, + }, + }; + }, + + .record => |ast_record| { + var fields = try analyzer.target.arena.allocator().alloc(FieldInitializer, ast_record.len()); + errdefer analyzer.target.arena.allocator().free(fields); + + var iter = ast.iterate(ast_record); + for (fields) |*item| { + const src = iter.next().?; + const field_name = src.field.value; + _ = field_name; + item.* = .{ + .field = &BAD_FIELD_SENTINEL, + .value = try analyzer.translateMapping(src.value.*), + }; + } + std.debug.assert(iter.next() == null); + + return Mapping{ + .record_initializer = .{ + .fields = fields, + .type = &BAD_TYPE_SENTINEL, + }, + }; + }, + } + } + + const TypeTransform = struct { + optional: bool = false, + sequence: bool = false, + + pub fn add(tt: TypeTransform, comptime field: enum { optional, sequence }) TypeTransform { + var copy = tt; + @field(copy, @tagName(field)) = true; + return copy; + } + + pub fn format(tt: TypeTransform, fmt: []const u8, opt: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = opt; + var list = std.BoundedArray([]const u8, 2){}; + + if (tt.optional) list.appendAssumeCapacity("opt"); + if (tt.sequence) list.appendAssumeCapacity("seq"); + + try writer.writeAll("TypeTransform("); + + if (list.len == 0) { + try writer.writeAll("none"); + } else { + for (list.slice(), 0..) |item, i| { + if (i > 0) + try writer.writeAll(","); + try writer.writeAll(item); + } + } + + try writer.writeAll(")"); + } + }; + + const IndexedProd = struct { + transform: TypeTransform, + production: *Production, + }; + + const ProductionIndex = std.ArrayList(IndexedProd); + + fn linkAndValidateMappedProductions(analyzer: *Analyzer, ast_rule: *ast.Rule) !void { + const sem_rule: *Rule = analyzer.target.rules.get(ast_rule.name.value).?; + + const has_any_mapping = for (sem_rule.productions) |prod| { + if (prod.mapping != null) + break true; + } else false; + + if (has_any_mapping and sem_rule.type == null) { + try analyzer.emitDiagnostic(sem_rule.location, .mapping_requires_typed_rule, .{}); + return; + } + + if (!has_any_mapping) { + // We're done here, nothing to link and validate. + return; + } + + const rule_type = sem_rule.type.?; + + var iter = ast.iterate(ast_rule.productions); + + var prod_index = ProductionIndex.init(analyzer.arena); + defer prod_index.deinit(); + + for (sem_rule.productions) |*sem_prod| { + const ast_prod = iter.next().?; + + if (ast_prod.mapping) |src_mapping| { + const dst_mapping = &sem_prod.mapping.?; + + // Rebuild index: + prod_index.shrinkRetainingCapacity(0); + try analyzer.rebuildProductionIndex(&prod_index, &sem_prod.production, .{}); + + std.debug.print("index:\n", .{}); + for (0.., prod_index.items) |index, item| { + std.debug.print("[{}]: {} {s}\n", .{ index, item.transform, @tagName(item.production.*) }); + } + + try analyzer.linkAndValidateMapping( + rule_type, + dst_mapping, + src_mapping, + prod_index.items, + ); + } else { + std.debug.assert(sem_prod.mapping == null); + } + } + + std.debug.assert(iter.next() == null); + } + + fn rebuildProductionIndex(analyzer: *Analyzer, prod_index: *ProductionIndex, production: *Production, transform: TypeTransform) error{OutOfMemory}!void { + switch (production.*) { + // Those are terminals and will be appended as-is: + .terminal => try prod_index.append(.{ .production = production, .transform = transform }), + .recursion => try prod_index.append(.{ .production = production, .transform = transform }), + + // Sequences are unwrapped: + .sequence => |list| for (list) |*inner_prod| { + try analyzer.rebuildProductionIndex(prod_index, inner_prod, transform); + }, + + // They just "recurse" into their inner workings, but annotate type changes: + .optional => |inner_prod| { + try analyzer.rebuildProductionIndex(prod_index, inner_prod, transform.add(.optional)); + }, + + .repetition_zero => |inner_prod| { + try analyzer.rebuildProductionIndex(prod_index, inner_prod, transform.add(.sequence)); + }, + + .repetition_one => |inner_prod| { + try analyzer.rebuildProductionIndex(prod_index, inner_prod, transform.add(.sequence)); + }, + } + } + + fn linkAndValidateMapping(analyzer: *Analyzer, type_context: *Type, sem_map: *Mapping, ast_map: ast.AstMapping, production_index: []const IndexedProd) !void { + _ = type_context; + + switch (sem_map.*) { + // Always fine, and terminate recursion: + .code_literal, .user_literal => {}, + + // Rule refs: + + .context_reference => |*context_reference| { + if (context_reference.index >= production_index.len) { + context_reference.production = &BAD_PRODUCTION_SENTINEL; + try analyzer.emitDiagnostic(ast_map.context_reference.location, .context_reference_out_of_bounds, .{ + .index = context_reference.index, + .limit = @as(u32, @truncate(production_index.len - 1)), // should never underflow as empty rules are illegal + }); + return; + } + + context_reference.production = production_index[context_reference.index].production; + + const base_type: *Type = switch (context_reference.production.*) { + // + .terminal => blk: { + var proto: Type = .token; + const canon = try analyzer.getCanonicalType(&proto); + std.debug.assert(canon != &proto); + break :blk canon; + }, + + // Invocations of other + .recursion => |rule| rule.type, + + .sequence, + .optional, + .repetition_zero, + .repetition_one, + => unreachable, // we should not be able to reach those + + }; + + // TODO: Transform type for context reference + + context_reference.type = base_type; + }, + + // Calls: + + .user_function_call => |*user_function_call| { + _ = user_function_call; + }, + + .builtin_function_call => |*builtin_function_call| { + _ = builtin_function_call; + }, + + // Compounds: + + .record_initializer => |*record_initializer| { + _ = record_initializer; + }, + + .list_initializer => |*list_initializer| { + _ = list_initializer; + }, + + .variant_initializer => |*variant_initializer| { + _ = variant_initializer; + }, + } + } + + /// Checks if the given type is semantically ok or emits compiler errors if not. + fn validateType(analyzer: *Analyzer, type_node: *Type) error{OutOfMemory}!void { + if (type_node == &BAD_TYPE_SENTINEL) { + @panic("bad sentinel"); + } + + switch (type_node.*) { + .code_literal, .user_type => {}, // always fine + .optional => |child_type| try analyzer.validateType(child_type), + .record, .variant => |compound_type| { + var fields = compound_type.fields.iterator(); + while (fields.next()) |kv| { + const field_type = kv.value_ptr.type; + try analyzer.validateType(field_type); + } + }, + .named => |node| { + if (node == &BAD_NODE_SENTINEL) { + @panic("bad node!"); + } + }, + } + } + + /// Constructs a new compound type from the given AST declaration. Will emit diagnostics + /// on error and returns an incomplete type if errors happened. + fn createCompoundType(analyzer: *Analyzer, def: ast.CompoundType) !*CompoundType { + const ct = try analyzer.target.arena.allocator().create(CompoundType); + errdefer analyzer.target.arena.allocator().destroy(ct); + + ct.* = CompoundType{ + .fields = StringHashMap(Field).init(analyzer.target.arena.allocator()), + }; + errdefer ct.fields.deinit(); + + try ct.fields.ensureTotalCapacity(def.fields.len()); + + var iter = ast.iterate(def.fields); + while (iter.next()) |field_def| { + const field_type = try analyzer.resolveType(&field_def.type); + const gop_result = ct.fields.getOrPutAssumeCapacity(field_def.name.value); + + if (gop_result.found_existing) { + try analyzer.emitDiagnostic(field_def.location, .duplicate_compound_field, .{ + .previous_location = gop_result.value_ptr.location, + .identifier = analyzer.strings.get(field_def.name.value), + }); + continue; + } + + gop_result.value_ptr.* = .{ + .type = field_type, + .location = field_def.location, + .name = field_def.name.value, + }; + } + + return ct; + } + + fn destroyCompoundType(analyzer: *Analyzer, ct: *CompoundType) void { + ct.fields.deinit(); + analyzer.target.arena.allocator().destroy(ct); + ct.* = undefined; + } + + fn resolveType(analyzer: *Analyzer, type_node: *const ast.TypeSpec) error{OutOfMemory}!*Type { + var compound_type: ?*CompoundType = null; + var proto_type: Type = switch (type_node.*) { + .reference => |def| .{ + .named = analyzer.target.nodes.get(def.identifier) orelse blk: { + try analyzer.emitDiagnostic(def.location, .reference_to_undeclared_node, .{ + .identifier = analyzer.strings.get(def.identifier), + }); + break :blk &BAD_NODE_SENTINEL; + }, + }, + .literal => |def| Type{ .code_literal = def.value }, + .custom => |def| Type{ .user_type = def.value }, + .record => |def| blk: { + compound_type = try analyzer.createCompoundType(def); + break :blk .{ .record = compound_type.? }; + }, + .variant => |def| blk: { + compound_type = try analyzer.createCompoundType(def); + break :blk .{ .record = compound_type.? }; + }, + }; + errdefer if (compound_type) |ct| + analyzer.destroyCompoundType(ct); + + return try analyzer.getCanonicalType(&proto_type); + } + + fn getCanonicalType(analyzer: Analyzer, proto_type: *Type) error{OutOfMemory}!?*Type { + if (analyzer.getUniqueTypeHandle(&proto_type)) |resolved_type| { + analyzer.deduplicated_type_count += 1; + // logger.debug("deduplicated a {s}", .{@tagName(resolved_type.*)}); + return resolved_type; + } + + const new_type = try analyzer.target.arena.allocator().create(Type); + errdefer analyzer.target.arena.allocator().destroy(new_type); + + new_type.* = proto_type; + + try analyzer.type_stash.putNoClobber(new_type, {}); + + return new_type; + } + + fn getUniqueTypeHandle(analyzer: Analyzer, proto_type: *Type) ?*Type { + if (analyzer.type_stash.getKey(proto_type)) |key| { + return key; + } + return null; + } + + const DeclarationError = error{ + OutOfMemory, + SemanticError, + }; + fn declareElement( + analyzer: *Analyzer, + comptime Element: type, + comptime AstNode: type, + set: *StringHashMap(*Element), + ast_map: *std.AutoHashMap(*Element, *AstNode), + ast_node: *AstNode, + name: ast.Identifier, + comptime diagnostic: Diagnostics.Code, + ) DeclarationError!*Element { + const gop = try set.getOrPut(name.value); + if (gop.found_existing) { + // emit diagnostic here + try analyzer.emitDiagnostic(name.location, diagnostic, .{ + .identifier = analyzer.strings.get(name.value), + .previous_location = gop.value_ptr.*.*.location, + }); + return error.SemanticError; + } + errdefer _ = set.swapRemove(name.value); + + const item = try analyzer.arena.create(Element); + errdefer analyzer.arena.destroy(item); + + item.* = undefined; + + gop.value_ptr.* = item; + + try ast_map.putNoClobber(item, ast_node); + + return item; + } + + fn emitDiagnostic(analyzer: *Analyzer, location: ptk.Location, comptime code: Diagnostics.Code, params: Diagnostics.Data(code)) !void { + try analyzer.diagnostics.emit(location, code, params); + } +}; + +const TypeContext = struct { + const HashFn = std.hash.Fnv1a_64; + + pub fn eql(ctx: TypeContext, lhs: *Type, rhs: *Type) bool { + _ = ctx; + if (lhs == rhs) + return true; + if (lhs.id() != rhs.id()) + return false; + switch (lhs.*) { + inline .code_literal, .user_type, .optional, .named => |val, tag| return val == @field(rhs, @tagName(tag)), + .record, .variant => return false, // they are same-by-identitiy + } + } + + pub fn hash(ctx: TypeContext, t: *Type) u64 { + _ = ctx; + var hasher = HashFn.init(); + hasher.update(@tagName(t.*)); + switch (t.*) { + .code_literal => |lit| hasher.update(&std.mem.toBytes(@intFromEnum(lit))), + .user_type => |lit| hasher.update(&std.mem.toBytes(@intFromEnum(lit))), + .optional => |child| hasher.update(&std.mem.toBytes(child)), + .named => |node| hasher.update(&std.mem.toBytes(node)), + .record, .variant => hasher.update(&std.mem.toBytes(t)), + } + return hasher.final(); + } +}; + +fn moveToHeap(arena: *std.heap.ArenaAllocator, comptime T: type, template: T) error{OutOfMemory}!*T { + const dupe = try arena.allocator().create(T); + dupe.* = template; + return dupe; +} + +pub const BuiltinFunction = struct { + name: []const u8, +}; + +pub const builtins = struct { + pub const foo = BuiltinFunction{ .name = "foo" }; +}; diff --git a/src/Diagnostics.zig b/src/toolkit/Diagnostics.zig similarity index 84% rename from src/Diagnostics.zig rename to src/toolkit/Diagnostics.zig index 0a93c19..bf3a842 100644 --- a/src/Diagnostics.zig +++ b/src/toolkit/Diagnostics.zig @@ -38,8 +38,15 @@ pub fn emit(self: *Self, location: Location, level: Error.Level, comptime fmt: [ const str = try std.fmt.allocPrintZ(allocator, fmt, args); errdefer allocator.free(str); + var cloned_location = location; + if (location.source) |source| { + cloned_location.source = try allocator.dupe(u8, source); + } + errdefer if (cloned_location.source) |source| + allocator.free(source); + try self.errors.append(allocator, Error{ - .location = location, + .location = cloned_location, .level = level, .message = str, }); diff --git a/src/Error.zig b/src/toolkit/Error.zig similarity index 100% rename from src/Error.zig rename to src/toolkit/Error.zig diff --git a/src/Location.zig b/src/toolkit/Location.zig similarity index 100% rename from src/Location.zig rename to src/toolkit/Location.zig diff --git a/src/StringCache.zig b/src/toolkit/StringCache.zig similarity index 100% rename from src/StringCache.zig rename to src/toolkit/StringCache.zig diff --git a/src/main.zig b/src/toolkit/main.zig similarity index 78% rename from src/main.zig rename to src/toolkit/main.zig index 784dec5..9a5d40b 100644 --- a/src/main.zig +++ b/src/toolkit/main.zig @@ -17,9 +17,15 @@ pub const RuleSet = pcore.RuleSet; pub const Error = @import("Error.zig"); pub const Diagnostics = @import("Diagnostics.zig"); pub const StringCache = @import("StringCache.zig"); +pub const strings = @import("strings.zig"); -test { +pub const testing = struct { + pub const validateMatcher = tok.testMatcher; +}; + +comptime { _ = Location; _ = tok; _ = pcore; + _ = strings; } diff --git a/src/parser_core.zig b/src/toolkit/parser_core.zig similarity index 99% rename from src/parser_core.zig rename to src/toolkit/parser_core.zig index 394d679..9bfcf42 100644 --- a/src/parser_core.zig +++ b/src/toolkit/parser_core.zig @@ -52,6 +52,7 @@ pub fn ParserCore(comptime TokenizerT: type, comptime ignore_list: anytype) type } pub const AcceptError = error{ EndOfStream, UnexpectedToken } || Tokenizer.NextError; + /// Accepts a token that matches `rule`. Otherwise returns /// - `error.EndOfStream` when no tokens are available /// - `error.UnexpectedToken` when an invalid token was encountered diff --git a/src/toolkit/strings.zig b/src/toolkit/strings.zig new file mode 100644 index 0000000..9c41933 --- /dev/null +++ b/src/toolkit/strings.zig @@ -0,0 +1,156 @@ +pub const std = @import("std"); + +pub const String = enum(u32) { + empty, + + _, + + pub fn format(string: String, fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + if (string == .empty) { + try writer.writeAll("String(empty)"); + } else { + try writer.print("String({})", .{ + @intFromEnum(string), + }); + } + } +}; + +/// A string pool that can store up to 4 GB of text and deduplicate instances. +/// +/// Use this to reduce the memory footprint of your AST and allow quick comparison of strings +/// by using the `String` type instead of doing a `std.mem.eql`. +pub const Pool = struct { + data: std.ArrayList(u8), + count: usize = 0, + + pub fn init(allocator: std.mem.Allocator) !Pool { + var pool = Pool{ + .data = std.ArrayList(u8).init(allocator), + }; + errdefer pool.deinit(); + + std.debug.assert(try pool.insert("") == .empty); + + return pool; + } + + pub fn deinit(pool: *Pool) void { + pool.data.deinit(); + pool.* = undefined; + } + + pub fn insert(pool: *Pool, string: []const u8) error{OutOfMemory}!String { + std.debug.assert(std.mem.indexOfScalar(u8, string, 0) == null); // Interned strings must not contain NUL! + + const storage = pool.data.items; + + var search_index: usize = 0; + while (search_index < storage.len) { + const index = std.mem.indexOfPos(u8, storage, search_index, string) orelse break; + + if (index + string.len + 1 > storage.len) + break; + + if (storage[index + string.len] == 0) + return @enumFromInt(index); + + // starts with `string`, but doesn't end with NUL. + search_index = index + string.len; + } + + const index = storage.len; + + if (index > std.math.maxInt(u32)) { + return error.OutOfMemory; + } + + try pool.data.ensureUnusedCapacity(string.len + 1); // invalidates storage + pool.data.appendSliceAssumeCapacity(string); + pool.data.appendAssumeCapacity(0); + pool.count += 1; + + return @enumFromInt(index); + } + + /// Returns the string in the pool. + pub fn get(pool: *const Pool, string: String) [:0]const u8 { + const storage = pool.data.items; + const index: usize = @intFromEnum(string); + std.debug.assert(index < storage.len); + const slice = std.mem.sliceTo(storage[index..], 0); + return slice.ptr[0..slice.len :0]; + } + + pub fn format(pool: Pool, fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try writer.print("StringPool(count={}, size={:.2f})", .{ + pool.count, + std.fmt.fmtIntSizeBin(pool.data.items.len), + }); + } +}; + +/// Very simplistic string deduplicator, returns the same slice for each string. +/// Does only perform deduplication, no fancy storage strategy. +pub const Dedupe = struct { + arena: std.heap.ArenaAllocator, + items: std.StringHashMapUnmanaged(void), + + pub fn init(allocator: std.mem.Allocator) Dedupe { + return Dedupe{ + .arena = std.heap.ArenaAllocator.init(allocator), + .items = .{}, + }; + } + + pub fn deinit(cache: *Dedupe) void { + cache.items.deinit(cache.arena.child_allocator); + cache.arena.deinit(); + cache.* = undefined; + } + + /// Gets or inserts a string into the cache. `string` might be a short-lived value, + /// the returned value is guaranteed to have the livetime of the string cache. + pub fn fetch(cache: *Dedupe, string: []const u8) ![]const u8 { + const allocator = cache.arena.child_allocator; + const gop = try cache.items.getOrPut(allocator, string); + if (!gop.found_existing) { + errdefer _ = cache.items.remove(string); + gop.key_ptr.* = try cache.arena.allocator().dupe(u8, string); + } + return gop.key_ptr.*; + } +}; + +test Pool { + var pool = try Pool.init(std.testing.allocator); + defer pool.deinit(); + + try std.testing.expectEqualStrings("", pool.get(.empty)); + + try std.testing.expectEqual(String.empty, try pool.insert("")); + + const a = try pool.insert("hello, world!"); + const b = try pool.insert("world!"); // suffix of a + const c = try pool.insert("world"); // non-suffix + + // All strings must be unique: + try std.testing.expect(a != b); + try std.testing.expect(a != c); + try std.testing.expect(b != c); + + // But must retain their qualities: + try std.testing.expectEqualStrings("hello, world!", pool.get(a)); + try std.testing.expectEqualStrings("world!", pool.get(b)); + try std.testing.expectEqualStrings("world", pool.get(c)); + + // sequential inserts may never return different values: + try std.testing.expectEqual(a, try pool.insert("hello, world!")); + try std.testing.expectEqual(a, try pool.insert("hello, world!")); + try std.testing.expectEqual(a, try pool.insert("hello, world!")); + try std.testing.expectEqual(a, try pool.insert("hello, world!")); +} diff --git a/src/token.zig b/src/toolkit/token.zig similarity index 56% rename from src/token.zig rename to src/toolkit/token.zig index 60ae8fa..028272c 100644 --- a/src/token.zig +++ b/src/toolkit/token.zig @@ -14,5 +14,15 @@ pub fn Token(comptime Type: type) type { /// The type of the token that was matched by a matching function type: Type, + + pub fn format(token: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try writer.print("Token {{ .type = {}, .text = \"{}\", .location = {} }}", .{ + token.type, + std.zig.fmtEscapes(token.text), + token.location, + }); + } }; } diff --git a/src/tokenizer.zig b/src/toolkit/tokenizer.zig similarity index 89% rename from src/tokenizer.zig rename to src/toolkit/tokenizer.zig index 1ee859c..ec20f18 100644 --- a/src/tokenizer.zig +++ b/src/toolkit/tokenizer.zig @@ -3,7 +3,9 @@ const std = @import("std"); const Location = @import("Location.zig"); const GenericToken = @import("token.zig").Token; -pub const Matcher = *const fn (str: []const u8) ?usize; +/// This is a function that will either accept a `text` as a token +/// of a non-zero length or returns `0` if the text does not match the token. +pub const Matcher = *const fn (text: []const u8) usize; pub fn Pattern(comptime TokenType: type) type { return struct { @@ -66,14 +68,13 @@ pub fn Tokenizer(comptime TokenTypeT: type, comptime patterns: []const Pattern(T if (rest.len == 0) return null; const maybe_token = for (patterns) |pat| { - if (pat.match(rest)) |len| { - if (len > 0) { - break Token{ - .location = self.current_location, - .text = rest[0..len], - .type = pat.type, - }; - } + const len = pat.match(rest); + if (len > 0) { + break Token{ + .location = self.current_location, + .text = rest[0..len], + .type = pat.type, + }; } } else null; if (maybe_token) |token| { @@ -91,11 +92,11 @@ pub const matchers = struct { /// Matches the literal `text`. pub fn literal(comptime text: []const u8) Matcher { return struct { - fn match(str: []const u8) ?usize { + fn match(str: []const u8) usize { return if (std.mem.startsWith(u8, str, text)) text.len else - null; + 0; } }.match; } @@ -103,17 +104,17 @@ pub const matchers = struct { /// Matches any "word" that is "text\b" pub fn word(comptime text: []const u8) Matcher { return struct { - fn match(input: []const u8) ?usize { + fn match(input: []const u8) usize { if (std.mem.startsWith(u8, input, text)) { if (text.len == input.len) return text.len; const c = input[text.len]; if (std.ascii.isAlphanumeric(c) or (c == '_')) // matches regex \w\W - return null; + return 0; return text.len; } - return null; + return 0; } }.match; } @@ -121,7 +122,7 @@ pub const matchers = struct { /// Takes characters while they are any of the given `chars`. pub fn takeAnyOf(comptime chars: []const u8) Matcher { return struct { - fn match(str: []const u8) ?usize { + fn match(str: []const u8) usize { for (str, 0..) |c, i| { if (std.mem.indexOfScalar(u8, chars, c) == null) { return i; @@ -140,7 +141,7 @@ pub const matchers = struct { }; return struct { - fn match(str: []const u8) ?usize { + fn match(str: []const u8) usize { for (str, 0..) |c, i| { const lc = std.ascii.toLower(c); if (std.mem.indexOfScalar(u8, lower_chars, lc) == null) { @@ -155,7 +156,7 @@ pub const matchers = struct { /// Takes characters while they are not any of the given `chars`. pub fn takeNoneOf(comptime chars: []const u8) Matcher { return struct { - fn match(str: []const u8) ?usize { + fn match(str: []const u8) usize { for (str, 0..) |c, i| { if (std.mem.indexOfScalar(u8, chars, c) != null) { return i; @@ -168,10 +169,12 @@ pub const matchers = struct { pub fn withPrefix(comptime prefix: []const u8, comptime matcher: Matcher) Matcher { return struct { - fn match(str: []const u8) ?usize { + fn match(str: []const u8) usize { if (!std.mem.startsWith(u8, str, prefix)) - return null; - const pattern_len = matcher(str[prefix.len..]) orelse return null; + return 0; + const pattern_len = matcher(str[prefix.len..]); + if (pattern_len == 0) + return 0; return prefix.len + pattern_len; } }.match; @@ -183,12 +186,12 @@ pub const matchers = struct { if (sequence.len == 0) @compileError("Empty sequence not allowed!"); return struct { - fn match(input: []const u8) ?usize { + fn match(input: []const u8) usize { var total_len: usize = 0; for (sequence) |seq_match| { - const len = seq_match(input[total_len..]) orelse return null; + const len = seq_match(input[total_len..]); if (len == 0) - return null; + return 0; total_len += len; } return total_len; @@ -198,7 +201,7 @@ pub const matchers = struct { // pre-shipped typical patterns - pub fn identifier(str: []const u8) ?usize { + pub fn identifier(str: []const u8) usize { const first_char = "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; const all_chars = first_char ++ "0123456789"; for (str, 0..) |c, i| { @@ -209,7 +212,7 @@ pub const matchers = struct { return str.len; } - pub fn whitespace(str: []const u8) ?usize { + pub fn whitespace(str: []const u8) usize { for (str, 0..) |c, i| { if (!std.ascii.isWhitespace(c)) return i; @@ -217,12 +220,12 @@ pub const matchers = struct { return str.len; } - pub fn linefeed(str: []const u8) ?usize { + pub fn linefeed(str: []const u8) usize { if (std.mem.startsWith(u8, str, "\r\n")) return 2; if (std.mem.startsWith(u8, str, "\n")) return 1; - return null; + return 0; } pub fn numberOfBase(comptime base: comptime_int) Matcher { @@ -321,12 +324,11 @@ test "save/restore tokenization" { try std.testing.expectEqual(Location{ .source = null, .line = 2, .column = 1 }, id1.location); } -fn testMatcher(match: Matcher, good: []const []const u8, bad: []const []const u8) !void { +pub fn testMatcher(match: Matcher, good: []const []const u8, bad: []const []const u8) !void { + std.debug.assert(good.len > 0); + std.debug.assert(bad.len > 0); for (good) |str| { - const v = match(str) orelse { - std.log.err("Didn't match pattern '{s}'", .{str}); - return error.MissedGoodPattern; - }; + const v = match(str); if (v == 0) { std.log.err("Didn't match pattern '{s}'", .{str}); return error.MissedGoodPattern; @@ -334,7 +336,7 @@ fn testMatcher(match: Matcher, good: []const []const u8, bad: []const []const u8 } for (bad) |str| { const v = match(str); - if (v != null and v.? > 0) { + if (v > 0) { std.log.err("Matched pattern '{s}'", .{str}); return error.MissedBadPattern; } diff --git a/test/analysis/accept/expect-warn-missing-start.ptk b/test/analysis/accept/expect-warn-missing-start.ptk new file mode 100644 index 0000000..f31365e --- /dev/null +++ b/test/analysis/accept/expect-warn-missing-start.ptk @@ -0,0 +1 @@ +# expected: W4000 diff --git a/test/analysis/accept/map-simple-builtin-fncall-0.ptk b/test/analysis/accept/map-simple-builtin-fncall-0.ptk new file mode 100644 index 0000000..b4d4eec --- /dev/null +++ b/test/analysis/accept/map-simple-builtin-fncall-0.ptk @@ -0,0 +1 @@ +rule basic = "hello" => builtin(); \ No newline at end of file diff --git a/test/analysis/accept/map-simple-builtin-fncall-1.ptk b/test/analysis/accept/map-simple-builtin-fncall-1.ptk new file mode 100644 index 0000000..21ebc7f --- /dev/null +++ b/test/analysis/accept/map-simple-builtin-fncall-1.ptk @@ -0,0 +1 @@ +rule basic = "hello" => builtin(`1`); \ No newline at end of file diff --git a/test/analysis/accept/map-simple-builtin-fncall-4.ptk b/test/analysis/accept/map-simple-builtin-fncall-4.ptk new file mode 100644 index 0000000..09e4372 --- /dev/null +++ b/test/analysis/accept/map-simple-builtin-fncall-4.ptk @@ -0,0 +1 @@ +rule basic = "hello" => builtin(`1`, `2`, `3`, `4`); \ No newline at end of file diff --git a/test/analysis/accept/map-simple-code-literal.ptk b/test/analysis/accept/map-simple-code-literal.ptk new file mode 100644 index 0000000..475f0a4 --- /dev/null +++ b/test/analysis/accept/map-simple-code-literal.ptk @@ -0,0 +1 @@ +rule basic = "hello" => `code`; \ No newline at end of file diff --git a/test/analysis/accept/map-simple-list-0.ptk b/test/analysis/accept/map-simple-list-0.ptk new file mode 100644 index 0000000..dffe97f --- /dev/null +++ b/test/analysis/accept/map-simple-list-0.ptk @@ -0,0 +1 @@ +rule basic = "hello" => { }; \ No newline at end of file diff --git a/test/analysis/accept/map-simple-list-1.ptk b/test/analysis/accept/map-simple-list-1.ptk new file mode 100644 index 0000000..ab4e2c2 --- /dev/null +++ b/test/analysis/accept/map-simple-list-1.ptk @@ -0,0 +1 @@ +rule basic = "hello" => { `1` }; \ No newline at end of file diff --git a/test/analysis/accept/map-simple-list-4.ptk b/test/analysis/accept/map-simple-list-4.ptk new file mode 100644 index 0000000..3f970b9 --- /dev/null +++ b/test/analysis/accept/map-simple-list-4.ptk @@ -0,0 +1 @@ +rule basic = "hello" => { `1`, `2`, `3`, `4` }; \ No newline at end of file diff --git a/test/analysis/accept/map-simple-record-0.ptk b/test/analysis/accept/map-simple-record-0.ptk new file mode 100644 index 0000000..8f1a98c --- /dev/null +++ b/test/analysis/accept/map-simple-record-0.ptk @@ -0,0 +1 @@ +rule basic = "hello" => { }; diff --git a/test/analysis/accept/map-simple-record-1.ptk b/test/analysis/accept/map-simple-record-1.ptk new file mode 100644 index 0000000..4cf6bfd --- /dev/null +++ b/test/analysis/accept/map-simple-record-1.ptk @@ -0,0 +1 @@ +rule basic = "hello" => { field = `1` }; diff --git a/test/analysis/accept/map-simple-record-4.ptk b/test/analysis/accept/map-simple-record-4.ptk new file mode 100644 index 0000000..5f03773 --- /dev/null +++ b/test/analysis/accept/map-simple-record-4.ptk @@ -0,0 +1 @@ +rule basic = "hello" => { x = `1`, y = `2`, z = `3`, w = `4` }; diff --git a/test/analysis/accept/map-simple-ruleref.ptk b/test/analysis/accept/map-simple-ruleref.ptk new file mode 100644 index 0000000..4e0bc07 --- /dev/null +++ b/test/analysis/accept/map-simple-ruleref.ptk @@ -0,0 +1 @@ +rule basic = "hello" => $0; \ No newline at end of file diff --git a/test/analysis/accept/map-simple-user-fncall-0.ptk b/test/analysis/accept/map-simple-user-fncall-0.ptk new file mode 100644 index 0000000..82eb16e --- /dev/null +++ b/test/analysis/accept/map-simple-user-fncall-0.ptk @@ -0,0 +1 @@ +rule basic = "hello" => @userFn(); \ No newline at end of file diff --git a/test/analysis/accept/map-simple-user-fncall-1.ptk b/test/analysis/accept/map-simple-user-fncall-1.ptk new file mode 100644 index 0000000..b6b55fe --- /dev/null +++ b/test/analysis/accept/map-simple-user-fncall-1.ptk @@ -0,0 +1 @@ +rule basic = "hello" => @userFn(`1`); \ No newline at end of file diff --git a/test/analysis/accept/map-simple-user-fncall-4.ptk b/test/analysis/accept/map-simple-user-fncall-4.ptk new file mode 100644 index 0000000..ab0bcb2 --- /dev/null +++ b/test/analysis/accept/map-simple-user-fncall-4.ptk @@ -0,0 +1 @@ +rule basic = "hello" => @userFn(`1`, `2`, `3`, `4`); \ No newline at end of file diff --git a/test/analysis/accept/map-simple-user-literal.ptk b/test/analysis/accept/map-simple-user-literal.ptk new file mode 100644 index 0000000..afef9ad --- /dev/null +++ b/test/analysis/accept/map-simple-user-literal.ptk @@ -0,0 +1 @@ +rule basic = "hello" => @externalThingy; \ No newline at end of file diff --git a/test/analysis/accept/map-simple-variant.ptk b/test/analysis/accept/map-simple-variant.ptk new file mode 100644 index 0000000..229b3cb --- /dev/null +++ b/test/analysis/accept/map-simple-variant.ptk @@ -0,0 +1 @@ +rule basic = "hello" => field: `code`; \ No newline at end of file diff --git a/test/analysis/accept/match-group-many-item.ptk b/test/analysis/accept/match-group-many-item.ptk new file mode 100644 index 0000000..5e1e31f --- /dev/null +++ b/test/analysis/accept/match-group-many-item.ptk @@ -0,0 +1 @@ +rule mode = ( "first" "second" "third" ); \ No newline at end of file diff --git a/test/analysis/accept/match-group-many-sequence.ptk b/test/analysis/accept/match-group-many-sequence.ptk new file mode 100644 index 0000000..40902e7 --- /dev/null +++ b/test/analysis/accept/match-group-many-sequence.ptk @@ -0,0 +1 @@ +rule mode = "first" ( "one" "two" "three" ) "third"; \ No newline at end of file diff --git a/test/analysis/accept/match-group-nested.ptk b/test/analysis/accept/match-group-nested.ptk new file mode 100644 index 0000000..d35091c --- /dev/null +++ b/test/analysis/accept/match-group-nested.ptk @@ -0,0 +1 @@ +rule mode = "L0:0" ( "L1:0" ( "L2:0" "L2:1" "L2:2" ) "L1:2" ) "L0:2"; \ No newline at end of file diff --git a/test/analysis/accept/match-group-one-item.ptk b/test/analysis/accept/match-group-one-item.ptk new file mode 100644 index 0000000..faa24e7 --- /dev/null +++ b/test/analysis/accept/match-group-one-item.ptk @@ -0,0 +1 @@ +rule mode = ( "item" ); \ No newline at end of file diff --git a/test/analysis/accept/match-group-one-sequence.ptk b/test/analysis/accept/match-group-one-sequence.ptk new file mode 100644 index 0000000..e34f909 --- /dev/null +++ b/test/analysis/accept/match-group-one-sequence.ptk @@ -0,0 +1 @@ +rule mode = "first" ( "second" ) "third"; \ No newline at end of file diff --git a/test/analysis/accept/match-literal-rule.ptk b/test/analysis/accept/match-literal-rule.ptk new file mode 100644 index 0000000..a0b8dc0 --- /dev/null +++ b/test/analysis/accept/match-literal-rule.ptk @@ -0,0 +1 @@ +rule basic = "basic"; \ No newline at end of file diff --git a/test/analysis/accept/match-literal-sequence-variant.ptk b/test/analysis/accept/match-literal-sequence-variant.ptk new file mode 100644 index 0000000..842274e --- /dev/null +++ b/test/analysis/accept/match-literal-sequence-variant.ptk @@ -0,0 +1,4 @@ +rule mode = + "basic" "item" + | "extended" "item" +; \ No newline at end of file diff --git a/test/analysis/accept/match-literal-sequence.ptk b/test/analysis/accept/match-literal-sequence.ptk new file mode 100644 index 0000000..245add7 --- /dev/null +++ b/test/analysis/accept/match-literal-sequence.ptk @@ -0,0 +1 @@ +rule basic = "basic" "words" "after" "another"; \ No newline at end of file diff --git a/test/analysis/accept/match-literal-variants.ptk b/test/analysis/accept/match-literal-variants.ptk new file mode 100644 index 0000000..28ff569 --- /dev/null +++ b/test/analysis/accept/match-literal-variants.ptk @@ -0,0 +1 @@ +rule mode = "basic" | "extended"; \ No newline at end of file diff --git a/test/analysis/accept/match-optional-many-item.ptk b/test/analysis/accept/match-optional-many-item.ptk new file mode 100644 index 0000000..fb4b409 --- /dev/null +++ b/test/analysis/accept/match-optional-many-item.ptk @@ -0,0 +1 @@ +rule mode = ( "first" "second" "third" )?; \ No newline at end of file diff --git a/test/analysis/accept/match-optional-many-sequence.ptk b/test/analysis/accept/match-optional-many-sequence.ptk new file mode 100644 index 0000000..2c49812 --- /dev/null +++ b/test/analysis/accept/match-optional-many-sequence.ptk @@ -0,0 +1 @@ +rule mode = "first" ( "one" "two" "three" )? "third"; \ No newline at end of file diff --git a/test/analysis/accept/match-optional-nested.ptk b/test/analysis/accept/match-optional-nested.ptk new file mode 100644 index 0000000..18bf0d9 --- /dev/null +++ b/test/analysis/accept/match-optional-nested.ptk @@ -0,0 +1 @@ +rule mode = "L0:0" ( "L1:0" ( "L2:0" "L2:1" "L2:2" )? "L1:2" )? "L0:2"; \ No newline at end of file diff --git a/test/analysis/accept/match-optional-one-item.ptk b/test/analysis/accept/match-optional-one-item.ptk new file mode 100644 index 0000000..3c5ccc0 --- /dev/null +++ b/test/analysis/accept/match-optional-one-item.ptk @@ -0,0 +1 @@ +rule mode = ( "item" )?; \ No newline at end of file diff --git a/test/analysis/accept/match-optional-one-sequence.ptk b/test/analysis/accept/match-optional-one-sequence.ptk new file mode 100644 index 0000000..c5fd167 --- /dev/null +++ b/test/analysis/accept/match-optional-one-sequence.ptk @@ -0,0 +1 @@ +rule mode = "first" ( "second" )? "third"; \ No newline at end of file diff --git a/test/analysis/accept/match-rep_one-many-item.ptk b/test/analysis/accept/match-rep_one-many-item.ptk new file mode 100644 index 0000000..89961d7 --- /dev/null +++ b/test/analysis/accept/match-rep_one-many-item.ptk @@ -0,0 +1 @@ +rule mode = ( "first" "second" "third" )+; \ No newline at end of file diff --git a/test/analysis/accept/match-rep_one-many-sequence.ptk b/test/analysis/accept/match-rep_one-many-sequence.ptk new file mode 100644 index 0000000..0568546 --- /dev/null +++ b/test/analysis/accept/match-rep_one-many-sequence.ptk @@ -0,0 +1 @@ +rule mode = "first" ( "one" "two" "three" )+ "third"; \ No newline at end of file diff --git a/test/analysis/accept/match-rep_one-nested.ptk b/test/analysis/accept/match-rep_one-nested.ptk new file mode 100644 index 0000000..99fbc2f --- /dev/null +++ b/test/analysis/accept/match-rep_one-nested.ptk @@ -0,0 +1 @@ +rule mode = "L0:0" ( "L1:0" ( "L2:0" "L2:1" "L2:2" )+ "L1:2" )+ "L0:2"; \ No newline at end of file diff --git a/test/analysis/accept/match-rep_one-one-item.ptk b/test/analysis/accept/match-rep_one-one-item.ptk new file mode 100644 index 0000000..7f273d5 --- /dev/null +++ b/test/analysis/accept/match-rep_one-one-item.ptk @@ -0,0 +1 @@ +rule mode = ( "item" )+; \ No newline at end of file diff --git a/test/analysis/accept/match-rep_one-one-sequence.ptk b/test/analysis/accept/match-rep_one-one-sequence.ptk new file mode 100644 index 0000000..64af460 --- /dev/null +++ b/test/analysis/accept/match-rep_one-one-sequence.ptk @@ -0,0 +1 @@ +rule mode = "first" ( "second" )+ "third"; \ No newline at end of file diff --git a/test/analysis/accept/match-rep_zero-many-item.ptk b/test/analysis/accept/match-rep_zero-many-item.ptk new file mode 100644 index 0000000..5d9b366 --- /dev/null +++ b/test/analysis/accept/match-rep_zero-many-item.ptk @@ -0,0 +1 @@ +rule mode = ( "first" "second" "third" )*; \ No newline at end of file diff --git a/test/analysis/accept/match-rep_zero-many-sequence.ptk b/test/analysis/accept/match-rep_zero-many-sequence.ptk new file mode 100644 index 0000000..cadf2c5 --- /dev/null +++ b/test/analysis/accept/match-rep_zero-many-sequence.ptk @@ -0,0 +1 @@ +rule mode = "first" ( "one" "two" "three" )* "third"; \ No newline at end of file diff --git a/test/analysis/accept/match-rep_zero-nested.ptk b/test/analysis/accept/match-rep_zero-nested.ptk new file mode 100644 index 0000000..fee0799 --- /dev/null +++ b/test/analysis/accept/match-rep_zero-nested.ptk @@ -0,0 +1 @@ +rule mode = "L0:0" ( "L1:0" ( "L2:0" "L2:1" "L2:2" )* "L1:2" )* "L0:2"; \ No newline at end of file diff --git a/test/analysis/accept/match-rep_zero-one-item.ptk b/test/analysis/accept/match-rep_zero-one-item.ptk new file mode 100644 index 0000000..d058aee --- /dev/null +++ b/test/analysis/accept/match-rep_zero-one-item.ptk @@ -0,0 +1 @@ +rule mode = ( "item" )*; \ No newline at end of file diff --git a/test/analysis/accept/match-rep_zero-one-sequence.ptk b/test/analysis/accept/match-rep_zero-one-sequence.ptk new file mode 100644 index 0000000..34e3a06 --- /dev/null +++ b/test/analysis/accept/match-rep_zero-one-sequence.ptk @@ -0,0 +1 @@ +rule mode = "first" ( "second" )* "third"; \ No newline at end of file diff --git a/test/analysis/accept/pattern-custom-skip.ptk b/test/analysis/accept/pattern-custom-skip.ptk new file mode 100644 index 0000000..83f23c7 --- /dev/null +++ b/test/analysis/accept/pattern-custom-skip.ptk @@ -0,0 +1,4 @@ + + +pattern a_word = @externalFunction; + diff --git a/test/analysis/accept/pattern-custom.ptk b/test/analysis/accept/pattern-custom.ptk new file mode 100644 index 0000000..83f23c7 --- /dev/null +++ b/test/analysis/accept/pattern-custom.ptk @@ -0,0 +1,4 @@ + + +pattern a_word = @externalFunction; + diff --git a/test/analysis/accept/pattern-literal-skip.ptk b/test/analysis/accept/pattern-literal-skip.ptk new file mode 100644 index 0000000..a5efb6c --- /dev/null +++ b/test/analysis/accept/pattern-literal-skip.ptk @@ -0,0 +1,4 @@ + + +pattern a_word = literal "a-word" skip; + diff --git a/test/analysis/accept/pattern-literal.ptk b/test/analysis/accept/pattern-literal.ptk new file mode 100644 index 0000000..4964d2c --- /dev/null +++ b/test/analysis/accept/pattern-literal.ptk @@ -0,0 +1,4 @@ + + +pattern a_word = literal "a-word"; + diff --git a/test/analysis/accept/pattern-regex-skip.ptk b/test/analysis/accept/pattern-regex-skip.ptk new file mode 100644 index 0000000..b9e45ec --- /dev/null +++ b/test/analysis/accept/pattern-regex-skip.ptk @@ -0,0 +1,4 @@ + + +pattern a_word = regex "a-word" skip; + diff --git a/test/analysis/accept/pattern-regex.ptk b/test/analysis/accept/pattern-regex.ptk new file mode 100644 index 0000000..4ec3715 --- /dev/null +++ b/test/analysis/accept/pattern-regex.ptk @@ -0,0 +1,4 @@ + + +pattern a_word = regex "a-word"; + diff --git a/test/analysis/accept/pattern-word-skip.ptk b/test/analysis/accept/pattern-word-skip.ptk new file mode 100644 index 0000000..07a0e07 --- /dev/null +++ b/test/analysis/accept/pattern-word-skip.ptk @@ -0,0 +1,4 @@ + + +pattern a_word = word "a-word" skip; + diff --git a/test/analysis/accept/pattern-word.ptk b/test/analysis/accept/pattern-word.ptk new file mode 100644 index 0000000..07a0e07 --- /dev/null +++ b/test/analysis/accept/pattern-word.ptk @@ -0,0 +1,4 @@ + + +pattern a_word = word "a-word" skip; + diff --git a/test/analysis/accept/start-decl.ptk b/test/analysis/accept/start-decl.ptk new file mode 100644 index 0000000..404f545 --- /dev/null +++ b/test/analysis/accept/start-decl.ptk @@ -0,0 +1,7 @@ + + + +start ; + +rule magic = "magic"; + diff --git a/test/analysis/reject/duplicate-field-record.ptk b/test/analysis/reject/duplicate-field-record.ptk new file mode 100644 index 0000000..3a64f2a --- /dev/null +++ b/test/analysis/reject/duplicate-field-record.ptk @@ -0,0 +1,7 @@ +# expected: E1307 + +node bad = record + x: `bool`, + y: `bool`, + x: `bool` +; \ No newline at end of file diff --git a/test/analysis/reject/duplicate-field-variant.ptk b/test/analysis/reject/duplicate-field-variant.ptk new file mode 100644 index 0000000..377a38a --- /dev/null +++ b/test/analysis/reject/duplicate-field-variant.ptk @@ -0,0 +1,7 @@ +# expected: E1307 + +node bad = variant + x: `bool`, + y: `bool`, + x: `bool` +; \ No newline at end of file diff --git a/test/analysis/reject/duplicate-node.ptk b/test/analysis/reject/duplicate-node.ptk new file mode 100644 index 0000000..0f67291 --- /dev/null +++ b/test/analysis/reject/duplicate-node.ptk @@ -0,0 +1,3 @@ +# expected: E1301 +node foo = `bool`; +node foo = `bool`; \ No newline at end of file diff --git a/test/analysis/reject/duplicate-pattern.ptk b/test/analysis/reject/duplicate-pattern.ptk new file mode 100644 index 0000000..4302396 --- /dev/null +++ b/test/analysis/reject/duplicate-pattern.ptk @@ -0,0 +1,3 @@ +# expected: E1302 +pattern foo = literal "bla"; +pattern foo = literal "bla"; \ No newline at end of file diff --git a/test/analysis/reject/duplicate-rule.ptk b/test/analysis/reject/duplicate-rule.ptk new file mode 100644 index 0000000..eff3ee6 --- /dev/null +++ b/test/analysis/reject/duplicate-rule.ptk @@ -0,0 +1,3 @@ +# expected: E1300 +rule foo = ""; +rule foo = ""; \ No newline at end of file diff --git a/test/analysis/reject/duplicate-start.ptk b/test/analysis/reject/duplicate-start.ptk new file mode 100644 index 0000000..52c55cf --- /dev/null +++ b/test/analysis/reject/duplicate-start.ptk @@ -0,0 +1,10 @@ +# expected: E1306 + +start ; + +rule magic = "magic"; + +rule disco = "disco"; + +start ; + diff --git a/test/analysis/reject/duplicate-undeclared-start.ptk b/test/analysis/reject/duplicate-undeclared-start.ptk new file mode 100644 index 0000000..8b53833 --- /dev/null +++ b/test/analysis/reject/duplicate-undeclared-start.ptk @@ -0,0 +1,8 @@ +# expected: E1303, E1306 + +start ; + +rule magic = "magic"; + +start ; + diff --git a/test/analysis/reject/map-ruleref-oob.ptk b/test/analysis/reject/map-ruleref-oob.ptk new file mode 100644 index 0000000..8af2ba4 --- /dev/null +++ b/test/analysis/reject/map-ruleref-oob.ptk @@ -0,0 +1,2 @@ +# expected: E1308 +rule basic = "hello" => $1; \ No newline at end of file diff --git a/test/analysis/reject/production-undeclared-pattern-ref.ptk b/test/analysis/reject/production-undeclared-pattern-ref.ptk new file mode 100644 index 0000000..10e66f0 --- /dev/null +++ b/test/analysis/reject/production-undeclared-pattern-ref.ptk @@ -0,0 +1,3 @@ +# expected: E1305 + +rule foo = $pat; \ No newline at end of file diff --git a/test/analysis/reject/production-undeclared-rule-ref.ptk b/test/analysis/reject/production-undeclared-rule-ref.ptk new file mode 100644 index 0000000..a5525cc --- /dev/null +++ b/test/analysis/reject/production-undeclared-rule-ref.ptk @@ -0,0 +1,3 @@ +# expected: E1303 + +rule foo = ; \ No newline at end of file diff --git a/test/analysis/reject/undeclared-start.ptk b/test/analysis/reject/undeclared-start.ptk new file mode 100644 index 0000000..5a97c96 --- /dev/null +++ b/test/analysis/reject/undeclared-start.ptk @@ -0,0 +1,2 @@ +# expected: E1303, W4000 +start ; \ No newline at end of file diff --git a/test/parser/accept/basic-rule-ref.ptk b/test/parser/accept/basic-rule-ref.ptk new file mode 100644 index 0000000..e31192c --- /dev/null +++ b/test/parser/accept/basic-rule-ref.ptk @@ -0,0 +1 @@ +rule output = ; \ No newline at end of file diff --git a/test/parser/accept/basic-token-ref.ptk b/test/parser/accept/basic-token-ref.ptk new file mode 100644 index 0000000..29f9ce7 --- /dev/null +++ b/test/parser/accept/basic-token-ref.ptk @@ -0,0 +1 @@ +rule output = $terminal; \ No newline at end of file diff --git a/test/parser/accept/document-start.ptk b/test/parser/accept/document-start.ptk new file mode 100644 index 0000000..0623db6 --- /dev/null +++ b/test/parser/accept/document-start.ptk @@ -0,0 +1 @@ +start ; \ No newline at end of file diff --git a/test/parser/accept/empty-with-comment-linefeed.ptk b/test/parser/accept/empty-with-comment-linefeed.ptk new file mode 100644 index 0000000..a1e7613 --- /dev/null +++ b/test/parser/accept/empty-with-comment-linefeed.ptk @@ -0,0 +1 @@ +# hello, world! diff --git a/test/parser/accept/empty-with-comment.ptk b/test/parser/accept/empty-with-comment.ptk new file mode 100644 index 0000000..0017949 --- /dev/null +++ b/test/parser/accept/empty-with-comment.ptk @@ -0,0 +1 @@ +# hello, world! \ No newline at end of file diff --git a/test/parser/accept/empty.ptk b/test/parser/accept/empty.ptk new file mode 100644 index 0000000..e69de29 diff --git a/test/parser/accept/identifiers.ptk b/test/parser/accept/identifiers.ptk new file mode 100644 index 0000000..3c4baaa --- /dev/null +++ b/test/parser/accept/identifiers.ptk @@ -0,0 +1,8 @@ + +rule a = "whatever"; +rule _ = "whatever"; +rule a0 = "whatever"; +rule a-z = "whatever"; +rule _10 = "whatever"; +rule @"x" = "whatever"; +rule @"hello, world!" = "whatever"; diff --git a/test/parser/accept/mapping-array-a0.ptk b/test/parser/accept/mapping-array-a0.ptk new file mode 100644 index 0000000..3ef8c33 --- /dev/null +++ b/test/parser/accept/mapping-array-a0.ptk @@ -0,0 +1 @@ +rule r = "" => { }; \ No newline at end of file diff --git a/test/parser/accept/mapping-array-a1.ptk b/test/parser/accept/mapping-array-a1.ptk new file mode 100644 index 0000000..48a6912 --- /dev/null +++ b/test/parser/accept/mapping-array-a1.ptk @@ -0,0 +1 @@ +rule r = "" => { $0 }; \ No newline at end of file diff --git a/test/parser/accept/mapping-array-a5.ptk b/test/parser/accept/mapping-array-a5.ptk new file mode 100644 index 0000000..a46ab16 --- /dev/null +++ b/test/parser/accept/mapping-array-a5.ptk @@ -0,0 +1 @@ +rule r = "" => { $0, $1, $2, $3, $4 }; \ No newline at end of file diff --git a/test/parser/accept/mapping-array-nested.ptk b/test/parser/accept/mapping-array-nested.ptk new file mode 100644 index 0000000..be8a59a --- /dev/null +++ b/test/parser/accept/mapping-array-nested.ptk @@ -0,0 +1 @@ +rule r = "" => { $0, { $10, $11, $12 }, $2 }; \ No newline at end of file diff --git a/test/parser/accept/mapping-builtin-function-a0.ptk b/test/parser/accept/mapping-builtin-function-a0.ptk new file mode 100644 index 0000000..478e220 --- /dev/null +++ b/test/parser/accept/mapping-builtin-function-a0.ptk @@ -0,0 +1 @@ +rule r = "" => tostring(); \ No newline at end of file diff --git a/test/parser/accept/mapping-builtin-function-a1.ptk b/test/parser/accept/mapping-builtin-function-a1.ptk new file mode 100644 index 0000000..58e9623 --- /dev/null +++ b/test/parser/accept/mapping-builtin-function-a1.ptk @@ -0,0 +1 @@ +rule r = "" => tostring($0); \ No newline at end of file diff --git a/test/parser/accept/mapping-builtin-function-a5.ptk b/test/parser/accept/mapping-builtin-function-a5.ptk new file mode 100644 index 0000000..acf6f75 --- /dev/null +++ b/test/parser/accept/mapping-builtin-function-a5.ptk @@ -0,0 +1 @@ +rule r = "" => tostring($0, $1, $2, $3, $4); \ No newline at end of file diff --git a/test/parser/accept/mapping-builtin-function-nest.ptk b/test/parser/accept/mapping-builtin-function-nest.ptk new file mode 100644 index 0000000..c7457fe --- /dev/null +++ b/test/parser/accept/mapping-builtin-function-nest.ptk @@ -0,0 +1 @@ +rule r = "" => tostring($0, tostring($1), $4); \ No newline at end of file diff --git a/test/parser/accept/mapping-code-literal.ptk b/test/parser/accept/mapping-code-literal.ptk new file mode 100644 index 0000000..b18e2b9 --- /dev/null +++ b/test/parser/accept/mapping-code-literal.ptk @@ -0,0 +1 @@ +rule r = "" => `.item`; \ No newline at end of file diff --git a/test/parser/accept/mapping-record-init-f1.ptk b/test/parser/accept/mapping-record-init-f1.ptk new file mode 100644 index 0000000..dcce273 --- /dev/null +++ b/test/parser/accept/mapping-record-init-f1.ptk @@ -0,0 +1 @@ +rule r = "" => { x = $0 }; \ No newline at end of file diff --git a/test/parser/accept/mapping-record-init-f3.ptk b/test/parser/accept/mapping-record-init-f3.ptk new file mode 100644 index 0000000..22d7640 --- /dev/null +++ b/test/parser/accept/mapping-record-init-f3.ptk @@ -0,0 +1 @@ +rule r = "" => { x = $0, y = $1, z = $2 }; \ No newline at end of file diff --git a/test/parser/accept/mapping-user-function-a0.ptk b/test/parser/accept/mapping-user-function-a0.ptk new file mode 100644 index 0000000..12d6fce --- /dev/null +++ b/test/parser/accept/mapping-user-function-a0.ptk @@ -0,0 +1 @@ +rule r = "" => @convert(); \ No newline at end of file diff --git a/test/parser/accept/mapping-user-function-a1.ptk b/test/parser/accept/mapping-user-function-a1.ptk new file mode 100644 index 0000000..0c51664 --- /dev/null +++ b/test/parser/accept/mapping-user-function-a1.ptk @@ -0,0 +1 @@ +rule r = "" => @convert($0); \ No newline at end of file diff --git a/test/parser/accept/mapping-user-function-a5.ptk b/test/parser/accept/mapping-user-function-a5.ptk new file mode 100644 index 0000000..684e3f3 --- /dev/null +++ b/test/parser/accept/mapping-user-function-a5.ptk @@ -0,0 +1 @@ +rule r = "" => @convert($0, $1, $2, $3, $4); \ No newline at end of file diff --git a/test/parser/accept/mapping-user-function-nest.ptk b/test/parser/accept/mapping-user-function-nest.ptk new file mode 100644 index 0000000..f78963b --- /dev/null +++ b/test/parser/accept/mapping-user-function-nest.ptk @@ -0,0 +1 @@ +rule r = "" => @convert($0, tostring($1), $4); \ No newline at end of file diff --git a/test/parser/accept/mapping-user-value.ptk b/test/parser/accept/mapping-user-value.ptk new file mode 100644 index 0000000..2183ab2 --- /dev/null +++ b/test/parser/accept/mapping-user-value.ptk @@ -0,0 +1 @@ +rule r = "" => @value; \ No newline at end of file diff --git a/test/parser/accept/mapping-value-ref.ptk b/test/parser/accept/mapping-value-ref.ptk new file mode 100644 index 0000000..b2293b8 --- /dev/null +++ b/test/parser/accept/mapping-value-ref.ptk @@ -0,0 +1 @@ +rule r = "" => $0; \ No newline at end of file diff --git a/test/parser/accept/mapping-variant-init.ptk b/test/parser/accept/mapping-variant-init.ptk new file mode 100644 index 0000000..0fc50e8 --- /dev/null +++ b/test/parser/accept/mapping-variant-init.ptk @@ -0,0 +1 @@ +rule r = "" => child: $0; \ No newline at end of file diff --git a/test/parser/accept/node-alias.ptk b/test/parser/accept/node-alias.ptk new file mode 100644 index 0000000..468dbc0 --- /dev/null +++ b/test/parser/accept/node-alias.ptk @@ -0,0 +1 @@ +node Alias = !OtherType; \ No newline at end of file diff --git a/test/parser/accept/node-custom.ptk b/test/parser/accept/node-custom.ptk new file mode 100644 index 0000000..da3a508 --- /dev/null +++ b/test/parser/accept/node-custom.ptk @@ -0,0 +1 @@ +node String = @StringIdentifier; \ No newline at end of file diff --git a/test/parser/accept/node-literal.ptk b/test/parser/accept/node-literal.ptk new file mode 100644 index 0000000..d2e3530 --- /dev/null +++ b/test/parser/accept/node-literal.ptk @@ -0,0 +1 @@ +node String = `[]const u8`; \ No newline at end of file diff --git a/test/parser/accept/node-record-f1.ptk b/test/parser/accept/node-record-f1.ptk new file mode 100644 index 0000000..8b8db7d --- /dev/null +++ b/test/parser/accept/node-record-f1.ptk @@ -0,0 +1 @@ +node Struct = record field: `bool`; \ No newline at end of file diff --git a/test/parser/accept/node-record-f4.ptk b/test/parser/accept/node-record-f4.ptk new file mode 100644 index 0000000..28b3356 --- /dev/null +++ b/test/parser/accept/node-record-f4.ptk @@ -0,0 +1,6 @@ +node Struct = record + x: `i32`, + y: `i32`, + z: `i32`, + location: !Location +; \ No newline at end of file diff --git a/test/parser/accept/node-variant-f1.ptk b/test/parser/accept/node-variant-f1.ptk new file mode 100644 index 0000000..0f675d8 --- /dev/null +++ b/test/parser/accept/node-variant-f1.ptk @@ -0,0 +1 @@ +node Struct = variant field: `bool`; \ No newline at end of file diff --git a/test/parser/accept/node-variant-f4.ptk b/test/parser/accept/node-variant-f4.ptk new file mode 100644 index 0000000..e346aea --- /dev/null +++ b/test/parser/accept/node-variant-f4.ptk @@ -0,0 +1,6 @@ +node Struct = variant + x: `i32`, + y: `i32`, + z: `i32`, + location: !Location +; \ No newline at end of file diff --git a/test/parser/accept/optional-nospace.ptk b/test/parser/accept/optional-nospace.ptk new file mode 100644 index 0000000..c72723f --- /dev/null +++ b/test/parser/accept/optional-nospace.ptk @@ -0,0 +1 @@ +rule group=("word")?; \ No newline at end of file diff --git a/test/parser/accept/optional-space.ptk b/test/parser/accept/optional-space.ptk new file mode 100644 index 0000000..b95fdab --- /dev/null +++ b/test/parser/accept/optional-space.ptk @@ -0,0 +1 @@ +rule group = ( "word" ) ? ; \ No newline at end of file diff --git a/test/parser/accept/rep_one-nospace.ptk b/test/parser/accept/rep_one-nospace.ptk new file mode 100644 index 0000000..9a8646d --- /dev/null +++ b/test/parser/accept/rep_one-nospace.ptk @@ -0,0 +1 @@ +rule group=("word")+; \ No newline at end of file diff --git a/test/parser/accept/rep_one-space.ptk b/test/parser/accept/rep_one-space.ptk new file mode 100644 index 0000000..c624039 --- /dev/null +++ b/test/parser/accept/rep_one-space.ptk @@ -0,0 +1 @@ +rule group = ( "word" ) + ; \ No newline at end of file diff --git a/test/parser/accept/rep_zero-nospace.ptk b/test/parser/accept/rep_zero-nospace.ptk new file mode 100644 index 0000000..3bfb157 --- /dev/null +++ b/test/parser/accept/rep_zero-nospace.ptk @@ -0,0 +1 @@ +rule group=("word")*; \ No newline at end of file diff --git a/test/parser/accept/rep_zero-space.ptk b/test/parser/accept/rep_zero-space.ptk new file mode 100644 index 0000000..3696d95 --- /dev/null +++ b/test/parser/accept/rep_zero-space.ptk @@ -0,0 +1 @@ +rule group = ( "word" ) * ; \ No newline at end of file diff --git a/test/parser/accept/rule-primitive-sequence.ptk b/test/parser/accept/rule-primitive-sequence.ptk new file mode 100644 index 0000000..0067313 --- /dev/null +++ b/test/parser/accept/rule-primitive-sequence.ptk @@ -0,0 +1 @@ +rule sequence = "literal" $terminal "literal" $terminal ; \ No newline at end of file diff --git a/test/parser/accept/rule-typespec-custom.ptk b/test/parser/accept/rule-typespec-custom.ptk new file mode 100644 index 0000000..3df8de4 --- /dev/null +++ b/test/parser/accept/rule-typespec-custom.ptk @@ -0,0 +1 @@ +rule r : @Point = ""; \ No newline at end of file diff --git a/test/parser/accept/rule-typespec-literal.ptk b/test/parser/accept/rule-typespec-literal.ptk new file mode 100644 index 0000000..7a700d7 --- /dev/null +++ b/test/parser/accept/rule-typespec-literal.ptk @@ -0,0 +1 @@ +rule r : `bool` = ""; \ No newline at end of file diff --git a/test/parser/accept/rule-typespec-ref.ptk b/test/parser/accept/rule-typespec-ref.ptk new file mode 100644 index 0000000..1af0072 --- /dev/null +++ b/test/parser/accept/rule-typespec-ref.ptk @@ -0,0 +1 @@ +rule r : !farpointer = ""; \ No newline at end of file diff --git a/test/parser/reject/bad-mapping-invalid-token.ptk b/test/parser/reject/bad-mapping-invalid-token.ptk new file mode 100644 index 0000000..aada416 --- /dev/null +++ b/test/parser/reject/bad-mapping-invalid-token.ptk @@ -0,0 +1,2 @@ +# expected: E1111 +rule group = "value" => "bad" ; \ No newline at end of file diff --git a/test/parser/reject/bad-mapping-too-long.ptk b/test/parser/reject/bad-mapping-too-long.ptk new file mode 100644 index 0000000..057dcd5 --- /dev/null +++ b/test/parser/reject/bad-mapping-too-long.ptk @@ -0,0 +1,2 @@ +# expected: E1112 +rule group = "value" => $0 whatever ; \ No newline at end of file diff --git a/test/parser/reject/empty-group.ptk b/test/parser/reject/empty-group.ptk new file mode 100644 index 0000000..2860712 --- /dev/null +++ b/test/parser/reject/empty-group.ptk @@ -0,0 +1,2 @@ +# expected: E1200 +rule group = ( ); \ No newline at end of file diff --git a/test/parser/reject/empty-mapping.ptk b/test/parser/reject/empty-mapping.ptk new file mode 100644 index 0000000..6479ae9 --- /dev/null +++ b/test/parser/reject/empty-mapping.ptk @@ -0,0 +1,2 @@ +# expected: E1201 +rule group = "value" => ; \ No newline at end of file diff --git a/test/parser/reject/empty-optional.ptk b/test/parser/reject/empty-optional.ptk new file mode 100644 index 0000000..82ac677 --- /dev/null +++ b/test/parser/reject/empty-optional.ptk @@ -0,0 +1,2 @@ +# expected: E1200 +rule group = ( )?; \ No newline at end of file diff --git a/test/parser/reject/empty-rep_one.ptk b/test/parser/reject/empty-rep_one.ptk new file mode 100644 index 0000000..82ac677 --- /dev/null +++ b/test/parser/reject/empty-rep_one.ptk @@ -0,0 +1,2 @@ +# expected: E1200 +rule group = ( )?; \ No newline at end of file diff --git a/test/parser/reject/empty-rep_zero.ptk b/test/parser/reject/empty-rep_zero.ptk new file mode 100644 index 0000000..82ac677 --- /dev/null +++ b/test/parser/reject/empty-rep_zero.ptk @@ -0,0 +1,2 @@ +# expected: E1200 +rule group = ( )?; \ No newline at end of file diff --git a/test/parser/reject/empty-rule.ptk b/test/parser/reject/empty-rule.ptk new file mode 100644 index 0000000..8d32fe9 --- /dev/null +++ b/test/parser/reject/empty-rule.ptk @@ -0,0 +1,2 @@ +# expected: E1200 +rule group = ; \ No newline at end of file diff --git a/test/parser/reject/node-no-type.ptk b/test/parser/reject/node-no-type.ptk new file mode 100644 index 0000000..9a6b774 --- /dev/null +++ b/test/parser/reject/node-no-type.ptk @@ -0,0 +1,2 @@ +# expected: E1203 +node foo = ; \ No newline at end of file diff --git a/test/parser/reject/pattern-unexpected-token.ptk b/test/parser/reject/pattern-unexpected-token.ptk new file mode 100644 index 0000000..158522d --- /dev/null +++ b/test/parser/reject/pattern-unexpected-token.ptk @@ -0,0 +1,4 @@ +# expected: E1114 + +pattern a_word = `illegal`; + diff --git a/test/parser/reject/rule-bad-prod.ptk b/test/parser/reject/rule-bad-prod.ptk new file mode 100644 index 0000000..f5bf832 --- /dev/null +++ b/test/parser/reject/rule-bad-prod.ptk @@ -0,0 +1,2 @@ +# expected: E1113 +rule foo = `illegal here`; \ No newline at end of file diff --git a/test/parser/reject/rule-no-type-no-prod.ptk b/test/parser/reject/rule-no-type-no-prod.ptk new file mode 100644 index 0000000..bbd4401 --- /dev/null +++ b/test/parser/reject/rule-no-type-no-prod.ptk @@ -0,0 +1,2 @@ +# expected: E1203, E1200 +rule foo : = ; \ No newline at end of file diff --git a/test/parser/reject/rule-no-type.ptk b/test/parser/reject/rule-no-type.ptk new file mode 100644 index 0000000..6ab328d --- /dev/null +++ b/test/parser/reject/rule-no-type.ptk @@ -0,0 +1,2 @@ +# expected: E1203 +rule foo : = "code"; \ No newline at end of file diff --git a/test/parser/reject/unexpected-token-string.ptk b/test/parser/reject/unexpected-token-string.ptk new file mode 100644 index 0000000..4848c41 --- /dev/null +++ b/test/parser/reject/unexpected-token-string.ptk @@ -0,0 +1,2 @@ +# expected: E1108 +"bad toplevel token!" \ No newline at end of file