diff --git a/UnrealAngelscriptParser/Grammar/UnrealAngelscriptLexer.g4 b/UnrealAngelscriptParser/Grammar/UnrealAngelscriptLexer.g4 index 9e9c355..4a6357a 100644 --- a/UnrealAngelscriptParser/Grammar/UnrealAngelscriptLexer.g4 +++ b/UnrealAngelscriptParser/Grammar/UnrealAngelscriptLexer.g4 @@ -1,7 +1,7 @@ /* - Adapted to Unreal Angelscript by Embark Studios AB (Fredrik Lindh [Temaran]). - Based on the C++ grammar made by Camilo Sanchez (Camiloasc1) and Martin Mirchev (Marti2203). See the parser file. - */ + Adapted to Unreal Angelscript by Embark Studios AB (originally Fredrik Lindh [Temaran]). + Based on: https://github.com/antlr/grammars-v4/blob/master/cpp/CPP14Lexer.g4 +*/ lexer grammar UnrealAngelscriptLexer; @@ -17,12 +17,12 @@ FloatingLiteral: Fractionalconstant Exponentpart? Floatingsuffix? | Digitsequence Exponentpart Floatingsuffix?; -StringLiteral: - '"""' .*? '"""' - | ('n' | 'f')? '"' ( - ~["\\\u0085\u2028\u2029] - | Escapesequence - )* '"'; +// UnrealAngelscript string literals +// https://angelscript.hazelight.se/scripting/fname-literals/ +// https://angelscript.hazelight.se/scripting/format-strings/ +fragment Angelscriptstringprefix: 'n' | 'f'; + +StringLiteral: (Encodingprefix | Angelscriptstringprefix)? (Rawstring | '"' Schar* '"'); BooleanLiteral: False | True; @@ -32,54 +32,69 @@ UserDefinedLiteral: | UserDefinedStringLiteral | UserDefinedCharacterLiteral; -/*Angelscript*/ - -Cast: 'Cast'; - -UClass: 'UCLASS'; - -UStruct: 'USTRUCT'; - -UProperty: 'UPROPERTY'; - -UFunction: 'UFUNCTION'; - -UEnum: 'UENUM'; +/* + Angelscript reserved keywords + https://www.angelcode.com/angelscript/sdk/docs/manual/doc_reserved_keywords.html +*/ -UMeta: 'UMETA'; +Cast: 'cast'; Import: 'import'; -From: 'from'; - -Out: 'out'; +Int: 'int'; -Property: 'property'; +Int8: 'int8'; -Ensure: 'ensure'; +Int16: 'int16'; -EnsureAlways: 'ensureAlways'; +Int32: 'int32'; -Check: 'check'; +Int64: 'int64'; Mixin: 'mixin'; -Int: 'int'; -Int8: 'int8'; -Int16: 'int16'; -Int32: 'int32'; -Int64: 'int64'; +Property: 'property'; + UInt: 'uint'; + UInt8: 'uint8'; + UInt16: 'uint16'; + UInt32: 'uint32'; + UInt64: 'uint64'; + Float: 'float'; + Float32: 'float32'; + Float64: 'float64'; + Double: 'double'; + Bool: 'bool'; +/* UnrealAngelscript */ + +UClass: 'UCLASS'; + +UStruct: 'USTRUCT'; + +UProperty: 'UPROPERTY'; + +UFunction: 'UFUNCTION'; + +UEnum: 'UENUM'; + +UMeta: 'UMETA'; + +Ensure: 'ensure'; + +EnsureAlways: 'ensureAlways'; + +Check: 'check'; + /*Keywords*/ Auto: 'auto'; @@ -156,6 +171,8 @@ This: 'this'; True: 'true'; +Typedef: 'typedef'; + Virtual: 'virtual'; Void: 'void'; @@ -250,6 +267,10 @@ Semi: ';'; Dot: '.'; +fragment Hexquad: HEXADECIMALDIGIT HEXADECIMALDIGIT HEXADECIMALDIGIT HEXADECIMALDIGIT; + +fragment Universalcharactername: '\\u' Hexquad | '\\U' Hexquad Hexquad; + Identifier: /* Identifiernondigit | Identifier Identifiernondigit | Identifier DIGIT @@ -266,9 +287,7 @@ DecimalLiteral: NONZERODIGIT ('\''? DIGIT)*; OctalLiteral: '0' ('\''? OCTALDIGIT)*; -HexadecimalLiteral: ('0x' | '0X') HEXADECIMALDIGIT ( - '\''? HEXADECIMALDIGIT - )*; +HexadecimalLiteral: ('0x' | '0X') HEXADECIMALDIGIT ( '\''? HEXADECIMALDIGIT)*; BinaryLiteral: ('0b' | '0B') BINARYDIGIT ('\''? BINARYDIGIT)*; @@ -292,12 +311,9 @@ fragment Longsuffix: [lL]; fragment Longlongsuffix: 'll' | 'LL'; -fragment Cchar: ~ ['\\\r\n] | Escapesequence; +fragment Cchar: ~ ['\\\r\n] | Escapesequence | Universalcharactername; -fragment Escapesequence: - Simpleescapesequence - | Octalescapesequence - | Hexadecimalescapesequence; +fragment Escapesequence: Simpleescapesequence | Octalescapesequence | Hexadecimalescapesequence; fragment Simpleescapesequence: '\\\'' @@ -336,6 +352,10 @@ fragment Floatingsuffix: [flFL]; fragment Encodingprefix: 'u8' | 'u' | 'U' | 'L'; +fragment Schar: ~ ["\\\r\n] | Escapesequence | Universalcharactername; + +fragment Rawstring: 'R"' ( '\\' ["()] | ~[\r\n (])*? '(' ~[)]*? ')' ( '\\' ["()] | ~[\r\n "])*? '"'; + UserDefinedIntegerLiteral: DecimalLiteral Udsuffix | OctalLiteral Udsuffix @@ -355,7 +375,11 @@ fragment Udsuffix: Identifier; Whitespace: [ \t]+ -> skip; Newline: ('\r' '\n'? | '\n') -> skip; + BlockComment: '/*' .*? '*/' -> skip; + LineComment: '//' ~ [\r\n]* -> skip; + PreprocessorBranchRemoval: '#else' .*? '#endif' -> skip; -Preprocessor: ('#if' | '#ifdef' | '#else' | '#endif') ~ [\r\n]* -> skip; \ No newline at end of file + +Preprocessor: ('#if' | '#ifdef' | '#else' | '#endif') ~ [\r\n]* -> skip; diff --git a/UnrealAngelscriptParser/Grammar/UnrealAngelscriptParser.g4 b/UnrealAngelscriptParser/Grammar/UnrealAngelscriptParser.g4 index fddf422..d0ddc42 100644 --- a/UnrealAngelscriptParser/Grammar/UnrealAngelscriptParser.g4 +++ b/UnrealAngelscriptParser/Grammar/UnrealAngelscriptParser.g4 @@ -21,9 +21,9 @@ */ /* - Adapted to Unreal Angelscript by Embark Studios AB (Fredrik Lindh [Temaran]). - Based on the C++ grammar made by Camilo Sanchez (Camiloasc1) and Martin Mirchev (Marti2203). See the parser file. - */ + Adapted to Unreal Angelscript by Embark Studios AB (originally Fredrik Lindh [Temaran]). + Based on: https://github.com/antlr/grammars-v4/blob/master/cpp/CPP14Parser.g4 +*/ parser grammar UnrealAngelscriptParser; options {