diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..0a4e851 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,3 @@ +## Unit tests + +Get quick feedback on correctness by running your platform's tests. E.g. `gradle macosX64Test` diff --git a/README.md b/README.md index 9b28fd5..011f22f 100644 --- a/README.md +++ b/README.md @@ -261,6 +261,11 @@ val term by A `Grammar` implementation can override the `tokenizer` property to provide a custom implementation of `Tokenizer`. +If nothing in `Grammar` needs to be overridden, the `grammar` function can also be used: +```kotlin +val matches = grammar(zeroOrMore(regexToken("foo") use { text })).parseToEnd(input) +``` + ## Syntax trees A `Parser` can be converted to another `Parser>`, where a `SyntaxTree`, along with the parsed `T` diff --git a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/AndCombinator.kt b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/AndCombinator.kt index 8efb266..3163cae 100644 --- a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/AndCombinator.kt +++ b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/AndCombinator.kt @@ -1,5 +1,6 @@ package com.github.h0tk3y.betterParse.combinators +import com.github.h0tk3y.betterParse.lexer.Token import com.github.h0tk3y.betterParse.lexer.TokenMatchesSequence import com.github.h0tk3y.betterParse.parser.* import com.github.h0tk3y.betterParse.utils.Tuple2 @@ -25,10 +26,13 @@ public inline operator fun AndCombinator.times(other: this and other public class AndCombinator @PublishedApi internal constructor( - @PublishedApi internal val consumersImpl: List, + @PublishedApi internal val consumersImpl: List, internal val transform: (List) -> R ) : Parser { + public override val tokens: List + get() = consumersImpl.flatMap { it.tokens } + @Deprecated("Use parsers or skipParsers instead to get the type-safe results.") public val consumers: List get() = consumersImpl diff --git a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/MapCombinator.kt b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/MapCombinator.kt index 36e424f..993ce95 100644 --- a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/MapCombinator.kt +++ b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/MapCombinator.kt @@ -9,7 +9,8 @@ import com.github.h0tk3y.betterParse.parser.* public class MapCombinator( public val innerParser: Parser, public val transform: (T) -> R -) : Parser { +) : Parser, TokenProvider by innerParser { + override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult { val innerResult = innerParser.tryParse(tokens, fromPosition) return when (innerResult) { diff --git a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/OptionalCombinator.kt b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/OptionalCombinator.kt index eb5b7cd..6dba02b 100644 --- a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/OptionalCombinator.kt +++ b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/OptionalCombinator.kt @@ -5,7 +5,7 @@ import com.github.h0tk3y.betterParse.parser.* /** Tries to parse the sequence with [parser], and if that fails, returns [Parsed] of null instead. */ public class OptionalCombinator(public val parser: Parser) : - Parser { + Parser, TokenProvider by parser { override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult { val result = parser.tryParse(tokens, fromPosition) return when (result) { diff --git a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/OrCombinator.kt b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/OrCombinator.kt index 5f6660a..397ca51 100644 --- a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/OrCombinator.kt +++ b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/OrCombinator.kt @@ -7,6 +7,10 @@ import com.github.h0tk3y.betterParse.parser.* * If none succeeds, returns the [AlternativesFailure] with all the [ErrorResult]s. */ public class OrCombinator(public val parsers: List>) : Parser { + + public override val tokens: List + get() = parsers.flatMap { it.tokens } + override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult { var failures: ArrayList? = null for (index in 0 until parsers.size) { diff --git a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/RepeatCombinator.kt b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/RepeatCombinator.kt index 2581e76..0a66e83 100644 --- a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/RepeatCombinator.kt +++ b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/RepeatCombinator.kt @@ -10,7 +10,7 @@ public class RepeatCombinator internal constructor( public val parser: Parser, public val atLeast: Int = 0, public val atMost: Int = -1 -) : Parser> { +) : Parser>, TokenProvider by parser { init { require(atLeast >= 0) { "atLeast = $atLeast, expected non-negative" } diff --git a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/Separated.kt b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/Separated.kt index 5671781..390dec9 100644 --- a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/Separated.kt +++ b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/Separated.kt @@ -1,5 +1,6 @@ package com.github.h0tk3y.betterParse.combinators +import com.github.h0tk3y.betterParse.lexer.Token import com.github.h0tk3y.betterParse.lexer.TokenMatchesSequence import com.github.h0tk3y.betterParse.parser.* @@ -9,6 +10,10 @@ public class SeparatedCombinator( public val separatorParser: Parser, public val acceptZero: Boolean ) : Parser> { + + public override val tokens: List + get() = termParser.tokens + separatorParser.tokens + override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult> { val termMatches = mutableListOf() val separatorMatches = mutableListOf() diff --git a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/SkipParser.kt b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/SkipParser.kt index 0dfabd1..8535f7d 100644 --- a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/SkipParser.kt +++ b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/SkipParser.kt @@ -2,11 +2,12 @@ package com.github.h0tk3y.betterParse.combinators import com.github.h0tk3y.betterParse.parser.Parsed import com.github.h0tk3y.betterParse.parser.Parser +import com.github.h0tk3y.betterParse.parser.TokenProvider import com.github.h0tk3y.betterParse.utils.Tuple import com.github.h0tk3y.betterParse.utils.Tuple1 /** Wraps a [Parser] to distinguish it from other parsers when it is used in [and] functions. */ -public class SkipParser(public val innerParser: Parser<*>) +public class SkipParser(public val innerParser: Parser<*>): TokenProvider by innerParser /** Wraps a [Parser] to distinguish it from other parsers when it is used in [and] functions. */ public fun skip(parser: Parser): SkipParser = SkipParser(parser) diff --git a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/grammar/Grammar.kt b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/grammar/Grammar.kt index b157d62..795e53c 100644 --- a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/grammar/Grammar.kt +++ b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/grammar/Grammar.kt @@ -4,10 +4,7 @@ import com.github.h0tk3y.betterParse.lexer.DefaultTokenizer import com.github.h0tk3y.betterParse.lexer.Token import com.github.h0tk3y.betterParse.lexer.TokenMatchesSequence import com.github.h0tk3y.betterParse.lexer.Tokenizer -import com.github.h0tk3y.betterParse.parser.ParseResult -import com.github.h0tk3y.betterParse.parser.Parser -import com.github.h0tk3y.betterParse.parser.parseToEnd -import com.github.h0tk3y.betterParse.parser.tryParseToEnd +import com.github.h0tk3y.betterParse.parser.* import kotlin.reflect.KProperty /** @@ -16,16 +13,18 @@ import kotlin.reflect.KProperty */ public abstract class Grammar : Parser { + /** Tokens declared in concrete object are added here during instance construction. */ private val _tokens = arrayListOf() private val _parsers = linkedSetOf>() - /** List of tokens that is by default used for tokenizing a sequence before parsing this language. The tokens are - * added to this list during an instance construction. */ - public open val tokens: List get(): List = _tokens.distinctBy { it.name ?: it } + /** + * List of tokens that is by default used for tokenizing a sequence before parsing. + */ + public override val tokens: List by lazy { (_tokens + rootParser.tokens).distinctBy { it.name ?: it } } /** Set of the tokens and parsers that were declared by delegation to the parser instances (`val p by someParser`), and [rootParser] */ - public open val declaredParsers: Set> get() = (_parsers + _tokens + rootParser).toSet() + public open val declaredParsers: Set> by lazy { (_parsers + tokens + rootParser).toSet() } /** A [Tokenizer] that is built with the [Token]s defined within this [Grammar], in their order of declaration */ public open val tokenizer: Tokenizer by lazy { DefaultTokenizer(tokens) } @@ -49,8 +48,6 @@ public abstract class Grammar : Parser { } _tokens.add(it) } - - protected operator fun Token.getValue(thisRef: Grammar<*>, property: KProperty<*>): Token = this } /** A convenience function to use for referencing a parser that is not initialized up to this moment. */ @@ -59,6 +56,8 @@ public fun parser(block: () -> Parser): Parser = ParserReference(block public class ParserReference internal constructor(parserProvider: () -> Parser) : Parser { public val parser: Parser by lazy(parserProvider) + public override val tokens: List = listOf() // a parser reference defines no tokens itself + override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult = parser.tryParse(tokens, fromPosition) } @@ -67,4 +66,10 @@ public fun Grammar.tryParseToEnd(input: String): ParseResult = rootParser.tryParseToEnd(tokenizer.tokenize(input), 0) public fun Grammar.parseToEnd(input: String): T = - rootParser.parseToEnd(tokenizer.tokenize(input)) \ No newline at end of file + rootParser.parseToEnd(tokenizer.tokenize(input)) + +public fun grammar(rootParser: Parser): Grammar { + return object : Grammar() { + override val rootParser: Parser = rootParser + } +} \ No newline at end of file diff --git a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/lexer/Token.kt b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/lexer/Token.kt index dad7462..3b3e389 100644 --- a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/lexer/Token.kt +++ b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/lexer/Token.kt @@ -1,6 +1,8 @@ package com.github.h0tk3y.betterParse.lexer +import com.github.h0tk3y.betterParse.grammar.Grammar import com.github.h0tk3y.betterParse.parser.* +import kotlin.reflect.KProperty @OptionalExpectation public expect annotation class Language(val value: String, val prefix: String, val suffix: String) @@ -12,6 +14,8 @@ public expect annotation class Language(val value: String, val prefix: String, v */ public abstract class Token(public var name: String? = null, public val ignored: Boolean) : Parser { + public override val tokens: List = listOf(this) + public abstract fun match(input: CharSequence, fromIndex: Int): Int override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult = @@ -28,6 +32,8 @@ public abstract class Token(public var name: String? = null, public val ignored: } } +public operator fun Token.getValue(thisRef: Any?, property: KProperty<*>): Token = this + /** Token type indicating that there was no [Token] found to be matched by a [Tokenizer]. */ public val noneMatched: Token = object : Token("no token matched", false) { override fun match(input: CharSequence, fromIndex: Int): Int = 0 diff --git a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/parser/Parser.kt b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/parser/Parser.kt index cfd6e93..6cbad5c 100644 --- a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/parser/Parser.kt +++ b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/parser/Parser.kt @@ -6,11 +6,12 @@ import com.github.h0tk3y.betterParse.lexer.TokenMatchesSequence /** A common interface for parsers that can try to consume a part or the whole [TokenMatch] sequence and return one of * possible [ParseResult], either [Parsed] or [ErrorResult] */ -public interface Parser { +public interface Parser: TokenProvider { public fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult } public object EmptyParser : Parser { + override val tokens: List = listOf() override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult = ParsedValue(Unit, fromPosition) } diff --git a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/parser/TokenProvider.kt b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/parser/TokenProvider.kt new file mode 100644 index 0000000..fe30de5 --- /dev/null +++ b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/parser/TokenProvider.kt @@ -0,0 +1,11 @@ +package com.github.h0tk3y.betterParse.parser + +import com.github.h0tk3y.betterParse.lexer.Token + +/** + * In theory this could be a property or function of Parser, however SkipParser must not implement + * Parser in order to disambiguate infix functions. + */ +public interface TokenProvider { + public val tokens: List +} \ No newline at end of file diff --git a/src/commonTest/kotlin/AndTest.kt b/src/commonTest/kotlin/AndTest.kt index 6a5b00b..c8bea9d 100644 --- a/src/commonTest/kotlin/AndTest.kt +++ b/src/commonTest/kotlin/AndTest.kt @@ -1,57 +1,55 @@ import com.github.h0tk3y.betterParse.combinators.* -import com.github.h0tk3y.betterParse.grammar.Grammar -import com.github.h0tk3y.betterParse.lexer.regexToken -import com.github.h0tk3y.betterParse.parser.Parser -import com.github.h0tk3y.betterParse.parser.parseToEnd +import com.github.h0tk3y.betterParse.grammar.* +import com.github.h0tk3y.betterParse.lexer.* import com.github.h0tk3y.betterParse.utils.components import kotlin.test.Test import kotlin.test.assertEquals -class AndTest : Grammar() { - override val rootParser: Parser get() = throw NoSuchElementException() +class AndTest { val a by regexToken("a") val b by regexToken("b") @Test fun simpleAnd() { - val tokens = tokenizer.tokenize("aba") - val parser = a and b and a use { components.map { it.type } } - val result = parser.parseToEnd(tokens) + assertEquals( + listOf(a, b, a), + grammar(a and b and a use { components.map { it.type } }).parseToEnd("aba")) + } - assertEquals(listOf(a, b, a), result) + @Test fun simpleAndInline() { + assertEquals( + listOf("a", "b"), + grammar(regexToken("a") and regexToken("b") use { components.map { it.text } }) + .parseToEnd("ab")) } @Test fun skip() { - val tokens = tokenizer.tokenize("abab") - val parserA = a and skip(b) and a and skip(b) use { components.map { it.type } } - val parserB = skip(a) and b and skip(a) and b use { components.map { it.type } } - - assertEquals(listOf(a, a), parserA.parseToEnd(tokens)) - assertEquals(listOf(b, b), parserB.parseToEnd(tokens)) + assertEquals( + listOf(a, a), + grammar(a and skip(b) and a and skip(b) use { components.map { it.type } }) + .parseToEnd("abab")) + + assertEquals( + listOf(b, b), + grammar(skip(a) and b and skip(a) and b use { components.map { it.type } }) + .parseToEnd("abab")) } @Test fun leftmostSeveralSkips() { - val tokens = tokenizer.tokenize("ababab") - val parser = -a * -b * a * -b * -a * b use { t1.type to t2.type } - val result = parser.parseToEnd(tokens) - - assertEquals(a to b, result) + assertEquals( + a to b, + grammar(-a * -b * a * -b * -a * b use { t1.type to t2.type }).parseToEnd("ababab")) } @Test fun singleParserInSkipChain() { - val tokens = tokenizer.tokenize("ababa") - val parser = -a * -b * a * -b * -a use { offset } - val result = parser.parseToEnd(tokens) - - assertEquals(2, result) + assertEquals(2, grammar(-a * -b * a * -b * -a use { offset }).parseToEnd("ababa")) } @Test fun longAndOperatorChain() { - val tokens = tokenizer.tokenize("aaabbb") - val parser = a * a * a * b * b * b use { listOf(t6, t5, t4, t3, t2, t1).map { it.type } } - val result = parser.parseToEnd(tokens) - - assertEquals(listOf(b, b, b, a, a, a), result) + assertEquals( + listOf(b, b, b, a, a, a), + grammar(a * a * a * b * b * b use { listOf(t6, t5, t4, t3, t2, t1).map { it.type } }) + .parseToEnd("aaabbb")) } } \ No newline at end of file diff --git a/src/commonTest/kotlin/OrTest.kt b/src/commonTest/kotlin/OrTest.kt index 2ac4ed2..e09d5bf 100644 --- a/src/commonTest/kotlin/OrTest.kt +++ b/src/commonTest/kotlin/OrTest.kt @@ -4,26 +4,49 @@ import com.github.h0tk3y.betterParse.lexer.* import com.github.h0tk3y.betterParse.parser.* import kotlin.test.* -class OrTest : Grammar() { - override val rootParser: Parser get() = throw NoSuchElementException() +class OrTest { val a by regexToken("a") + val aa by regexToken("aa") val b by regexToken("b") @Test fun aOrB() { - val tokens = tokenizer.tokenize("abababa") - val abOrA = zeroOrMore((a and b use { t2 }) or a) use { map { it.type } } - val result = abOrA.parseToEnd(tokens) - - assertEquals(listOf(b, b, b, a), result) + assertEquals( + listOf(b, b, b, a), + grammar(zeroOrMore((a and b use { t2 }) or a) use { map { it.type } }) + .parseToEnd("abababa")) } @Test fun alternativesError() { - val tokens = tokenizer.tokenize("ab") - val parser = (a and a) or (a and b and a) - val result = parser.tryParse(tokens,0) as AlternativesFailure + val result = grammar((a and a) or (a and b and a)).tryParseToEnd("ab") as AlternativesFailure assertTrue(result.errors[0] is MismatchedToken) assertTrue(result.errors[1] is UnexpectedEof) } + + @Test fun aOrAa() { + // aa defined first + assertEquals( + listOf("aa"), + grammar(oneOrMore(aa or a) use { map { it.text} }).parseToEnd("aa") + ) + + // a defined first + assertEquals( + listOf("a", "a"), + grammar(oneOrMore(a or aa) use { map { it.text} }).parseToEnd("aa") + ) + + /* + * a is first in the declared tokens, but aa first in the parser. For backwards + * compatibility, the order of declared tokens is honored. + */ + val grammar = object : Grammar>() { + val a by regexToken("a") + val aa by regexToken("aa") + override val rootParser: Parser> by oneOrMore(aa or a) use { map { it.text } } + } + + assertEquals(listOf("a", "a"), grammar.parseToEnd("aa")) + } } diff --git a/src/commonTest/kotlin/TestLiftToAst.kt b/src/commonTest/kotlin/TestLiftToAst.kt index 91b1237..21016bf 100644 --- a/src/commonTest/kotlin/TestLiftToAst.kt +++ b/src/commonTest/kotlin/TestLiftToAst.kt @@ -3,6 +3,7 @@ import com.github.h0tk3y.betterParse.combinators.* import com.github.h0tk3y.betterParse.grammar.Grammar import com.github.h0tk3y.betterParse.grammar.parseToEnd import com.github.h0tk3y.betterParse.grammar.parser +import com.github.h0tk3y.betterParse.lexer.Token import com.github.h0tk3y.betterParse.lexer.TokenMatch import com.github.h0tk3y.betterParse.lexer.TokenMatchesSequence import com.github.h0tk3y.betterParse.lexer.regexToken @@ -148,6 +149,9 @@ internal class TestLiftToAst { fun testCustomTransformer() { class ForcedDuplicate(val alternatives: List>) : Parser> { + + override val tokens: List = alternatives.flatMap { it.tokens } + override fun tryParse( tokens: TokenMatchesSequence, fromPosition: Int @@ -175,6 +179,7 @@ internal class TestLiftToAst { if (parser is ForcedDuplicate<*>) return object : Parser> { val parsers = parser.alternatives.map { default.transform(it) } + override val tokens: List = parsers.flatMap { it.tokens } override fun tryParse( tokens: TokenMatchesSequence,