h0tk3y · pragmaticpandy · Apr 30, 2021
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -0,0 +1,3 @@
+## Unit tests
+
+Get quick feedback on correctness by running your platform's tests. E.g. `gradle macosX64Test`
diff --git a/README.md b/README.md
@@ -261,6 +261,11 @@ val term by
 
 A `Grammar` implementation can override the `tokenizer` property to provide a custom implementation of `Tokenizer`.
 
+If nothing in `Grammar` needs to be overridden, the `grammar` function can also be used:
+```kotlin
+val matches = grammar(zeroOrMore(regexToken("foo") use { text })).parseToEnd(input)
+```
+
 ## Syntax trees
 
 A `Parser<T>` can be converted to another `Parser<SyntaxTree<T>>`, where a `SyntaxTree<T>`, along with the parsed `T` 

diff --git a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/AndCombinator.kt b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/AndCombinator.kt
@@ -1,5 +1,6 @@
 package com.github.h0tk3y.betterParse.combinators
 
+import com.github.h0tk3y.betterParse.lexer.Token
 import com.github.h0tk3y.betterParse.lexer.TokenMatchesSequence
 import com.github.h0tk3y.betterParse.parser.*
 import com.github.h0tk3y.betterParse.utils.Tuple2
@@ -25,10 +26,13 @@ public inline operator fun <reified A, reified B> AndCombinator<A>.times(other:
     this and other
 
 public class AndCombinator<out R> @PublishedApi internal constructor(
-    @PublishedApi internal val consumersImpl: List<Any>,
+    @PublishedApi internal val consumersImpl: List<TokenProvider>,
     internal val transform: (List<Any?>) -> R
 ) : Parser<R> {
 
+    public override val tokens: List<Token>
+        get() = consumersImpl.flatMap { it.tokens }
+
     @Deprecated("Use parsers or skipParsers instead to get the type-safe results.")
     public val consumers: List<Any>
         get() = consumersImpl

diff --git a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/MapCombinator.kt b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/MapCombinator.kt
@@ -9,7 +9,8 @@ import com.github.h0tk3y.betterParse.parser.*
 public class MapCombinator<T, R>(
     public val innerParser: Parser<T>,
     public val transform: (T) -> R
-) : Parser<R> {
+) : Parser<R>, TokenProvider by innerParser {
+
     override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult<R> {
         val innerResult = innerParser.tryParse(tokens, fromPosition)
         return when (innerResult) {

diff --git a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/OptionalCombinator.kt b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/OptionalCombinator.kt
@@ -5,7 +5,7 @@ import com.github.h0tk3y.betterParse.parser.*
 
 /** Tries to parse the sequence with [parser], and if that fails, returns [Parsed] of null instead. */
 public class OptionalCombinator<T>(public val parser: Parser<T>) :
-    Parser<T?> {
+    Parser<T?>, TokenProvider by parser {
     override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult<T?> {
         val result = parser.tryParse(tokens, fromPosition)
         return when (result) {

diff --git a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/OrCombinator.kt b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/OrCombinator.kt
@@ -7,6 +7,10 @@ import com.github.h0tk3y.betterParse.parser.*
  * If none succeeds, returns the [AlternativesFailure] with all the [ErrorResult]s. */
 public class OrCombinator<T>(public val parsers: List<Parser<T>>) :
     Parser<T> {
+
+    public override val tokens: List<Token>
+        get() = parsers.flatMap { it.tokens }
+
     override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult<T> {
         var failures: ArrayList<ErrorResult>? = null
         for (index in 0 until parsers.size) {

diff --git a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/RepeatCombinator.kt b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/RepeatCombinator.kt
@@ -10,7 +10,7 @@ public class RepeatCombinator<T> internal constructor(
     public val parser: Parser<T>,
     public val atLeast: Int = 0,
     public val atMost: Int = -1
-) : Parser<List<T>> {
+) : Parser<List<T>>, TokenProvider by parser {
 
     init {
         require(atLeast >= 0) { "atLeast = $atLeast, expected non-negative" }

diff --git a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/Separated.kt b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/Separated.kt
@@ -1,5 +1,6 @@
 package com.github.h0tk3y.betterParse.combinators
 
+import com.github.h0tk3y.betterParse.lexer.Token
 import com.github.h0tk3y.betterParse.lexer.TokenMatchesSequence
 import com.github.h0tk3y.betterParse.parser.*
 
@@ -9,6 +10,10 @@ public class SeparatedCombinator<T, S>(
     public val separatorParser: Parser<S>,
     public val acceptZero: Boolean
 ) : Parser<Separated<T, S>> {
+
+    public override val tokens: List<Token>
+        get() = termParser.tokens + separatorParser.tokens
+
     override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult<Separated<T, S>> {
         val termMatches = mutableListOf<T>()
         val separatorMatches = mutableListOf<S>()

diff --git a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/SkipParser.kt b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/combinators/SkipParser.kt
@@ -2,11 +2,12 @@ package com.github.h0tk3y.betterParse.combinators
 
 import com.github.h0tk3y.betterParse.parser.Parsed
 import com.github.h0tk3y.betterParse.parser.Parser
+import com.github.h0tk3y.betterParse.parser.TokenProvider
 import com.github.h0tk3y.betterParse.utils.Tuple
 import com.github.h0tk3y.betterParse.utils.Tuple1
 
 /** Wraps a [Parser] to distinguish it from other parsers when it is used in [and] functions. */
-public class SkipParser(public val innerParser: Parser<*>)
+public class SkipParser(public val innerParser: Parser<*>): TokenProvider by innerParser
 
 /** Wraps a [Parser] to distinguish it from other parsers when it is used in [and] functions. */
 public fun <T> skip(parser: Parser<T>): SkipParser = SkipParser(parser)

diff --git a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/grammar/Grammar.kt b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/grammar/Grammar.kt
@@ -4,10 +4,7 @@ import com.github.h0tk3y.betterParse.lexer.DefaultTokenizer
 import com.github.h0tk3y.betterParse.lexer.Token
 import com.github.h0tk3y.betterParse.lexer.TokenMatchesSequence
 import com.github.h0tk3y.betterParse.lexer.Tokenizer
-import com.github.h0tk3y.betterParse.parser.ParseResult
-import com.github.h0tk3y.betterParse.parser.Parser
-import com.github.h0tk3y.betterParse.parser.parseToEnd
-import com.github.h0tk3y.betterParse.parser.tryParseToEnd
+import com.github.h0tk3y.betterParse.parser.*
 import kotlin.reflect.KProperty
 
 /**
@@ -16,16 +13,18 @@ import kotlin.reflect.KProperty
  */
 public abstract class Grammar<out T> : Parser<T> {
 
+    /** Tokens declared in concrete object are added here during instance construction. */
     private val _tokens = arrayListOf<Token>()
 
     private val _parsers = linkedSetOf<Parser<*>>()
 
-    /** List of tokens that is by default used for tokenizing a sequence before parsing this language. The tokens are
-     * added to this list during an instance construction. */
-    public open val tokens: List<Token> get(): List<Token> = _tokens.distinctBy { it.name ?: it }
+    /**
+     * List of tokens that is by default used for tokenizing a sequence before parsing.
+     */
+    public override val tokens: List<Token> by lazy { (_tokens + rootParser.tokens).distinctBy { it.name ?: it } }
 
     /** Set of the tokens and parsers that were declared by delegation to the parser instances (`val p by someParser`), and [rootParser] */
-    public open val declaredParsers: Set<Parser<*>> get() = (_parsers + _tokens + rootParser).toSet()
+    public open val declaredParsers: Set<Parser<*>> by lazy { (_parsers + tokens + rootParser).toSet() }
 
     /** A [Tokenizer] that is built with the [Token]s defined within this [Grammar], in their order of declaration */
     public open val tokenizer: Tokenizer by lazy { DefaultTokenizer(tokens) }
@@ -49,8 +48,6 @@ public abstract class Grammar<out T> : Parser<T> {
             }
             _tokens.add(it)
         }
-
-    protected operator fun Token.getValue(thisRef: Grammar<*>, property: KProperty<*>): Token = this
 }
 
 /** A convenience function to use for referencing a parser that is not initialized up to this moment. */
@@ -59,6 +56,8 @@ public fun <T> parser(block: () -> Parser<T>): Parser<T> = ParserReference(block
 public class ParserReference<out T> internal constructor(parserProvider: () -> Parser<T>) : Parser<T> {
     public val parser: Parser<T> by lazy(parserProvider)
 
+    public override val tokens: List<Token> = listOf() // a parser reference defines no tokens itself
+
     override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult<T> =
         parser.tryParse(tokens, fromPosition)
 }
@@ -67,4 +66,10 @@ public fun <T> Grammar<T>.tryParseToEnd(input: String): ParseResult<T> =
     rootParser.tryParseToEnd(tokenizer.tokenize(input), 0)
 
 public fun <T> Grammar<T>.parseToEnd(input: String): T =
-    rootParser.parseToEnd(tokenizer.tokenize(input))
+    rootParser.parseToEnd(tokenizer.tokenize(input))
+
+public fun <T> grammar(rootParser: Parser<T>): Grammar<T> {
+    return object : Grammar<T>() {
+        override val rootParser: Parser<T> = rootParser
+    }
+}
diff --git a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/lexer/Token.kt b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/lexer/Token.kt
@@ -1,6 +1,8 @@
 package com.github.h0tk3y.betterParse.lexer
 
+import com.github.h0tk3y.betterParse.grammar.Grammar
 import com.github.h0tk3y.betterParse.parser.*
+import kotlin.reflect.KProperty
 
 @OptionalExpectation
 public expect annotation class Language(val value: String, val prefix: String, val suffix: String)
@@ -12,6 +14,8 @@ public expect annotation class Language(val value: String, val prefix: String, v
  */
 public abstract class Token(public var name: String? = null, public val ignored: Boolean) : Parser<TokenMatch> {
 
+    public override val tokens: List<Token> = listOf(this)
+
     public abstract fun match(input: CharSequence, fromIndex: Int): Int
 
     override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult<TokenMatch> =
@@ -28,6 +32,8 @@ public abstract class Token(public var name: String? = null, public val ignored:
     }
 }
 
+public operator fun Token.getValue(thisRef: Any?, property: KProperty<*>): Token = this
+
 /** Token type indicating that there was no [Token] found to be matched by a [Tokenizer]. */
 public val noneMatched: Token = object : Token("no token matched", false) {
     override fun match(input: CharSequence, fromIndex: Int): Int = 0

diff --git a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/parser/Parser.kt b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/parser/Parser.kt
@@ -6,11 +6,12 @@ import com.github.h0tk3y.betterParse.lexer.TokenMatchesSequence
 
 /** A common interface for parsers that can try to consume a part or the whole [TokenMatch] sequence and return one of
  * possible [ParseResult], either [Parsed] or [ErrorResult] */
-public interface Parser<out T> {
+public interface Parser<out T>: TokenProvider {
     public fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult<T>
 }
 
 public object EmptyParser : Parser<Unit> {
+    override val tokens: List<Token> = listOf()
     override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult<Unit> = ParsedValue(Unit, fromPosition)
 }
 

diff --git a/src/commonMain/kotlin/com/github/h0tk3y/betterParse/parser/TokenProvider.kt b/src/commonMain/kotlin/com/github/h0tk3y/betterParse/parser/TokenProvider.kt
@@ -0,0 +1,11 @@
+package com.github.h0tk3y.betterParse.parser
+
+import com.github.h0tk3y.betterParse.lexer.Token
+
+/**
+ * In theory this could be a property or function of Parser, however SkipParser must not implement
+ * Parser in order to disambiguate infix functions.
+ */
+public interface TokenProvider {
+    public val tokens: List<Token>
+}
diff --git a/src/commonTest/kotlin/AndTest.kt b/src/commonTest/kotlin/AndTest.kt
@@ -1,57 +1,55 @@
 
 import com.github.h0tk3y.betterParse.combinators.*
-import com.github.h0tk3y.betterParse.grammar.Grammar
-import com.github.h0tk3y.betterParse.lexer.regexToken
-import com.github.h0tk3y.betterParse.parser.Parser
-import com.github.h0tk3y.betterParse.parser.parseToEnd
+import com.github.h0tk3y.betterParse.grammar.*
+import com.github.h0tk3y.betterParse.lexer.*
 import com.github.h0tk3y.betterParse.utils.components
 import kotlin.test.Test
 import kotlin.test.assertEquals
 
-class AndTest : Grammar<Nothing>() {
-    override val rootParser: Parser<Nothing> get() = throw NoSuchElementException()
+class AndTest {
 
     val a by regexToken("a")
     val b by regexToken("b")
 
     @Test fun simpleAnd() {
-        val tokens = tokenizer.tokenize("aba")
-        val parser = a and b and a use { components.map { it.type } }
-        val result = parser.parseToEnd(tokens)
+        assertEquals(
+            listOf(a, b, a),
+            grammar(a and b and a use { components.map { it.type } }).parseToEnd("aba"))
+    }
 
-        assertEquals(listOf(a, b, a), result)
+    @Test fun simpleAndInline() {
+        assertEquals(
+            listOf("a", "b"),
+            grammar(regexToken("a") and regexToken("b") use { components.map { it.text } })
+                .parseToEnd("ab"))
     }
 
     @Test fun skip() {
-        val tokens = tokenizer.tokenize("abab")
-        val parserA = a and skip(b) and a and skip(b) use { components.map { it.type } }
-        val parserB = skip(a) and b and skip(a) and b use { components.map { it.type } }
-
-        assertEquals(listOf(a, a), parserA.parseToEnd(tokens))
-        assertEquals(listOf(b, b), parserB.parseToEnd(tokens))
+        assertEquals(
+            listOf(a, a),
+            grammar(a and skip(b) and a and skip(b) use { components.map { it.type } })
+                .parseToEnd("abab"))
+
+        assertEquals(
+            listOf(b, b),
+            grammar(skip(a) and b and skip(a) and b use { components.map { it.type } })
+                .parseToEnd("abab"))
     }
 
     @Test fun leftmostSeveralSkips() {
-        val tokens = tokenizer.tokenize("ababab")
-        val parser = -a * -b * a * -b * -a * b use { t1.type to t2.type }
-        val result = parser.parseToEnd(tokens)
-
-        assertEquals(a to b, result)
+        assertEquals(
+            a to b,
+            grammar(-a * -b * a * -b * -a * b use { t1.type to t2.type }).parseToEnd("ababab"))
     }
 
     @Test fun singleParserInSkipChain() {
-        val tokens = tokenizer.tokenize("ababa")
-        val parser = -a * -b * a * -b * -a use { offset }
-        val result = parser.parseToEnd(tokens)
-
-        assertEquals(2, result)
+        assertEquals(2, grammar(-a * -b * a * -b * -a use { offset }).parseToEnd("ababa"))
     }
 
     @Test fun longAndOperatorChain() {
-        val tokens = tokenizer.tokenize("aaabbb")
-        val parser = a * a * a * b * b * b use { listOf(t6, t5, t4, t3, t2, t1).map { it.type } }
-        val result = parser.parseToEnd(tokens)
-
-        assertEquals(listOf(b, b, b, a, a, a), result)
+        assertEquals(
+            listOf(b, b, b, a, a, a),
+            grammar(a * a * a * b * b * b use { listOf(t6, t5, t4, t3, t2, t1).map { it.type } })
+                .parseToEnd("aaabbb"))
     }
 }
diff --git a/src/commonTest/kotlin/OrTest.kt b/src/commonTest/kotlin/OrTest.kt
@@ -4,26 +4,49 @@ import com.github.h0tk3y.betterParse.lexer.*
 import com.github.h0tk3y.betterParse.parser.*
 import kotlin.test.*
 
-class OrTest : Grammar<Nothing>() {
-    override val rootParser: Parser<Nothing> get() = throw NoSuchElementException()
+class OrTest {
 
     val a by regexToken("a")
+    val aa by regexToken("aa")
     val b by regexToken("b")
 
     @Test fun aOrB() {
-        val tokens = tokenizer.tokenize("abababa")
-        val abOrA = zeroOrMore((a and b use { t2 }) or a) use { map { it.type } }
-        val result = abOrA.parseToEnd(tokens)
-
-        assertEquals(listOf(b, b, b, a), result)
+        assertEquals(
+            listOf(b, b, b, a),
+            grammar(zeroOrMore((a and b use { t2 }) or a) use { map { it.type } })
+                .parseToEnd("abababa"))
     }
 
     @Test fun alternativesError() {
-        val tokens = tokenizer.tokenize("ab")
-        val parser = (a and a) or (a and b and a)
-        val result = parser.tryParse(tokens,0) as AlternativesFailure
+        val result = grammar((a and a) or (a and b and a)).tryParseToEnd("ab") as AlternativesFailure
 
         assertTrue(result.errors[0] is MismatchedToken)
         assertTrue(result.errors[1] is UnexpectedEof)
     }
+
+    @Test fun aOrAa() {
+        // aa defined first
+        assertEquals(
+            listOf("aa"),
+            grammar(oneOrMore(aa or a) use { map { it.text} }).parseToEnd("aa")
+        )
+
+        // a defined first
+        assertEquals(
+            listOf("a", "a"),
+            grammar(oneOrMore(a or aa) use { map { it.text} }).parseToEnd("aa")
+        )
+
+        /*
+         * a is first in the declared tokens, but aa first in the parser. For backwards
+         * compatibility, the order of declared tokens is honored.
+         */
+        val grammar = object : Grammar<List<String>>() {
+            val a by regexToken("a")
+            val aa by regexToken("aa")
+            override val rootParser: Parser<List<String>> by oneOrMore(aa or a) use { map { it.text } }
+        }
+
+        assertEquals(listOf("a", "a"), grammar.parseToEnd("aa"))
+    }
 }