Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add anonymous token support. #42

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## Unit tests

Get quick feedback on correctness by running your platform's tests. E.g. `gradle macosX64Test`
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,11 @@ val term by

A `Grammar` implementation can override the `tokenizer` property to provide a custom implementation of `Tokenizer`.

If nothing in `Grammar` needs to be overridden, the `grammar` function can also be used:
```kotlin
val matches = grammar(zeroOrMore(regexToken("foo") use { text })).parseToEnd(input)
```

## Syntax trees

A `Parser<T>` can be converted to another `Parser<SyntaxTree<T>>`, where a `SyntaxTree<T>`, along with the parsed `T`
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.github.h0tk3y.betterParse.combinators

import com.github.h0tk3y.betterParse.lexer.Token
import com.github.h0tk3y.betterParse.lexer.TokenMatchesSequence
import com.github.h0tk3y.betterParse.parser.*
import com.github.h0tk3y.betterParse.utils.Tuple2
Expand All @@ -25,10 +26,13 @@ public inline operator fun <reified A, reified B> AndCombinator<A>.times(other:
this and other

public class AndCombinator<out R> @PublishedApi internal constructor(
@PublishedApi internal val consumersImpl: List<Any>,
@PublishedApi internal val consumersImpl: List<TokenProvider>,
internal val transform: (List<Any?>) -> R
) : Parser<R> {

public override val tokens: List<Token>
get() = consumersImpl.flatMap { it.tokens }

@Deprecated("Use parsers or skipParsers instead to get the type-safe results.")
public val consumers: List<Any>
get() = consumersImpl
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ import com.github.h0tk3y.betterParse.parser.*
public class MapCombinator<T, R>(
public val innerParser: Parser<T>,
public val transform: (T) -> R
) : Parser<R> {
) : Parser<R>, TokenProvider by innerParser {

override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult<R> {
val innerResult = innerParser.tryParse(tokens, fromPosition)
return when (innerResult) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import com.github.h0tk3y.betterParse.parser.*

/** Tries to parse the sequence with [parser], and if that fails, returns [Parsed] of null instead. */
public class OptionalCombinator<T>(public val parser: Parser<T>) :
Parser<T?> {
Parser<T?>, TokenProvider by parser {
override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult<T?> {
val result = parser.tryParse(tokens, fromPosition)
return when (result) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ import com.github.h0tk3y.betterParse.parser.*
* If none succeeds, returns the [AlternativesFailure] with all the [ErrorResult]s. */
public class OrCombinator<T>(public val parsers: List<Parser<T>>) :
Parser<T> {

public override val tokens: List<Token>
get() = parsers.flatMap { it.tokens }

override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult<T> {
var failures: ArrayList<ErrorResult>? = null
for (index in 0 until parsers.size) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ public class RepeatCombinator<T> internal constructor(
public val parser: Parser<T>,
public val atLeast: Int = 0,
public val atMost: Int = -1
) : Parser<List<T>> {
) : Parser<List<T>>, TokenProvider by parser {

init {
require(atLeast >= 0) { "atLeast = $atLeast, expected non-negative" }
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.github.h0tk3y.betterParse.combinators

import com.github.h0tk3y.betterParse.lexer.Token
import com.github.h0tk3y.betterParse.lexer.TokenMatchesSequence
import com.github.h0tk3y.betterParse.parser.*

Expand All @@ -9,6 +10,10 @@ public class SeparatedCombinator<T, S>(
public val separatorParser: Parser<S>,
public val acceptZero: Boolean
) : Parser<Separated<T, S>> {

public override val tokens: List<Token>
get() = termParser.tokens + separatorParser.tokens

override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult<Separated<T, S>> {
val termMatches = mutableListOf<T>()
val separatorMatches = mutableListOf<S>()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@ package com.github.h0tk3y.betterParse.combinators

import com.github.h0tk3y.betterParse.parser.Parsed
import com.github.h0tk3y.betterParse.parser.Parser
import com.github.h0tk3y.betterParse.parser.TokenProvider
import com.github.h0tk3y.betterParse.utils.Tuple
import com.github.h0tk3y.betterParse.utils.Tuple1

/** Wraps a [Parser] to distinguish it from other parsers when it is used in [and] functions. */
public class SkipParser(public val innerParser: Parser<*>)
public class SkipParser(public val innerParser: Parser<*>): TokenProvider by innerParser

/** Wraps a [Parser] to distinguish it from other parsers when it is used in [and] functions. */
public fun <T> skip(parser: Parser<T>): SkipParser = SkipParser(parser)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@ import com.github.h0tk3y.betterParse.lexer.DefaultTokenizer
import com.github.h0tk3y.betterParse.lexer.Token
import com.github.h0tk3y.betterParse.lexer.TokenMatchesSequence
import com.github.h0tk3y.betterParse.lexer.Tokenizer
import com.github.h0tk3y.betterParse.parser.ParseResult
import com.github.h0tk3y.betterParse.parser.Parser
import com.github.h0tk3y.betterParse.parser.parseToEnd
import com.github.h0tk3y.betterParse.parser.tryParseToEnd
import com.github.h0tk3y.betterParse.parser.*
import kotlin.reflect.KProperty

/**
Expand All @@ -16,16 +13,18 @@ import kotlin.reflect.KProperty
*/
public abstract class Grammar<out T> : Parser<T> {

/** Tokens declared in concrete object are added here during instance construction. */
private val _tokens = arrayListOf<Token>()

private val _parsers = linkedSetOf<Parser<*>>()

/** List of tokens that is by default used for tokenizing a sequence before parsing this language. The tokens are
* added to this list during an instance construction. */
public open val tokens: List<Token> get(): List<Token> = _tokens.distinctBy { it.name ?: it }
/**
* List of tokens that is by default used for tokenizing a sequence before parsing.
*/
public override val tokens: List<Token> by lazy { (_tokens + rootParser.tokens).distinctBy { it.name ?: it } }

/** Set of the tokens and parsers that were declared by delegation to the parser instances (`val p by someParser`), and [rootParser] */
public open val declaredParsers: Set<Parser<*>> get() = (_parsers + _tokens + rootParser).toSet()
public open val declaredParsers: Set<Parser<*>> by lazy { (_parsers + tokens + rootParser).toSet() }

/** A [Tokenizer] that is built with the [Token]s defined within this [Grammar], in their order of declaration */
public open val tokenizer: Tokenizer by lazy { DefaultTokenizer(tokens) }
Expand All @@ -49,8 +48,6 @@ public abstract class Grammar<out T> : Parser<T> {
}
_tokens.add(it)
}

protected operator fun Token.getValue(thisRef: Grammar<*>, property: KProperty<*>): Token = this
}

/** A convenience function to use for referencing a parser that is not initialized up to this moment. */
Expand All @@ -59,6 +56,8 @@ public fun <T> parser(block: () -> Parser<T>): Parser<T> = ParserReference(block
public class ParserReference<out T> internal constructor(parserProvider: () -> Parser<T>) : Parser<T> {
public val parser: Parser<T> by lazy(parserProvider)

public override val tokens: List<Token> = listOf() // a parser reference defines no tokens itself

override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult<T> =
parser.tryParse(tokens, fromPosition)
}
Expand All @@ -67,4 +66,10 @@ public fun <T> Grammar<T>.tryParseToEnd(input: String): ParseResult<T> =
rootParser.tryParseToEnd(tokenizer.tokenize(input), 0)

public fun <T> Grammar<T>.parseToEnd(input: String): T =
rootParser.parseToEnd(tokenizer.tokenize(input))
rootParser.parseToEnd(tokenizer.tokenize(input))

public fun <T> grammar(rootParser: Parser<T>): Grammar<T> {
return object : Grammar<T>() {
override val rootParser: Parser<T> = rootParser
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package com.github.h0tk3y.betterParse.lexer

import com.github.h0tk3y.betterParse.grammar.Grammar
import com.github.h0tk3y.betterParse.parser.*
import kotlin.reflect.KProperty

@OptionalExpectation
public expect annotation class Language(val value: String, val prefix: String, val suffix: String)
Expand All @@ -12,6 +14,8 @@ public expect annotation class Language(val value: String, val prefix: String, v
*/
public abstract class Token(public var name: String? = null, public val ignored: Boolean) : Parser<TokenMatch> {

public override val tokens: List<Token> = listOf(this)

public abstract fun match(input: CharSequence, fromIndex: Int): Int

override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult<TokenMatch> =
Expand All @@ -28,6 +32,8 @@ public abstract class Token(public var name: String? = null, public val ignored:
}
}

public operator fun Token.getValue(thisRef: Any?, property: KProperty<*>): Token = this

/** Token type indicating that there was no [Token] found to be matched by a [Tokenizer]. */
public val noneMatched: Token = object : Token("no token matched", false) {
override fun match(input: CharSequence, fromIndex: Int): Int = 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@ import com.github.h0tk3y.betterParse.lexer.TokenMatchesSequence

/** A common interface for parsers that can try to consume a part or the whole [TokenMatch] sequence and return one of
* possible [ParseResult], either [Parsed] or [ErrorResult] */
public interface Parser<out T> {
public interface Parser<out T>: TokenProvider {
public fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult<T>
}

public object EmptyParser : Parser<Unit> {
override val tokens: List<Token> = listOf()
override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult<Unit> = ParsedValue(Unit, fromPosition)
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package com.github.h0tk3y.betterParse.parser

import com.github.h0tk3y.betterParse.lexer.Token

/**
* In theory this could be a property or function of Parser, however SkipParser must not implement
* Parser in order to disambiguate infix functions.
*/
public interface TokenProvider {
public val tokens: List<Token>
}
60 changes: 29 additions & 31 deletions src/commonTest/kotlin/AndTest.kt
Original file line number Diff line number Diff line change
@@ -1,57 +1,55 @@

import com.github.h0tk3y.betterParse.combinators.*
import com.github.h0tk3y.betterParse.grammar.Grammar
import com.github.h0tk3y.betterParse.lexer.regexToken
import com.github.h0tk3y.betterParse.parser.Parser
import com.github.h0tk3y.betterParse.parser.parseToEnd
import com.github.h0tk3y.betterParse.grammar.*
import com.github.h0tk3y.betterParse.lexer.*
import com.github.h0tk3y.betterParse.utils.components
import kotlin.test.Test
import kotlin.test.assertEquals

class AndTest : Grammar<Nothing>() {
override val rootParser: Parser<Nothing> get() = throw NoSuchElementException()
class AndTest {

val a by regexToken("a")
val b by regexToken("b")

@Test fun simpleAnd() {
val tokens = tokenizer.tokenize("aba")
val parser = a and b and a use { components.map { it.type } }
val result = parser.parseToEnd(tokens)
assertEquals(
listOf(a, b, a),
grammar(a and b and a use { components.map { it.type } }).parseToEnd("aba"))
}

assertEquals(listOf(a, b, a), result)
@Test fun simpleAndInline() {
assertEquals(
listOf("a", "b"),
grammar(regexToken("a") and regexToken("b") use { components.map { it.text } })
.parseToEnd("ab"))
}

@Test fun skip() {
val tokens = tokenizer.tokenize("abab")
val parserA = a and skip(b) and a and skip(b) use { components.map { it.type } }
val parserB = skip(a) and b and skip(a) and b use { components.map { it.type } }

assertEquals(listOf(a, a), parserA.parseToEnd(tokens))
assertEquals(listOf(b, b), parserB.parseToEnd(tokens))
assertEquals(
listOf(a, a),
grammar(a and skip(b) and a and skip(b) use { components.map { it.type } })
.parseToEnd("abab"))

assertEquals(
listOf(b, b),
grammar(skip(a) and b and skip(a) and b use { components.map { it.type } })
.parseToEnd("abab"))
}

@Test fun leftmostSeveralSkips() {
val tokens = tokenizer.tokenize("ababab")
val parser = -a * -b * a * -b * -a * b use { t1.type to t2.type }
val result = parser.parseToEnd(tokens)

assertEquals(a to b, result)
assertEquals(
a to b,
grammar(-a * -b * a * -b * -a * b use { t1.type to t2.type }).parseToEnd("ababab"))
}

@Test fun singleParserInSkipChain() {
val tokens = tokenizer.tokenize("ababa")
val parser = -a * -b * a * -b * -a use { offset }
val result = parser.parseToEnd(tokens)

assertEquals(2, result)
assertEquals(2, grammar(-a * -b * a * -b * -a use { offset }).parseToEnd("ababa"))
}

@Test fun longAndOperatorChain() {
val tokens = tokenizer.tokenize("aaabbb")
val parser = a * a * a * b * b * b use { listOf(t6, t5, t4, t3, t2, t1).map { it.type } }
val result = parser.parseToEnd(tokens)

assertEquals(listOf(b, b, b, a, a, a), result)
assertEquals(
listOf(b, b, b, a, a, a),
grammar(a * a * a * b * b * b use { listOf(t6, t5, t4, t3, t2, t1).map { it.type } })
.parseToEnd("aaabbb"))
}
}
43 changes: 33 additions & 10 deletions src/commonTest/kotlin/OrTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,49 @@ import com.github.h0tk3y.betterParse.lexer.*
import com.github.h0tk3y.betterParse.parser.*
import kotlin.test.*

class OrTest : Grammar<Nothing>() {
override val rootParser: Parser<Nothing> get() = throw NoSuchElementException()
class OrTest {

val a by regexToken("a")
val aa by regexToken("aa")
val b by regexToken("b")

@Test fun aOrB() {
val tokens = tokenizer.tokenize("abababa")
val abOrA = zeroOrMore((a and b use { t2 }) or a) use { map { it.type } }
val result = abOrA.parseToEnd(tokens)

assertEquals(listOf(b, b, b, a), result)
assertEquals(
listOf(b, b, b, a),
grammar(zeroOrMore((a and b use { t2 }) or a) use { map { it.type } })
.parseToEnd("abababa"))
}

@Test fun alternativesError() {
val tokens = tokenizer.tokenize("ab")
val parser = (a and a) or (a and b and a)
val result = parser.tryParse(tokens,0) as AlternativesFailure
val result = grammar((a and a) or (a and b and a)).tryParseToEnd("ab") as AlternativesFailure

assertTrue(result.errors[0] is MismatchedToken)
assertTrue(result.errors[1] is UnexpectedEof)
}

@Test fun aOrAa() {
// aa defined first
assertEquals(
listOf("aa"),
grammar(oneOrMore(aa or a) use { map { it.text} }).parseToEnd("aa")
)

// a defined first
assertEquals(
listOf("a", "a"),
grammar(oneOrMore(a or aa) use { map { it.text} }).parseToEnd("aa")
)

/*
* a is first in the declared tokens, but aa first in the parser. For backwards
* compatibility, the order of declared tokens is honored.
*/
val grammar = object : Grammar<List<String>>() {
val a by regexToken("a")
val aa by regexToken("aa")
override val rootParser: Parser<List<String>> by oneOrMore(aa or a) use { map { it.text } }
}

assertEquals(listOf("a", "a"), grammar.parseToEnd("aa"))
}
}
Loading