diff --git a/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonParserTest.kt b/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonParserTest.kt index 94f7052cd..d2bc0290f 100644 --- a/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonParserTest.kt +++ b/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonParserTest.kt @@ -6,7 +6,6 @@ package kotlinx.serialization.json import kotlinx.serialization.* import kotlinx.serialization.builtins.* -import kotlinx.serialization.json.internal.* import kotlinx.serialization.test.* import kotlin.test.* @@ -48,7 +47,10 @@ class JsonParserTest : JsonTestBase() { StringData("https://t.co/M1uhwigsMT"), default.decodeFromString(StringData.serializer(), """{"data":"https:\/\/t.co\/M1uhwigsMT"}""") ) - assertEquals(StringData("\"test\""), default.decodeFromString(StringData.serializer(), """{"data": "\"test\""}""")) + assertEquals( + StringData("\"test\""), + default.decodeFromString(StringData.serializer(), """{"data": "\"test\""}""") + ) assertEquals(StringData("\u00c9"), default.decodeFromString(StringData.serializer(), """{"data": "\u00c9"}""")) assertEquals(StringData("""\\"""), default.decodeFromString(StringData.serializer(), """{"data": "\\\\"}""")) } @@ -79,11 +81,11 @@ class JsonParserTest : JsonTestBase() { fun testTrailingComma() { testTrailingComma("{\"id\":0,}") testTrailingComma("{\"id\":0 ,}") - testTrailingComma("{\"id\":0 , ,}") + testTrailingComma("{\"id\":0 , ,}", message = "Multiple consecutive commas are not allowed in JSON") } - private fun testTrailingComma(content: String) { - assertFailsWithSerialMessage("JsonDecodingException", "Trailing comma before the end of JSON object") { Json.parseToJsonElement(content) } + private fun testTrailingComma(content: String, message: String = "Trailing comma before the end of JSON object") { + assertFailsWithSerialMessage("JsonDecodingException", message) { Json.parseToJsonElement(content) } } @Test diff --git a/formats/json-tests/commonTest/src/kotlinx/serialization/json/MissingCommaTest.kt b/formats/json-tests/commonTest/src/kotlinx/serialization/json/MissingCommaTest.kt index e337c5a66..a3a894dee 100644 --- a/formats/json-tests/commonTest/src/kotlinx/serialization/json/MissingCommaTest.kt +++ b/formats/json-tests/commonTest/src/kotlinx/serialization/json/MissingCommaTest.kt @@ -26,7 +26,7 @@ class MissingCommaTest : JsonTestBase() { @Test fun missingCommaBetweenFieldsAfterPrimitive() { val message = - "Unexpected JSON token at offset 8: Expected comma after the key-value pair at path: \$.i" + "Unexpected JSON token at offset 8: Expected end of the object or comma at path: \$" val json = """{"i":42 "c":{"i":"string"}}""" assertFailsWithSerialMessage("JsonDecodingException", message) { @@ -37,7 +37,7 @@ class MissingCommaTest : JsonTestBase() { @Test fun missingCommaBetweenFieldsAfterObject() { val message = - "Unexpected JSON token at offset 19: Expected comma after the key-value pair at path: \$.c" + "Unexpected JSON token at offset 19: Expected end of the object or comma at path: \$" val json = """{"c":{"i":"string"}"i":42}""" assertFailsWithSerialMessage("JsonDecodingException", message) { @@ -48,7 +48,7 @@ class MissingCommaTest : JsonTestBase() { @Test fun allowTrailingCommaDoesNotApplyToCommaBetweenFields() { val message = - "Unexpected JSON token at offset 8: Expected comma after the key-value pair at path: \$.i" + "Unexpected JSON token at offset 8: Expected end of the object or comma at path: \$" val json = """{"i":42 "c":{"i":"string"}}""" assertFailsWithSerialMessage("JsonDecodingException", message) { @@ -59,7 +59,7 @@ class MissingCommaTest : JsonTestBase() { @Test fun lenientSerializeDoesNotAllowToSkipCommaBetweenFields() { val message = - "Unexpected JSON token at offset 8: Expected comma after the key-value pair at path: \$.i" + "Unexpected JSON token at offset 8: Expected end of the object or comma at path: \$" val json = """{"i":42 "c":{"i":"string"}}""" assertFailsWithSerialMessage("JsonDecodingException", message) { @@ -69,7 +69,7 @@ class MissingCommaTest : JsonTestBase() { @Test fun missingCommaInDynamicMap() { - val m = "Unexpected JSON token at offset 9: Expected end of the object or comma at path: \$" + val m = "Unexpected JSON token at offset 8: Expected end of the object or comma at path: \$" val json = """{"i":42 "c":{"i":"string"}}""" assertFailsWithSerialMessage("JsonDecodingException", m) { default.parseToJsonElement(json) @@ -78,7 +78,7 @@ class MissingCommaTest : JsonTestBase() { @Test fun missingCommaInArray() { - val m = "Unexpected JSON token at offset 3: Expected end of the array or comma at path: \$[0]" + val m = "Unexpected JSON token at offset 3: Expected end of the array or comma at path: \$" val json = """[1 2 3 4]""" assertFailsWithSerialMessage("JsonDecodingException", m) { @@ -88,11 +88,21 @@ class MissingCommaTest : JsonTestBase() { @Test fun missingCommaInStringMap() { - val m = "Unexpected JSON token at offset 9: Expected comma after the key-value pair at path: \$['a']" + val m = "Unexpected JSON token at offset 9: Expected end of the object or comma at path: \$" val json = """{"a":"1" "b":"2"}""" assertFailsWithSerialMessage("JsonDecodingException", m) { default.decodeFromString>(json) } } + + @Test + fun missingCommaInUnknownKeys() { + val m = "Unexpected JSON token at offset 17: Expected end of the object or comma at path: \$" + val json = """{"i":"1","b":"2" "c":"2"}""" + + assertFailsWithSerialMessage("JsonDecodingException", m) { + lenient.decodeFromString(json) + } + } } \ No newline at end of file diff --git a/formats/json-tests/commonTest/src/kotlinx/serialization/json/TrailingCommaTest.kt b/formats/json-tests/commonTest/src/kotlinx/serialization/json/TrailingCommaTest.kt index 0916de579..d729349d2 100644 --- a/formats/json-tests/commonTest/src/kotlinx/serialization/json/TrailingCommaTest.kt +++ b/formats/json-tests/commonTest/src/kotlinx/serialization/json/TrailingCommaTest.kt @@ -125,4 +125,45 @@ class TrailingCommaTest : JsonTestBase() { "wl":{"l":[1, 2, 3,],},}""" assertEquals(Mixed(multipleFields, withMap, withList), tj.decodeFromString(input, mode)) } + + @Test + fun testCommaReportedPosition() = parametrizedTest { mode -> + val sd = """{"a":"1", }""" + + checkSerializationException({ + default.decodeFromString>(sd, mode) + }, { message -> + assertContains( + message, + """Unexpected JSON token at offset 8: Trailing comma before the end of JSON object""" + ) + }) + } + + @Test + fun testMultipleTrailingCommasAreNotAllowedEvenWhenTrailingIsEnabled() = parametrizedTest { mode -> + val sd = """{"a":"1",,}""" + checkSerializationException({ + tj.decodeFromString>(sd, mode) + }, { message -> + assertContains( + message, + """Unexpected JSON token at offset 9: Multiple consecutive commas are not allowed in JSON""" + ) + }) + } + + @Test + fun testMultipleTrailingCommasError() = parametrizedTest { mode -> + val sd = """{"a":"1",,,}""" + + checkSerializationException({ + default.decodeFromString>(sd, mode) + }, { message -> + assertContains( + message, + """Unexpected JSON token at offset 9: Multiple consecutive commas are not allowed in JSON""" + ) + }) + } } diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonExceptions.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonExceptions.kt index c885c808f..0f1c22915 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonExceptions.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonExceptions.kt @@ -46,13 +46,6 @@ internal fun AbstractJsonLexer.throwInvalidFloatingPointDecoded(result: Number): hint = specialFlowingValuesHint) } -internal fun AbstractJsonLexer.invalidTrailingComma(entity: String = "object"): Nothing { - fail("Trailing comma before the end of JSON $entity", - position = currentPosition - 1, - hint = "Trailing commas are non-complaint JSON and not allowed by default. Use 'allowTrailingComma = true' in 'Json {}' builder to support them." - ) -} - @OptIn(ExperimentalSerializationApi::class) internal fun InvalidKeyKindException(keyDescriptor: SerialDescriptor) = JsonEncodingException( "Value of type '${keyDescriptor.serialName}' can't be used in JSON as a key in the map. " + diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonPath.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonPath.kt index 14e70a425..2934e9d0b 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonPath.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonPath.kt @@ -29,7 +29,7 @@ internal class JsonPath { /* * Serial descriptor, map key or the tombstone for map key */ - private var currentObjectPath = arrayOfNulls(8) + private var currentObjectPath = arrayOfNulls(8) /* * Index is a small state-machine used to determine the state of the path: * >=0 -> index of the element being decoded with the outer class currentObjectPath[currentDepth] diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonTreeReader.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonTreeReader.kt index 9cb9bb3c5..bcad4cdbb 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonTreeReader.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonTreeReader.kt @@ -4,7 +4,7 @@ package kotlinx.serialization.json.internal -import kotlinx.serialization.ExperimentalSerializationApi +import kotlinx.serialization.* import kotlinx.serialization.json.* @OptIn(ExperimentalSerializationApi::class) @@ -24,53 +24,52 @@ internal class JsonTreeReader( readObjectImpl { callRecursive(Unit) } private inline fun readObjectImpl(reader: () -> JsonElement): JsonObject { - var lastToken = lexer.consumeNextToken(TC_BEGIN_OBJ) + lexer.consumeNextToken(TC_BEGIN_OBJ) + lexer.path.pushDescriptor(JsonObjectSerializer.descriptor) + if (lexer.peekNextToken() == TC_COMMA) lexer.fail("Unexpected leading comma") val result = linkedMapOf() while (lexer.canConsumeValue()) { + lexer.path.resetCurrentMapKey() // Read key and value val key = if (isLenient) lexer.consumeStringLenient() else lexer.consumeString() lexer.consumeNextToken(TC_COLON) + + lexer.path.updateCurrentMapKey(key) val element = reader() result[key] = element // Verify the next token - lastToken = lexer.consumeNextToken() - when (lastToken) { - TC_COMMA -> Unit // no-op, can continue with `canConsumeValue` that verifies the token after comma - TC_END_OBJ -> break // `canConsumeValue` can return incorrect result, since it checks token _after_ TC_END_OBJ - else -> lexer.fail("Expected end of the object or comma") - } + lexer.consumeCommaOrPeekEnd( + allowTrailing = trailingCommaAllowed, + expectedEnd = '}', + ) } - // Check for the correct ending - if (lastToken == TC_BEGIN_OBJ) { // Case of empty object - lexer.consumeNextToken(TC_END_OBJ) - } else if (lastToken == TC_COMMA) { // Trailing comma - if (!trailingCommaAllowed) lexer.invalidTrailingComma() - lexer.consumeNextToken(TC_END_OBJ) - } // else unexpected token? + + lexer.consumeNextToken(TC_END_OBJ) + lexer.path.popDescriptor() + return JsonObject(result) } private fun readArray(): JsonElement { - var lastToken = lexer.consumeNextToken() + lexer.consumeNextToken(TC_BEGIN_LIST) + lexer.path.pushDescriptor(JsonArraySerializer.descriptor) // Prohibit leading comma if (lexer.peekNextToken() == TC_COMMA) lexer.fail("Unexpected leading comma") val result = arrayListOf() while (lexer.canConsumeValue()) { + lexer.path.updateDescriptorIndex(result.size) val element = read() result.add(element) - lastToken = lexer.consumeNextToken() - if (lastToken != TC_COMMA) { - lexer.require(lastToken == TC_END_LIST) { "Expected end of the array or comma" } - } - } - // Check for the correct ending - if (lastToken == TC_BEGIN_LIST) { // Case of empty object - lexer.consumeNextToken(TC_END_LIST) - } else if (lastToken == TC_COMMA) { // Trailing comma - if (!trailingCommaAllowed) lexer.invalidTrailingComma("array") - lexer.consumeNextToken(TC_END_LIST) + lexer.consumeCommaOrPeekEnd( + allowTrailing = trailingCommaAllowed, + expectedEnd = ']', + entity = "array" + ) } + lexer.consumeNextToken(TC_END_LIST) + lexer.path.popDescriptor() + return JsonArray(result) } @@ -103,6 +102,7 @@ internal class JsonTreeReader( --stackDepth result } + TC_BEGIN_LIST -> readArray() else -> lexer.fail("Cannot read Json element because of unexpected ${tokenDescription(token)}") } diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt index 2e68da734..807e2c397 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt @@ -123,7 +123,6 @@ internal open class StreamingJsonDecoder( if (descriptor.elementsCount == 0 && descriptor.ignoreUnknownKeys(json)) { skipLeftoverElements(descriptor) } - if (lexer.tryConsumeComma() && !json.configuration.allowTrailingComma) lexer.invalidTrailingComma("") // First consume the object so we know it's correct lexer.consumeNextToken(mode.end) // Then cleanup the path @@ -185,24 +184,21 @@ internal open class StreamingJsonDecoder( } private fun decodeMapIndex(): Int { - var hasComma = false val decodingKey = currentIndex % 2 != 0 if (decodingKey) { if (currentIndex != -1) { - hasComma = lexer.tryConsumeComma() + lexer.consumeCommaOrPeekEnd( + allowTrailing = json.configuration.allowTrailingComma, + expectedEnd = mode.end + ) } } else { lexer.consumeNextToken(COLON) } return if (lexer.canConsumeValue()) { - if (decodingKey) { - if (currentIndex == -1) lexer.require(!hasComma) { "Unexpected leading comma" } - else lexer.require(hasComma) { "Expected comma after the key-value pair" } - } ++currentIndex } else { - if (hasComma && !json.configuration.allowTrailingComma) lexer.invalidTrailingComma() CompositeDecoder.DECODE_DONE } } @@ -220,19 +216,10 @@ internal open class StreamingJsonDecoder( private fun decodeObjectIndex(descriptor: SerialDescriptor): Int { while (true) { if (currentIndex != -1) { - val next = lexer.peekNextToken() - if (next == TC_END_OBJ) { - currentIndex = CompositeDecoder.DECODE_DONE - return elementMarker?.nextUnmarkedIndex() ?: CompositeDecoder.DECODE_DONE - } - - lexer.require(next == TC_COMMA) { "Expected comma after the key-value pair" } - val commaPosition = lexer.currentPosition - lexer.consumeNextToken() - lexer.require( - configuration.allowTrailingComma || lexer.peekNextToken() != TC_END_OBJ, - position = commaPosition - ) { "Trailing comma before the end of JSON object" } + lexer.consumeCommaOrPeekEnd( + allowTrailing = json.configuration.allowTrailingComma, + expectedEnd = mode.end + ) } if (!lexer.canConsumeValue()) break @@ -270,13 +257,17 @@ internal open class StreamingJsonDecoder( } private fun decodeListIndex(): Int { - // Prohibit leading comma - val hasComma = lexer.tryConsumeComma() + if (currentIndex != -1) { + lexer.consumeCommaOrPeekEnd( + allowTrailing = json.configuration.allowTrailingComma, + expectedEnd = mode.end, + entity = "array" + ) + } + return if (lexer.canConsumeValue()) { - if (currentIndex != -1 && !hasComma) lexer.fail("Expected end of the array or comma") ++currentIndex } else { - if (hasComma && !json.configuration.allowTrailingComma) lexer.invalidTrailingComma("array") CompositeDecoder.DECODE_DONE } } diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt index 5f570a95e..3f67b4e2a 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt @@ -11,10 +11,12 @@ import kotlin.jvm.* import kotlin.math.* internal const val lenientHint = "Use 'isLenient = true' in 'Json {}' builder to accept non-compliant JSON." -internal const val coerceInputValuesHint = "Use 'coerceInputValues = true' in 'Json {}' builder to coerce nulls if property has a default value." +internal const val coerceInputValuesHint = + "Use 'coerceInputValues = true' in 'Json {}' builder to coerce nulls if property has a default value." internal const val specialFlowingValuesHint = "It is possible to deserialize them using 'JsonBuilder.allowSpecialFloatingPointValues = true'" -internal const val ignoreUnknownKeysHint = "Use 'ignoreUnknownKeys = true' in 'Json {}' builder or '@JsonIgnoreUnknownKeys' annotation to ignore unknown keys." +internal const val ignoreUnknownKeysHint = + "Use 'ignoreUnknownKeys = true' in 'Json {}' builder or '@JsonIgnoreUnknownKeys' annotation to ignore unknown keys." internal const val allowStructuredMapKeysHint = "Use 'allowStructuredMapKeys = true' in 'Json {}' builder to convert such maps to [key1, value1, key2, value2,...] arrays." @@ -168,17 +170,6 @@ internal abstract class AbstractJsonLexer { abstract fun consumeNextToken(): Byte - fun tryConsumeComma(): Boolean { - val current = skipWhitespaces() - val source = source - if (current >= source.length || current == -1) return false - if (source[current] == ',') { - ++currentPosition - return true - } - return false - } - protected fun isValidValueStart(c: Char): Boolean { return when (c) { '}', ']', ':', ',' -> false @@ -315,7 +306,10 @@ internal abstract class AbstractJsonLexer { char != STRING } - open fun consumeStringChunked(isLenient: Boolean, consumeChunk: (stringChunk: String) -> Unit) { // open to allow simpler implementations (i.e. StringJsonLexer) + open fun consumeStringChunked( + isLenient: Boolean, + consumeChunk: (stringChunk: String) -> Unit + ) { // open to allow simpler implementations (i.e. StringJsonLexer) val nextToken = peekNextToken() if (isLenient && nextToken != TC_OTHER) return // noting to consume @@ -352,7 +346,12 @@ internal abstract class AbstractJsonLexer { } } - private fun writeRange(fromIndex: Int, toIndex: Int, currentChunkHasEscape: Boolean, consumeChunk: (stringChunk: String) -> Unit) { + private fun writeRange( + fromIndex: Int, + toIndex: Int, + currentChunkHasEscape: Boolean, + consumeChunk: (stringChunk: String) -> Unit + ) { if (currentChunkHasEscape) { consumeChunk(decodedString(fromIndex, toIndex)) } else { @@ -507,9 +506,9 @@ internal abstract class AbstractJsonLexer { } escapedString.append( ((fromHexChar(source, startPos) shl 12) + - (fromHexChar(source, startPos + 1) shl 8) + - (fromHexChar(source, startPos + 2) shl 4) + - fromHexChar(source, startPos + 3)).toChar() + (fromHexChar(source, startPos + 1) shl 8) + + (fromHexChar(source, startPos + 2) shl 4) + + fromHexChar(source, startPos + 3)).toChar() ) return startPos + 4 } @@ -544,6 +543,7 @@ internal abstract class AbstractJsonLexer { TC_BEGIN_LIST, TC_BEGIN_OBJ -> { tokenStack.add(lastToken) } + TC_END_LIST -> { if (tokenStack.last() != TC_BEGIN_LIST) throw JsonDecodingException( currentPosition, @@ -552,6 +552,7 @@ internal abstract class AbstractJsonLexer { ) tokenStack.removeLast() } + TC_END_OBJ -> { if (tokenStack.last() != TC_BEGIN_OBJ) throw JsonDecodingException( currentPosition, @@ -560,6 +561,7 @@ internal abstract class AbstractJsonLexer { ) tokenStack.removeLast() } + TC_EOF -> fail("Unexpected end of input due to malformed JSON during ignoring unknown keys") } consumeNextToken() @@ -659,13 +661,14 @@ internal abstract class AbstractJsonLexer { } currentPosition = current - fun calculateExponent(exponentAccumulator: Long, isExponentPositive: Boolean): Double = when (isExponentPositive) { - false -> 10.0.pow(-exponentAccumulator.toDouble()) - true -> 10.0.pow(exponentAccumulator.toDouble()) - } + fun calculateExponent(exponentAccumulator: Long, isExponentPositive: Boolean): Double = + when (isExponentPositive) { + false -> 10.0.pow(-exponentAccumulator.toDouble()) + true -> 10.0.pow(exponentAccumulator.toDouble()) + } if (hasExponent) { - val doubleAccumulator = accumulator.toDouble() * calculateExponent(exponentAccumulator, isExponentPositive) + val doubleAccumulator = accumulator.toDouble() * calculateExponent(exponentAccumulator, isExponentPositive) if (doubleAccumulator > Long.MAX_VALUE || doubleAccumulator < Long.MIN_VALUE) fail("Numeric value overflow") if (floor(doubleAccumulator) != doubleAccumulator) fail("Can't convert $doubleAccumulator to Long") accumulator = doubleAccumulator.toLong() @@ -725,10 +728,12 @@ internal abstract class AbstractJsonLexer { consumeBooleanLiteral("rue", current) true } + 'f'.code -> { consumeBooleanLiteral("alse", current) false } + else -> { fail("Expected valid boolean literal prefix, but had '${consumeStringLenient()}'") } @@ -759,4 +764,37 @@ internal abstract class AbstractJsonLexer { currentPosition = snapshot } } + + fun consumeCommaOrPeekEnd( + allowTrailing: Boolean, + expectedEnd: Char, + entity: String = "object", + ) { + val nextToken = peekNextToken() + if (nextToken == charToTokenClass(expectedEnd)) return + if (nextToken != TC_COMMA) { + path.popDescriptor() + fail("Expected end of the $entity or comma") + } + val commaPosition = currentPosition++ // consumes peeked comma + val peeked = peekNextToken() + + if (peeked == charToTokenClass(expectedEnd) && !allowTrailing) { + path.popDescriptor() + + fail( + "Trailing comma before the end of JSON $entity", + position = commaPosition, + hint = "Trailing commas are non-complaint JSON and not allowed by default. Use 'allowTrailingComma = true' in 'Json {}' builder to support them." + ) + } + if (peeked == TC_COMMA) { + path.popDescriptor() + fail( + "Multiple consecutive commas are not allowed in JSON", + position = currentPosition, + hint = "Trailing commas are non-complaint JSON and not allowed by default. Use 'allowTrailingComma = true' in 'Json {}' builder to support them." + ) + } + } }