Skip to content

Commit 199baac

Browse files
Fix failure when reading deep or shallow cloned Delta Lake tables.
1 parent 03eedce commit 199baac

24 files changed

+189
-1
lines changed

plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/CheckpointMetadataEntry.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ public record CheckpointMetadataEntry(long version, Optional<Map<String, String>
3333

3434
public CheckpointMetadataEntry
3535
{
36-
checkArgument(version > 0, "version is not positive: %s", version);
36+
checkArgument(version >= 0, "version is negative: %s", version);
3737
requireNonNull(tags, "tags is null");
3838
tags = tags.map(ImmutableMap::copyOf);
3939
}

plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2766,6 +2766,29 @@ private static MetadataEntry loadMetadataEntry(long entryNumber, Path tableLocat
27662766
return transactionLog.getMetaData();
27672767
}
27682768

2769+
@Test
2770+
public void testClonedTableWithCheckpointVersionZero()
2771+
throws Exception
2772+
{
2773+
testClonedTableWithCheckpointVersionZero("databricks154/clone_checkpoint_version_zero/checkpoint_v1/cloned_table");
2774+
testClonedTableWithCheckpointVersionZero("databricks154/clone_checkpoint_version_zero/checkpoint_v2/cloned_table");
2775+
}
2776+
2777+
private void testClonedTableWithCheckpointVersionZero(String resourceName)
2778+
throws Exception
2779+
{
2780+
String tableName = "test_cloned_table" + randomNameSuffix();
2781+
Path tableLocation = catalogDir.resolve(tableName);
2782+
copyDirectoryContents(new File(Resources.getResource(resourceName).toURI()).toPath(), tableLocation);
2783+
assertUpdate("CALL system.register_table(CURRENT_SCHEMA, '%s', '%s')".formatted(tableName, tableLocation.toUri()));
2784+
2785+
assertThat(query("SELECT * FROM " + tableName + " ORDER BY id")).matches("VALUES " +
2786+
"(1, VARCHAR 'Alice', 25), " +
2787+
"(2, VARCHAR 'Bob', 30), " +
2788+
"(3, VARCHAR 'Charlie', 28)");
2789+
assertUpdate("DROP TABLE " + tableName);
2790+
}
2791+
27692792
private static ProtocolEntry loadProtocolEntry(long entryNumber, Path tableLocation)
27702793
throws IOException
27712794
{
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package io.trino.plugin.deltalake.transactionlog;
15+
16+
import com.google.common.collect.ImmutableMap;
17+
import io.airlift.json.JsonCodec;
18+
import org.intellij.lang.annotations.Language;
19+
import org.junit.jupiter.api.Test;
20+
21+
import java.util.Optional;
22+
23+
import static org.assertj.core.api.Assertions.assertThat;
24+
import static org.assertj.core.api.Assertions.assertThatThrownBy;
25+
26+
class TestCheckpointMetadataEntry
27+
{
28+
private final JsonCodec<CheckpointMetadataEntry> codec = JsonCodec.jsonCodec(CheckpointMetadataEntry.class);
29+
30+
@Test
31+
void testCheckpointMetadataEntry()
32+
{
33+
@Language("JSON")
34+
String json = "{\"version\":5,\"tags\":{\"sidecarNumActions\":\"1\",\"sidecarSizeInBytes\":\"20965\",\"numOfAddFiles\":\"1\",\"sidecarFileSchema\":\"\"}}";
35+
assertThat(codec.fromJson(json)).isEqualTo(new CheckpointMetadataEntry(
36+
5,
37+
Optional.of(ImmutableMap.of(
38+
"sidecarNumActions", "1",
39+
"sidecarSizeInBytes", "20965",
40+
"numOfAddFiles", "1",
41+
"sidecarFileSchema", ""))));
42+
43+
@Language("JSON")
44+
String jsonWithVersionZero = "{\"version\":0,\"tags\":{\"sidecarNumActions\":\"1\",\"sidecarSizeInBytes\":\"20965\",\"numOfAddFiles\":\"1\",\"sidecarFileSchema\":\"\"}}";
45+
assertThat(codec.fromJson(jsonWithVersionZero)).isEqualTo(new CheckpointMetadataEntry(
46+
0,
47+
Optional.of(ImmutableMap.of(
48+
"sidecarNumActions", "1",
49+
"sidecarSizeInBytes", "20965",
50+
"numOfAddFiles", "1",
51+
"sidecarFileSchema", ""))));
52+
}
53+
54+
@Test
55+
void testInvalidCheckpointMetadataEntry()
56+
{
57+
@Language("JSON")
58+
String jsonWithNegativeVersion = "{\"version\":-1,\"tags\":{\"sidecarNumActions\":\"1\",\"sidecarSizeInBytes\":\"20965\",\"numOfAddFiles\":\"1\",\"sidecarFileSchema\":\"\"}}";
59+
assertThatThrownBy(() -> codec.fromJson(jsonWithNegativeVersion))
60+
.isInstanceOf(IllegalArgumentException.class)
61+
.hasMessageContaining("Invalid JSON string for");
62+
63+
@Language("JSON")
64+
String jsonWithoutTags = "{\"version\":-1}";
65+
assertThatThrownBy(() -> codec.fromJson(jsonWithoutTags))
66+
.isInstanceOf(IllegalArgumentException.class)
67+
.hasMessageContaining("Invalid JSON string for");
68+
}
69+
70+
@Test
71+
void testCheckpointMetadataEntryToJson()
72+
{
73+
assertThat(codec.toJson(new CheckpointMetadataEntry(
74+
100,
75+
Optional.of(ImmutableMap.of(
76+
"sidecarNumActions", "1",
77+
"sidecarSizeInBytes", "20965",
78+
"numOfAddFiles", "1",
79+
"sidecarFileSchema", "")))))
80+
.isEqualTo("{\n" +
81+
" \"version\" : 100,\n" +
82+
" \"tags\" : {\n" +
83+
" \"sidecarNumActions\" : \"1\",\n" +
84+
" \"sidecarSizeInBytes\" : \"20965\",\n" +
85+
" \"numOfAddFiles\" : \"1\",\n" +
86+
" \"sidecarFileSchema\" : \"\"\n" +
87+
" }\n" +
88+
"}");
89+
}
90+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{"commitInfo":{"timestamp":1761426293237,"userId":"user1","userName":"user1","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"true","properties":"{\"delta.checkpointPolicy\":\"classic\",\"delta.enableDeletionVectors\":\"true\"}","statsOnLoad":false},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"8f67a83b-4eaa-4aa2-a2e4-8e3f6cf7f8d8"}}
2+
{"metaData":{"id":"546b1b7a-5b0e-4731-95cd-67d96c5d8cd6","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"age\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"classic","delta.enableDeletionVectors":"true"},"createdTime":1761426292944}}
3+
{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors"]}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
{"commitInfo":{"timestamp":1761426300334,"userId":"user1","userName":"user1","operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"3","numOutputBytes":"1112"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"1c95d330-c90c-485e-8771-f7e3836cf8cc"}}
2+
{"add":{"path":"part-00000-e521ec45-b9b9-4343-bc81-130f0c5c65b5-c000.snappy.parquet","partitionValues":{},"size":1112,"modificationTime":1761426301000,"dataChange":true,"stats":"{\"numRecords\":3,\"minValues\":{\"id\":1,\"name\":\"Alice\",\"age\":25},\"maxValues\":{\"id\":3,\"name\":\"Charlie\",\"age\":30},\"nullCount\":{\"id\":0,\"name\":0,\"age\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1761426301000000","MIN_INSERTION_TIME":"1761426301000000","MAX_INSERTION_TIME":"1761426301000000","OPTIMIZE_TARGET_SIZE":"67108864"}}}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
Data generated using Databricks 15.4:
2+
3+
```sql
4+
CREATE TABLE source_table (
5+
id INT,
6+
name STRING,
7+
age INT
8+
)
9+
USING DELTA
10+
TBLPROPERTIES (
11+
'delta.checkpointPolicy' = 'classic'
12+
);
13+
14+
INSERT INTO source_table VALUES
15+
(1, 'Alice', 25),
16+
(2, 'Bob', 30),
17+
(3, 'Charlie', 28);
18+
```
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{"commitInfo":{"timestamp":1761426293237,"userId":"user1","userName":"user1","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"true","properties":"{\"delta.checkpointPolicy\":\"classic\",\"delta.enableDeletionVectors\":\"true\"}","statsOnLoad":false},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"8f67a83b-4eaa-4aa2-a2e4-8e3f6cf7f8d8"}}
2+
{"metaData":{"id":"546b1b7a-5b0e-4731-95cd-67d96c5d8cd6","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"age\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"classic","delta.enableDeletionVectors":"true"},"createdTime":1761426292944}}
3+
{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors"]}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
{"commitInfo":{"timestamp":1761426300334,"userId":"user1","userName":"user1","operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"3","numOutputBytes":"1112"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"1c95d330-c90c-485e-8771-f7e3836cf8cc"}}
2+
{"add":{"path":"part-00000-e521ec45-b9b9-4343-bc81-130f0c5c65b5-c000.snappy.parquet","partitionValues":{},"size":1112,"modificationTime":1761426301000,"dataChange":true,"stats":"{\"numRecords\":3,\"minValues\":{\"id\":1,\"name\":\"Alice\",\"age\":25},\"maxValues\":{\"id\":3,\"name\":\"Charlie\",\"age\":30},\"nullCount\":{\"id\":0,\"name\":0,\"age\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1761426301000000","MIN_INSERTION_TIME":"1761426301000000","MAX_INSERTION_TIME":"1761426301000000","OPTIMIZE_TARGET_SIZE":"67108864"}}}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Data generated using Databricks 15.4:
2+
3+
```sql
4+
CREATE TABLE cloned_table DEEP CLONE source_table;
5+
```

0 commit comments

Comments
 (0)