Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public record CheckpointMetadataEntry(long version, Optional<Map<String, String>

public CheckpointMetadataEntry
{
checkArgument(version > 0, "version is not positive: %s", version);
checkArgument(version >= 0, "version is negative: %s", version);
requireNonNull(tags, "tags is null");
tags = tags.map(ImmutableMap::copyOf);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2766,6 +2766,24 @@ private static MetadataEntry loadMetadataEntry(long entryNumber, Path tableLocat
return transactionLog.getMetaData();
}

@Test
public void testClonedTableWithCheckpointVersionZero()
throws Exception
{
String resource = "databricks154/clone_checkpoint_version_zero/checkpoint_v2/cloned_table";
String tableName = "test_cloned_table" + randomNameSuffix();
Path tableLocation = catalogDir.resolve(tableName);
copyDirectoryContents(new File(Resources.getResource(resource).toURI()).toPath(), tableLocation);
assertUpdate("CALL system.register_table(CURRENT_SCHEMA, '%s', '%s')".formatted(tableName, tableLocation.toUri()));

assertThat(query("SELECT * FROM " + tableName + " ORDER BY id"))
.matches("VALUES " +
"(1, VARCHAR 'Alice', 25), " +
"(2, VARCHAR 'Bob', 30), " +
"(3, VARCHAR 'Charlie', 28)");
assertUpdate("DROP TABLE " + tableName);
}

private static ProtocolEntry loadProtocolEntry(long entryNumber, Path tableLocation)
throws IOException
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
Copy link
Member

@ebyhr ebyhr Oct 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We prefer query based tests to unit tests in this repository.
Please update existing integration tests or product tests instead.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, integration tests updated

* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.deltalake.transactionlog;

import com.google.common.collect.ImmutableMap;
import io.airlift.json.JsonCodec;
import org.intellij.lang.annotations.Language;
import org.junit.jupiter.api.Test;

import java.util.Optional;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

class TestCheckpointMetadataEntry
{
private final JsonCodec<CheckpointMetadataEntry> codec = JsonCodec.jsonCodec(CheckpointMetadataEntry.class);

@Test
void testCheckpointMetadataEntry()
{
@Language("JSON")
String json = "{\"version\":5,\"tags\":{\"sidecarNumActions\":\"1\",\"sidecarSizeInBytes\":\"20965\",\"numOfAddFiles\":\"1\",\"sidecarFileSchema\":\"\"}}";
assertThat(codec.fromJson(json)).isEqualTo(new CheckpointMetadataEntry(
5,
Optional.of(ImmutableMap.of(
"sidecarNumActions", "1",
"sidecarSizeInBytes", "20965",
"numOfAddFiles", "1",
"sidecarFileSchema", ""))));

@Language("JSON")
String jsonWithVersionZero = "{\"version\":0,\"tags\":{\"sidecarNumActions\":\"1\",\"sidecarSizeInBytes\":\"20965\",\"numOfAddFiles\":\"1\",\"sidecarFileSchema\":\"\"}}";
assertThat(codec.fromJson(jsonWithVersionZero)).isEqualTo(new CheckpointMetadataEntry(
0,
Optional.of(ImmutableMap.of(
"sidecarNumActions", "1",
"sidecarSizeInBytes", "20965",
"numOfAddFiles", "1",
"sidecarFileSchema", ""))));
}

@Test
void testInvalidCheckpointMetadataEntry()
{
@Language("JSON")
String jsonWithNegativeVersion = "{\"version\":-1,\"tags\":{\"sidecarNumActions\":\"1\",\"sidecarSizeInBytes\":\"20965\",\"numOfAddFiles\":\"1\",\"sidecarFileSchema\":\"\"}}";
assertThatThrownBy(() -> codec.fromJson(jsonWithNegativeVersion))
.isInstanceOf(IllegalArgumentException.class)
.hasMessageContaining("Invalid JSON string for");

@Language("JSON")
String jsonWithoutTags = "{\"version\":-1}";
assertThatThrownBy(() -> codec.fromJson(jsonWithoutTags))
.isInstanceOf(IllegalArgumentException.class)
.hasMessageContaining("Invalid JSON string for");
}

@Test
void testCheckpointMetadataEntryToJson()
{
assertThat(codec.toJson(new CheckpointMetadataEntry(
100,
Optional.of(ImmutableMap.of(
"sidecarNumActions", "1",
"sidecarSizeInBytes", "20965",
"numOfAddFiles", "1",
"sidecarFileSchema", "")))))
.isEqualTo("{\n" +
" \"version\" : 100,\n" +
" \"tags\" : {\n" +
" \"sidecarNumActions\" : \"1\",\n" +
" \"sidecarSizeInBytes\" : \"20965\",\n" +
" \"numOfAddFiles\" : \"1\",\n" +
" \"sidecarFileSchema\" : \"\"\n" +
" }\n" +
"}");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
Data generated using Databricks 15.4:

```sql
CREATE TABLE source_table (
id INT,
name STRING,
age INT
)
USING DELTA
TBLPROPERTIES (
'delta.checkpointPolicy' = 'v2'
);

INSERT INTO source_table VALUES
(1, 'Alice', 25),
(2, 'Bob', 30),
(3, 'Charlie', 28);
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"commitInfo":{"timestamp":1761436420945,"userId":"user1","userName":"user1","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"true","properties":"{\"delta.checkpointPolicy\":\"v2\",\"delta.checkpointInterval\":\"2\",\"delta.enableDeletionVectors\":\"true\"}","statsOnLoad":false},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"70e732d3-dd95-4615-b187-d3862aa1c181"}}
{"metaData":{"id":"b916c720-895c-4ccd-8b74-5a52754d3e26","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"age\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"2","delta.enableDeletionVectors":"true"},"createdTime":1761436420654}}
{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors","v2Checkpoint"],"writerFeatures":["deletionVectors","v2Checkpoint"]}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"commitInfo":{"timestamp":1761436433491,"userId":"user1","userName":"user1","operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"3","numOutputBytes":"1112"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"482c7b06-a45c-4d4e-be3b-cf4314f4afc5"}}
{"add":{"path":"part-00000-d47cc824-9a87-40c6-9e6b-528d933a30f9-c000.snappy.parquet","partitionValues":{},"size":1112,"modificationTime":1761436433000,"dataChange":true,"stats":"{\"numRecords\":3,\"minValues\":{\"id\":1,\"name\":\"Alice\",\"age\":25},\"maxValues\":{\"id\":3,\"name\":\"Charlie\",\"age\":30},\"nullCount\":{\"id\":0,\"name\":0,\"age\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1761436433000000","MIN_INSERTION_TIME":"1761436433000000","MAX_INSERTION_TIME":"1761436433000000","OPTIMIZE_TARGET_SIZE":"67108864"}}}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Data generated using Databricks 15.4:

```sql
CREATE TABLE cloned_table DEEP CLONE source_table;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we also add a case for shallow clone?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe no need for that? as the only difference is the flag isShallow for true or false in the transaction log.

```
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"checkpointMetadata":{"version":0,"tags":{"sidecarNumActions":"1","sidecarSizeInBytes":"13505","numOfAddFiles":"1","sidecarFileSchema":"{\"type\":\"struct\",\"fields\":[{\"name\":\"add\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"path\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"partitionValues\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"size\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"modificationTime\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"dataChange\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"tags\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"deletionVector\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"storageType\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"pathOrInlineDv\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"offset\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sizeInBytes\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"cardinality\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"maxRowIndex\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"baseRowId\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"defaultRowCommitVersion\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clusteringProvider\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"stats\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"stats_parsed\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"numRecords\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"minValues\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"age\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"maxValues\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"age\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"nullCount\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"age\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"tightBounds\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"remove\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"path\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"deletionTimestamp\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"dataChange\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"extendedFileMetadata\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"partitionValues\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"size\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"deletionVector\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"storageType\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"pathOrInlineDv\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"offset\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sizeInBytes\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"cardinality\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"maxRowIndex\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"baseRowId\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"defaultRowCommitVersion\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]}"}}}
{"sidecar":{"path":"00000000000000000000.checkpoint.0000000001.0000000001.7be61843-e74d-45d2-8db5-4b4e56714412.parquet","sizeInBytes":13505,"modificationTime":1761436515000}}
{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors","v2Checkpoint"],"writerFeatures":["deletionVectors","v2Checkpoint"]}}
{"metaData":{"id":"45c721d6-56cd-47b6-bcb6-ccdeef56ce80","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"age\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"2","delta.enableDeletionVectors":"true"},"createdTime":1761436420654}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"commitInfo":{"timestamp":1761436510659,"userId":"user1","userName":"user1","operation":"CLONE","operationParameters":{"source":"source_table","sourceVersion":1,"isShallow":false},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":-1,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"removedFilesSize":"0","numRemovedFiles":"0","sourceTableSize":"1112","numCopiedFiles":"1","copiedFilesSize":"1112","sourceNumOfFiles":"1"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"a073c2c4-75c9-4ff8-bb73-fbf49115cf88"}}
{"metaData":{"id":"45c721d6-56cd-47b6-bcb6-ccdeef56ce80","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"age\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"2","delta.enableDeletionVectors":"true"},"createdTime":1761436420654}}
{"add":{"path":"part-00000-d47cc824-9a87-40c6-9e6b-528d933a30f9-c000.snappy.parquet","partitionValues":{},"size":1112,"modificationTime":1761436433000,"dataChange":true,"stats":"{\"numRecords\":3,\"minValues\":{\"id\":1,\"name\":\"Alice\",\"age\":25},\"maxValues\":{\"id\":3,\"name\":\"Charlie\",\"age\":30},\"nullCount\":{\"id\":0,\"name\":0,\"age\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1761436433000000","MIN_INSERTION_TIME":"1761436433000000","MAX_INSERTION_TIME":"1761436433000000","OPTIMIZE_TARGET_SIZE":"67108864"}}}
{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors","v2Checkpoint"],"writerFeatures":["deletionVectors","v2Checkpoint"]}}
Loading