Skip to content

Commit 2ee6b73

Browse files
authored
Merge pull request #442 from Tmonster/no_inserts_to_v3_tables
Disable Inserts into V3 tables
2 parents d87c4bb + 4859c7e commit 2ee6b73

File tree

8 files changed

+105
-4
lines changed

8 files changed

+105
-4
lines changed

duckdb

Submodule duckdb updated 773 files

extension_config.cmake

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@ duckdb_extension_load(icu)
1212
duckdb_extension_load(ducklake
1313
LOAD_TESTS
1414
GIT_URL https://github.com/duckdb/ducklake
15-
GIT_TAG d2392c36f33151cf5cdd7d006375b0b669bd44ac
16-
APPLY_PATCHES
15+
GIT_TAG c1ebd032eb4c763910551c08f4b61bdb8168f209
1716
)
1817

1918
duckdb_extension_load(avro

scripts/data_generators/tests/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def generate(self, con: IcebergConnection):
6565
intermediate_data_path = os.path.join(intermediate_dir, snapshot_name, 'data.parquet')
6666
df.write.mode("overwrite").parquet(intermediate_data_path)
6767

68-
if self.write_intermediates:
68+
if self.write_intermediates and last_file:
6969
### Finally, copy the latest results to a "last" dir for easy test writing
7070
shutil.copytree(
7171
os.path.join(intermediate_dir, last_file, 'data.parquet'),
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
from scripts.data_generators.tests.base import IcebergTest
2+
import pathlib
3+
import tempfile
4+
import duckdb
5+
6+
7+
@IcebergTest.register()
8+
class Test(IcebergTest):
9+
def __init__(self):
10+
path = pathlib.PurePath(__file__)
11+
super().__init__(path.parent.name)
12+
13+
# Create a temporary directory
14+
self.tempdir = pathlib.Path(tempfile.mkdtemp())
15+
self.parquet_file = self.tempdir / "tmp.parquet"
16+
17+
duckdb_con = duckdb.connect()
18+
duckdb_con.execute("call dbgen(sf=1)")
19+
duckdb_con.execute(f"copy customer to '{self.parquet_file}' (FORMAT PARQUET)")
20+
21+
def setup(self, con):
22+
con.con.read.parquet(self.parquet_file.as_posix()).createOrReplaceTempView('parquet_file_view')
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
CREATE or REPLACE TABLE default.simple_v3_table
2+
TBLPROPERTIES (
3+
'format-version' = '3',
4+
'write.delete.mode' = 'merge-on-read',
5+
'write.delete.format' = 'puffin',
6+
'write.update.mode' = 'merge-on-read'
7+
)
8+
AS SELECT * FROM parquet_file_view;
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
delete from default.simple_v3_table where c_custkey % 2 = 0;

src/storage/iceberg_insert.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,12 @@ PhysicalOperator &IRCatalog::PlanInsert(ClientContext &context, PhysicalPlanGene
376376
}
377377

378378
auto &table_entry = op.table.Cast<ICTableEntry>();
379+
// FIXME: Inserts into V3 tables is not yet supported since
380+
// we need to keep track of row lineage, which we do not support
381+
// https://iceberg.apache.org/spec/#row-lineage
382+
if (table_entry.table_info.table_metadata.iceberg_version == 3) {
383+
throw NotImplementedException("Insert into Iceberg V3 tables");
384+
}
379385
table_entry.PrepareIcebergScanFromEntry(context);
380386
auto &table_info = table_entry.table_info;
381387
auto &schema = table_info.table_metadata.GetLatestSchema();
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# name: test/sql/local/irc/insert/test_insert_to_v3_tables.test
2+
# group: [insert]
3+
4+
require-env DUCKDB_ICEBERG_HAVE_GENERATED_DATA
5+
6+
require avro
7+
8+
require parquet
9+
10+
require iceberg
11+
12+
require httpfs
13+
14+
statement ok
15+
CREATE SECRET (
16+
TYPE S3,
17+
KEY_ID 'admin',
18+
SECRET 'password',
19+
ENDPOINT '127.0.0.1:9000',
20+
URL_STYLE 'path',
21+
USE_SSL 0
22+
);
23+
24+
25+
statement ok
26+
ATTACH '' AS my_datalake (
27+
TYPE ICEBERG,
28+
CLIENT_ID 'admin',
29+
CLIENT_SECRET 'password',
30+
ENDPOINT 'http://127.0.0.1:8181'
31+
);
32+
33+
statement ok
34+
CALL enable_logging('Iceberg');
35+
36+
query I
37+
select count(*) from my_datalake.default.simple_v3_table;
38+
----
39+
75000
40+
41+
query I
42+
select count(*) from my_datalake.default.simple_v3_table where c_custkey > 75_000;
43+
----
44+
37500
45+
46+
query I
47+
select count(*) from my_datalake.default.simple_v3_table where c_custkey > 50_000;
48+
----
49+
50_000
50+
51+
query I
52+
select count(*) from my_datalake.default.simple_v3_table where c_custkey < 75_000;
53+
----
54+
37500
55+
56+
query I
57+
select count(*) from my_datalake.default.simple_v3_table where c_custkey > 25_000 and c_custkey < 75_000;
58+
----
59+
25000
60+
61+
statement error
62+
insert into my_datalake.default.simple_v3_table values (100000000, 'Customer#1000000000', 'jsdjsdffi', 4, 06-122-922-9374, 9281.24, 'FURNITURE', 'ducks are human beings');
63+
----
64+
<REGEX>:.*Not implemented Error: Insert into Iceberg V3 tables.*
65+

0 commit comments

Comments
 (0)