Skip to content

Commit 0914d90

Browse files
authored
Merge pull request #446 from Tmonster/disable_null_inserts_into_null_columns
Disable inserting NULL into columns marked NOT NULL
2 parents 0522efe + 36ae91b commit 0914d90

File tree

7 files changed

+215
-4
lines changed

7 files changed

+215
-4
lines changed
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from scripts.data_generators.tests.base import IcebergTest
2+
import pathlib
3+
4+
@IcebergTest.register()
5+
class Test(IcebergTest):
6+
def __init__(self):
7+
path = pathlib.PurePath(__file__)
8+
super().__init__(path.parent.name)
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
CREATE OR REPLACE TABLE default.test_not_null (
2+
id INT,
3+
name STRING NOT NULL,
4+
address STRUCT<
5+
street: STRING NOT NULL,
6+
city: STRING NOT NULL,
7+
zip: STRING NOT NULL
8+
>,
9+
phone_numbers ARRAY<STRING>,
10+
metadata MAP<STRING, STRING>
11+
);
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
INSERT INTO default.test_not_null VALUES (
2+
1,
3+
'Alice',
4+
NAMED_STRUCT('street', '123 Main St', 'city', 'Metropolis', 'zip', '12345'),
5+
ARRAY('123-456-7890', '987-654-3210'),
6+
MAP('age', '30', 'membership', 'gold')
7+
);

src/storage/create_table/iceberg_create_table_request.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "storage/table_create/iceberg_create_table_request.hpp"
33
#include "storage/irc_table_set.hpp"
44
#include "storage/iceberg_table_information.hpp"
5+
#include "duckdb/parser/constraints/not_null_constraint.hpp"
56
#include "utils/iceberg_type.hpp"
67
#include "duckdb/common/enums/catalog_type.hpp"
78
#include "duckdb/catalog/catalog_entry/copy_function_catalog_entry.hpp"
@@ -105,9 +106,23 @@ shared_ptr<IcebergTableSchema> IcebergCreateTableRequest::CreateIcebergSchema(co
105106
return field_id++;
106107
};
107108

109+
auto &constraints = table_entry->GetConstraints();
108110
for (auto column = column_iterator.begin(); column != column_iterator.end(); ++column) {
109111
auto name = (*column).Name();
112+
// check if there is a not null constraint
110113
bool required = false;
114+
if (!constraints.empty()) {
115+
for (auto &constraint : constraints) {
116+
if (constraint->type != ConstraintType::NOT_NULL) {
117+
continue;
118+
}
119+
auto &not_null_constraint = constraint->Cast<NotNullConstraint>();
120+
if (not_null_constraint.index.IsValid() && not_null_constraint.index.index == column.pos) {
121+
required = true;
122+
}
123+
}
124+
}
125+
111126
auto logical_type = (*column).GetType();
112127
idx_t first_id = next_field_id();
113128
rest_api_objects::Type type;

src/storage/iceberg_insert.cpp

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "storage/irc_transaction.hpp"
44
#include "storage/irc_table_entry.hpp"
55
#include "storage/iceberg_table_information.hpp"
6+
#include "metadata/iceberg_column_definition.hpp"
67

78
#include "iceberg_multi_file_list.hpp"
89

@@ -149,7 +150,26 @@ static IcebergColumnStats ParseColumnStats(const vector<Value> col_stats) {
149150
return column_stats;
150151
}
151152

152-
static void AddWrittenFiles(IcebergInsertGlobalState &global_state, DataChunk &chunk, optional_idx partition_id) {
153+
static void AddToColDefMap(case_insensitive_map_t<optional_ptr<IcebergColumnDefinition>> &name_to_coldef,
154+
string col_name_prefix, optional_ptr<IcebergColumnDefinition> column_def) {
155+
string column_name = column_def->name;
156+
if (!col_name_prefix.empty()) {
157+
column_name = col_name_prefix + "." + column_def->name;
158+
}
159+
if (column_def->IsIcebergPrimitiveType()) {
160+
name_to_coldef.emplace(column_name, column_def.get());
161+
} else {
162+
for (auto &child : column_def->children) {
163+
AddToColDefMap(name_to_coldef, column_name, child.get());
164+
}
165+
}
166+
}
167+
168+
static void AddWrittenFiles(IcebergInsertGlobalState &global_state, DataChunk &chunk,
169+
optional_ptr<TableCatalogEntry> table) {
170+
D_ASSERT(table);
171+
auto &ic_table = table->Cast<ICTableEntry>();
172+
auto partition_id = ic_table.table_info.table_metadata.default_spec_id;
153173
for (idx_t r = 0; r < chunk.size(); r++) {
154174
IcebergManifestEntry data_file;
155175
data_file.file_path = chunk.GetValue(0, r).GetValue<string>();
@@ -159,8 +179,8 @@ static void AddWrittenFiles(IcebergInsertGlobalState &global_state, DataChunk &c
159179
data_file.status = IcebergManifestEntryStatusType::ADDED;
160180
data_file.file_format = "parquet";
161181

162-
if (partition_id.IsValid()) {
163-
data_file.partition_spec_id = static_cast<int32_t>(partition_id.GetIndex());
182+
if (partition_id) {
183+
data_file.partition_spec_id = static_cast<int32_t>(partition_id);
164184
} else {
165185
data_file.partition_spec_id = 0;
166186
}
@@ -171,12 +191,27 @@ static void AddWrittenFiles(IcebergInsertGlobalState &global_state, DataChunk &c
171191

172192
global_state.insert_count += data_file.record_count;
173193

194+
auto table_current_schema_id = ic_table.table_info.table_metadata.current_schema_id;
195+
auto ic_schema = ic_table.table_info.table_metadata.schemas[table_current_schema_id];
196+
197+
case_insensitive_map_t<optional_ptr<IcebergColumnDefinition>> column_info;
198+
for (auto &column : ic_schema->columns) {
199+
AddToColDefMap(column_info, "", column.get());
200+
}
201+
174202
for (idx_t col_idx = 0; col_idx < map_children.size(); col_idx++) {
175203
auto &struct_children = StructValue::GetChildren(map_children[col_idx]);
176204
auto &col_name = StringValue::Get(struct_children[0]);
177205
auto &col_stats = MapValue::GetChildren(struct_children[1]);
178206
auto column_names = ParseQuotedList(col_name, '.');
179207
auto stats = ParseColumnStats(col_stats);
208+
auto normalized_col_name = StringUtil::Join(column_names, ".");
209+
210+
auto ic_column_info = column_info.find(normalized_col_name);
211+
D_ASSERT(ic_column_info != column_info.end());
212+
if (ic_column_info->second->required && stats.has_null_count && stats.null_count > 0) {
213+
throw ConstraintException("NOT NULL constraint failed: %s.%s", table->name, normalized_col_name);
214+
}
180215

181216
//! TODO: convert 'stats' into 'data_file.lower_bounds', upper_bounds, value_counts, null_value_counts,
182217
//! nan_value_counts ...
@@ -205,7 +240,7 @@ SinkResultType IcebergInsert::Sink(ExecutionContext &context, DataChunk &chunk,
205240
auto &global_state = input.global_state.Cast<IcebergInsertGlobalState>();
206241

207242
// TODO: pass through the partition id?
208-
AddWrittenFiles(global_state, chunk, {});
243+
AddWrittenFiles(global_state, chunk, table);
209244

210245
return SinkResultType::NEED_MORE_INPUT;
211246
}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# name: test/sql/local/irc/create/test_create_table_not_null.test
2+
# description: test create table
3+
# group: [create]
4+
5+
require-env ICEBERG_SERVER_AVAILABLE
6+
7+
require avro
8+
9+
require parquet
10+
11+
require iceberg
12+
13+
require httpfs
14+
15+
# Do not ignore 'HTTP' error messages!
16+
set ignore_error_messages
17+
18+
statement ok
19+
CALL enable_logging('HTTP');
20+
21+
statement ok
22+
set logging_level='debug'
23+
24+
statement ok
25+
CREATE SECRET (
26+
TYPE S3,
27+
KEY_ID 'admin',
28+
SECRET 'password',
29+
ENDPOINT '127.0.0.1:9000',
30+
URL_STYLE 'path',
31+
USE_SSL 0
32+
);
33+
34+
35+
statement ok
36+
ATTACH '' AS my_datalake (
37+
TYPE ICEBERG,
38+
CLIENT_ID 'admin',
39+
CLIENT_SECRET 'password',
40+
ENDPOINT 'http://127.0.0.1:8181'
41+
);
42+
43+
statement ok
44+
use my_datalake.default;
45+
46+
statement ok
47+
drop table if exists table_with_null_reqs;
48+
49+
statement ok
50+
create table table_with_null_reqs (
51+
a varchar,
52+
b int NOT NULL
53+
);
54+
55+
statement error
56+
insert into table_with_null_reqs values ('value', NULL);
57+
----
58+
<REGEX>:.*NOT NULL constraint failed.*table_with_null_reqs.b.*
59+
60+
statement ok
61+
insert into table_with_null_reqs values ('value', 5);
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# name: test/sql/local/irc/insert/test_insert_into_null_columns.test
2+
# group: [insert]
3+
4+
require-env ICEBERG_SERVER_AVAILABLE
5+
6+
require avro
7+
8+
require parquet
9+
10+
require iceberg
11+
12+
require httpfs
13+
14+
# Do not ignore 'HTTP' error messages!
15+
set ignore_error_messages
16+
17+
statement ok
18+
CALL enable_logging('HTTP');
19+
20+
statement ok
21+
set logging_level='debug'
22+
23+
statement ok
24+
CREATE SECRET (
25+
TYPE S3,
26+
KEY_ID 'admin',
27+
SECRET 'password',
28+
ENDPOINT '127.0.0.1:9000',
29+
URL_STYLE 'path',
30+
USE_SSL 0
31+
);
32+
33+
34+
statement ok
35+
ATTACH '' AS my_datalake (
36+
TYPE ICEBERG,
37+
CLIENT_ID 'admin',
38+
CLIENT_SECRET 'password',
39+
ENDPOINT 'http://127.0.0.1:8181'
40+
);
41+
42+
# first test that we can insert nested types
43+
statement ok
44+
insert into my_datalake.default.test_not_null VALUES (
45+
1,
46+
'NULL nested type',
47+
{'street': 'duck street', 'city': 'Metropolis', 'zip': '12345'},
48+
['123-456-7890', '987-654-3210']::VARCHAR[],
49+
MAP {'age': '30', 'membership': 'gold'}
50+
);
51+
52+
# cannot insert null into a nested type that is required
53+
statement error
54+
insert into my_datalake.default.test_not_null VALUES (
55+
1,
56+
'NULL nested type',
57+
{'street': NULL, 'city': 'Metropolis', 'zip': '12345'},
58+
['123-456-7890', '987-654-3210']::VARCHAR[],
59+
MAP {'age': '30', 'membership': 'gold'}
60+
);
61+
----
62+
<REGEX>:.*Constraint Error.*NOT NULL.*
63+
64+
# cannot insert NULL into primitive type marked NOT NULL
65+
statement error
66+
insert into my_datalake.default.test_not_null VALUES (
67+
1,
68+
NULL,
69+
{'street': 'duck street', 'city': 'swan city', 'zip': '12345'},
70+
['123-456-7890', '987-654-3210']::VARCHAR[],
71+
MAP {'age': '30', 'membership': 'gold'}
72+
);
73+
----
74+
<REGEX>:.*Constraint Error.*NOT NULL.*

0 commit comments

Comments
 (0)