Skip to content

Commit 874c0dd

Browse files
authored
Merge pull request #121 from carlopi/bumpv111
Bump to DuckDB v1.1.1
2 parents b3bf03b + 4d2b736 commit 874c0dd

File tree

146 files changed

+2272
-1800
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

146 files changed

+2272
-1800
lines changed

binding.gyp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,8 @@
384384
"DUCKDB_EXTENSION_ICU_LINKED",
385385
"DUCKDB_EXTENSION_JSON_LINKED",
386386
"DUCKDB_EXTENSION_AUTOLOAD_DEFAULT=1",
387-
"DUCKDB_EXTENSION_AUTOINSTALL_DEFAULT=1"
387+
"DUCKDB_EXTENSION_AUTOINSTALL_DEFAULT=1",
388+
"NDEBUG"
388389
],
389390
"cflags_cc": [
390391
"-frtti",

src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

src/duckdb/extension/json/include/json_common.hpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -241,12 +241,16 @@ struct JSONCommon {
241241
};
242242

243243
//! Get JSON value using JSON path query (safe, checks the path query)
244-
static inline yyjson_val *Get(yyjson_val *val, const string_t &path_str) {
244+
static inline yyjson_val *Get(yyjson_val *val, const string_t &path_str, bool integral_argument) {
245245
auto ptr = path_str.GetData();
246246
auto len = path_str.GetSize();
247247
if (len == 0) {
248248
return GetUnsafe(val, ptr, len);
249249
}
250+
if (integral_argument) {
251+
auto str = "$[" + path_str.GetString() + "]";
252+
return GetUnsafe(val, str.c_str(), str.length());
253+
}
250254
switch (*ptr) {
251255
case '/': {
252256
// '/' notation must be '\0'-terminated
@@ -260,9 +264,15 @@ struct JSONCommon {
260264
}
261265
return GetUnsafe(val, ptr, len);
262266
}
263-
default:
264-
auto str = "/" + string(ptr, len);
265-
return GetUnsafe(val, str.c_str(), len + 1);
267+
default: {
268+
string path;
269+
if (memchr(ptr, '"', len)) {
270+
path = "/" + string(ptr, len);
271+
} else {
272+
path = "$.\"" + path_str.GetString() + "\"";
273+
}
274+
return GetUnsafe(val, path.c_str(), path.length());
275+
}
266276
}
267277
}
268278

src/duckdb/extension/json/include/json_executors.hpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#pragma once
1010

11+
#include "duckdb/common/vector_operations/vector_operations.hpp"
1112
#include "duckdb/execution/expression_executor.hpp"
1213
#include "json_functions.hpp"
1314

@@ -88,11 +89,18 @@ struct JSONExecutors {
8889
}
8990
} else { // Columnref path
9091
D_ASSERT(info.path_type == JSONCommon::JSONPathType::REGULAR);
91-
auto &paths = args.data[1];
92+
unique_ptr<Vector> casted_paths;
93+
if (args.data[1].GetType().id() == LogicalTypeId::VARCHAR) {
94+
casted_paths = make_uniq<Vector>(args.data[1]);
95+
} else {
96+
casted_paths = make_uniq<Vector>(LogicalTypeId::VARCHAR);
97+
VectorOperations::DefaultCast(args.data[1], *casted_paths, args.size(), true);
98+
}
9299
BinaryExecutor::ExecuteWithNulls<string_t, string_t, T>(
93-
inputs, paths, result, args.size(), [&](string_t input, string_t path, ValidityMask &mask, idx_t idx) {
100+
inputs, *casted_paths, result, args.size(),
101+
[&](string_t input, string_t path, ValidityMask &mask, idx_t idx) {
94102
auto doc = JSONCommon::ReadDocument(input, JSONCommon::READ_FLAG, lstate.json_allocator.GetYYAlc());
95-
auto val = JSONCommon::Get(doc->root, path);
103+
auto val = JSONCommon::Get(doc->root, path, args.data[1].GetType().IsIntegral());
96104
if (SET_NULL_IF_NOT_FOUND && !val) {
97105
mask.SetInvalid(idx);
98106
return T {};

src/duckdb/extension/json/json_extension.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ static DefaultMacro json_macros[] = {
2727
"json_group_structure",
2828
{"x", nullptr},
2929
{{nullptr, nullptr}},
30-
"json_structure(json_group_array(x))->'0'"},
30+
"json_structure(json_group_array(x))->0"},
3131
{DEFAULT_SCHEMA, "json", {"x", nullptr}, {{nullptr, nullptr}}, "json_extract(x, '$')"},
3232
{nullptr, nullptr, {nullptr}, {{nullptr, nullptr}}, nullptr}};
3333

src/duckdb/extension/json/json_functions.cpp

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,21 +21,25 @@ static JSONPathType CheckPath(const Value &path_val, string &path, size_t &len)
2121
const auto path_str_val = path_val.DefaultCastAs(LogicalType::VARCHAR);
2222
auto path_str = path_str_val.GetValueUnsafe<string_t>();
2323
len = path_str.GetSize();
24-
auto ptr = path_str.GetData();
24+
const auto ptr = path_str.GetData();
2525
// Empty strings and invalid $ paths yield an error
2626
if (len == 0) {
2727
throw BinderException("Empty JSON path");
2828
}
2929
JSONPathType path_type = JSONPathType::REGULAR;
30-
if (*ptr == '$') {
31-
path_type = JSONCommon::ValidatePath(ptr, len, true);
32-
}
3330
// Copy over string to the bind data
3431
if (*ptr == '/' || *ptr == '$') {
3532
path = string(ptr, len);
36-
} else {
33+
} else if (path_val.type().IsIntegral()) {
34+
path = "$[" + string(ptr, len) + "]";
35+
} else if (memchr(ptr, '"', len)) {
3736
path = "/" + string(ptr, len);
38-
len++;
37+
} else {
38+
path = "$.\"" + string(ptr, len) + "\"";
39+
}
40+
len = path.length();
41+
if (*path.c_str() == '$') {
42+
path_type = JSONCommon::ValidatePath(path.c_str(), len, true);
3943
}
4044
return path_type;
4145
}
@@ -67,7 +71,11 @@ unique_ptr<FunctionData> JSONReadFunctionData::Bind(ClientContext &context, Scal
6771
path_type = CheckPath(path_val, path, len);
6872
}
6973
}
70-
bound_function.arguments[1] = LogicalType::VARCHAR;
74+
if (arguments[1]->return_type.IsIntegral()) {
75+
bound_function.arguments[1] = LogicalType::BIGINT;
76+
} else {
77+
bound_function.arguments[1] = LogicalType::VARCHAR;
78+
}
7179
if (path_type == JSONCommon::JSONPathType::WILDCARD) {
7280
bound_function.return_type = LogicalType::LIST(bound_function.return_type);
7381
}
@@ -117,6 +125,7 @@ unique_ptr<FunctionData> JSONReadManyFunctionData::Bind(ClientContext &context,
117125

118126
JSONFunctionLocalState::JSONFunctionLocalState(Allocator &allocator) : json_allocator(allocator) {
119127
}
128+
120129
JSONFunctionLocalState::JSONFunctionLocalState(ClientContext &context)
121130
: JSONFunctionLocalState(BufferAllocator::Get(context)) {
122131
}

src/duckdb/extension/json/json_functions/json_extract.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,17 @@ static inline string_t ExtractFromVal(yyjson_val *val, yyjson_alc *alc, Vector &
66
return JSONCommon::WriteVal<yyjson_val>(val, alc);
77
}
88

9-
static inline string_t ExtractStringFromVal(yyjson_val *val, yyjson_alc *alc, Vector &, ValidityMask &, idx_t) {
10-
return yyjson_is_str(val) ? string_t(unsafe_yyjson_get_str(val), unsafe_yyjson_get_len(val))
11-
: JSONCommon::WriteVal<yyjson_val>(val, alc);
9+
static inline string_t ExtractStringFromVal(yyjson_val *val, yyjson_alc *alc, Vector &, ValidityMask &mask, idx_t idx) {
10+
switch (yyjson_get_tag(val)) {
11+
case YYJSON_TYPE_NULL | YYJSON_SUBTYPE_NONE:
12+
mask.SetInvalid(idx);
13+
return string_t {};
14+
case YYJSON_TYPE_STR | YYJSON_SUBTYPE_NOESC:
15+
case YYJSON_TYPE_STR | YYJSON_SUBTYPE_NONE:
16+
return string_t(unsafe_yyjson_get_str(val), unsafe_yyjson_get_len(val));
17+
default:
18+
return JSONCommon::WriteVal<yyjson_val>(val, alc);
19+
}
1220
}
1321

1422
static void ExtractFunction(DataChunk &args, ExpressionState &state, Vector &result) {

src/duckdb/extension/json/json_functions/json_value.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ namespace duckdb {
44

55
static inline string_t ValueFromVal(yyjson_val *val, yyjson_alc *alc, Vector &, ValidityMask &mask, idx_t idx) {
66
switch (yyjson_get_tag(val)) {
7+
case YYJSON_TYPE_NULL | YYJSON_SUBTYPE_NONE:
78
case YYJSON_TYPE_ARR | YYJSON_SUBTYPE_NONE:
89
case YYJSON_TYPE_OBJ | YYJSON_SUBTYPE_NONE:
910
mask.SetInvalid(idx);
@@ -22,12 +23,12 @@ static void ValueManyFunction(DataChunk &args, ExpressionState &state, Vector &r
2223
}
2324

2425
static void GetValueFunctionsInternal(ScalarFunctionSet &set, const LogicalType &input_type) {
25-
set.AddFunction(ScalarFunction({input_type, LogicalType::BIGINT}, LogicalType::JSON(), ValueFunction,
26+
set.AddFunction(ScalarFunction({input_type, LogicalType::BIGINT}, LogicalType::VARCHAR, ValueFunction,
2627
JSONReadFunctionData::Bind, nullptr, nullptr, JSONFunctionLocalState::Init));
27-
set.AddFunction(ScalarFunction({input_type, LogicalType::VARCHAR}, LogicalType::JSON(), ValueFunction,
28+
set.AddFunction(ScalarFunction({input_type, LogicalType::VARCHAR}, LogicalType::VARCHAR, ValueFunction,
2829
JSONReadFunctionData::Bind, nullptr, nullptr, JSONFunctionLocalState::Init));
2930
set.AddFunction(ScalarFunction({input_type, LogicalType::LIST(LogicalType::VARCHAR)},
30-
LogicalType::LIST(LogicalType::JSON()), ValueManyFunction,
31+
LogicalType::LIST(LogicalType::VARCHAR), ValueManyFunction,
3132
JSONReadManyFunctionData::Bind, nullptr, nullptr, JSONFunctionLocalState::Init));
3233
}
3334

src/duckdb/extension/parquet/column_reader.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,9 @@ void ColumnReader::PrepareRead(parquet_filter_t &filter) {
259259
break;
260260
case PageType::DICTIONARY_PAGE:
261261
PreparePage(page_hdr);
262+
if (page_hdr.dictionary_page_header.num_values < 0) {
263+
throw std::runtime_error("Invalid dictionary page header (num_values < 0)");
264+
}
262265
Dictionary(std::move(block), page_hdr.dictionary_page_header.num_values);
263266
break;
264267
default:

src/duckdb/extension/parquet/column_writer.cpp

Lines changed: 54 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1209,47 +1209,6 @@ class IntervalColumnWriter : public BasicColumnWriter {
12091209
}
12101210
};
12111211

1212-
//===--------------------------------------------------------------------===//
1213-
// Geometry Column Writer
1214-
//===--------------------------------------------------------------------===//
1215-
// This class just wraps another column writer, but also calculates the extent
1216-
// of the geometry column by updating the geodata object with every written
1217-
// vector.
1218-
template <class WRITER_IMPL>
1219-
class GeometryColumnWriter : public WRITER_IMPL {
1220-
GeoParquetColumnMetadata geo_data;
1221-
GeoParquetColumnMetadataWriter geo_data_writer;
1222-
string column_name;
1223-
1224-
public:
1225-
void Write(ColumnWriterState &state, Vector &vector, idx_t count) override {
1226-
// Just write normally
1227-
WRITER_IMPL::Write(state, vector, count);
1228-
1229-
// And update the geodata object
1230-
geo_data_writer.Update(geo_data, vector, count);
1231-
}
1232-
void FinalizeWrite(ColumnWriterState &state) override {
1233-
WRITER_IMPL::FinalizeWrite(state);
1234-
1235-
// Add the geodata object to the writer
1236-
this->writer.GetGeoParquetData().geometry_columns[column_name] = geo_data;
1237-
}
1238-
1239-
public:
1240-
GeometryColumnWriter(ClientContext &context, ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path_p,
1241-
idx_t max_repeat, idx_t max_define, bool can_have_nulls, string name)
1242-
: WRITER_IMPL(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls),
1243-
geo_data_writer(context), column_name(std::move(name)) {
1244-
1245-
auto &geo_data = writer.GetGeoParquetData();
1246-
if (geo_data.primary_geometry_column.empty()) {
1247-
// Set the first column to the primary column
1248-
geo_data.primary_geometry_column = column_name;
1249-
}
1250-
}
1251-
};
1252-
12531212
//===--------------------------------------------------------------------===//
12541213
// String Column Writer
12551214
//===--------------------------------------------------------------------===//
@@ -1563,6 +1522,58 @@ class StringColumnWriter : public BasicColumnWriter {
15631522
}
15641523
};
15651524

1525+
//===--------------------------------------------------------------------===//
1526+
// WKB Column Writer
1527+
//===--------------------------------------------------------------------===//
1528+
// Used to store the metadata for a WKB-encoded geometry column when writing
1529+
// GeoParquet files.
1530+
class WKBColumnWriterState final : public StringColumnWriterState {
1531+
public:
1532+
WKBColumnWriterState(ClientContext &context, duckdb_parquet::format::RowGroup &row_group, idx_t col_idx)
1533+
: StringColumnWriterState(row_group, col_idx), geo_data(), geo_data_writer(context) {
1534+
}
1535+
1536+
GeoParquetColumnMetadata geo_data;
1537+
GeoParquetColumnMetadataWriter geo_data_writer;
1538+
};
1539+
1540+
class WKBColumnWriter final : public StringColumnWriter {
1541+
public:
1542+
WKBColumnWriter(ClientContext &context_p, ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path_p,
1543+
idx_t max_repeat, idx_t max_define, bool can_have_nulls, string name)
1544+
: StringColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls),
1545+
column_name(std::move(name)), context(context_p) {
1546+
1547+
this->writer.GetGeoParquetData().RegisterGeometryColumn(column_name);
1548+
}
1549+
1550+
unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) override {
1551+
auto result = make_uniq<WKBColumnWriterState>(context, row_group, row_group.columns.size());
1552+
RegisterToRowGroup(row_group);
1553+
return std::move(result);
1554+
}
1555+
void Write(ColumnWriterState &state, Vector &vector, idx_t count) override {
1556+
StringColumnWriter::Write(state, vector, count);
1557+
1558+
auto &geo_state = state.Cast<WKBColumnWriterState>();
1559+
geo_state.geo_data_writer.Update(geo_state.geo_data, vector, count);
1560+
}
1561+
1562+
void FinalizeWrite(ColumnWriterState &state) override {
1563+
StringColumnWriter::FinalizeWrite(state);
1564+
1565+
// Add the geodata object to the writer
1566+
const auto &geo_state = state.Cast<WKBColumnWriterState>();
1567+
1568+
// Merge this state's geo column data with the writer's geo column data
1569+
writer.GetGeoParquetData().FlushColumnMeta(column_name, geo_state.geo_data);
1570+
}
1571+
1572+
private:
1573+
string column_name;
1574+
ClientContext &context;
1575+
};
1576+
15661577
//===--------------------------------------------------------------------===//
15671578
// Enum Column Writer
15681579
//===--------------------------------------------------------------------===//
@@ -2234,8 +2245,8 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(ClientContext &cont
22342245
schema_path.push_back(name);
22352246

22362247
if (type.id() == LogicalTypeId::BLOB && type.GetAlias() == "WKB_BLOB") {
2237-
return make_uniq<GeometryColumnWriter<StringColumnWriter>>(context, writer, schema_idx, std::move(schema_path),
2238-
max_repeat, max_define, can_have_nulls, name);
2248+
return make_uniq<WKBColumnWriter>(context, writer, schema_idx, std::move(schema_path), max_repeat, max_define,
2249+
can_have_nulls, name);
22392250
}
22402251

22412252
switch (type.id()) {

0 commit comments

Comments
 (0)