Skip to content

Commit

Permalink
vendor duckdb 1.2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
hannes committed Feb 10, 2025
1 parent 47f242f commit cdd22fc
Show file tree
Hide file tree
Showing 218 changed files with 3,129 additions and 1,641 deletions.
1 change: 1 addition & 0 deletions binding.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@
"src/duckdb/extension/icu/./icu-makedate.cpp",
"src/duckdb/extension/icu/./icu-list-range.cpp",
"src/duckdb/extension/icu/./icu-timebucket.cpp",
"src/duckdb/extension/icu/./icu-current.cpp",
"src/duckdb/extension/icu/./icu-timezone.cpp",
"src/duckdb/extension/icu/./icu-dateadd.cpp",
"src/duckdb/extension/icu/./icu-datetrunc.cpp",
Expand Down
1 change: 1 addition & 0 deletions src/duckdb/extension/core_functions/function_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ static const StaticFunctionDefinition core_functions[] = {
DUCKDB_SCALAR_FUNCTION(MapConcatFun),
DUCKDB_SCALAR_FUNCTION(MapEntriesFun),
DUCKDB_SCALAR_FUNCTION(MapExtractFun),
DUCKDB_SCALAR_FUNCTION(MapExtractValueFun),
DUCKDB_SCALAR_FUNCTION(MapFromEntriesFun),
DUCKDB_SCALAR_FUNCTION(MapKeysFun),
DUCKDB_SCALAR_FUNCTION(MapValuesFun),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,15 @@ struct ElementAtFun {
static constexpr const char *Name = "element_at";
};

struct MapExtractValueFun {
static constexpr const char *Name = "map_extract_value";
static constexpr const char *Parameters = "map,key";
static constexpr const char *Description = "Returns the value for a given key or NULL if the key is not contained in the map. The type of the key provided in the second parameter must match the type of the map’s keys else an error is returned";
static constexpr const char *Example = "map_extract_value(map(['key'], ['val']), 'key')";

static ScalarFunction GetFunction();
};

struct MapFromEntriesFun {
static constexpr const char *Name = "map_from_entries";
static constexpr const char *Parameters = "map";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "duckdb/main/client_context.hpp"
#include "duckdb/planner/expression/bound_function_expression.hpp"
#include "duckdb/transaction/meta_transaction.hpp"
#include "duckdb/planner/expression/bound_cast_expression.hpp"

namespace duckdb {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ static void CanCastImplicitlyFunction(DataChunk &args, ExpressionState &state, V
}

unique_ptr<Expression> BindCanCastImplicitlyExpression(FunctionBindExpressionInput &input) {
auto &source_type = input.function.children[0]->return_type;
auto &target_type = input.function.children[1]->return_type;
auto &source_type = input.children[0]->return_type;
auto &target_type = input.children[1]->return_type;
if (source_type.id() == LogicalTypeId::UNKNOWN || source_type.id() == LogicalTypeId::SQLNULL ||
target_type.id() == LogicalTypeId::UNKNOWN || target_type.id() == LogicalTypeId::SQLNULL) {
// parameter - unknown return type
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ static void TypeOfFunction(DataChunk &args, ExpressionState &state, Vector &resu
}

unique_ptr<Expression> BindTypeOfFunctionExpression(FunctionBindExpressionInput &input) {
auto &return_type = input.function.children[0]->return_type;
auto &return_type = input.children[0]->return_type;
if (return_type.id() == LogicalTypeId::UNKNOWN || return_type.id() == LogicalTypeId::SQLNULL) {
// parameter - unknown return type
return nullptr;
Expand Down
152 changes: 91 additions & 61 deletions src/duckdb/extension/core_functions/scalar/list/flatten.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,102 +7,132 @@

namespace duckdb {

void ListFlattenFunction(DataChunk &args, ExpressionState &state, Vector &result) {
D_ASSERT(args.ColumnCount() == 1);
static void ListFlattenFunction(DataChunk &args, ExpressionState &, Vector &result) {

Vector &input = args.data[0];
if (input.GetType().id() == LogicalTypeId::SQLNULL) {
result.Reference(input);
const auto flat_list_data = FlatVector::GetData<list_entry_t>(result);
auto &flat_list_mask = FlatVector::Validity(result);

UnifiedVectorFormat outer_format;
UnifiedVectorFormat inner_format;
UnifiedVectorFormat items_format;

// Setup outer vec;
auto &outer_vec = args.data[0];
const auto outer_count = args.size();
outer_vec.ToUnifiedFormat(outer_count, outer_format);

// Special case: outer list is all-null
if (outer_vec.GetType().id() == LogicalTypeId::SQLNULL) {
result.Reference(outer_vec);
return;
}

idx_t count = args.size();

// Prepare the result vector
result.SetVectorType(VectorType::FLAT_VECTOR);
// This holds the new offsets and lengths
auto result_entries = FlatVector::GetData<list_entry_t>(result);
auto &result_validity = FlatVector::Validity(result);

// The outermost list in each row
UnifiedVectorFormat row_data;
input.ToUnifiedFormat(count, row_data);
auto row_entries = UnifiedVectorFormat::GetData<list_entry_t>(row_data);

// The list elements in each row: [HERE, ...]
auto &row_lists = ListVector::GetEntry(input);
UnifiedVectorFormat row_lists_data;
idx_t total_row_lists = ListVector::GetListSize(input);
row_lists.ToUnifiedFormat(total_row_lists, row_lists_data);
auto row_lists_entries = UnifiedVectorFormat::GetData<list_entry_t>(row_lists_data);

if (row_lists.GetType().id() == LogicalTypeId::SQLNULL) {
for (idx_t row_cnt = 0; row_cnt < count; row_cnt++) {
auto row_idx = row_data.sel->get_index(row_cnt);
if (!row_data.validity.RowIsValid(row_idx)) {
result_validity.SetInvalid(row_cnt);
// Setup inner vec
auto &inner_vec = ListVector::GetEntry(outer_vec);
const auto inner_count = ListVector::GetListSize(outer_vec);
inner_vec.ToUnifiedFormat(inner_count, inner_format);

// Special case: inner list is all-null
if (inner_vec.GetType().id() == LogicalTypeId::SQLNULL) {
for (idx_t outer_raw_idx = 0; outer_raw_idx < outer_count; outer_raw_idx++) {
const auto outer_idx = outer_format.sel->get_index(outer_raw_idx);
if (!outer_format.validity.RowIsValid(outer_idx)) {
flat_list_mask.SetInvalid(outer_raw_idx);
continue;
}
result_entries[row_cnt].offset = 0;
result_entries[row_cnt].length = 0;
flat_list_data[outer_raw_idx].offset = 0;
flat_list_data[outer_raw_idx].length = 0;
}
if (args.AllConstant()) {
result.SetVectorType(VectorType::CONSTANT_VECTOR);
}
return;
}

// The actual elements inside each row list: [[HERE, ...], []]
// This one becomes the child vector of the result.
auto &elem_vector = ListVector::GetEntry(row_lists);
// Setup items vec
auto &items_vec = ListVector::GetEntry(inner_vec);
const auto items_count = ListVector::GetListSize(inner_vec);
items_vec.ToUnifiedFormat(items_count, items_format);

// First pass: Figure out the total amount of items.
// This can be more than items_count if the inner list reference the same item(s) multiple times.

idx_t total_items = 0;

const auto outer_data = UnifiedVectorFormat::GetData<list_entry_t>(outer_format);
const auto inner_data = UnifiedVectorFormat::GetData<list_entry_t>(inner_format);

for (idx_t outer_raw_idx = 0; outer_raw_idx < outer_count; outer_raw_idx++) {
const auto outer_idx = outer_format.sel->get_index(outer_raw_idx);

if (!outer_format.validity.RowIsValid(outer_idx)) {
continue;
}

const auto &outer_entry = outer_data[outer_idx];

for (idx_t inner_raw_idx = outer_entry.offset; inner_raw_idx < outer_entry.offset + outer_entry.length;
inner_raw_idx++) {
const auto inner_idx = inner_format.sel->get_index(inner_raw_idx);

// We'll use this selection vector to slice the elem_vector.
idx_t child_elem_cnt = ListVector::GetListSize(row_lists);
SelectionVector sel(child_elem_cnt);
if (!inner_format.validity.RowIsValid(inner_idx)) {
continue;
}

const auto &inner_entry = inner_data[inner_idx];

total_items += inner_entry.length;
}
}

// Now we know the total amount of items, we can create our selection vector.
SelectionVector sel(total_items);
idx_t sel_idx = 0;

// HERE, [[]], ...
for (idx_t row_cnt = 0; row_cnt < count; row_cnt++) {
auto row_idx = row_data.sel->get_index(row_cnt);
// Second pass: Fill the selection vector (and the result list entries)

for (idx_t outer_raw_idx = 0; outer_raw_idx < outer_count; outer_raw_idx++) {
const auto outer_idx = outer_format.sel->get_index(outer_raw_idx);

if (!row_data.validity.RowIsValid(row_idx)) {
result_validity.SetInvalid(row_cnt);
if (!outer_format.validity.RowIsValid(outer_idx)) {
flat_list_mask.SetInvalid(outer_raw_idx);
continue;
}

idx_t list_offset = sel_idx;
idx_t list_length = 0;
const auto &outer_entry = outer_data[outer_idx];

list_entry_t list_entry = {sel_idx, 0};

// [HERE, [...], ...]
auto row_entry = row_entries[row_idx];
for (idx_t row_lists_cnt = 0; row_lists_cnt < row_entry.length; row_lists_cnt++) {
auto row_lists_idx = row_lists_data.sel->get_index(row_entry.offset + row_lists_cnt);
for (idx_t inner_raw_idx = outer_entry.offset; inner_raw_idx < outer_entry.offset + outer_entry.length;
inner_raw_idx++) {
const auto inner_idx = inner_format.sel->get_index(inner_raw_idx);

// Skip invalid lists
if (!row_lists_data.validity.RowIsValid(row_lists_idx)) {
if (!inner_format.validity.RowIsValid(inner_idx)) {
continue;
}

// [[HERE, ...], [.., ...]]
auto list_entry = row_lists_entries[row_lists_idx];
list_length += list_entry.length;
const auto &inner_entry = inner_data[inner_idx];

list_entry.length += inner_entry.length;

for (idx_t elem_raw_idx = inner_entry.offset; elem_raw_idx < inner_entry.offset + inner_entry.length;
elem_raw_idx++) {
const auto elem_idx = items_format.sel->get_index(elem_raw_idx);

for (idx_t elem_cnt = 0; elem_cnt < list_entry.length; elem_cnt++) {
// offset of the element in the elem_vector.
idx_t offset = list_entry.offset + elem_cnt;
sel.set_index(sel_idx, offset);
sel.set_index(sel_idx, elem_idx);
sel_idx++;
}
}

result_entries[row_cnt].offset = list_offset;
result_entries[row_cnt].length = list_length;
// Assign the result list entry
flat_list_data[outer_raw_idx] = list_entry;
}

// Now assing the result
ListVector::SetListSize(result, sel_idx);

auto &result_child_vector = ListVector::GetEntry(result);
result_child_vector.Slice(elem_vector, sel, sel_idx);
result_child_vector.Slice(items_vec, sel, sel_idx);
result_child_vector.Flatten(sel_idx);

if (args.AllConstant()) {
Expand Down
97 changes: 89 additions & 8 deletions src/duckdb/extension/core_functions/scalar/map/map_extract.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,35 +6,36 @@

namespace duckdb {

template <bool EXTRACT_VALUE>
static unique_ptr<FunctionData> MapExtractBind(ClientContext &, ScalarFunction &bound_function,
vector<unique_ptr<Expression>> &arguments) {
if (arguments.size() != 2) {
throw BinderException("MAP_EXTRACT must have exactly two arguments");
}

auto &map_type = arguments[0]->return_type;
auto &input_type = arguments[1]->return_type;
const auto &map_type = arguments[0]->return_type;
const auto &input_type = arguments[1]->return_type;

if (map_type.id() == LogicalTypeId::SQLNULL) {
bound_function.return_type = LogicalTypeId::SQLNULL;
bound_function.return_type = EXTRACT_VALUE ? LogicalTypeId::SQLNULL : LogicalType::LIST(LogicalTypeId::SQLNULL);
return make_uniq<VariableReturnBindData>(bound_function.return_type);
}

if (map_type.id() != LogicalTypeId::MAP) {
throw BinderException("MAP_EXTRACT can only operate on MAPs");
throw BinderException("'%s' can only operate on MAPs", bound_function.name);
}
auto &value_type = MapType::ValueType(map_type);

//! Here we have to construct the List Type that will be returned
bound_function.return_type = value_type;
auto key_type = MapType::KeyType(map_type);
bound_function.return_type = EXTRACT_VALUE ? value_type : LogicalType::LIST(value_type);
const auto &key_type = MapType::KeyType(map_type);
if (key_type.id() != LogicalTypeId::SQLNULL && input_type.id() != LogicalTypeId::SQLNULL) {
bound_function.arguments[1] = MapType::KeyType(map_type);
}
return make_uniq<VariableReturnBindData>(bound_function.return_type);
}

static void MapExtractFunc(DataChunk &args, ExpressionState &state, Vector &result) {
static void MapExtractValueFunc(DataChunk &args, ExpressionState &state, Vector &result) {
const auto count = args.size();

auto &map_vec = args.data[0];
Expand Down Expand Up @@ -94,8 +95,88 @@ static void MapExtractFunc(DataChunk &args, ExpressionState &state, Vector &resu
result.Verify(count);
}

static void MapExtractListFunc(DataChunk &args, ExpressionState &state, Vector &result) {
const auto count = args.size();

auto &map_vec = args.data[0];
auto &arg_vec = args.data[1];

const auto map_is_null = map_vec.GetType().id() == LogicalTypeId::SQLNULL;
const auto arg_is_null = arg_vec.GetType().id() == LogicalTypeId::SQLNULL;

if (map_is_null || arg_is_null) {
// Short-circuit if either the map or the arg is NULL
ListVector::SetListSize(result, 0);
result.SetVectorType(VectorType::CONSTANT_VECTOR);
ConstantVector::GetData<list_entry_t>(result)[0] = {0, 0};
result.Verify(count);
return;
}

auto &key_vec = MapVector::GetKeys(map_vec);
auto &val_vec = MapVector::GetValues(map_vec);

// Collect the matching positions
Vector pos_vec(LogicalType::INTEGER, count);
ListSearchOp<true>(map_vec, key_vec, arg_vec, pos_vec, args.size());

UnifiedVectorFormat val_format;
UnifiedVectorFormat pos_format;
UnifiedVectorFormat lst_format;

val_vec.ToUnifiedFormat(ListVector::GetListSize(map_vec), val_format);
pos_vec.ToUnifiedFormat(count, pos_format);
map_vec.ToUnifiedFormat(count, lst_format);

const auto pos_data = UnifiedVectorFormat::GetData<int32_t>(pos_format);
const auto inc_list_data = ListVector::GetData(map_vec);
const auto out_list_data = ListVector::GetData(result);

idx_t offset = 0;
for (idx_t row_idx = 0; row_idx < count; row_idx++) {
const auto lst_idx = lst_format.sel->get_index(row_idx);
if (!lst_format.validity.RowIsValid(lst_idx)) {
FlatVector::SetNull(result, row_idx, true);
continue;
}

auto &inc_list = inc_list_data[lst_idx];
auto &out_list = out_list_data[row_idx];

const auto pos_idx = pos_format.sel->get_index(row_idx);
if (!pos_format.validity.RowIsValid(pos_idx)) {
// We didnt find the key in the map, so return emptyl ist
out_list.offset = offset;
out_list.length = 0;
continue;
}

// Compute the actual position of the value in the map value vector
const auto pos = inc_list.offset + UnsafeNumericCast<idx_t>(pos_data[pos_idx] - 1);
out_list.offset = offset;
out_list.length = 1;
ListVector::Append(result, val_vec, pos + 1, pos);
offset++;
}

if (args.size() == 1) {
result.SetVectorType(VectorType::CONSTANT_VECTOR);
}

result.Verify(count);
}

ScalarFunction MapExtractValueFun::GetFunction() {
ScalarFunction fun({LogicalType::ANY, LogicalType::ANY}, LogicalType::ANY, MapExtractValueFunc,
MapExtractBind<true>);
fun.varargs = LogicalType::ANY;
fun.null_handling = FunctionNullHandling::SPECIAL_HANDLING;
return fun;
}

ScalarFunction MapExtractFun::GetFunction() {
ScalarFunction fun({LogicalType::ANY, LogicalType::ANY}, LogicalType::ANY, MapExtractFunc, MapExtractBind);
ScalarFunction fun({LogicalType::ANY, LogicalType::ANY}, LogicalType::ANY, MapExtractListFunc,
MapExtractBind<false>);
fun.varargs = LogicalType::ANY;
fun.null_handling = FunctionNullHandling::SPECIAL_HANDLING;
return fun;
Expand Down
Loading

0 comments on commit cdd22fc

Please sign in to comment.