Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add PostgresViewEntry #183

Draft
wants to merge 15 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion create-postgres-tables.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
#!/bin/bash

# Set default value for the build type
BUILD_TYPE="release"
# If an argument is provided, use that as the build type instead
if [ $# -eq 1 ]; then
BUILD_TYPE=$1
fi

echo "
CREATE SCHEMA tpch;
CREATE SCHEMA tpcds;
CALL dbgen(sf=0.01, schema='tpch');
CALL dsdgen(sf=0.01, schema='tpcds');
EXPORT DATABASE '/tmp/postgresscannertmp';
" | \
./build/release/duckdb
./build/$BUILD_TYPE/duckdb

dropdb --if-exists postgresscanner
createdb postgresscanner
Expand Down
9 changes: 5 additions & 4 deletions src/include/postgres_binary_reader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,9 +263,10 @@ struct PostgresBinaryReader {
return (config.is_negative ? -base_res : base_res);
}

void ReadGeometry(const LogicalType &type, const PostgresType &postgres_type, Vector &out_vec, idx_t output_offset) {
void ReadGeometry(const LogicalType &type, const PostgresType &postgres_type, Vector &out_vec,
idx_t output_offset) {
idx_t element_count = 0;
switch(postgres_type.info) {
switch (postgres_type.info) {
case PostgresTypeAnnotation::GEOM_LINE:
case PostgresTypeAnnotation::GEOM_CIRCLE:
element_count = 3;
Expand Down Expand Up @@ -294,7 +295,7 @@ struct PostgresBinaryReader {
list_entries[output_offset].length = element_count;
auto &child_vector = ListVector::GetEntry(out_vec);
auto child_data = FlatVector::GetData<double>(child_vector);
for(idx_t i = 0; i < element_count; i++) {
for (idx_t i = 0; i < element_count; i++) {
child_data[child_offset + i] = ReadDouble();
}
ListVector::SetListSize(out_vec, child_offset + element_count);
Expand Down Expand Up @@ -488,7 +489,7 @@ struct PostgresBinaryReader {
list_entry.length = 0;
break;
}
switch(postgres_type.info) {
switch (postgres_type.info) {
case PostgresTypeAnnotation::GEOM_LINE:
case PostgresTypeAnnotation::GEOM_LINE_SEGMENT:
case PostgresTypeAnnotation::GEOM_BOX:
Expand Down
49 changes: 49 additions & 0 deletions src/include/storage/postgres_create_info.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// storage/postgres_create_info.hpp
//
//
//===----------------------------------------------------------------------===//

#pragma once

#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
#include "duckdb/parser/parsed_data/create_table_info.hpp"
#include "postgres_utils.hpp"

namespace duckdb {

struct PostgresCreateInfo {
public:
PostgresCreateInfo() {
}
virtual ~PostgresCreateInfo() {
}

public:
virtual CreateInfo &GetCreateInfo() = 0;
virtual const string &GetName() const = 0;
virtual void AddColumn(ColumnDefinition def, PostgresType pg_type, const string &pg_name) = 0;
virtual void GetColumnNamesAndTypes(vector<string> &names, vector<LogicalType> &types) = 0;

public:
template <class TARGET>
TARGET &Cast() {
D_ASSERT(dynamic_cast<TARGET *>(this));
return reinterpret_cast<TARGET &>(*this);
}

template <class TARGET>
const TARGET &Cast() const {
D_ASSERT(dynamic_cast<const TARGET *>(this));
return reinterpret_cast<const TARGET &>(*this);
}

public:
idx_t approx_num_pages = 0;
vector<PostgresType> postgres_types;
vector<string> postgres_names;
};

} // namespace duckdb
2 changes: 1 addition & 1 deletion src/include/storage/postgres_schema_entry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class PostgresSchemaEntry : public SchemaCatalogEntry {
void DropEntry(ClientContext &context, DropInfo &info) override;
optional_ptr<CatalogEntry> GetEntry(CatalogTransaction transaction, CatalogType type, const string &name) override;

static bool SchemaIsInternal(const string &name);
static bool SchemaIsInternal(const string &name);

private:
void AlterTable(PostgresTransaction &transaction, RenameTableInfo &info);
Expand Down
33 changes: 27 additions & 6 deletions src/include/storage/postgres_table_entry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@

#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
#include "duckdb/parser/parsed_data/create_table_info.hpp"
#include "storage/postgres_create_info.hpp"
#include "postgres_utils.hpp"

namespace duckdb {

struct PostgresTableInfo {
struct PostgresTableInfo : public PostgresCreateInfo {
public:
PostgresTableInfo() {
create_info = make_uniq<CreateTableInfo>();
create_info->columns.SetAllowDuplicates(true);
Expand All @@ -27,21 +29,40 @@ struct PostgresTableInfo {
create_info = make_uniq<CreateTableInfo>((SchemaCatalogEntry &)schema, table);
create_info->columns.SetAllowDuplicates(true);
}
~PostgresTableInfo() override {
}

public:
CreateInfo &GetCreateInfo() override {
return *create_info;
}

const string &GetTableName() const {
const string &GetName() const override {
return create_info->table;
}

void AddColumn(ColumnDefinition def, PostgresType pg_type, const string &pg_name) override {
postgres_types.push_back(std::move(pg_type));
D_ASSERT(!pg_name.empty());
postgres_names.push_back(pg_name);
create_info->columns.AddColumn(std::move(def));
}
void GetColumnNamesAndTypes(vector<string> &names, vector<LogicalType> &types) override {
for (auto &col : create_info->columns.Logical()) {
names.push_back(col.GetName());
types.push_back(col.GetType());
}
}

public:
unique_ptr<CreateTableInfo> create_info;
vector<PostgresType> postgres_types;
vector<string> postgres_names;
idx_t approx_num_pages = 0;
};

class PostgresTableEntry : public TableCatalogEntry {
public:
PostgresTableEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateTableInfo &info);
PostgresTableEntry(Catalog &catalog, SchemaCatalogEntry &schema, PostgresTableInfo &info);
~PostgresTableEntry() override;

public:
unique_ptr<BaseStatistics> GetStatistics(ClientContext &context, column_t column_id) override;
Expand All @@ -61,7 +82,7 @@ class PostgresTableEntry : public TableCatalogEntry {
vector<PostgresType> postgres_types;
//! Column names as they are within Postgres
//! We track these separately because of case sensitivity - Postgres allows e.g. the columns "ID" and "id" together
//! We would in this case remap them to "ID" and "id:1", while postgres_names store the original names
//! We would in this case remap them to "ID" and "id_1", while postgres_names store the original names
vector<string> postgres_names;
//! The approximate number of pages a table consumes in Postgres
idx_t approx_num_pages;
Expand Down
11 changes: 6 additions & 5 deletions src/include/storage/postgres_table_set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ class PostgresTableSet : public PostgresInSchemaSet {
public:
optional_ptr<CatalogEntry> CreateTable(ClientContext &context, BoundCreateTableInfo &info);

static unique_ptr<PostgresTableInfo> GetTableInfo(PostgresTransaction &transaction, PostgresSchemaEntry &schema,
const string &table_name);
static unique_ptr<PostgresTableInfo> GetTableInfo(PostgresConnection &connection, const string &schema_name,
const string &table_name);
static unique_ptr<PostgresCreateInfo> GetTableInfo(PostgresTransaction &transaction, PostgresSchemaEntry &schema,
const string &table_name);
static unique_ptr<PostgresCreateInfo> GetTableInfo(PostgresConnection &connection, const string &schema_name,
const string &table_name);
optional_ptr<CatalogEntry> ReloadEntry(ClientContext &context, const string &table_name) override;

void AlterTable(ClientContext &context, AlterTableInfo &info);
Expand All @@ -46,7 +46,8 @@ class PostgresTableSet : public PostgresInSchemaSet {
void AlterTable(ClientContext &context, RemoveColumnInfo &info);

static void AddColumn(optional_ptr<PostgresTransaction> transaction, optional_ptr<PostgresSchemaEntry> schema,
PostgresResult &result, idx_t row, PostgresTableInfo &table_info, idx_t column_offset = 0);
PostgresResult &result, idx_t row, PostgresCreateInfo &pg_create_info,
idx_t column_offset = 0);

void CreateEntries(PostgresTransaction &transaction, PostgresResult &result, idx_t start, idx_t end);

Expand Down
80 changes: 80 additions & 0 deletions src/include/storage/postgres_view_entry.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// storage/postgres_view_entry.hpp
//
//
//===----------------------------------------------------------------------===//

#pragma once

#include "duckdb/catalog/catalog_entry/view_catalog_entry.hpp"
#include "duckdb/parser/parsed_data/create_view_info.hpp"
#include "storage/postgres_create_info.hpp"

namespace duckdb {

struct PostgresViewInfo : public PostgresCreateInfo {
public:
PostgresViewInfo(const string &select_stmt) {
create_info = make_uniq<CreateViewInfo>();
create_info->query = CreateViewInfo::ParseSelect(select_stmt);
// create_info->columns.SetAllowDuplicates(true);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure about this, the Table has a ColumnsList which provides this option, but no such option exists for views because we don't have a ColumnsList, we instead just store the types+names separately

}
PostgresViewInfo(const string &schema, const string &view, const string &select_stmt) {
create_info = make_uniq<CreateViewInfo>(string(), schema, view);
create_info->query = CreateViewInfo::ParseSelect(select_stmt);
// create_info->columns.SetAllowDuplicates(true);
}
PostgresViewInfo(const SchemaCatalogEntry &schema, const string &view, const string &select_stmt) {
create_info = make_uniq<CreateViewInfo>((SchemaCatalogEntry &)schema, view);
create_info->query = CreateViewInfo::ParseSelect(select_stmt);
// create_info->columns.SetAllowDuplicates(true);
}
~PostgresViewInfo() override {
}

public:
const string &GetName() const override {
return create_info->view_name;
}

CreateInfo &GetCreateInfo() override {
return *create_info;
}

void AddColumn(ColumnDefinition def, PostgresType pg_type, const string &pg_name) override {
postgres_types.push_back(std::move(pg_type));
D_ASSERT(!pg_name.empty());
postgres_names.push_back(pg_name);
create_info->types.push_back(def.Type());
create_info->names.push_back(def.Name());
}

void GetColumnNamesAndTypes(vector<string> &names, vector<LogicalType> &types) override {
names = create_info->names;
types = create_info->types;
}

public:
unique_ptr<CreateViewInfo> create_info;
};

class PostgresViewEntry : public ViewCatalogEntry {
public:
PostgresViewEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateViewInfo &info);
PostgresViewEntry(Catalog &catalog, SchemaCatalogEntry &schema, PostgresViewInfo &info);
~PostgresViewEntry() override;

public:
//! Postgres type annotations
vector<PostgresType> postgres_types;
//! Column names as they are within Postgres
//! We track these separately because of case sensitivity - Postgres allows e.g. the columns "ID" and "id" together
//! We would in this case remap them to "ID" and "id_1", while postgres_names store the original names
vector<string> postgres_names;
//! The approximate number of pages a table consumes in Postgres
idx_t approx_num_pages;
};

} // namespace duckdb
12 changes: 5 additions & 7 deletions src/postgres_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,30 +22,30 @@

class PostgresExtensionState : public ClientContextState {
public:
bool CanRequestRebind() override {

Check failure on line 25 in src/postgres_extension.cpp

View workflow job for this annotation

GitHub Actions / Run tests on Linux (<submodule_version>, linux_amd64_gcc4, 2023.04.15)

‘bool PostgresExtensionState::CanRequestRebind()’ marked ‘override’, but does not override

Check failure on line 25 in src/postgres_extension.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / MacOS (osx_arm64, arm64, arm64-osx)

only virtual member functions can be marked 'override'

Check failure on line 25 in src/postgres_extension.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / Linux (linux_amd64_gcc4, quay.io/pypa/manylinux2014_x86_64, x64-linux)

‘bool PostgresExtensionState::CanRequestRebind()’ marked ‘override’, but does not override
return true;
}
RebindQueryInfo OnPlanningError(ClientContext &context, SQLStatement &statement, ErrorData &error) override {

Check failure on line 28 in src/postgres_extension.cpp

View workflow job for this annotation

GitHub Actions / Run tests on Linux (<submodule_version>, linux_amd64_gcc4, 2023.04.15)

‘RebindQueryInfo’ does not name a type

Check failure on line 28 in src/postgres_extension.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / MacOS (osx_arm64, arm64, arm64-osx)

unknown type name 'RebindQueryInfo'

Check failure on line 28 in src/postgres_extension.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / Linux (linux_amd64_gcc4, quay.io/pypa/manylinux2014_x86_64, x64-linux)

‘RebindQueryInfo’ does not name a type
if (error.Type() != ExceptionType::BINDER) {
return RebindQueryInfo::DO_NOT_REBIND;

Check failure on line 30 in src/postgres_extension.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / MacOS (osx_arm64, arm64, arm64-osx)

use of undeclared identifier 'RebindQueryInfo'
}
auto &extra_info = error.ExtraInfo();
auto entry = extra_info.find("error_subtype");
if (entry == extra_info.end()) {
return RebindQueryInfo::DO_NOT_REBIND;

Check failure on line 35 in src/postgres_extension.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / MacOS (osx_arm64, arm64, arm64-osx)

use of undeclared identifier 'RebindQueryInfo'
}
if (entry->second != "COLUMN_NOT_FOUND") {
return RebindQueryInfo::DO_NOT_REBIND;

Check failure on line 38 in src/postgres_extension.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / MacOS (osx_arm64, arm64, arm64-osx)

use of undeclared identifier 'RebindQueryInfo'
}
// clear caches and rebind
PostgresClearCacheFunction::ClearPostgresCaches(context);
return RebindQueryInfo::ATTEMPT_TO_REBIND;

Check failure on line 42 in src/postgres_extension.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / MacOS (osx_arm64, arm64, arm64-osx)

use of undeclared identifier 'RebindQueryInfo'
}
};

class PostgresExtensionCallback : public ExtensionCallback {
public:
void OnConnectionOpened(ClientContext &context) override {

Check failure on line 48 in src/postgres_extension.cpp

View workflow job for this annotation

GitHub Actions / Run tests on Linux (<submodule_version>, linux_amd64_gcc4, 2023.04.15)

‘void PostgresExtensionCallback::OnConnectionOpened(duckdb::ClientContext&)’ marked ‘override’, but does not override

Check failure on line 48 in src/postgres_extension.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / MacOS (osx_arm64, arm64, arm64-osx)

only virtual member functions can be marked 'override'

Check failure on line 48 in src/postgres_extension.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / Linux (linux_amd64_gcc4, quay.io/pypa/manylinux2014_x86_64, x64-linux)

‘void PostgresExtensionCallback::OnConnectionOpened(duckdb::ClientContext&)’ marked ‘override’, but does not override
context.registered_state.insert(make_pair("postgres_extension", make_shared<PostgresExtensionState>()));
}
};
Expand Down Expand Up @@ -97,19 +97,17 @@
config.AddExtensionOption("pg_connection_limit", "The maximum amount of concurrent Postgres connections",
LogicalType::UBIGINT, Value::UBIGINT(PostgresConnectionPool::DEFAULT_MAX_CONNECTIONS),
SetPostgresConnectionLimit);
config.AddExtensionOption("pg_array_as_varchar",
"Read Postgres arrays as varchar - enables reading mixed dimensional arrays",
LogicalType::BOOLEAN, Value::BOOLEAN(false), PostgresClearCacheFunction::ClearCacheOnSetting);
config.AddExtensionOption("pg_connection_cache",
"Whether or not to use the connection cache", LogicalType::BOOLEAN,
Value::BOOLEAN(true), PostgresConnectionPool::PostgresSetConnectionCache);
config.AddExtensionOption(
"pg_array_as_varchar", "Read Postgres arrays as varchar - enables reading mixed dimensional arrays",
LogicalType::BOOLEAN, Value::BOOLEAN(false), PostgresClearCacheFunction::ClearCacheOnSetting);
config.AddExtensionOption("pg_connection_cache", "Whether or not to use the connection cache", LogicalType::BOOLEAN,
Value::BOOLEAN(true), PostgresConnectionPool::PostgresSetConnectionCache);
config.AddExtensionOption("pg_experimental_filter_pushdown",
"Whether or not to use filter pushdown (currently experimental)", LogicalType::BOOLEAN,
Value::BOOLEAN(false));
config.AddExtensionOption("pg_debug_show_queries", "DEBUG SETTING: print all queries sent to Postgres to stdout",
LogicalType::BOOLEAN, Value::BOOLEAN(false), SetPostgresDebugQueryPrint);


OptimizerExtension postgres_optimizer;
postgres_optimizer.optimize_function = PostgresOptimizer::Optimize;
config.optimizer_extensions.push_back(std::move(postgres_optimizer));
Expand Down
10 changes: 4 additions & 6 deletions src/postgres_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,7 @@ static unique_ptr<FunctionData> PostgresBind(ClientContext &context, TableFuncti
auto info = PostgresTableSet::GetTableInfo(con, bind_data->schema_name, bind_data->table_name);

bind_data->postgres_types = info->postgres_types;
for (auto &col : info->create_info->columns.Logical()) {
names.push_back(col.GetName());
return_types.push_back(col.GetType());
}
info->GetColumnNamesAndTypes(names, return_types);
bind_data->names = info->postgres_names;
bind_data->types = return_types;
bind_data->can_use_main_thread = true;
Expand Down Expand Up @@ -481,12 +478,13 @@ unique_ptr<NodeStatistics> PostgresScanCardinality(ClientContext &context, const
}

double PostgresScanProgress(ClientContext &context, const FunctionData *bind_data_p,
const GlobalTableFunctionState *global_state) {
const GlobalTableFunctionState *global_state) {
auto &bind_data = bind_data_p->Cast<PostgresBindData>();
auto &gstate = global_state->Cast<PostgresGlobalState>();

lock_guard<mutex> parallel_lock(gstate.lock);
double progress = 100 * double(gstate.page_idx) / double(bind_data.pages_approx);;
double progress = 100 * double(gstate.page_idx) / double(bind_data.pages_approx);
;
return MinValue<double>(100, progress);
}

Expand Down
4 changes: 3 additions & 1 deletion src/storage/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ add_library(
postgres_transaction_manager.cpp
postgres_type_entry.cpp
postgres_type_set.cpp
postgres_update.cpp)
postgres_update.cpp
postgres_view_entry.cpp
)
set(ALL_OBJECT_FILES
${ALL_OBJECT_FILES} $<TARGET_OBJECTS:postgres_ext_storage>
PARENT_SCOPE)
4 changes: 2 additions & 2 deletions src/storage/postgres_catalog_set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ void PostgresCatalogSet::ClearEntries() {
is_loaded = false;
}

PostgresInSchemaSet::PostgresInSchemaSet(PostgresSchemaEntry &schema, bool is_loaded) :
PostgresCatalogSet(schema.ParentCatalog(), is_loaded), schema(schema) {
PostgresInSchemaSet::PostgresInSchemaSet(PostgresSchemaEntry &schema, bool is_loaded)
: PostgresCatalogSet(schema.ParentCatalog(), is_loaded), schema(schema) {
}

optional_ptr<CatalogEntry> PostgresInSchemaSet::CreateEntry(unique_ptr<CatalogEntry> entry) {
Expand Down
Loading
Loading