Skip to content

Commit ec1c378

Browse files
authored
Merge pull request #475 from carlopi/v1.4-andium-wasm
Implement also HEAD request, and merge with origin/main
2 parents 594a221 + 4a02d93 commit ec1c378

16 files changed

+301
-32
lines changed
705 Bytes
Binary file not shown.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"format-version":2,"table-uuid":"d6293bed-4757-4504-9342-f69a447b7759","location":"data/persistent/expression_filter","last-sequence-number":0,"last-updated-ms":1757676428493,"last-column-id":2,"current-schema-id":0,"schemas":[{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"id","required":true,"type":"long"},{"id":2,"name":"value","required":false,"type":"string"}]}],"default-spec-id":0,"partition-specs":[{"spec-id":0,"fields":[]}],"last-partition-id":999,"default-sort-order-id":0,"sort-orders":[{"order-id":0,"fields":[]}],"properties":{"write.parquet.compression-codec":"zstd"},"current-snapshot-id":-1,"refs":{},"snapshots":[],"statistics":[],"partition-statistics":[],"snapshot-log":[],"metadata-log":[]}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"format-version":2,"table-uuid":"d6293bed-4757-4504-9342-f69a447b7759","location":"data/persistent/expression_filter","last-sequence-number":1,"last-updated-ms":1757676429141,"last-column-id":2,"current-schema-id":0,"schemas":[{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"id","required":true,"type":"long"},{"id":2,"name":"value","required":false,"type":"string"}]}],"default-spec-id":0,"partition-specs":[{"spec-id":0,"fields":[]}],"last-partition-id":999,"default-sort-order-id":0,"sort-orders":[{"order-id":0,"fields":[]}],"properties":{"write.parquet.compression-codec":"zstd"},"current-snapshot-id":8096310958539014181,"refs":{"main":{"snapshot-id":8096310958539014181,"type":"branch"}},"snapshots":[{"sequence-number":1,"snapshot-id":8096310958539014181,"timestamp-ms":1757676429141,"summary":{"operation":"append","added-data-files":"1","added-records":"3","added-files-size":"705","changed-partition-count":"1","total-records":"3","total-files-size":"705","total-data-files":"1","total-delete-files":"0","total-position-deletes":"0","total-equality-deletes":"0","iceberg-version":"Apache Iceberg 1.9.2 (commit 071d5606bc6199a0be9b3f274ec7fbf111d88821)"},"manifest-list":"data/persistent/expression_filter/metadata/snap-8096310958539014181-1-8d30f58e-7333-4451-983d-eaf657a21a11.avro","schema-id":0}],"statistics":[],"partition-statistics":[],"snapshot-log":[{"timestamp-ms":1757676429141,"snapshot-id":8096310958539014181}],"metadata-log":[{"timestamp-ms":1757676428493,"metadata-file":"data/persistent/expression_filter/metadata/00000-acdf842e-3a9d-4b9b-ad87-daf78583a550.metadata.json"}]}
6.86 KB
Binary file not shown.
4.34 KB
Binary file not shown.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
00001-19739cda-f528-4429-84cc-377ffdd24c75

src/aws.cpp

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,18 @@ std::shared_ptr<Aws::Http::HttpRequest> AWSInput::CreateSignedRequest(Aws::Http:
122122
// return request;
123123
}
124124

125+
static string GetPayloadHash(const char *buffer, idx_t buffer_len) {
126+
if (buffer_len > 0) {
127+
hash_bytes payload_hash_bytes;
128+
hash_str payload_hash_str;
129+
sha256(buffer, buffer_len, payload_hash_bytes);
130+
hex256(payload_hash_bytes, payload_hash_str);
131+
return string((char *)payload_hash_str, sizeof(payload_hash_str));
132+
} else {
133+
return "";
134+
}
135+
}
136+
125137
unique_ptr<HTTPResponse> AWSInput::ExecuteRequest(ClientContext &context, Aws::Http::HttpMethod method,
126138
const string body, string content_type) {
127139

@@ -140,6 +152,11 @@ unique_ptr<HTTPResponse> AWSInput::ExecuteRequest(ClientContext &context, Aws::H
140152
// If access key is not set, we don't set the headers at all to allow accessing public files through s3 urls
141153

142154
string payload_hash = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"; // Empty payload hash
155+
156+
if (!body.empty()) {
157+
payload_hash = GetPayloadHash(body.c_str(), body.size());
158+
}
159+
143160
// key_id, secret, session_token
144161
// we can pass date/time but this is mostly useful in testing. normally we just get the current datetime
145162
// here.
@@ -161,6 +178,9 @@ unique_ptr<HTTPResponse> AWSInput::ExecuteRequest(ClientContext &context, Aws::H
161178
hash_str canonical_request_hash_str;
162179
if (content_type.length() > 0) {
163180
signed_headers += "content-type;";
181+
#ifdef EMSCRIPTEN
182+
res["content-type"] = content_type;
183+
#endif
164184
}
165185
signed_headers += "host;x-amz-content-sha256;x-amz-date";
166186
if (session_token.length() > 0) {
@@ -244,19 +264,14 @@ unique_ptr<HTTPResponse> AWSInput::ExecuteRequest(ClientContext &context, Aws::H
244264

245265
params = http_util.InitializeParameters(context, request_url);
246266

247-
if (!body.empty()) {
248-
throw NotImplementedException("CreateSignedRequest with non-empty body is not supported at this time");
249-
/*
250-
auto bodyStream = Aws::MakeShared<Aws::StringStream>("");
251-
*bodyStream << body;
252-
request->AddContentBody(bodyStream);
253-
request->SetContentLength(std::to_string(body.size()));
254-
if (!content_type.empty()) {
255-
request->SetHeaderValue("Content-Type", content_type);
256-
}
257-
*/
267+
if (method == Aws::Http::HttpMethod::HTTP_HEAD) {
268+
HeadRequestInfo head_request(request_url, res, *params);
269+
return http_util.Request(head_request);
270+
}
271+
if (method == Aws::Http::HttpMethod::HTTP_DELETE) {
272+
DeleteRequestInfo delete_request(request_url, res, *params);
273+
return http_util.Request(delete_request);
258274
}
259-
260275
if (method == Aws::Http::HttpMethod::HTTP_GET) {
261276
GetRequestInfo get_request(request_url, res, *params, nullptr, nullptr);
262277
return http_util.Request(get_request);

src/iceberg_predicate.cpp

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "iceberg_predicate.hpp"
2+
#include "duckdb/planner/expression/bound_operator_expression.hpp"
23
#include "duckdb/planner/filter/constant_filter.hpp"
34
#include "duckdb/planner/filter/conjunction_filter.hpp"
45
#include "duckdb/planner/filter/null_filter.hpp"
@@ -80,16 +81,34 @@ bool MatchBoundsTemplated(const TableFilter &filter, const IcebergPredicateStats
8081
return MatchBoundsIsNotNullFilter<TRANSFORM>(stats, transform);
8182
}
8283
case TableFilterType::EXPRESSION_FILTER: {
84+
//! Expressions can be arbitrarily complex, and we currently only support IS NULL/IS NOT NULL checks against the
85+
//! column itself, i.e. where the expression is a BOUND_OPERATOR with type OPERATOR_IS_NULL/_IS_NOT_NULL with a
86+
//! single child expression of type BOUND_REF.
87+
//!
88+
//! See duckdb/duckdb-iceberg#464
8389
auto &expression_filter = filter.Cast<ExpressionFilter>();
8490
auto &expr = *expression_filter.expr;
91+
92+
if (expr.type != ExpressionType::OPERATOR_IS_NULL && expr.type != ExpressionType::OPERATOR_IS_NOT_NULL) {
93+
return true;
94+
}
95+
96+
D_ASSERT(expr.GetExpressionClass() == ExpressionClass::BOUND_OPERATOR);
97+
auto &bound_operator_expr = expr.Cast<BoundOperatorExpression>();
98+
99+
D_ASSERT(bound_operator_expr.children.size() == 1);
100+
auto &child_expr = bound_operator_expr.children[0];
101+
if (child_expr->type != ExpressionType::BOUND_REF) {
102+
//! We can't evaluate expressions that aren't direct column references
103+
return true;
104+
}
105+
85106
if (expr.type == ExpressionType::OPERATOR_IS_NULL) {
86107
return MatchBoundsIsNullFilter<TRANSFORM>(stats, transform);
87-
}
88-
if (expr.type == ExpressionType::OPERATOR_IS_NOT_NULL) {
108+
} else {
109+
D_ASSERT(expr.type == ExpressionType::OPERATOR_IS_NOT_NULL);
89110
return MatchBoundsIsNotNullFilter<TRANSFORM>(stats, transform);
90111
}
91-
//! Any other expression can not be filtered
92-
return true;
93112
}
94113
default:
95114
//! Conservative approach: we don't know what this is, just say it doesn't filter anything

src/include/storage/iceberg_table_information.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,12 @@ struct IcebergTableInformation {
4545
IRCSchemaEntry &schema;
4646
string name;
4747
string table_id;
48-
// bool deleted;
4948

5049
rest_api_objects::LoadTableResult load_table_result;
5150
IcebergTableMetadata table_metadata;
5251
unordered_map<int32_t, unique_ptr<ICTableEntry>> schema_versions;
52+
// dummy entry to hold existence of a table, but no schema versions
53+
unique_ptr<ICTableEntry> dummy_entry;
5354

5455
public:
5556
unique_ptr<IcebergTransactionData> transaction_data;

src/include/storage/irc_catalog.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,17 @@ class IRCatalog : public Catalog {
4848
bool SetCachedValue(const string &url, const string &value, const rest_api_objects::LoadTableResult &result);
4949
static void SetAWSCatalogOptions(IcebergAttachOptions &attach_options,
5050
case_insensitive_set_t &set_by_attach_options);
51+
//! Whether or not this catalog should search a specific type with the standard priority
52+
CatalogLookupBehavior CatalogTypeLookupRule(CatalogType type) const override {
53+
switch (type) {
54+
case CatalogType::TABLE_FUNCTION_ENTRY:
55+
case CatalogType::SCALAR_FUNCTION_ENTRY:
56+
case CatalogType::AGGREGATE_FUNCTION_ENTRY:
57+
return CatalogLookupBehavior::NEVER_LOOKUP;
58+
default:
59+
return CatalogLookupBehavior::STANDARD;
60+
}
61+
}
5162

5263
public:
5364
static unique_ptr<Catalog> Attach(optional_ptr<StorageExtensionInfo> storage_info, ClientContext &context,

0 commit comments

Comments
 (0)