-
Notifications
You must be signed in to change notification settings - Fork 30
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Release query result after materialization & transformation #1027
base: main
Are you sure you want to change the base?
Changes from all commits
a900623
7279de3
091179b
a8e3f8f
c8eaf0a
1cbea2d
37b1b9f
cb86095
c8256a7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,13 +17,22 @@ struct AltrepRelationWrapper { | |
|
||
duckdb::unique_ptr<QueryResult> Materialize(); | ||
|
||
void MarkColumnAsTransformed(); | ||
|
||
const bool allow_materialization; | ||
const size_t n_rows; | ||
const size_t n_cells; | ||
|
||
rel_extptr_t rel_eptr; | ||
duckdb::shared_ptr<Relation> rel; | ||
duckdb::unique_ptr<QueryResult> res; | ||
|
||
R_xlen_t rowcount; | ||
bool rowcount_retrieved; | ||
|
||
size_t ncols; | ||
size_t cols_transformed; | ||
Comment on lines
+33
to
+34
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. By the same token, use a single |
||
|
||
}; | ||
|
||
} | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -101,13 +101,24 @@ AltrepRelationWrapper *AltrepRelationWrapper::Get(SEXP x) { | |||||
} | ||||||
|
||||||
AltrepRelationWrapper::AltrepRelationWrapper(rel_extptr_t rel_, bool allow_materialization_, size_t n_rows_, size_t n_cells_) | ||||||
: allow_materialization(allow_materialization_), n_rows(n_rows_), n_cells(n_cells_), rel_eptr(rel_), rel(rel_->rel) { | ||||||
: allow_materialization(allow_materialization_), n_rows(n_rows_), n_cells(n_cells_), rel_eptr(rel_), rel(rel_->rel), rowcount(0), rowcount_retrieved(false), ncols(0), cols_transformed(0) { | ||||||
} | ||||||
|
||||||
bool AltrepRelationWrapper::HasQueryResult() const { | ||||||
return (bool)res; | ||||||
} | ||||||
|
||||||
void AltrepRelationWrapper::MarkColumnAsTransformed() { | ||||||
// AltrepRelationWrapper keeps tabs on how many of the columns have been transformed | ||||||
// to their R-representation | ||||||
cols_transformed++; | ||||||
// If all of the columns have been transformed, we can reset | ||||||
// the query-result pointer and free the memory | ||||||
if (cols_transformed == ncols) { | ||||||
res.reset(); | ||||||
} | ||||||
} | ||||||
|
||||||
MaterializedQueryResult *AltrepRelationWrapper::GetQueryResult() { | ||||||
if (!res) { | ||||||
if (!allow_materialization || n_cells == 0) { | ||||||
|
@@ -149,6 +160,10 @@ MaterializedQueryResult *AltrepRelationWrapper::GetQueryResult() { | |||||
cpp11::stop("Query execution was interrupted"); | ||||||
} | ||||||
|
||||||
|
||||||
rowcount = ((MaterializedQueryResult *)res.get())->RowCount(); | ||||||
rowcount_retrieved = true; | ||||||
|
||||||
signal_handler.Disable(); | ||||||
} | ||||||
D_ASSERT(res); | ||||||
|
@@ -196,7 +211,7 @@ duckdb::unique_ptr<QueryResult> AltrepRelationWrapper::Materialize() { | |||||
|
||||||
struct AltrepRownamesWrapper { | ||||||
|
||||||
AltrepRownamesWrapper(duckdb::shared_ptr<AltrepRelationWrapper> rel_p) : rel(rel_p) { | ||||||
AltrepRownamesWrapper(duckdb::shared_ptr<AltrepRelationWrapper> rel_p) : rel(rel_p), rowlen_data_retrieved(false) { | ||||||
rowlen_data[0] = NA_INTEGER; | ||||||
} | ||||||
|
||||||
|
@@ -206,6 +221,7 @@ struct AltrepRownamesWrapper { | |||||
|
||||||
int32_t rowlen_data[2]; | ||||||
duckdb::shared_ptr<AltrepRelationWrapper> rel; | ||||||
bool rowlen_data_retrieved; | ||||||
}; | ||||||
|
||||||
struct AltrepVectorWrapper { | ||||||
|
@@ -228,6 +244,8 @@ struct AltrepVectorWrapper { | |||||
duckdb_r_transform(chunk.data[column_index], dest, dest_offset, chunk.size(), false); | ||||||
dest_offset += chunk.size(); | ||||||
} | ||||||
|
||||||
rel->MarkColumnAsTransformed(); | ||||||
} | ||||||
return DATAPTR(transformed_vector); | ||||||
} | ||||||
|
@@ -306,16 +324,27 @@ const void *RelToAltrep::RownamesDataptrOrNull(SEXP x) { | |||||
|
||||||
void *RelToAltrep::DoRownamesDataptrGet(SEXP x) { | ||||||
auto rownames_wrapper = AltrepRownamesWrapper::Get(x); | ||||||
|
||||||
// the query has been materialized, return the rowcount | ||||||
// (and void recomputing the query if it's been reset) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
if (rownames_wrapper->rowlen_data_retrieved) { | ||||||
return rownames_wrapper->rowlen_data; | ||||||
} | ||||||
|
||||||
auto row_count = rownames_wrapper->rel->GetQueryResult()->RowCount(); | ||||||
if (row_count > (idx_t)NumericLimits<int32_t>::Maximum()) { | ||||||
cpp11::stop("Integer overflow for row.names attribute"); | ||||||
} | ||||||
rownames_wrapper->rowlen_data[1] = -row_count; | ||||||
rownames_wrapper->rowlen_data_retrieved = true; | ||||||
return rownames_wrapper->rowlen_data; | ||||||
} | ||||||
|
||||||
R_xlen_t RelToAltrep::VectorLength(SEXP x) { | ||||||
BEGIN_CPP11 | ||||||
if (AltrepVectorWrapper::Get(x)->rel->rowcount_retrieved) { | ||||||
return AltrepVectorWrapper::Get(x)->rel->rowcount; | ||||||
} | ||||||
return AltrepVectorWrapper::Get(x)->rel->GetQueryResult()->RowCount(); | ||||||
END_CPP11_EX(0) | ||||||
} | ||||||
|
@@ -404,6 +433,8 @@ size_t DoubleToSize(double d) { | |||||
auto relation_wrapper = make_shared_ptr<AltrepRelationWrapper>(rel, allow_materialization, DoubleToSize(n_rows), | ||||||
DoubleToSize(n_cells)); | ||||||
|
||||||
relation_wrapper->ncols = drel->Columns().size(); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we really need to update twice (here and below)? |
||||||
|
||||||
cpp11::writable::list data_frame; | ||||||
data_frame.reserve(ncols); | ||||||
|
||||||
|
@@ -427,6 +458,8 @@ size_t DoubleToSize(double d) { | |||||
} | ||||||
SET_NAMES(data_frame, StringsToSexp(names)); | ||||||
|
||||||
relation_wrapper->ncols = drel->Columns().size(); | ||||||
|
||||||
// Row names | ||||||
cpp11::external_pointer<AltrepRownamesWrapper> ptr(new AltrepRownamesWrapper(relation_wrapper)); | ||||||
R_SetExternalPtrTag(ptr, RStrings::get().duckdb_row_names_sym); | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it worth to initialize with
-1
to avoidrowcount_retrieved
? I honestly don't know.