Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release query result after materialization & transformation #1027

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
9 changes: 9 additions & 0 deletions src/include/reltoaltrep.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,22 @@ struct AltrepRelationWrapper {

duckdb::unique_ptr<QueryResult> Materialize();

void MarkColumnAsTransformed();

const bool allow_materialization;
const size_t n_rows;
const size_t n_cells;

rel_extptr_t rel_eptr;
duckdb::shared_ptr<Relation> rel;
duckdb::unique_ptr<QueryResult> res;

R_xlen_t rowcount;
bool rowcount_retrieved;
Comment on lines +30 to +31
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it worth to initialize with -1 to avoid rowcount_retrieved ? I honestly don't know.


size_t ncols;
size_t cols_transformed;
Comment on lines +33 to +34
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By the same token, use a single n_cols_to_retrieve that counts down to zero?


};

}
Expand Down
37 changes: 35 additions & 2 deletions src/reltoaltrep.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,24 @@ AltrepRelationWrapper *AltrepRelationWrapper::Get(SEXP x) {
}

AltrepRelationWrapper::AltrepRelationWrapper(rel_extptr_t rel_, bool allow_materialization_, size_t n_rows_, size_t n_cells_)
: allow_materialization(allow_materialization_), n_rows(n_rows_), n_cells(n_cells_), rel_eptr(rel_), rel(rel_->rel) {
: allow_materialization(allow_materialization_), n_rows(n_rows_), n_cells(n_cells_), rel_eptr(rel_), rel(rel_->rel), rowcount(0), rowcount_retrieved(false), ncols(0), cols_transformed(0) {
}

bool AltrepRelationWrapper::HasQueryResult() const {
return (bool)res;
}

void AltrepRelationWrapper::MarkColumnAsTransformed() {
// AltrepRelationWrapper keeps tabs on how many of the columns have been transformed
// to their R-representation
cols_transformed++;
// If all of the columns have been transformed, we can reset
// the query-result pointer and free the memory
if (cols_transformed == ncols) {
res.reset();
}
}

MaterializedQueryResult *AltrepRelationWrapper::GetQueryResult() {
if (!res) {
if (!allow_materialization || n_cells == 0) {
Expand Down Expand Up @@ -149,6 +160,10 @@ MaterializedQueryResult *AltrepRelationWrapper::GetQueryResult() {
cpp11::stop("Query execution was interrupted");
}


rowcount = ((MaterializedQueryResult *)res.get())->RowCount();
rowcount_retrieved = true;

signal_handler.Disable();
}
D_ASSERT(res);
Expand Down Expand Up @@ -196,7 +211,7 @@ duckdb::unique_ptr<QueryResult> AltrepRelationWrapper::Materialize() {

struct AltrepRownamesWrapper {

AltrepRownamesWrapper(duckdb::shared_ptr<AltrepRelationWrapper> rel_p) : rel(rel_p) {
AltrepRownamesWrapper(duckdb::shared_ptr<AltrepRelationWrapper> rel_p) : rel(rel_p), rowlen_data_retrieved(false) {
rowlen_data[0] = NA_INTEGER;
}

Expand All @@ -206,6 +221,7 @@ struct AltrepRownamesWrapper {

int32_t rowlen_data[2];
duckdb::shared_ptr<AltrepRelationWrapper> rel;
bool rowlen_data_retrieved;
};

struct AltrepVectorWrapper {
Expand All @@ -228,6 +244,8 @@ struct AltrepVectorWrapper {
duckdb_r_transform(chunk.data[column_index], dest, dest_offset, chunk.size(), false);
dest_offset += chunk.size();
}

rel->MarkColumnAsTransformed();
}
return DATAPTR(transformed_vector);
}
Expand Down Expand Up @@ -306,16 +324,27 @@ const void *RelToAltrep::RownamesDataptrOrNull(SEXP x) {

void *RelToAltrep::DoRownamesDataptrGet(SEXP x) {
auto rownames_wrapper = AltrepRownamesWrapper::Get(x);

// the query has been materialized, return the rowcount
// (and void recomputing the query if it's been reset)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// (and void recomputing the query if it's been reset)
// (and avoid recomputing the query if it's been reset)

if (rownames_wrapper->rowlen_data_retrieved) {
return rownames_wrapper->rowlen_data;
}

auto row_count = rownames_wrapper->rel->GetQueryResult()->RowCount();
if (row_count > (idx_t)NumericLimits<int32_t>::Maximum()) {
cpp11::stop("Integer overflow for row.names attribute");
}
rownames_wrapper->rowlen_data[1] = -row_count;
rownames_wrapper->rowlen_data_retrieved = true;
return rownames_wrapper->rowlen_data;
}

R_xlen_t RelToAltrep::VectorLength(SEXP x) {
BEGIN_CPP11
if (AltrepVectorWrapper::Get(x)->rel->rowcount_retrieved) {
return AltrepVectorWrapper::Get(x)->rel->rowcount;
}
return AltrepVectorWrapper::Get(x)->rel->GetQueryResult()->RowCount();
END_CPP11_EX(0)
}
Expand Down Expand Up @@ -404,6 +433,8 @@ size_t DoubleToSize(double d) {
auto relation_wrapper = make_shared_ptr<AltrepRelationWrapper>(rel, allow_materialization, DoubleToSize(n_rows),
DoubleToSize(n_cells));

relation_wrapper->ncols = drel->Columns().size();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really need to update twice (here and below)?


cpp11::writable::list data_frame;
data_frame.reserve(ncols);

Expand All @@ -427,6 +458,8 @@ size_t DoubleToSize(double d) {
}
SET_NAMES(data_frame, StringsToSexp(names));

relation_wrapper->ncols = drel->Columns().size();

// Row names
cpp11::external_pointer<AltrepRownamesWrapper> ptr(new AltrepRownamesWrapper(relation_wrapper));
R_SetExternalPtrTag(ptr, RStrings::get().duckdb_row_names_sym);
Expand Down
Loading