Skip to content

Commit

Permalink
Continue this work on Monday morning, we really have to start cleanin…
Browse files Browse the repository at this point in the history
…g up this mess.
  • Loading branch information
joka921 committed Jul 12, 2024
1 parent 65e569b commit ee0848f
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 100 deletions.
59 changes: 23 additions & 36 deletions src/engine/IndexScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include <absl/strings/str_join.h>

#include <boost/optional.hpp>
#include <sstream>
#include <string>

Expand Down Expand Up @@ -130,20 +131,9 @@ Result IndexScan::computeResult([[maybe_unused]] bool requestLaziness) {
using enum Permutation::Enum;
idTable.setNumColumns(numVariables_);
const auto& index = _executionContext->getIndex();
const auto permutedTriple = getPermutedTriple();
if (numVariables_ == 2) {
idTable =
index.scan(*permutedTriple[0], std::nullopt, std::nullopt, permutation_,
additionalColumns(), cancellationHandle_, getLimit());
} else if (numVariables_ == 1) {
idTable = index.scan(*permutedTriple[0], *permutedTriple[1], std::nullopt,
permutation_, additionalColumns(), cancellationHandle_,
getLimit());
} else if (numVariables_ == 0) {
idTable = index.scan(*permutedTriple[0], *permutedTriple[1],
*permutedTriple[2], permutation_, additionalColumns(),
cancellationHandle_, getLimit());

if (numVariables_ < 3) {
idTable = index.scan(getPermutedTripleNoVariables(), permutation_,
additionalColumns(), cancellationHandle_, getLimit());
} else {
AD_CORRECTNESS_CHECK(numVariables_ == 3);
computeFullScan(&idTable, permutation_);
Expand All @@ -158,31 +148,21 @@ Result IndexScan::computeResult([[maybe_unused]] bool requestLaziness) {
// _____________________________________________________________________________
size_t IndexScan::computeSizeEstimate() const {
if (_executionContext) {
// Should always be in this branch. Else is only for test cases.

// We have to do a simple scan anyway so might as well do it now
if (numVariables_ == 0) {
return getIndex().getResultSizeOfScan(
*getPermutedTriple()[0], *getPermutedTriple().at(1),
*getPermutedTriple()[2], permutation_);
} else if (numVariables_ == 1) {
if (numVariables_ == 1) {
// TODO<C++23> Use the monadic operation `std::optional::or_else`.
// Note: we cannot use `optional::value_or()` here, because the else
// case is expensive to compute, and we need it lazily evaluated.
if (auto size = getExecutionContext()->getQueryTreeCache().getPinnedSize(
getCacheKey());
size.has_value()) {
return size.value();
} else {
// This call explicitly has to read two blocks of triples from memory to
// obtain an exact size estimate.
return getIndex().getResultSizeOfScan(*getPermutedTriple()[0],
*getPermutedTriple().at(1),
std::nullopt, permutation_);
}
} else if (numVariables_ == 2) {
const TripleComponent& firstKey = *getPermutedTriple().at(0);
return getIndex().getCardinality(firstKey, permutation_);
}

// We have to do a simple scan anyway so might as well do it now
if (numVariables_ < 3) {
return getIndex().getResultSizeOfScan(getPermutedTripleNoVariables(),
permutation_);
} else {
// The triple consists of three variables.
// TODO<joka921> As soon as all implementations of a full index scan
Expand Down Expand Up @@ -292,6 +272,13 @@ std::array<const TripleComponent* const, 3> IndexScan::getPermutedTriple()
triple[permutation[2]]};
}

// ___________________________________________________________________________
ScanSpecificationAsTripleComponent IndexScan::getPermutedTripleNoVariables()
const {
auto permutedTriple = getPermutedTriple();
return {*permutedTriple[0], *permutedTriple[1], *permutedTriple[2]};
}

// ___________________________________________________________________________
Permutation::IdTableGenerator IndexScan::getLazyScan(
const IndexScan& s, std::vector<CompressedBlockMetadata> blocks) {
Expand All @@ -304,11 +291,11 @@ Permutation::IdTableGenerator IndexScan::getLazyScan(
if (s.numVariables_ < 2) {
col1Id = s.getPermutedTriple()[1]->toValueId(index.getVocab()).value();
}
std::optional<Id> col2Id;
if (s.numVariables_ < 1) {
col2Id = s.getPermutedTriple()[2]->toValueId(index.getVocab()).value();
}

// This function is currently only called by the `getLazyScanForJoin...`
// functions. In these cases we always have at least one variable in each of
// the scans, because otherwise there would be no join column.
AD_CORRECTNESS_CHECK(s.numVariables_ >= 1);
// If there is a LIMIT or OFFSET clause that constrains the scan
// (which can happen with an explicit subquery), we cannot use the prefiltered
// blocks, as we currently have no mechanism to include limits and offsets
Expand All @@ -318,7 +305,7 @@ Permutation::IdTableGenerator IndexScan::getLazyScan(
: std::nullopt;

return index.getPermutation(s.permutation())
.lazyScan({col0Id, col1Id, col2Id}, std::move(actualBlocks),
.lazyScan({col0Id, col1Id, std::nullopt}, std::move(actualBlocks),
s.additionalColumns(), s.cancellationHandle_, s.getLimit());
};

Expand Down
2 changes: 2 additions & 0 deletions src/engine/IndexScan.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// Author: Björn Buchhold ([email protected])
#pragma once

#include <boost/optional.hpp>
#include <string>

#include "./Operation.h"
Expand Down Expand Up @@ -100,6 +101,7 @@ class IndexScan final : public Operation {
// `permutation_`. For example if `permutation_ == PSO` then the result is
// {&predicate_, &subject_, &object_}
std::array<const TripleComponent* const, 3> getPermutedTriple() const;
ScanSpecificationAsTripleComponent getPermutedTripleNoVariables() const;

private:
Result computeResult([[maybe_unused]] bool requestLaziness) override;
Expand Down
32 changes: 22 additions & 10 deletions src/index/Index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -276,31 +276,43 @@ vector<float> Index::getMultiplicities(const TripleComponent& key,

// ____________________________________________________________________________
IdTable Index::scan(
const TripleComponent& col0String,
std::optional<std::reference_wrapper<const TripleComponent>> col1String,
std::optional<std::reference_wrapper<const TripleComponent>> col2String,
const ScanSpecificationAsTripleComponent& scanSpecification,
Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns,
const ad_utility::SharedCancellationHandle& cancellationHandle,
const LimitOffsetClause& limitOffset) const {
return pimpl_->scan(col0String, col1String, col2String, p, additionalColumns,
return pimpl_->scan(scanSpecification, p, additionalColumns,
cancellationHandle, limitOffset);
}

// ____________________________________________________________________________
IdTable Index::scan(
Id col0Id, std::optional<Id> col1Id, std::optional<Id> col2Id,
const Permutation::ScanSpecification& scanSpecification,
Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns,
const ad_utility::SharedCancellationHandle& cancellationHandle,
const LimitOffsetClause& limitOffset) const {
return pimpl_->scan({col0Id, col1Id, col2Id}, p, additionalColumns,
return pimpl_->scan(scanSpecification, p, additionalColumns,
cancellationHandle, limitOffset);
}

// ____________________________________________________________________________
size_t Index::getResultSizeOfScan(
const TripleComponent& col0String, const TripleComponent& col1String,
std::optional<std::reference_wrapper<const TripleComponent>> col2String,
const ScanSpecificationAsTripleComponent& scanSpecification,
const Permutation::Enum& permutation) const {
return pimpl_->getResultSizeOfScan(col0String, col1String, col2String,
permutation);
return pimpl_->getResultSizeOfScan(scanSpecification, permutation);
}
// ____________________________________________________________________________
std::optional<Permutation::ScanSpecification>
ScanSpecificationAsTripleComponent::toScanSpecification(
const IndexImpl& index) const {
std::optional<Id> col0Id = col0_.toValueId(index.getVocab());
std::optional<Id> col1Id = col1_.has_value()
? col1_.value().toValueId(index.getVocab())
: std::nullopt;
std::optional<Id> col2Id = col2_.has_value()
? col2_.value().toValueId(index.getVocab())
: std::nullopt;
if (!col0Id.has_value() || (col1_.has_value() && !col1Id.has_value()) ||
(col2_.has_value() && !col2Id.has_value())) {
return std::nullopt;
}
}
32 changes: 22 additions & 10 deletions src/index/Index.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,21 @@ class IdTable;
class TextBlockMetaData;
class IndexImpl;

// TODO<joka921> Comment.
struct ScanSpecificationAsTripleComponent {
TripleComponent col0_;
std::optional<TripleComponent> col1_;
std::optional<TripleComponent> col2_;
ScanSpecificationAsTripleComponent(TripleComponent col0,
const TripleComponent& col1,
const TripleComponent& col2);
std::optional<Permutation::ScanSpecification> toScanSpecification(
const IndexImpl& index) const;
size_t numColumns() const {
return 2 - col1_.has_value() - col2_.has_value();
}
};

class Index {
private:
// Pimpl to reduce compile times.
Expand Down Expand Up @@ -238,16 +253,14 @@ class Index {
* @param p The Permutation::Enum to use (in particularly POS(), SOP,...
* members of Index class).
*/
IdTable scan(
const TripleComponent& col0String,
std::optional<std::reference_wrapper<const TripleComponent>> col1String,
std::optional<std::reference_wrapper<const TripleComponent>> col2String,
Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns,
const ad_utility::SharedCancellationHandle& cancellationHandle,
const LimitOffsetClause& limitOffset = {}) const;
IdTable scan(const ScanSpecificationAsTripleComponent& scanSpecification,
Permutation::Enum p,
Permutation::ColumnIndicesRef additionalColumns,
const ad_utility::SharedCancellationHandle& cancellationHandle,
const LimitOffsetClause& limitOffset = {}) const;

// Similar to the overload of `scan` above, but the keys are specified as IDs.
IdTable scan(Id col0Id, std::optional<Id> col1Id, std::optional<Id> col2Id,
IdTable scan(const Permutation::ScanSpecification& scanSpecification,
Permutation::Enum p,
Permutation::ColumnIndicesRef additionalColumns,
const ad_utility::SharedCancellationHandle& cancellationHandle,
Expand All @@ -256,8 +269,7 @@ class Index {
// Similar to the previous overload of `scan`, but only get the exact size of
// the scan result.
size_t getResultSizeOfScan(
const TripleComponent& col0String, const TripleComponent& col1String,
std::optional<std::reference_wrapper<const TripleComponent>> col2String,
const ScanSpecificationAsTripleComponent& scanSpecification,
const Permutation::Enum& permutation) const;

// Get access to the implementation. This should be used rarely as it
Expand Down
46 changes: 12 additions & 34 deletions src/index/IndexImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1367,35 +1367,19 @@ vector<float> IndexImpl::getMultiplicities(

// _____________________________________________________________________________
IdTable IndexImpl::scan(
const TripleComponent& col0String,
std::optional<std::reference_wrapper<const TripleComponent>> col1String,
std::optional<std::reference_wrapper<const TripleComponent>> col2String,
const ScanSpecificationAsTripleComponent& scanSpecificationAsTc,
const Permutation::Enum& permutation,
Permutation::ColumnIndicesRef additionalColumns,
const ad_utility::SharedCancellationHandle& cancellationHandle,
const LimitOffsetClause& limitOffset) const {
AD_CORRECTNESS_CHECK(!col2String.has_value() || col1String.has_value());
std::optional<Id> col0Id = col0String.toValueId(getVocab());
// TODO<C++23> Use the monadic operations for std::optional.
bool elementNotFound = !col0Id.has_value();
size_t numColumns = 2;
auto handleOptional = [&elementNotFound, &numColumns,
this](const auto& comp) -> std::optional<Id> {
if (!comp.has_value()) {
return std::nullopt;
}
--numColumns;
auto result = comp.value().get().toValueId(getVocab());
elementNotFound = elementNotFound || !result.has_value();
return result;
};
std::optional<Id> col1Id = handleOptional(col1String);
std::optional<Id> col2Id = handleOptional(col2String);
if (elementNotFound) {
auto scanSpecification = scanSpecificationAsTc.toScanSpecification(*this);
if (!scanSpecification.has_value()) {
cancellationHandle->throwIfCancelled();
return IdTable{numColumns + additionalColumns.size(), allocator_};
return IdTable{
scanSpecificationAsTc.numColumns() + additionalColumns.size(),
allocator_};
}
return scan({col0Id.value(), col1Id, col2Id}, permutation, additionalColumns,
return scan(scanSpecification.value(), permutation, additionalColumns,
cancellationHandle, limitOffset);
}
// _____________________________________________________________________________
Expand All @@ -1410,20 +1394,14 @@ IdTable IndexImpl::scan(

// _____________________________________________________________________________
size_t IndexImpl::getResultSizeOfScan(
const TripleComponent& col0, const TripleComponent& col1,
std::optional<std::reference_wrapper<const TripleComponent>> col2,
const ScanSpecificationAsTripleComponent& scanSpecificationAsTc,
const Permutation::Enum& permutation) const {
std::optional<Id> col0Id = col0.toValueId(getVocab());
std::optional<Id> col1Id = col1.toValueId(getVocab());
std::optional<Id> col2Id = col2.has_value()
? col2.value().get().toValueId(getVocab())
: std::nullopt;
if (!col0Id.has_value() || !col1Id.has_value() ||
(col2.has_value() && !col2Id.has_value())) {
const Permutation& p = getPermutation(permutation);
auto scanSpecification = scanSpecificationAsTc.toScanSpecification(*this);
if (!scanSpecification.has_value()) {
return 0;
}
const Permutation& p = getPermutation(permutation);
return p.getResultSizeOfScan({col0Id.value(), col1Id.value(), col2Id});
return p.getResultSizeOfScan(scanSpecification.value());
}

// _____________________________________________________________________________
Expand Down
16 changes: 6 additions & 10 deletions src/index/IndexImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -397,14 +397,11 @@ class IndexImpl {
vector<float> getMultiplicities(Permutation::Enum permutation) const;

// _____________________________________________________________________________
IdTable scan(
const TripleComponent& col0String,
std::optional<std::reference_wrapper<const TripleComponent>> col1String,
std::optional<std::reference_wrapper<const TripleComponent>> col2String,
const Permutation::Enum& permutation,
Permutation::ColumnIndicesRef additionalColumns,
const ad_utility::SharedCancellationHandle& cancellationHandle,
const LimitOffsetClause& limitOffset = {}) const;
IdTable scan(const ScanSpecificationAsTripleComponent& scanSpecification,
const Permutation::Enum& permutation,
Permutation::ColumnIndicesRef additionalColumns,
const ad_utility::SharedCancellationHandle& cancellationHandle,
const LimitOffsetClause& limitOffset = {}) const;

// _____________________________________________________________________________
IdTable scan(const Permutation::ScanSpecification& scanSpecification,
Expand All @@ -415,8 +412,7 @@ class IndexImpl {

// _____________________________________________________________________________
size_t getResultSizeOfScan(
const TripleComponent& col0, const TripleComponent& col1,
std::optional<std::reference_wrapper<const TripleComponent>> col2,
const ScanSpecificationAsTripleComponent& scanSpecification,
const Permutation::Enum& permutation) const;

private:
Expand Down
14 changes: 14 additions & 0 deletions test/QueryPlannerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,20 @@ TEST(QueryPlanner, testBFSLeaveOut) {
}
}

TEST(QueryPlanner, indexScanZeroVariables) {
auto scan = h::IndexScanFromStrings;
using enum Permutation::Enum;
h::expect(
"SELECT * \n "
"WHERE \t {<x> <y> <z>}",
scan("<x>", "<y>", "<z>"));
h::expect(
"SELECT * \n "
"WHERE \t {<x> <y> <z> . <x> <y> ?z}",
h::CartesianProductJoin(scan("<x>", "<y>", "<z>"),
scan("<x>", "<y>", "?z")));
}

TEST(QueryPlanner, indexScanOneVariable) {
auto scan = h::IndexScanFromStrings;
using enum Permutation::Enum;
Expand Down

0 comments on commit ee0848f

Please sign in to comment.