Skip to content

Commit

Permalink
The unit tests pass again.
Browse files Browse the repository at this point in the history
Check for code coverage/self-review/etc.

Signed-off-by: Johannes Kalmbach <[email protected]>
  • Loading branch information
joka921 committed Oct 31, 2024
1 parent 1435b1c commit f0caa80
Show file tree
Hide file tree
Showing 9 changed files with 207 additions and 66 deletions.
15 changes: 13 additions & 2 deletions src/engine/IndexScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ IndexScan::IndexScan(QueryExecutionContext* qec, Permutation::Enum permutation,
additionalColumns_.push_back(idx);
additionalVariables_.push_back(variable);
}
sizeEstimate_ = computeSizeEstimate();
std::tie(sizeEstimateIsExact_, sizeEstimate_) = computeSizeEstimate();

// Check the following invariant: All the variables must be at the end of the
// permuted triple. For example in the PSO permutation, either only the O, or
Expand Down Expand Up @@ -171,7 +171,18 @@ ProtoResult IndexScan::computeResult(bool requestLaziness) {
}

// _____________________________________________________________________________
size_t IndexScan::computeSizeEstimate() const {
std::pair<bool, size_t> IndexScan::computeSizeEstimate() const {
AD_CORRECTNESS_CHECK(_executionContext);
auto [lower, upper] =
getIndex()
.getImpl()
.getPermutation(permutation())
.getSizeEstimateForScan(getScanSpecification(), deltaTriples());
return {lower == upper, std::midpoint(lower, upper)};
}

// _____________________________________________________________________________
size_t IndexScan::getExactSize() const {
AD_CORRECTNESS_CHECK(_executionContext);
return getIndex().getResultSizeOfScan(getScanSpecification(), permutation_,
deltaTriples());
Expand Down
13 changes: 8 additions & 5 deletions src/engine/IndexScan.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class IndexScan final : public Operation {
Graphs graphsToFilter_;
size_t numVariables_;
size_t sizeEstimate_;
bool sizeEstimateIsExact_;
vector<float> multiplicity_;

// Additional columns (e.g. patterns) that are being retrieved in addition to
Expand Down Expand Up @@ -59,7 +60,7 @@ class IndexScan final : public Operation {

// Return the exact result size of the index scan. This is always known as it
// can be read from the Metadata.
size_t getExactSize() const { return sizeEstimate_; }
size_t getExactSize() const;

// Return two generators that lazily yield the results of `s1` and `s2` in
// blocks, but only the blocks that can theoretically contain matching rows
Expand All @@ -78,7 +79,7 @@ class IndexScan final : public Operation {
private:
// TODO<joka921> Make the `getSizeEstimateBeforeLimit()` function `const` for
// ALL the `Operations`.
uint64_t getSizeEstimateBeforeLimit() override { return getExactSize(); }
uint64_t getSizeEstimateBeforeLimit() override { return sizeEstimate_; }

public:
size_t getCostEstimate() override;
Expand All @@ -93,7 +94,9 @@ class IndexScan final : public Operation {
return multiplicity_[col];
}

bool knownEmptyResult() override { return getExactSize() == 0; }
bool knownEmptyResult() override {
return sizeEstimateIsExact_ && sizeEstimate_ == 0;
}

bool isIndexScanWithNumVariables(size_t target) const override {
return numVariables() == target;
Expand All @@ -103,7 +106,7 @@ class IndexScan final : public Operation {
// size of wikidata, so we don't even need to try and waste performance.
bool unlikelyToFitInCache(
ad_utility::MemorySize maxCacheableSize) const override {
return ad_utility::MemorySize::bytes(getExactSize() * getResultWidth() *
return ad_utility::MemorySize::bytes(sizeEstimate_ * getResultWidth() *
sizeof(Id)) > maxCacheableSize;
}

Expand All @@ -124,7 +127,7 @@ class IndexScan final : public Operation {

vector<QueryExecutionTree*> getChildren() override { return {}; }

size_t computeSizeEstimate() const;
std::pair<bool, size_t> computeSizeEstimate() const;

std::string getCacheKeyImpl() const override;

Expand Down
51 changes: 27 additions & 24 deletions src/index/CompressedRelation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -635,8 +635,6 @@ DecompressedBlock CompressedRelationReader::readPossiblyIncompleteBlock(
return result;
};

/*
// ____________________________________________________________________________
template <bool exactSize>
std::pair<size_t, size_t> CompressedRelationReader::getResultSizeImpl(
const ScanSpecification& scanSpec,
Expand All @@ -647,17 +645,16 @@ std::pair<size_t, size_t> CompressedRelationReader::getResultSizeImpl(
// col1Id
auto relevantBlocks = getRelevantBlocks(scanSpec, blocks);
auto [beginBlock, endBlock] = getBeginAndEnd(relevantBlocks);
// TODO<joka921> The exact size is also wrong as soon as we have GRAPHS
// involved.
std::array<ColumnIndex, 1> columnIndices{0u};

auto config = getScanConfig(scanSpec, {}, locatedTriplesPerBlock);

// The first and the last block might be incomplete (that is, only
// a part of these blocks is actually part of the result,
// set up a lambda which allows us to read these blocks, and returns
// the size of the result.
auto readSizeOfPossiblyIncompleteBlock = [&](const auto& block) {
return readPossiblyIncompleteBlock(scanSpec, block, std::nullopt,
columnIndices, locatedTriplesPerBlock)
return readPossiblyIncompleteBlock(scanSpec, config, block, std::nullopt,
locatedTriplesPerBlock)
.numRows();
};

Expand All @@ -674,47 +671,52 @@ std::pair<size_t, size_t> CompressedRelationReader::getResultSizeImpl(
}

if (beginBlock == endBlock) {
return { numResults, numResults };
return {numResults, numResults};
}

// TODO<joka921> There are a lot of bugs here when it comes to graph columns.
// In particular the prepareColumnIndices should directly add the graph column
// if necessary etc.
auto allColumns = prepareColumnIndices(scanSpec, {});
allColumns.push_back(ADDITIONAL_COLUMN_GRAPH_ID);
auto locatedTriples = prepareLocatedTriples(allColumns,
locatedTriplesPerBlock);
// Determine the total size of the result.
// First accumulate the complete blocks in the "middle"
// TODO<joka921> If the block contains added or deleted triples, we
// actually have to read/materialize them to get the correct size.
std::size_t fromIndex;
std::size_t inserted;
std::size_t deleted;
std::size_t inserted = 0;
std::size_t deleted = 0;
std::ranges::for_each(
std::ranges::subrange{beginBlock, endBlock}, [&](const auto& block) {
const auto [ins, del] =
locatedTriplesPerBlock.numTriples(block.blockIndex_);
if (!exactSize || (ins == 0 && del == 0)) {
inserted += ins;
deleted += del;
fromIndex += block.numRows_;
numResults += block.numRows_;
} else {
fromIndex += readAndDecompressBlock(block, allColumns,
locatedTriples).numRows();
// TODO<joka921> We could cache the exact size as soon as we have
// merged the block once since the last update.
auto b = readAndDecompressBlock(block, config);
numResults += b.has_value() ? b.value().numRows() : 0u;
}
});
return {fromIndex - std::max(deleted, fromIndex), fromIndex + inserted};
return {numResults - std::min(deleted, numResults), numResults + inserted};
}

// ____________________________________________________________________________
std::pair<size_t, size_t> CompressedRelationReader::getSizeEstimateForScan(
const ScanSpecification& scanSpec,
const vector<CompressedBlockMetadata>& blocks,
const LocatedTriplesPerBlock& locatedTriplesPerBlock) const {
return getResultSizeImpl<false>(scanSpec, blocks, locatedTriplesPerBlock);
}
*/

// ____________________________________________________________________________
size_t CompressedRelationReader::getResultSizeOfScan(
const ScanSpecification& scanSpec,
const vector<CompressedBlockMetadata>& blocks,
[[maybe_unused]] const LocatedTriplesPerBlock& locatedTriplesPerBlock)
const {
auto [lower, upper] =
getResultSizeImpl<true>(scanSpec, blocks, locatedTriplesPerBlock);
AD_CORRECTNESS_CHECK(lower == upper);
return lower;
/*
// Get all the blocks that possibly might contain our pair of col0Id and
// col1Id
auto relevantBlocks = getRelevantBlocks(scanSpec, blocks);
Expand Down Expand Up @@ -752,6 +754,7 @@ size_t CompressedRelationReader::getResultSizeOfScan(
return count + block.numRows_;
});
return numResults;
*/
}

// ____________________________________________________________________________
Expand Down
13 changes: 13 additions & 0 deletions src/index/CompressedRelation.h
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,20 @@ class CompressedRelationReader {
const ScanSpecification& scanSpec,
const vector<CompressedBlockMetadata>& blocks,
const LocatedTriplesPerBlock& locatedTriplesPerBlock) const;
std::pair<size_t, size_t> getSizeEstimateForScan(
const ScanSpecification& scanSpec,
const vector<CompressedBlockMetadata>& blocks,
const LocatedTriplesPerBlock& locatedTriplesPerBlock) const;

private:
template <bool exactSize>
std::pair<size_t, size_t> getResultSizeImpl(
const ScanSpecification& scanSpec,
const vector<CompressedBlockMetadata>& blocks,
[[maybe_unused]] const LocatedTriplesPerBlock& locatedTriplesPerBlock)
const;

public:
// For a given relation, determine the `col1Id`s and their counts. This is
// used for `computeGroupByObjectWithCount`.
IdTable getDistinctCol1IdsAndCounts(
Expand Down
21 changes: 21 additions & 0 deletions src/index/LocatedTriples.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,27 @@ void LocatedTriplesPerBlock::updateAugmentedMetadata() {
}
blockIndex++;
}
// Also account for the last block that contains the triples that are larger
// than all the inserted triples.
if (hasUpdates(blockIndex)) {
const auto& blockUpdates = map_.at(blockIndex);
auto firstTriple = blockUpdates.begin()->triple_.toPermutedTriple();
auto lastTriple = blockUpdates.rbegin()->triple_.toPermutedTriple();

using O = CompressedBlockMetadata::OffsetAndCompressedSize;
O emptyBlock{0, 0};

// TODO<joka921> We need the appropriate number of columns here, or we need
// to make the reading code work regardless of the number of columns.
CompressedBlockMetadataNoBlockIndex lastBlockN{
std::vector<O>(4, emptyBlock),
0,
firstTriple,
lastTriple,
std::nullopt,
true};
augmentedMetadata_->emplace_back(lastBlockN, blockIndex);
}
}

// ____________________________________________________________________________
Expand Down
2 changes: 2 additions & 0 deletions src/index/LocatedTriples.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ class LocatedTriplesPerBlock {
// the updated triples.
std::optional<std::vector<CompressedBlockMetadata>> augmentedMetadata_;
std::vector<CompressedBlockMetadata> originalMetadata_;

public:
void updateAugmentedMetadata();

public:
Expand Down
8 changes: 8 additions & 0 deletions src/index/Permutation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,14 @@ size_t Permutation::getResultSizeOfScan(
locatedTriples(deltaTriples));
}

// _____________________________________________________________________
std::pair<size_t, size_t> Permutation::getSizeEstimateForScan(
const ScanSpecification& scanSpec, const DeltaTriples& deltaTriples) const {
const auto& p = getActualPermutation(scanSpec);
return p.reader().getSizeEstimateForScan(scanSpec, p.meta_.blockData(),
locatedTriples(deltaTriples));
}

// ____________________________________________________________________________
IdTable Permutation::getDistinctCol1IdsAndCounts(
Id col0Id, const CancellationHandle& cancellationHandle,
Expand Down
3 changes: 3 additions & 0 deletions src/index/Permutation.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ class Permutation {
/// result
size_t getResultSizeOfScan(const ScanSpecification& scanSpec,
const DeltaTriples& deltaTriples) const;
std::pair<size_t, size_t> getSizeEstimateForScan(
const ScanSpecification& scanSpec,
const DeltaTriples& deltaTriples) const;

// _______________________________________________________
void setKbName(const string& name) { meta_.setName(name); }
Expand Down
Loading

0 comments on commit f0caa80

Please sign in to comment.