diff --git a/src/engine/CartesianProductJoin.cpp b/src/engine/CartesianProductJoin.cpp index 8f8147f5c..c73361e00 100644 --- a/src/engine/CartesianProductJoin.cpp +++ b/src/engine/CartesianProductJoin.cpp @@ -154,10 +154,19 @@ ProtoResult CartesianProductJoin::computeResult( child.setLimit(limitIfPresent.value()); } subResults.push_back(child.getResult()); + + const auto& table = subResults.back()->idTable(); // Early stopping: If one of the results is empty, we can stop early. - if (subResults.back()->idTable().size() == 0) { + if (table.empty()) { break; } + + // If one of the children is the neutral element (because of a triple with + // zero variables), we can simply ignore it here. + if (table.numRows() == 1 && table.numColumns() == 0) { + subResults.pop_back(); + continue; + } // Example for the following calculation: If we have a LIMIT of 1000 and // the first child already has a result of size 100, then the second child // needs to evaluate only its first 10 results. The +1 is because integer @@ -169,6 +178,10 @@ ProtoResult CartesianProductJoin::computeResult( } } + // TODO Find a solution to cheaply handle the case, that only a + // single result is left. This can probably be done by using the + // `ProtoResult`. + auto sizesView = std::views::transform( subResults, [](const auto& child) { return child->idTable().size(); }); auto totalResultSize = std::accumulate(sizesView.begin(), sizesView.end(), diff --git a/src/engine/CartesianProductJoin.h b/src/engine/CartesianProductJoin.h index 96047d62f..779adf1db 100644 --- a/src/engine/CartesianProductJoin.h +++ b/src/engine/CartesianProductJoin.h @@ -2,9 +2,7 @@ // Chair of Algorithms and Data Structures. // Author: Johannes Kalmbach -#ifndef QLEVER_CARTESIANPRODUCTJOIN_H -#define QLEVER_CARTESIANPRODUCTJOIN_H - +#pragma once #include "engine/Operation.h" #include "engine/QueryExecutionTree.h" @@ -92,5 +90,3 @@ class CartesianProductJoin : public Operation { std::span inputColumn, size_t groupSize, size_t offset); }; - -#endif // QLEVER_CARTESIANPRODUCTJOIN_H diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp index 46e88be5a..4a78b1ed0 100644 --- a/src/engine/IndexScan.cpp +++ b/src/engine/IndexScan.cpp @@ -6,6 +6,7 @@ #include +#include #include #include @@ -26,17 +27,19 @@ IndexScan::IndexScan(QueryExecutionContext* qec, Permutation::Enum permutation, numVariables_(static_cast(subject_.isVariable()) + static_cast(predicate_.isVariable()) + static_cast(object_.isVariable())) { + // We previously had `nullptr`s here in unit tests. This is no longer + // necessary nor allowed. + AD_CONTRACT_CHECK(qec != nullptr); for (auto& [idx, variable] : triple.additionalScanColumns_) { additionalColumns_.push_back(idx); additionalVariables_.push_back(variable); } sizeEstimate_ = computeSizeEstimate(); - // Check the following invariant: The permuted input triple must contain at - // least one variable, and all the variables must be at the end of the + // Check the following invariant: All the variables must be at the end of the // permuted triple. For example in the PSO permutation, either only the O, or - // the S and O, or all three of P, S, O can be variables, all other - // combinations are not supported. + // the S and O, or all three of P, S, O, or none of them can be variables, all + // other combinations are not supported. auto permutedTriple = getPermutedTriple(); for (size_t i = 0; i < 3 - numVariables_; ++i) { AD_CONTRACT_CHECK(!permutedTriple.at(i)->isVariable()); @@ -57,7 +60,7 @@ string IndexScan::getCacheKeyImpl() const { auto permutationString = Permutation::toString(permutation_); if (numVariables_ == 3) { - os << "SCAN FOR FULL INDEX " << permutationString << " (DUMMY OPERATION)"; + os << "SCAN FOR FULL INDEX " << permutationString; } else { os << "SCAN " << permutationString << " with "; @@ -66,10 +69,9 @@ string IndexScan::getCacheKeyImpl() const { const auto& key = getPermutedTriple().at(idx)->toRdfLiteral(); os << keyString << " = \"" << key << "\""; }; - addKey(0); - if (numVariables_ == 1) { + for (size_t i = 0; i < 3 - numVariables_; ++i) { + addKey(i); os << ", "; - addKey(1); } } if (!additionalColumns_.empty()) { @@ -92,16 +94,8 @@ size_t IndexScan::getResultWidth() const { // _____________________________________________________________________________ vector IndexScan::resultSortedOn() const { - switch (numVariables_) { - case 1: - return {ColumnIndex{0}}; - case 2: - return {ColumnIndex{0}, ColumnIndex{1}}; - case 3: - return {ColumnIndex{0}, ColumnIndex{1}, ColumnIndex{2}}; - default: - AD_FAIL(); - } + auto resAsView = ad_utility::integerRange(ColumnIndex{numVariables_}); + return std::vector{resAsView.begin(), resAsView.end()}; } // _____________________________________________________________________________ @@ -149,12 +143,8 @@ ProtoResult IndexScan::computeResult(bool requestLaziness) { using enum Permutation::Enum; idTable.setNumColumns(numVariables_); const auto& index = _executionContext->getIndex(); - const auto permutedTriple = getPermutedTriple(); - if (numVariables_ == 2) { - idTable = index.scan(*permutedTriple[0], std::nullopt, permutation_, - additionalColumns(), cancellationHandle_, getLimit()); - } else if (numVariables_ == 1) { - idTable = index.scan(*permutedTriple[0], *permutedTriple[1], permutation_, + if (numVariables_ < 3) { + idTable = index.scan(getScanSpecification(), permutation_, additionalColumns(), cancellationHandle_, getLimit()); } else { AD_CORRECTNESS_CHECK(numVariables_ == 3); @@ -169,44 +159,19 @@ ProtoResult IndexScan::computeResult(bool requestLaziness) { // _____________________________________________________________________________ size_t IndexScan::computeSizeEstimate() const { - if (_executionContext) { - // Should always be in this branch. Else is only for test cases. - - // We have to do a simple scan anyway so might as well do it now - if (numVariables_ == 1) { - // TODO Use the monadic operation `std::optional::or_else`. - // Note: we cannot use `optional::value_or()` here, because the else - // case is expensive to compute, and we need it lazily evaluated. - if (auto size = getExecutionContext()->getQueryTreeCache().getPinnedSize( - getCacheKey()); - size.has_value()) { - return size.value(); - } else { - // This call explicitly has to read two blocks of triples from memory to - // obtain an exact size estimate. - return getIndex().getResultSizeOfScan( - *getPermutedTriple()[0], *getPermutedTriple().at(1), permutation_); - } - } else if (numVariables_ == 2) { - const TripleComponent& firstKey = *getPermutedTriple().at(0); - return getIndex().getCardinality(firstKey, permutation_); - } else { - // The triple consists of three variables. - // TODO As soon as all implementations of a full index scan - // (Including the "dummy joins" in Join.cpp) consistently exclude the - // internal triples, this estimate should be changed to only return - // the number of triples in the actual knowledge graph (excluding the - // internal triples). - AD_CORRECTNESS_CHECK(numVariables_ == 3); - return getIndex().numTriples().normalAndInternal_(); - } + AD_CORRECTNESS_CHECK(_executionContext); + // We have to do a simple scan anyway so might as well do it now + if (numVariables_ < 3) { + return getIndex().getResultSizeOfScan(getScanSpecification(), permutation_); } else { - // Only for test cases. The handling of the objects is to make the - // strange query planner tests pass. - auto strLen = [](const auto& el) { - return (el.isString() ? el.getString() : el.toString()).size(); - }; - return 1000 + strLen(subject_) + strLen(object_) + strLen(predicate_); + // The triple consists of three variables. + // TODO As soon as all implementations of a full index scan + // (Including the "dummy joins" in Join.cpp) consistently exclude the + // internal triples, this estimate should be changed to only return + // the number of triples in the actual knowledge graph (excluding the + // internal triples). + AD_CORRECTNESS_CHECK(numVariables_ == 3); + return getIndex().numTriples().normalAndInternal_(); } } @@ -219,29 +184,20 @@ size_t IndexScan::getCostEstimate() { // _____________________________________________________________________________ void IndexScan::determineMultiplicities() { - multiplicity_.clear(); - if (_executionContext) { + multiplicity_ = [this]() -> std::vector { const auto& idx = getIndex(); - if (numVariables_ == 1) { + if (numVariables_ == 0) { + return {}; + } else if (numVariables_ == 1) { // There are no duplicate triples in RDF and two elements are fixed. - multiplicity_.emplace_back(1); + return {1.0f}; } else if (numVariables_ == 2) { - const auto permutedTriple = getPermutedTriple(); - multiplicity_ = idx.getMultiplicities(*permutedTriple[0], permutation_); + return idx.getMultiplicities(*getPermutedTriple()[0], permutation_); } else { AD_CORRECTNESS_CHECK(numVariables_ == 3); - multiplicity_ = idx.getMultiplicities(permutation_); - } - } else { - // This branch is only used in certain unit tests. - multiplicity_.emplace_back(1); - if (numVariables_ == 2) { - multiplicity_.emplace_back(1); - } - if (numVariables_ == 3) { - multiplicity_.emplace_back(1); + return idx.getMultiplicities(permutation_); } - } + }(); for ([[maybe_unused]] size_t i : std::views::iota(multiplicity_.size(), getResultWidth())) { multiplicity_.emplace_back(1); @@ -296,6 +252,12 @@ std::array IndexScan::getPermutedTriple() triple[permutation[2]]}; } +// ___________________________________________________________________________ +ScanSpecificationAsTripleComponent IndexScan::getScanSpecification() const { + auto permutedTriple = getPermutedTriple(); + return {*permutedTriple[0], *permutedTriple[1], *permutedTriple[2]}; +} + // ___________________________________________________________________________ Permutation::IdTableGenerator IndexScan::getLazyScan( const IndexScan& s, std::vector blocks) { @@ -309,6 +271,10 @@ Permutation::IdTableGenerator IndexScan::getLazyScan( col1Id = s.getPermutedTriple()[1]->toValueId(index.getVocab()).value(); } + // This function is currently only called by the `getLazyScanForJoin...` + // functions. In these cases we always have at least one variable in each of + // the scans, because otherwise there would be no join column. + AD_CORRECTNESS_CHECK(s.numVariables_ >= 1); // If there is a LIMIT or OFFSET clause that constrains the scan // (which can happen with an explicit subquery), we cannot use the prefiltered // blocks, as we currently have no mechanism to include limits and offsets @@ -325,28 +291,20 @@ Permutation::IdTableGenerator IndexScan::getLazyScan( // ________________________________________________________________ std::optional IndexScan::getMetadataForScan( const IndexScan& s) { - auto permutedTriple = s.getPermutedTriple(); - const IndexImpl& index = s.getIndex().getImpl(); - auto numVars = s.numVariables_; - std::optional col0Id = - numVars == 3 ? std::nullopt - : permutedTriple[0]->toValueId(index.getVocab()); - std::optional col1Id = - numVars >= 2 ? std::nullopt - : permutedTriple[1]->toValueId(index.getVocab()); - if ((!col0Id.has_value() && numVars < 3) || - (!col1Id.has_value() && numVars < 2)) { + const auto& index = s.getExecutionContext()->getIndex().getImpl(); + auto scanSpec = s.getScanSpecification().toScanSpecification(index); + if (!scanSpec.has_value()) { return std::nullopt; } - return index.getPermutation(s.permutation()) - .getMetadataAndBlocks({col0Id, col1Id, std::nullopt}); + .getMetadataAndBlocks(scanSpec.value()); }; // ________________________________________________________________ std::array IndexScan::lazyScanForJoinOfTwoScans(const IndexScan& s1, const IndexScan& s2) { AD_CONTRACT_CHECK(s1.numVariables_ <= 3 && s2.numVariables_ <= 3); + AD_CONTRACT_CHECK(s1.numVariables_ >= 1 && s2.numVariables_ >= 1); // This function only works for single column joins. This means that the first // variable of both scans must be equal, but all other variables of the scans @@ -395,7 +353,7 @@ IndexScan::lazyScanForJoinOfTwoScans(const IndexScan& s1, const IndexScan& s2) { Permutation::IdTableGenerator IndexScan::lazyScanForJoinOfColumnWithScan( std::span joinColumn, const IndexScan& s) { AD_EXPENSIVE_CHECK(std::ranges::is_sorted(joinColumn)); - AD_CORRECTNESS_CHECK(s.numVariables_ <= 3); + AD_CORRECTNESS_CHECK(s.numVariables_ <= 3 && s.numVariables_ > 0); auto metaBlocks1 = getMetadataForScan(s); diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h index 04615c77d..8609bf83a 100644 --- a/src/engine/IndexScan.h +++ b/src/engine/IndexScan.h @@ -100,6 +100,7 @@ class IndexScan final : public Operation { // `permutation_`. For example if `permutation_ == PSO` then the result is // {&predicate_, &subject_, &object_} std::array getPermutedTriple() const; + ScanSpecificationAsTripleComponent getScanSpecification() const; private: ProtoResult computeResult(bool requestLaziness) override; diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index a47dd5a8c..c8dab08d9 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -199,25 +199,6 @@ std::shared_ptr Operation::getResult( const bool pinResult = _executionContext->_pinSubtrees || pinFinalResultButNotSubtrees; - // When we pin the final result but no subtrees, we need to remember the sizes - // of all involved index scans that have only one free variable. Note that - // these index scans are executed already during query planning because they - // have to be executed anyway, for any query plan. If we don't remember these - // sizes here, future queries that take the result from the cache would redo - // these index scans. Note that we do not need to remember the multiplicity - // (and distinctness) because the multiplicity for an index scan with a single - // free variable is always 1. - if (pinFinalResultButNotSubtrees) { - auto lock = - getExecutionContext()->getQueryTreeCache().pinnedSizes().wlock(); - forAllDescendants([&lock](QueryExecutionTree* child) { - if (child->getRootOperation()->isIndexScanWithNumVariables(1)) { - (*lock)[child->getRootOperation()->getCacheKey()] = - child->getSizeEstimate(); - } - }); - } - try { // In case of an exception, create the correct runtime info, no matter which // exception handler is called. @@ -355,7 +336,7 @@ void Operation::updateRuntimeInformationOnSuccess( // ____________________________________________________________________________________________________________________ void Operation::updateRuntimeInformationOnSuccess( - const ConcurrentLruCache::ResultAndCacheStatus& resultAndCacheStatus, + const QueryResultCache::ResultAndCacheStatus& resultAndCacheStatus, Milliseconds duration) { const auto& result = resultAndCacheStatus._resultPointer->resultTable(); updateRuntimeInformationOnSuccess( diff --git a/src/engine/Operation.h b/src/engine/Operation.h index cd2907bd3..242355831 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -274,7 +274,7 @@ class Operation { // Create and store the complete runtime information for this operation after // it has either been successfully computed or read from the cache. virtual void updateRuntimeInformationOnSuccess( - const ConcurrentLruCache::ResultAndCacheStatus& resultAndCacheStatus, + const QueryResultCache::ResultAndCacheStatus& resultAndCacheStatus, Milliseconds duration) final; // Similar to the function above, but the components are specified manually. diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index a8a5255d1..b7b54af6b 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -74,37 +74,8 @@ class CacheValue { // Threadsafe LRU cache for (partial) query results, that // checks on insertion, if the result is currently being computed // by another query. -using ConcurrentLruCache = ad_utility::ConcurrentCache< - ad_utility::LRUCache>; -using PinnedSizes = - ad_utility::Synchronized, - std::shared_mutex>; -class QueryResultCache : public ConcurrentLruCache { - private: - PinnedSizes _pinnedSizes; - - public: - virtual ~QueryResultCache() = default; - void clearAll() override { - // The _pinnedSizes are not part of the (otherwise threadsafe) _cache - // and thus have to be manually locked. - auto lock = _pinnedSizes.wlock(); - ConcurrentLruCache::clearAll(); - lock->clear(); - } - // Inherit the constructor. - using ConcurrentLruCache::ConcurrentLruCache; - const PinnedSizes& pinnedSizes() const { return _pinnedSizes; } - PinnedSizes& pinnedSizes() { return _pinnedSizes; } - std::optional getPinnedSize(const std::string& key) { - auto rlock = _pinnedSizes.rlock(); - if (rlock->contains(key)) { - return rlock->at(key); - } else { - return std::nullopt; - } - } -}; +using QueryResultCache = ad_utility::ConcurrentCache< + ad_utility::LRUCache>; // Execution context for queries. // Holds references to index and engine, implements caching. diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 745b27edd..26f607727 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -629,7 +629,10 @@ void QueryPlanner::seedFromOrdinaryTriple( const size_t numVars = static_cast(isVariable(triple.s_)) + static_cast(isVariable(triple.p_)) + static_cast(isVariable(triple.o_)); - if (numVars == 1) { + if (numVars == 0) { + // We could read this from any of the permutations. + addIndexScan(Permutation::Enum::PSO); + } else if (numVars == 1) { indexScanSingleVarCase(triple, addIndexScan); } else if (numVars == 2) { indexScanTwoVarsCase(triple, addIndexScan, addFilter); @@ -673,10 +676,6 @@ auto QueryPlanner::seedWithScansAndText( seeds.push_back(getTextLeafPlan(node, textLimits)); continue; } - if (node._variables.empty()) { - AD_THROW("Triples should have at least one variable. Not the case in: " + - node.triple_.asString()); - } // Property paths must have been handled previously. AD_CORRECTNESS_CHECK(node.triple_.p_._operation == diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp index 689d58c7c..245f3cb94 100644 --- a/src/engine/Server.cpp +++ b/src/engine/Server.cpp @@ -469,7 +469,6 @@ nlohmann::json Server::composeCacheStatsJson() const { // converter. result["non-pinned-size"] = cache_.nonPinnedSize().getBytes(); result["pinned-size"] = cache_.pinnedSize().getBytes(); - result["num-pinned-index-scan-sizes"] = cache_.pinnedSizes().rlock()->size(); return result; } diff --git a/src/engine/TransitivePathBase.cpp b/src/engine/TransitivePathBase.cpp index b0e11eec0..85eaa3236 100644 --- a/src/engine/TransitivePathBase.cpp +++ b/src/engine/TransitivePathBase.cpp @@ -378,8 +378,8 @@ std::shared_ptr TransitivePathBase::bindLeftOrRightSide( AD_CORRECTNESS_CHECK(!p->variableColumns_.contains(variable)); p->variableColumns_[variable] = columnIndexWithType; - p->resultWidth_++; } + p->resultWidth_ += leftOrRightOp->getResultWidth() - 1; return std::move(p); } @@ -397,6 +397,8 @@ void TransitivePathBase::copyColumns(const IdTableView& inputTable, size_t skipCol) const { size_t inCol = 0; size_t outCol = 2; + AD_CORRECTNESS_CHECK(skipCol < inputTable.numColumns()); + AD_CORRECTNESS_CHECK(inputTable.numColumns() + 1 == outputTable.numColumns()); while (inCol < inputTable.numColumns() && outCol < outputTable.numColumns()) { if (skipCol == inCol) { inCol++; diff --git a/src/engine/idTable/IdTable.h b/src/engine/idTable/IdTable.h index 4377b0244..dd9895810 100644 --- a/src/engine/idTable/IdTable.h +++ b/src/engine/idTable/IdTable.h @@ -1,5 +1,5 @@ -// Copyright 2021, University of Freiburg, Chair of Algorithms and Data -// Structures. Author: Johannes Kalmbach +// Copyright 2021, University of Freiburg, Chair of Algorithms and Data +// Structures. Author: Johannes Kalmbach #pragma once diff --git a/src/index/CMakeLists.txt b/src/index/CMakeLists.txt index c8020551f..e41fd2471 100644 --- a/src/index/CMakeLists.txt +++ b/src/index/CMakeLists.txt @@ -5,5 +5,5 @@ add_library(index LocatedTriples.cpp Permutation.cpp TextMetaData.cpp DocsDB.cpp FTSAlgorithms.cpp PrefixHeuristic.cpp CompressedRelation.cpp - PatternCreator.cpp) + PatternCreator.cpp ScanSpecification.cpp) qlever_target_link_libraries(index util parser vocabulary compilationInfo ${STXXL_LIBRARIES}) diff --git a/src/index/CompressedRelation.h b/src/index/CompressedRelation.h index 6a6b76169..af8725ec9 100644 --- a/src/index/CompressedRelation.h +++ b/src/index/CompressedRelation.h @@ -11,6 +11,7 @@ #include "engine/idTable/IdTable.h" #include "global/Id.h" +#include "index/ScanSpecification.h" #include "parser/data/LimitOffsetClause.h" #include "util/Cache.h" #include "util/CancellationHandle.h" @@ -350,48 +351,6 @@ class CompressedRelationReader { using ColumnIndices = std::vector; using CancellationHandle = ad_utility::SharedCancellationHandle; - // The specification of a scan operation for a given permutation. - // Can either be a full scan (all three elements are `std::nullopt`), - // a scan for a fixed `col0Id`, a scan for a fixed `col0Id` and `col1Id`, - // or even a scan for a single triple to check whether it is contained in - // the knowledge graph at all. The values which are `nullopt` become variables - // and are returned as columns in the result of the scan. - class ScanSpecification { - private: - using T = std::optional; - - T col0Id_; - T col1Id_; - T col2Id_; - - void validate() const { - bool c0 = col0Id_.has_value(); - bool c1 = col1Id_.has_value(); - bool c2 = col2Id_.has_value(); - if (!c0) { - AD_CORRECTNESS_CHECK(!c1 && !c2); - } - if (!c1) { - AD_CORRECTNESS_CHECK(!c2); - } - } - - public: - ScanSpecification(T col0Id, T col1Id, T col2Id) - : col0Id_{col0Id}, col1Id_{col1Id}, col2Id_{col2Id} { - validate(); - } - const T& col0Id() const { return col0Id_; } - const T& col1Id() const { return col1Id_; } - const T& col2Id() const { return col2Id_; } - - // Only used in tests. - void setCol1Id(T col1Id) { - col1Id_ = col1Id; - validate(); - } - }; - // The metadata of a single relation together with a subset of its // blocks and possibly a `col1Id` for additional filtering. This is used as // the input to several functions below that take such an input. diff --git a/src/index/Index.cpp b/src/index/Index.cpp index 468a17a3d..b23b6629a 100644 --- a/src/index/Index.cpp +++ b/src/index/Index.cpp @@ -276,28 +276,27 @@ vector Index::getMultiplicities(const TripleComponent& key, // ____________________________________________________________________________ IdTable Index::scan( - const TripleComponent& col0String, - std::optional> col1String, + const ScanSpecificationAsTripleComponent& scanSpecification, Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns, const ad_utility::SharedCancellationHandle& cancellationHandle, const LimitOffsetClause& limitOffset) const { - return pimpl_->scan(col0String, col1String, p, additionalColumns, + return pimpl_->scan(scanSpecification, p, additionalColumns, cancellationHandle, limitOffset); } // ____________________________________________________________________________ IdTable Index::scan( - Id col0Id, std::optional col1Id, Permutation::Enum p, + const ScanSpecification& scanSpecification, Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns, const ad_utility::SharedCancellationHandle& cancellationHandle, const LimitOffsetClause& limitOffset) const { - return pimpl_->scan(col0Id, col1Id, p, additionalColumns, cancellationHandle, - limitOffset); + return pimpl_->scan(scanSpecification, p, additionalColumns, + cancellationHandle, limitOffset); } // ____________________________________________________________________________ -size_t Index::getResultSizeOfScan(const TripleComponent& col0String, - const TripleComponent& col1String, - const Permutation::Enum& permutation) const { - return pimpl_->getResultSizeOfScan(col0String, col1String, permutation); +size_t Index::getResultSizeOfScan( + const ScanSpecificationAsTripleComponent& scanSpecification, + const Permutation::Enum& permutation) const { + return pimpl_->getResultSizeOfScan(scanSpecification, permutation); } diff --git a/src/index/Index.h b/src/index/Index.h index 6ab952e8f..82534a612 100644 --- a/src/index/Index.h +++ b/src/index/Index.h @@ -236,24 +236,23 @@ class Index { * @param p The Permutation::Enum to use (in particularly POS(), SOP,... * members of Index class). */ - IdTable scan( - const TripleComponent& col0String, - std::optional> col1String, - Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns, - const ad_utility::SharedCancellationHandle& cancellationHandle, - const LimitOffsetClause& limitOffset = {}) const; + IdTable scan(const ScanSpecificationAsTripleComponent& scanSpecification, + Permutation::Enum p, + Permutation::ColumnIndicesRef additionalColumns, + const ad_utility::SharedCancellationHandle& cancellationHandle, + const LimitOffsetClause& limitOffset = {}) const; // Similar to the overload of `scan` above, but the keys are specified as IDs. - IdTable scan(Id col0Id, std::optional col1Id, Permutation::Enum p, + IdTable scan(const ScanSpecification& scanSpecification, Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns, const ad_utility::SharedCancellationHandle& cancellationHandle, const LimitOffsetClause& limitOffset = {}) const; // Similar to the previous overload of `scan`, but only get the exact size of // the scan result. - size_t getResultSizeOfScan(const TripleComponent& col0String, - const TripleComponent& col1String, - const Permutation::Enum& permutation) const; + size_t getResultSizeOfScan( + const ScanSpecificationAsTripleComponent& scanSpecification, + const Permutation::Enum& permutation) const; // Get access to the implementation. This should be used rarely as it // requires including the rather expensive `IndexImpl.h` header diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 30622cbcf..c466e7548 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -1358,46 +1358,41 @@ vector IndexImpl::getMultiplicities( // _____________________________________________________________________________ IdTable IndexImpl::scan( - const TripleComponent& col0String, - std::optional> col1String, + const ScanSpecificationAsTripleComponent& scanSpecificationAsTc, const Permutation::Enum& permutation, Permutation::ColumnIndicesRef additionalColumns, const ad_utility::SharedCancellationHandle& cancellationHandle, const LimitOffsetClause& limitOffset) const { - std::optional col0Id = col0String.toValueId(getVocab()); - std::optional col1Id = - col1String.has_value() ? col1String.value().get().toValueId(getVocab()) - : std::nullopt; - if (!col0Id.has_value() || (col1String.has_value() && !col1Id.has_value())) { - size_t numColumns = col1String.has_value() ? 1 : 2; + auto scanSpecification = scanSpecificationAsTc.toScanSpecification(*this); + if (!scanSpecification.has_value()) { cancellationHandle->throwIfCancelled(); - return IdTable{numColumns + additionalColumns.size(), allocator_}; + return IdTable{ + scanSpecificationAsTc.numColumns() + additionalColumns.size(), + allocator_}; } - return scan(col0Id.value(), col1Id, permutation, additionalColumns, + return scan(scanSpecification.value(), permutation, additionalColumns, cancellationHandle, limitOffset); } // _____________________________________________________________________________ IdTable IndexImpl::scan( - Id col0Id, std::optional col1Id, Permutation::Enum p, + const ScanSpecification& scanSpecification, Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns, const ad_utility::SharedCancellationHandle& cancellationHandle, const LimitOffsetClause& limitOffset) const { - return getPermutation(p).scan({col0Id, col1Id, std::nullopt}, - additionalColumns, cancellationHandle, - limitOffset); + return getPermutation(p).scan(scanSpecification, additionalColumns, + cancellationHandle, limitOffset); } // _____________________________________________________________________________ size_t IndexImpl::getResultSizeOfScan( - const TripleComponent& col0, const TripleComponent& col1, + const ScanSpecificationAsTripleComponent& scanSpecificationAsTc, const Permutation::Enum& permutation) const { - std::optional col0Id = col0.toValueId(getVocab()); - std::optional col1Id = col1.toValueId(getVocab()); - if (!col0Id.has_value() || !col1Id.has_value()) { + const Permutation& p = getPermutation(permutation); + auto scanSpecification = scanSpecificationAsTc.toScanSpecification(*this); + if (!scanSpecification.has_value()) { return 0; } - const Permutation& p = getPermutation(permutation); - return p.getResultSizeOfScan({col0Id.value(), col1Id.value(), std::nullopt}); + return p.getResultSizeOfScan(scanSpecification.value()); } // _____________________________________________________________________________ diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index b39fa7e91..43bccb4ef 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -418,24 +418,22 @@ class IndexImpl { vector getMultiplicities(Permutation::Enum permutation) const; // _____________________________________________________________________________ - IdTable scan( - const TripleComponent& col0String, - std::optional> col1String, - const Permutation::Enum& permutation, - Permutation::ColumnIndicesRef additionalColumns, - const ad_utility::SharedCancellationHandle& cancellationHandle, - const LimitOffsetClause& limitOffset = {}) const; + IdTable scan(const ScanSpecificationAsTripleComponent& scanSpecification, + const Permutation::Enum& permutation, + Permutation::ColumnIndicesRef additionalColumns, + const ad_utility::SharedCancellationHandle& cancellationHandle, + const LimitOffsetClause& limitOffset = {}) const; // _____________________________________________________________________________ - IdTable scan(Id col0Id, std::optional col1Id, Permutation::Enum p, + IdTable scan(const ScanSpecification& scanSpecification, Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns, const ad_utility::SharedCancellationHandle& cancellationHandle, const LimitOffsetClause& limitOffset = {}) const; // _____________________________________________________________________________ - size_t getResultSizeOfScan(const TripleComponent& col0, - const TripleComponent& col1, - const Permutation::Enum& permutation) const; + size_t getResultSizeOfScan( + const ScanSpecificationAsTripleComponent& scanSpecification, + const Permutation::Enum& permutation) const; private: // Private member functions diff --git a/src/index/Permutation.h b/src/index/Permutation.h index 2defb4c58..db4b36635 100644 --- a/src/index/Permutation.h +++ b/src/index/Permutation.h @@ -38,8 +38,6 @@ class Permutation { using ColumnIndices = CompressedRelationReader::ColumnIndices; using CancellationHandle = ad_utility::SharedCancellationHandle; - using ScanSpecification = CompressedRelationReader::ScanSpecification; - // Convert a permutation to the corresponding string, etc. `PSO` is converted // to "PSO". static std::string_view toString(Enum permutation); diff --git a/src/index/ScanSpecification.cpp b/src/index/ScanSpecification.cpp new file mode 100644 index 000000000..505dc1e05 --- /dev/null +++ b/src/index/ScanSpecification.cpp @@ -0,0 +1,89 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Kalmbach + +#include "index/ScanSpecification.h" + +#include "index/Index.h" +#include "index/IndexImpl.h" + +// ____________________________________________________________________________ +std::optional +ScanSpecificationAsTripleComponent::toScanSpecification( + const Index& index) const { + return toScanSpecification(index.getImpl()); +} + +// ____________________________________________________________________________ +std::optional +ScanSpecificationAsTripleComponent::toScanSpecification( + const IndexImpl& index) const { + // TODO Use `std::optional::transform`. + // TODO: We can also have LocalVocab entries is the + // ScanSpecification. + bool nonexistingVocabEntryFound = false; + auto getId = + [&index, &nonexistingVocabEntryFound]( + const std::optional& tc) -> std::optional { + if (!tc.has_value()) { + return std::nullopt; + } + auto id = tc.value().toValueId(index.getVocab()); + if (!id.has_value()) { + nonexistingVocabEntryFound = true; + } + return id; + }; + std::optional col0Id = getId(col0_); + std::optional col1Id = getId(col1_); + std::optional col2Id = getId(col2_); + + if (nonexistingVocabEntryFound) { + return std::nullopt; + } + return ScanSpecification{col0Id, col1Id, col2Id}; +} + +// ____________________________________________________________________________ +ScanSpecificationAsTripleComponent::ScanSpecificationAsTripleComponent(T col0, + T col1, + T col2) { + auto toNulloptIfVariable = [](T& tc) -> std::optional { + if (tc.has_value() && tc.value().isVariable()) { + return std::nullopt; + } else { + return std::move(tc); + } + }; + col0_ = toNulloptIfVariable(col0); + col1_ = toNulloptIfVariable(col1); + col2_ = toNulloptIfVariable(col2); + + if (!col0_.has_value()) { + AD_CONTRACT_CHECK(!col1_.has_value()); + } + if (!col1_.has_value()) { + AD_CONTRACT_CHECK(!col2_.has_value()); + } +} + +// ____________________________________________________________________________ +size_t ScanSpecificationAsTripleComponent::numColumns() const { + auto i = [](const auto& x) -> size_t { + return static_cast(x.has_value()); + }; + return 3 - i(col0_) - i(col1_) - i(col2_); +} + +// _____________________________________________________________________________ +void ScanSpecification::validate() const { + bool c0 = col0Id_.has_value(); + bool c1 = col1Id_.has_value(); + bool c2 = col2Id_.has_value(); + if (!c0) { + AD_CORRECTNESS_CHECK(!c1 && !c2); + } + if (!c1) { + AD_CORRECTNESS_CHECK(!c2); + } +} diff --git a/src/index/ScanSpecification.h b/src/index/ScanSpecification.h new file mode 100644 index 000000000..de8e915ce --- /dev/null +++ b/src/index/ScanSpecification.h @@ -0,0 +1,81 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Kalmbach + +#pragma once +#include + +#include "engine/LocalVocab.h" +#include "global/Id.h" +#include "parser/TripleComponent.h" + +// Forward declaration +class IndexImpl; +class Index; + +// The specification of a scan operation for a given permutation. +// Can either be a full scan (all three elements are `std::nullopt`), +// a scan for a fixed `col0Id`, a scan for a fixed `col0Id` and `col1Id`, +// or even a scan for a single triple to check whether it is contained in +// the knowledge graph at all. The values which are `nullopt` become variables +// and are returned as columns in the result of the scan. +class ScanSpecification { + private: + using T = std::optional; + + T col0Id_; + T col1Id_; + T col2Id_; + friend class ScanSpecificationAsTripleComponent; + + void validate() const; + + public: + ScanSpecification(T col0Id, T col1Id, T col2Id) + : col0Id_{col0Id}, col1Id_{col1Id}, col2Id_{col2Id} { + validate(); + } + const T& col0Id() const { return col0Id_; } + const T& col1Id() const { return col1Id_; } + const T& col2Id() const { return col2Id_; } + + bool operator==(const ScanSpecification&) const = default; + + // Only used in tests. + void setCol1Id(T col1Id) { + col1Id_ = col1Id; + validate(); + } +}; + +// Same as `ScanSpecification` (see above), but stores `TripleComponent`s +// instead of `Id`s. +class ScanSpecificationAsTripleComponent { + using T = std::optional; + + private: + std::optional col0_; + std::optional col1_; + std::optional col2_; + + public: + // Construct from three optional `TripleComponent`s. If any of the three + // entries is unbound (`nullopt` or of type `Variable`), then all subsequent + // entries also have to be unbound. For example if `col0` is bound, but `col1` + // isn't, then `col2` also has to be unbound. + ScanSpecificationAsTripleComponent(T col0, T col1, T col2); + + // Convert to a `ScanSpecification`. The `index` is used to convert the + // `TripleComponent` to `Id`s by looking them up in the vocabulary. Return + // `nullopt` if and only if one of the vocab lookup fails (then the result of + // the corresponding scan will be empty). + // TODO Once we implement SPARQL UPDATE, we possibly also to use the + // `LocalVocab` of the UPDATE triples here. + std::optional toScanSpecification( + const IndexImpl& index) const; + std::optional toScanSpecification( + const Index& index) const; + + // The number of columns that the corresponding index scan will have. + size_t numColumns() const; +}; diff --git a/src/parser/CMakeLists.txt b/src/parser/CMakeLists.txt index 29ca836c7..8b1efdcb1 100644 --- a/src/parser/CMakeLists.txt +++ b/src/parser/CMakeLists.txt @@ -25,5 +25,5 @@ add_library(parser Iri.cpp Literal.cpp LiteralOrIri.cpp) -qlever_target_link_libraries(parser sparqlParser parserData sparqlExpressions rdfEscaping re2::re2 util engine) +qlever_target_link_libraries(parser sparqlParser parserData sparqlExpressions rdfEscaping re2::re2 util engine index) diff --git a/src/util/ConcurrentCache.h b/src/util/ConcurrentCache.h index 56ac53fe7..88837063a 100644 --- a/src/util/ConcurrentCache.h +++ b/src/util/ConcurrentCache.h @@ -213,7 +213,7 @@ class ConcurrentCache { } /// Clear the cache, including the pinned entries. - virtual void clearAll() { _cacheAndInProgressMap.wlock()->_cache.clearAll(); } + void clearAll() { _cacheAndInProgressMap.wlock()->_cache.clearAll(); } /// Delete elements from the unpinned part of the cache of total size /// at least `size`; diff --git a/test/CompressedRelationsTest.cpp b/test/CompressedRelationsTest.cpp index c2aad9d7c..24faf16c2 100644 --- a/test/CompressedRelationsTest.cpp +++ b/test/CompressedRelationsTest.cpp @@ -174,8 +174,7 @@ void testCompressedRelations(const auto& inputs, std::string testCaseName, ASSERT_FLOAT_EQ(m.numRows_ / static_cast(i + 1), m.multiplicityCol1_); // Scan for all distinct `col0` and check that we get the expected result. - CompressedRelationReader::ScanSpecification scanSpec{ - metaData[i].col0Id_, std::nullopt, std::nullopt}; + ScanSpecification scanSpec{metaData[i].col0Id_, std::nullopt, std::nullopt}; IdTable table = reader.scan(scanSpec, blocks, additionalColumns, cancellationHandle); const auto& col1And2 = inputs[i].col1And2_; @@ -207,8 +206,8 @@ void testCompressedRelations(const auto& inputs, std::string testCaseName, std::vector> col3; auto scanAndCheck = [&]() { - CompressedRelationReader::ScanSpecification scanSpec{ - metaData[i].col0Id_, V(lastCol1Id), std::nullopt}; + ScanSpecification scanSpec{metaData[i].col0Id_, V(lastCol1Id), + std::nullopt}; auto size = reader.getResultSizeOfScan(scanSpec, blocks); IdTable tableWidthOne = reader.scan(scanSpec, blocks, Permutation::ColumnIndicesRef{}, diff --git a/test/IndexTest.cpp b/test/IndexTest.cpp index df7421dbf..685edfc88 100644 --- a/test/IndexTest.cpp +++ b/test/IndexTest.cpp @@ -37,7 +37,7 @@ auto makeTestScanWidthOne = [](const IndexImpl& index) { ad_utility::source_location::current()) { auto t = generateLocationTrace(l); IdTable result = - index.scan(c0, std::cref(c1), permutation, additionalColumns, + index.scan({c0, c1, std::nullopt}, permutation, additionalColumns, std::make_shared>()); ASSERT_EQ(result.numColumns(), 1 + additionalColumns.size()); ASSERT_EQ(result, makeIdTableFromVector(expected)); @@ -53,9 +53,10 @@ auto makeTestScanWidthTwo = [](const IndexImpl& index) { ad_utility::source_location l = ad_utility::source_location::current()) { auto t = generateLocationTrace(l); - IdTable wol = index.scan( - c0, std::nullopt, permutation, Permutation::ColumnIndicesRef{}, - std::make_shared>()); + IdTable wol = + index.scan({c0, std::nullopt, std::nullopt}, permutation, + Permutation::ColumnIndicesRef{}, + std::make_shared>()); ASSERT_EQ(wol, makeIdTableFromVector(expected)); }; }; diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 42f98286f..de3f4f3d4 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -17,7 +17,7 @@ constexpr auto iri = ad_utility::testing::iri; using ::testing::HasSubstr; QueryPlanner makeQueryPlanner() { - return QueryPlanner{nullptr, + return QueryPlanner{ad_utility::testing::getQec(), std::make_shared>()}; } @@ -204,6 +204,20 @@ TEST(QueryPlanner, testBFSLeaveOut) { } } +TEST(QueryPlanner, indexScanZeroVariables) { + auto scan = h::IndexScanFromStrings; + using enum Permutation::Enum; + h::expect( + "SELECT * \n " + "WHERE \t { }", + scan("", "", "")); + h::expect( + "SELECT * \n " + "WHERE \t { . ?z}", + h::CartesianProductJoin(scan("", "", ""), + scan("", "", "?z"))); +} + TEST(QueryPlanner, indexScanOneVariable) { auto scan = h::IndexScanFromStrings; using enum Permutation::Enum; @@ -287,31 +301,28 @@ TEST(QueryPlanner, testStarTwoFree) { } TEST(QueryPlanner, testFilterAfterSeed) { - ParsedQuery pq = SparqlParser::parseQuery( + auto scan = h::IndexScanFromStrings; + auto qec = ad_utility::testing::getQec( + " , , . , , ."); + h::expect( "SELECT ?x ?y ?z WHERE {" "?x ?y . ?y ?z . " - "FILTER(?x != ?y) }"); - QueryPlanner qp = makeQueryPlanner(); - QueryExecutionTree qet = qp.createExecutionTree(pq); - ASSERT_EQ(qet.getCacheKey(), - "FILTER JOIN\nSCAN POS with P = \"\" join-column: " - "[0]\n|X|\nSCAN PSO with P = \"\" join-column: [0] with " - "N16sparqlExpression10relational20RelationalExpressionILN18valueIdC" - "omparators10ComparisonE3EEE#column_1##column_0#"); + "FILTER(?x != ?y) }", + h::Filter("?x != ?y", + h::Join(scan("?x", "", "?y"), scan("?y", "", "?z"))), + qec); } TEST(QueryPlanner, testFilterAfterJoin) { - ParsedQuery pq = SparqlParser::parseQuery( + auto scan = h::IndexScanFromStrings; + auto qec = ad_utility::testing::getQec(" "); + h::expect( "SELECT ?x ?y ?z WHERE {" "?x ?y . ?y ?z . " - "FILTER(?x != ?z) }"); - QueryPlanner qp = makeQueryPlanner(); - QueryExecutionTree qet = qp.createExecutionTree(pq); - ASSERT_EQ(qet.getCacheKey(), - "FILTER JOIN\nSCAN POS with P = \"\" join-column: " - "[0]\n|X|\nSCAN PSO with P = \"\" join-column: [0] with " - "N16sparqlExpression10relational20RelationalExpressionILN18valueIdC" - "omparators10ComparisonE3EEE#column_1##column_2#"); + "FILTER(?x != ?z) }", + h::Filter("?x != ?z", + h::Join(scan("?x", "", "?y"), scan("?y", "", "?z"))), + qec); } TEST(QueryPlanner, threeVarTriples) { @@ -565,25 +576,18 @@ TEST(QueryExecutionTreeTest, testFormerSegfaultTriFilter) { } TEST(QueryPlanner, testSimpleOptional) { - QueryPlanner qp = makeQueryPlanner(); - - ParsedQuery pq = SparqlParser::parseQuery( + auto scan = h::IndexScanFromStrings; + h::expect( "SELECT ?a ?b \n " - "WHERE {?a ?b . OPTIONAL { ?a ?c }}"); - QueryExecutionTree qet = qp.createExecutionTree(pq); - ASSERT_EQ(qet.getCacheKey(), - "OPTIONAL_JOIN\nSCAN PSO with P = \"\" join-columns: " - "[0]\n|X|\nSCAN PSO with P = \"\" join-columns: [0]"); - - ParsedQuery pq2 = SparqlParser::parseQuery( + "WHERE {?a ?b . OPTIONAL { ?a ?c }}", + h::OptionalJoin(scan("?a", "", "?b"), scan("?a", "", "?c"))); + h::expect( "SELECT ?a ?b \n " "WHERE {?a ?b . " - "OPTIONAL { ?a ?c }} ORDER BY ?b"); - QueryExecutionTree qet2 = qp.createExecutionTree(pq2); - ASSERT_EQ(qet2.getCacheKey(), - "ORDER BY on columns:asc(1) \nOPTIONAL_JOIN\nSCAN PSO with P = " - "\"\" join-columns: [0]\n|X|\nSCAN PSO with P = \"\" " - "join-columns: [0]"); + "OPTIONAL { ?a ?c }} ORDER BY ?b", + h::OrderBy({{Variable{"?b"}, ::OrderBy::AscOrDesc::Asc}}, + h::OptionalJoin(scan("?a", "", "?b"), + scan("?a", "", "?c")))); } TEST(QueryPlanner, SimpleTripleOneVariable) { diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index c33f9a2fa..37c48f312 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -16,6 +16,7 @@ #include "engine/Join.h" #include "engine/MultiColumnJoin.h" #include "engine/NeutralElementOperation.h" +#include "engine/OptionalJoin.h" #include "engine/OrderBy.h" #include "engine/QueryExecutionTree.h" #include "engine/QueryPlanner.h" @@ -204,6 +205,8 @@ inline auto IndexScanFromStrings = inline auto MultiColumnJoin = MatchTypeAndUnorderedChildren<::MultiColumnJoin>; inline auto Join = MatchTypeAndUnorderedChildren<::Join>; +constexpr auto OptionalJoin = MatchTypeAndOrderedChildren<::OptionalJoin>; + // Return a matcher that matches a query execution tree that consists of // multiple JOIN operations that join the `children`. The `INTERNAL SORT BY` // operations required for the joins are also ignored by this matcher. diff --git a/test/TransitivePathTest.cpp b/test/TransitivePathTest.cpp index 651faa944..e616ad2e2 100644 --- a/test/TransitivePathTest.cpp +++ b/test/TransitivePathTest.cpp @@ -247,14 +247,27 @@ TEST_P(TransitivePathTest, idToLeftBound) { TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0); TransitivePathSide right(std::nullopt, 1, V(4), 1); - auto T = makePathLeftBound( - std::move(sub), {Variable{"?start"}, Variable{"?target"}}, - std::move(leftOpTable), 1, {Variable{"?x"}, Variable{"?start"}}, - std::move(left), std::move(right), 0, std::numeric_limits::max()); - - auto resultTable = T->computeResultOnlyForTesting(); - ASSERT_THAT(resultTable.idTable(), - ::testing::UnorderedElementsAreArray(expected)); + { + auto T = makePathLeftBound( + sub.clone(), {Variable{"?start"}, Variable{"?target"}}, + leftOpTable.clone(), 1, {Variable{"?x"}, Variable{"?start"}}, left, + right, 0, std::numeric_limits::max()); + + auto resultTable = T->computeResultOnlyForTesting(); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); + } + { + auto T = makePathLeftBound( + std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + std::move(leftOpTable), 1, {std::nullopt, Variable{"?start"}}, + std::move(left), std::move(right), 0, + std::numeric_limits::max()); + + auto resultTable = T->computeResultOnlyForTesting(); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); + } } TEST_P(TransitivePathTest, idToRightBound) { @@ -280,14 +293,27 @@ TEST_P(TransitivePathTest, idToRightBound) { TransitivePathSide left(std::nullopt, 0, V(0), 0); TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1); - auto T = makePathRightBound( - std::move(sub), {Variable{"?start"}, Variable{"?target"}}, - std::move(rightOpTable), 0, {Variable{"?target"}, Variable{"?x"}}, - std::move(left), std::move(right), 0, std::numeric_limits::max()); - - auto resultTable = T->computeResultOnlyForTesting(); - ASSERT_THAT(resultTable.idTable(), - ::testing::UnorderedElementsAreArray(expected)); + { + auto T = makePathRightBound( + sub.clone(), {Variable{"?start"}, Variable{"?target"}}, + rightOpTable.clone(), 0, {Variable{"?target"}, Variable{"?x"}}, left, + right, 0, std::numeric_limits::max()); + + auto resultTable = T->computeResultOnlyForTesting(); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); + } + { + auto T = makePathRightBound( + std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + std::move(rightOpTable), 0, {Variable{"?target"}, std::nullopt}, + std::move(left), std::move(right), 0, + std::numeric_limits::max()); + + auto resultTable = T->computeResultOnlyForTesting(); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); + } } TEST_P(TransitivePathTest, leftBoundToVar) { @@ -318,14 +344,17 @@ TEST_P(TransitivePathTest, leftBoundToVar) { TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0); TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1); - auto T = makePathLeftBound( - std::move(sub), {Variable{"?start"}, Variable{"?target"}}, - std::move(leftOpTable), 1, {Variable{"?x"}, Variable{"?start"}}, - std::move(left), std::move(right), 0, std::numeric_limits::max()); - - auto resultTable = T->computeResultOnlyForTesting(); - ASSERT_THAT(resultTable.idTable(), - ::testing::UnorderedElementsAreArray(expected)); + { + auto T = makePathLeftBound( + std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + std::move(leftOpTable), 1, {Variable{"?x"}, Variable{"?start"}}, + std::move(left), std::move(right), 0, + std::numeric_limits::max()); + + auto resultTable = T->computeResultOnlyForTesting(); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); + } } TEST_P(TransitivePathTest, rightBoundToVar) { diff --git a/test/TriplesViewTest.cpp b/test/TriplesViewTest.cpp index 4c9fd5382..956100991 100644 --- a/test/TriplesViewTest.cpp +++ b/test/TriplesViewTest.cpp @@ -30,7 +30,7 @@ struct DummyPermutation { } cppcoro::generator lazyScan( - CompressedRelationReader::ScanSpecification scanSpec, + ScanSpecification scanSpec, std::optional> blocks, std::span, const auto&) const { AD_CORRECTNESS_CHECK(!blocks.has_value()); diff --git a/test/ValuesForTestingTest.cpp b/test/ValuesForTestingTest.cpp index 95ce76ce4..8c4b86d01 100644 --- a/test/ValuesForTestingTest.cpp +++ b/test/ValuesForTestingTest.cpp @@ -25,9 +25,9 @@ TEST(ValuesForTesting, valuesForTesting) { ASSERT_EQ(v.getMultiplicity(0), 42.0); ASSERT_EQ(v.getMultiplicity(1), 84.0); - ASSERT_THAT( - v.getCacheKey(), - ::testing::StartsWith("Values for testing with 2 columns. V:3 V:12")); + ASSERT_THAT(v.getCacheKey(), + ::testing::StartsWith( + "Values for testing with 2 columns and 3 rows. V:3 V:12")); ASSERT_THAT(v.getCacheKey(), ::testing::EndsWith("Supports limit: 0")); ASSERT_EQ(v.getDescriptor(), "explicit values for testing"); ASSERT_TRUE(v.resultSortedOn().empty()); @@ -36,3 +36,13 @@ TEST(ValuesForTesting, valuesForTesting) { auto result = v.getResult(); ASSERT_EQ(result->idTable(), table); } + +// ____________________________________________________________________________ +TEST(ValuesForTesting, cornerCasesCacheKey) { + auto empty = makeIdTableFromVector({}); + auto neutral = makeIdTableFromVector({{}}); + + ValuesForTesting vEmpty{getQec(), empty.clone(), {}}; + ValuesForTesting vNeutral{getQec(), neutral.clone(), {}}; + EXPECT_NE(vEmpty.getCacheKey(), vNeutral.getCacheKey()); +} diff --git a/test/engine/CartesianProductJoinTest.cpp b/test/engine/CartesianProductJoinTest.cpp index f07bb1121..cd05e31b9 100644 --- a/test/engine/CartesianProductJoinTest.cpp +++ b/test/engine/CartesianProductJoinTest.cpp @@ -90,9 +90,21 @@ void testCartesianProduct(VectorTable expected, std::vector inputs, TEST(CartesianProductJoin, computeResult) { // Simple base cases. VectorTable v{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}; + VectorTable empty{}; testCartesianProduct(v, {v}); - testCartesianProduct({}, {{}, v, {}}); - testCartesianProduct({}, {{}, {}}); + testCartesianProduct(empty, {empty, v, empty}); + testCartesianProduct(empty, {empty, empty}); + + // Test cases where some or all of the inputs are Neutral elements (1 row, + // zero columns) that are automatically filtered out by the + // `CartesianProductJoin`. + VectorTable neutral{{}}; + testCartesianProduct(neutral, {neutral}); + testCartesianProduct(v, {v, neutral}); + testCartesianProduct(v, {neutral, v, neutral}); + testCartesianProduct(neutral, {neutral, neutral, neutral}); + testCartesianProduct(empty, {neutral, empty, neutral}); + testCartesianProduct(empty, {neutral, empty, v}); // Fails because of an empty input. EXPECT_ANY_THROW(makeJoin({})); diff --git a/test/engine/IndexScanTest.cpp b/test/engine/IndexScanTest.cpp index 2920a2cd8..abc5babd3 100644 --- a/test/engine/IndexScanTest.cpp +++ b/test/engine/IndexScanTest.cpp @@ -380,3 +380,67 @@ TEST(IndexScan, additionalColumn) { {{getId(""), getId(""), I(0), I(NO_PATTERN)}}); EXPECT_THAT(res.idTable(), ::testing::ElementsAreArray(exp)); } + +TEST(IndexScan, getResultSizeOfScan) { + auto qec = getQec("

, . ."); + auto getId = makeGetId(qec->getIndex()); + [[maybe_unused]] auto x = getId(""); + [[maybe_unused]] auto p = getId("

"); + [[maybe_unused]] auto s1 = getId(""); + [[maybe_unused]] auto s2 = getId(""); + [[maybe_unused]] auto p2 = getId(""); + using V = Variable; + using I = TripleComponent::Iri; + + { + SparqlTripleSimple scanTriple{V{"?x"}, V("?y"), V{"?z"}}; + IndexScan scan{qec, Permutation::Enum::PSO, scanTriple}; + // Note: this currently also contains the (internal) triple for the + // `ql:has-pattern` relation of ``. + EXPECT_EQ(scan.getSizeEstimate(), 4); + } + { + SparqlTripleSimple scanTriple{V{"?x"}, I::fromIriref("

"), V{"?y"}}; + IndexScan scan{qec, Permutation::Enum::PSO, scanTriple}; + EXPECT_EQ(scan.getSizeEstimate(), 2); + } + { + SparqlTripleSimple scanTriple{I::fromIriref(""), I::fromIriref("

"), + V{"?y"}}; + IndexScan scan{qec, Permutation::Enum::PSO, scanTriple}; + EXPECT_EQ(scan.getSizeEstimate(), 2); + } + { + SparqlTripleSimple scanTriple{V("?x"), I::fromIriref("

"), + I::fromIriref("")}; + IndexScan scan{qec, Permutation::Enum::POS, scanTriple}; + EXPECT_EQ(scan.getSizeEstimate(), 1); + } + // 0 variables + { + SparqlTripleSimple scanTriple{I::fromIriref(""), I::fromIriref("

"), + I::fromIriref("")}; + IndexScan scan{qec, Permutation::Enum::POS, scanTriple}; + EXPECT_EQ(scan.getSizeEstimate(), 1); + EXPECT_ANY_THROW(scan.getMultiplicity(0)); + auto res = scan.computeResultOnlyForTesting(); + ASSERT_EQ(res.idTable().numRows(), 1); + ASSERT_EQ(res.idTable().numColumns(), 0); + } + { + SparqlTripleSimple scanTriple{I::fromIriref(""), I::fromIriref("

"), + I::fromIriref("")}; + IndexScan scan{qec, Permutation::Enum::POS, scanTriple}; + EXPECT_EQ(scan.getSizeEstimate(), 0); + } + { + SparqlTripleSimple scanTriple{I::fromIriref(""), I::fromIriref("

"), + I::fromIriref("

")}; + IndexScan scan{qec, Permutation::Enum::POS, scanTriple}; + EXPECT_EQ(scan.getSizeEstimate(), 0); + EXPECT_ANY_THROW(scan.getMultiplicity(0)); + auto res = scan.computeResultOnlyForTesting(); + ASSERT_EQ(res.idTable().numRows(), 0); + ASSERT_EQ(res.idTable().numColumns(), 0); + } +} diff --git a/test/engine/ValuesForTesting.h b/test/engine/ValuesForTesting.h index dcbf130da..ac7e363a9 100644 --- a/test/engine/ValuesForTesting.h +++ b/test/engine/ValuesForTesting.h @@ -65,7 +65,8 @@ class ValuesForTesting : public Operation { // ___________________________________________________________________________ string getCacheKeyImpl() const override { std::stringstream str; - str << "Values for testing with " << table_.numColumns() << " columns. "; + str << "Values for testing with " << table_.numColumns() << " columns and " + << table_.numRows() << " rows. "; if (table_.numRows() > 1000) { str << ad_utility::FastRandomIntGenerator{}(); } else { diff --git a/test/index/CMakeLists.txt b/test/index/CMakeLists.txt index aa0d0950e..3651259d1 100644 --- a/test/index/CMakeLists.txt +++ b/test/index/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(vocabulary) addLinkAndDiscoverTest(PatternCreatorTest index) +addLinkAndDiscoverTestSerial(ScanSpecificationTest index) diff --git a/test/index/ScanSpecificationTest.cpp b/test/index/ScanSpecificationTest.cpp new file mode 100644 index 000000000..3c7df1375 --- /dev/null +++ b/test/index/ScanSpecificationTest.cpp @@ -0,0 +1,81 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Kalmbach + +#include + +#include "../util/GTestHelpers.h" +#include "../util/IndexTestHelpers.h" +#include "index/ScanSpecification.h" + +// _____________________________________________________________________________ +TEST(ScanSpecification, validate) { + Id i = Id::makeFromInt(42); + auto n = std::nullopt; + using S = ScanSpecification; + EXPECT_NO_THROW(S(i, i, i)); + EXPECT_NO_THROW(S(i, i, n)); + EXPECT_NO_THROW(S(i, n, n)); + EXPECT_NO_THROW(S(n, n, n)); + + EXPECT_ANY_THROW(S(n, i, i)); + EXPECT_ANY_THROW(S(n, n, i)); + EXPECT_ANY_THROW(S(n, i, n)); + EXPECT_ANY_THROW(S(i, n, i)); +} + +// _____________________________________________________________________________ +TEST(ScanSpecification, ScanSpecificationAsTripleComponent) { + Id i = Id::makeFromInt(42); + TripleComponent iTc{42}; + auto n = std::nullopt; + using S = ScanSpecification; + using STc = ScanSpecificationAsTripleComponent; + + EXPECT_ANY_THROW(STc(n, iTc, iTc)); + EXPECT_ANY_THROW(STc(n, n, iTc)); + EXPECT_ANY_THROW(STc(n, iTc, n)); + EXPECT_ANY_THROW(STc(iTc, n, iTc)); + + const auto& index = ad_utility::testing::getQec()->getIndex(); + auto toScanSpec = [&index](const STc& s) { + return s.toScanSpecification(index); + }; + + // Match that a `ScanSpecificationAsTripleComponent` has the expected number + // of columns, and yields the expected `ScanSpecification` when + // `toScanSpecification` is called on it. + auto matchScanSpec = + [&toScanSpec](const std::optional spec, + size_t numColumns = 0) -> ::testing::Matcher { + auto innerMatcher = [&toScanSpec, &spec] { + return ::testing::ResultOf(toScanSpec, ::testing::Eq(spec)); + }; + if (!spec.has_value()) { + return innerMatcher(); + } else { + return ::testing::AllOf( + innerMatcher(), + AD_PROPERTY(STc, numColumns, ::testing::Eq(numColumns))); + } + }; + EXPECT_THAT(STc(iTc, iTc, iTc), matchScanSpec(S(i, i, i), 0)); + EXPECT_THAT(STc(iTc, iTc, n), matchScanSpec(S(i, i, n), 1)); + EXPECT_THAT(STc(iTc, n, n), matchScanSpec(S(i, n, n), 2)); + EXPECT_THAT(STc(n, n, n), matchScanSpec(S(n, n, n), 3)); + + // Test the resolution of vocab entries. + auto getId = ad_utility::testing::makeGetId(index); + auto x = getId(""); + TripleComponent xIri = TripleComponent::Iri::fromIriref(""); + + EXPECT_THAT(STc(xIri, xIri, xIri), matchScanSpec(S(x, x, x), 0)); + + // For an entry that is not in the vocabulary, the complete result of + // `toScanSpecification` is `nullopt`. + TripleComponent notInVocab = + TripleComponent::Iri::fromIriref(""); + EXPECT_THAT(STc(notInVocab, xIri, xIri), matchScanSpec(std::nullopt)); + EXPECT_THAT(STc(xIri, notInVocab, xIri), matchScanSpec(std::nullopt)); + EXPECT_THAT(STc(xIri, xIri, notInVocab), matchScanSpec(std::nullopt)); +} diff --git a/test/util/IndexTestHelpers.cpp b/test/util/IndexTestHelpers.cpp index db4c89c22..9d208f70c 100644 --- a/test/util/IndexTestHelpers.cpp +++ b/test/util/IndexTestHelpers.cpp @@ -60,8 +60,9 @@ void checkConsistencyBetweenPatternPredicateAndAdditionalColumn( auto hasPatternId = qlever::specialIds.at(HAS_PATTERN_PREDICATE); auto checkSingleElement = [&cancellationDummy, &hasPatternId]( const Index& index, size_t patternIdx, Id id) { - auto scanResultHasPattern = index.scan( - hasPatternId, id, Permutation::Enum::PSO, {}, cancellationDummy); + auto scanResultHasPattern = + index.scan({hasPatternId, id, std::nullopt}, Permutation::Enum::PSO, {}, + cancellationDummy); // Each ID has at most one pattern, it can have none if it doesn't // appear as a subject in the knowledge graph. AD_CORRECTNESS_CHECK(scanResultHasPattern.numRows() <= 1); @@ -79,7 +80,7 @@ void checkConsistencyBetweenPatternPredicateAndAdditionalColumn( auto cancellationDummy = std::make_shared>(); auto scanResult = index.scan( - col0Id, std::nullopt, permutation, + {col0Id, std::nullopt, std::nullopt}, permutation, std::array{ColumnIndex{ADDITIONAL_COLUMN_INDEX_SUBJECT_PATTERN}, ColumnIndex{ADDITIONAL_COLUMN_INDEX_OBJECT_PATTERN}}, cancellationDummy);