From 39e63b44de44f5bddd600cb3910cbc63648f565e Mon Sep 17 00:00:00 2001 From: Hannes Baumann <116301375+realHannes@users.noreply.github.com> Date: Mon, 28 Oct 2024 09:07:28 +0100 Subject: [PATCH 1/5] Prefiltering on `CompressedBlockMetadata` using binary search (#1503) Implement efficient relational filters (`<=>`) on a sorted vector of `CompressedBlockMetadata`. These can be used to efficiently filter out blocks, for which we can guarantee that they don't contain a single element for which the relational filter returns true. In the future, these prefilters will be applied when such a filter is applied directly on an `IndexScan` . For example (simplified, in reality blocks contain triples), if a block contains elements in the range `[3, 7]` (which can be deduced from the metadata) and the filter condition is ` <= 2 ` we can discard the whole block by only looking on its metadata. On the other hand, if (considering the same block) the filter condition is `== 4` then the block can not be filtered out, because we cannot know in advance, if the full block contains the element `4`. --- src/global/ValueIdComparators.h | 30 +- src/index/CMakeLists.txt | 1 + src/index/CompressedBlockPrefiltering.cpp | 291 +++++++++++ src/index/CompressedBlockPrefiltering.h | 163 +++++++ src/index/CompressedRelation.h | 10 + test/CMakeLists.txt | 2 + test/CompressedBlockPrefilteringTest.cpp | 567 ++++++++++++++++++++++ test/SparqlExpressionTestHelpers.h | 39 +- test/util/IdTestHelpers.h | 4 + 9 files changed, 1078 insertions(+), 29 deletions(-) create mode 100644 src/index/CompressedBlockPrefiltering.cpp create mode 100644 src/index/CompressedBlockPrefiltering.h create mode 100644 test/CompressedBlockPrefilteringTest.cpp diff --git a/src/global/ValueIdComparators.h b/src/global/ValueIdComparators.h index f5ae8dea9a..0b23621c77 100644 --- a/src/global/ValueIdComparators.h +++ b/src/global/ValueIdComparators.h @@ -349,9 +349,12 @@ inline std::vector> getRangesForIndexTypes( // Helper function: Sort the non-overlapping ranges in `input` by the first // element, remove the empty ranges, and merge directly adjacent ranges inline auto simplifyRanges = - [](std::vector> input) { - // Eliminate empty ranges - std::erase_if(input, [](const auto& p) { return p.first == p.second; }); + [](std::vector> input, + bool removeEmptyRanges = true) { + if (removeEmptyRanges) { + // Eliminate empty ranges + std::erase_if(input, [](const auto& p) { return p.first == p.second; }); + } std::sort(input.begin(), input.end()); if (input.empty()) { return input; @@ -378,9 +381,13 @@ inline auto simplifyRanges = // 2. The condition x `comparison` value is fulfilled, where value is the value // of `valueId`. // 3. The datatype of x and `valueId` are compatible. +// +// When setting the flag argument `removeEmptyRanges` to false, empty ranges +// [`begin`, `end`] where `begin` is equal to `end` will not be discarded. template inline std::vector> getRangesForId( - RandomIt begin, RandomIt end, ValueId valueId, Comparison comparison) { + RandomIt begin, RandomIt end, ValueId valueId, Comparison comparison, + bool removeEmptyRanges = true) { // For the evaluation of FILTERs, comparisons that involve undefined values // are always false. if (valueId.getDatatype() == Datatype::Undefined) { @@ -389,11 +396,15 @@ inline std::vector> getRangesForId( // This lambda enforces the invariants `non-empty` and `sorted`. switch (valueId.getDatatype()) { case Datatype::Double: - return detail::simplifyRanges(detail::getRangesForIntsAndDoubles( - begin, end, valueId.getDouble(), comparison)); + return detail::simplifyRanges( + detail::getRangesForIntsAndDoubles(begin, end, valueId.getDouble(), + comparison), + removeEmptyRanges); case Datatype::Int: - return detail::simplifyRanges(detail::getRangesForIntsAndDoubles( - begin, end, valueId.getInt(), comparison)); + return detail::simplifyRanges( + detail::getRangesForIntsAndDoubles(begin, end, valueId.getInt(), + comparison), + removeEmptyRanges); case Datatype::Undefined: case Datatype::VocabIndex: case Datatype::LocalVocabIndex: @@ -405,7 +416,8 @@ inline std::vector> getRangesForId( case Datatype::BlankNodeIndex: // For `Date` the trivial comparison via bits is also correct. return detail::simplifyRanges( - detail::getRangesForIndexTypes(begin, end, valueId, comparison)); + detail::getRangesForIndexTypes(begin, end, valueId, comparison), + removeEmptyRanges); } AD_FAIL(); } diff --git a/src/index/CMakeLists.txt b/src/index/CMakeLists.txt index 1fc8773721..9b46467bc6 100644 --- a/src/index/CMakeLists.txt +++ b/src/index/CMakeLists.txt @@ -6,5 +6,6 @@ add_library(index DocsDB.cpp FTSAlgorithms.cpp PrefixHeuristic.cpp CompressedRelation.cpp PatternCreator.cpp ScanSpecification.cpp + CompressedBlockPrefiltering.cpp DeltaTriples.cpp LocalVocabEntry.cpp) qlever_target_link_libraries(index util parser vocabulary ${STXXL_LIBRARIES}) diff --git a/src/index/CompressedBlockPrefiltering.cpp b/src/index/CompressedBlockPrefiltering.cpp new file mode 100644 index 0000000000..a2af112c77 --- /dev/null +++ b/src/index/CompressedBlockPrefiltering.cpp @@ -0,0 +1,291 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures +// Author: Hannes Baumann + +#include "index/CompressedBlockPrefiltering.h" + +#include "global/ValueIdComparators.h" + +namespace prefilterExpressions { + +// HELPER FUNCTIONS +//______________________________________________________________________________ +// Given a PermutedTriple retrieve the suitable Id w.r.t. a column (index). +static Id getIdFromColumnIndex(const BlockMetadata::PermutedTriple& triple, + size_t columnIndex) { + switch (columnIndex) { + case 0: + return triple.col0Id_; + case 1: + return triple.col1Id_; + case 2: + return triple.col2Id_; + default: + // columnIndex out of bounds + AD_FAIL(); + } +}; + +//______________________________________________________________________________ +// Extract the Ids from the given `PermutedTriple` in a tuple w.r.t. the +// position (column index) defined by `ignoreIndex`. The ignored positions are +// filled with Ids `Id::min()`. `Id::min()` is guaranteed +// to be smaller than Ids of all other types. +static auto getMaskedTriple(const BlockMetadata::PermutedTriple& triple, + size_t ignoreIndex = 3) { + const Id& undefined = Id::min(); + switch (ignoreIndex) { + case 3: + return std::make_tuple(triple.col0Id_, triple.col1Id_, triple.col2Id_); + case 2: + return std::make_tuple(triple.col0Id_, triple.col1Id_, undefined); + case 1: + return std::make_tuple(triple.col0Id_, undefined, undefined); + case 0: + return std::make_tuple(undefined, undefined, undefined); + default: + // ignoreIndex out of bounds + AD_FAIL(); + } +}; + +//______________________________________________________________________________ +// Check required conditions. +static void checkEvalRequirements(const std::vector& input, + size_t evaluationColumn) { + const auto throwRuntimeError = [](const std::string& errorMessage) { + throw std::runtime_error(errorMessage); + }; + // Check for duplicates. + if (auto it = std::ranges::adjacent_find(input); it != input.end()) { + throwRuntimeError("The provided data blocks must be unique."); + } + // Helper to check for fully sorted blocks. Return `true` if `b1 < b2` is + // satisfied. + const auto checkOrder = [](const BlockMetadata& b1, const BlockMetadata& b2) { + if (b1.blockIndex_ < b2.blockIndex_) { + AD_CORRECTNESS_CHECK(getMaskedTriple(b1.lastTriple_) <= + getMaskedTriple(b2.lastTriple_)); + return true; + } + if (b1.blockIndex_ == b2.blockIndex_) { + // Given the previous check detects duplicates in the input, the + // correctness check here will never evaluate to true. + // => blockIndex_ assignment issue. + AD_CORRECTNESS_CHECK(b1 == b2); + } else { + AD_CORRECTNESS_CHECK(getMaskedTriple(b1.lastTriple_) > + getMaskedTriple(b2.firstTriple_)); + } + return false; + }; + if (!std::ranges::is_sorted(input, checkOrder)) { + throwRuntimeError("The blocks must be provided in sorted order."); + } + // Helper to check for column consistency. Returns `true` if the columns for + // `b1` and `b2` up to the evaluation are inconsistent. + const auto checkColumnConsistency = + [evaluationColumn](const BlockMetadata& b1, const BlockMetadata& b2) { + const auto& b1Last = getMaskedTriple(b1.lastTriple_, evaluationColumn); + const auto& b2First = + getMaskedTriple(b2.firstTriple_, evaluationColumn); + return getMaskedTriple(b1.firstTriple_, evaluationColumn) != b1Last || + b1Last != b2First || + b2First != getMaskedTriple(b2.lastTriple_, evaluationColumn); + }; + if (auto it = std::ranges::adjacent_find(input, checkColumnConsistency); + it != input.end()) { + throwRuntimeError( + "The values in the columns up to the evaluation column must be " + "consistent."); + } +}; + +//______________________________________________________________________________ +// Given two sorted `vector`s containing `BlockMetadata`, this function +// returns their merged `BlockMetadata` content in a `vector` which is free of +// duplicates and ordered. +static auto getSetUnion(const std::vector& blocks1, + const std::vector& blocks2) { + std::vector mergedVectors; + mergedVectors.reserve(blocks1.size() + blocks2.size()); + const auto blockLessThanBlock = [](const BlockMetadata& b1, + const BlockMetadata& b2) { + return b1.blockIndex_ < b2.blockIndex_; + }; + // Given that we have vectors with sorted (BlockMedata) values, we can + // use std::ranges::set_union. Thus the complexity is O(n + m). + std::ranges::set_union(blocks1, blocks2, std::back_inserter(mergedVectors), + blockLessThanBlock); + mergedVectors.shrink_to_fit(); + return mergedVectors; +} + +// SECTION PREFILTER EXPRESSION (BASE CLASS) +//______________________________________________________________________________ +std::vector PrefilterExpression::evaluate( + const std::vector& input, size_t evaluationColumn) const { + checkEvalRequirements(input, evaluationColumn); + const auto& relevantBlocks = evaluateImpl(input, evaluationColumn); + checkEvalRequirements(relevantBlocks, evaluationColumn); + return relevantBlocks; +}; + +// SECTION RELATIONAL OPERATIONS +//______________________________________________________________________________ +template +std::unique_ptr +RelationalExpression::logicalComplement() const { + using enum CompOp; + switch (Comparison) { + case LT: + // Complement X < Y: X >= Y + return std::make_unique(referenceId_); + case LE: + // Complement X <= Y: X > Y + return std::make_unique(referenceId_); + case EQ: + // Complement X == Y: X != Y + return std::make_unique(referenceId_); + case NE: + // Complement X != Y: X == Y + return std::make_unique(referenceId_); + case GE: + // Complement X >= Y: X < Y + return std::make_unique(referenceId_); + case GT: + // Complement X > Y: X <= Y + return std::make_unique(referenceId_); + default: + AD_FAIL(); + } +}; + +//______________________________________________________________________________ +template +std::vector RelationalExpression::evaluateImpl( + const std::vector& input, size_t evaluationColumn) const { + using namespace valueIdComparators; + std::vector valueIdsInput; + // For each BlockMetadata value in vector input, we have a respective Id for + // firstTriple and lastTriple + valueIdsInput.reserve(2 * input.size()); + std::vector mixedDatatypeBlocks; + + for (const auto& block : input) { + const auto firstId = + getIdFromColumnIndex(block.firstTriple_, evaluationColumn); + const auto secondId = + getIdFromColumnIndex(block.lastTriple_, evaluationColumn); + valueIdsInput.push_back(firstId); + valueIdsInput.push_back(secondId); + + if (firstId.getDatatype() != secondId.getDatatype()) { + mixedDatatypeBlocks.push_back(block); + } + } + + // Use getRangesForId (from valueIdComparators) to extract the ranges + // containing the relevant ValueIds. + // For pre-filtering with CompOp::EQ, we have to consider empty ranges. + // Reason: The referenceId_ could be contained within the bounds formed by + // the IDs of firstTriple_ and lastTriple_ (set false flag to keep + // empty ranges). + auto relevantIdRanges = + Comparison != CompOp::EQ + ? getRangesForId(valueIdsInput.begin(), valueIdsInput.end(), + referenceId_, Comparison) + : getRangesForId(valueIdsInput.begin(), valueIdsInput.end(), + referenceId_, Comparison, false); + + // The vector for relevant BlockMetadata values which contain ValueIds + // defined as relevant by relevantIdRanges. + std::vector relevantBlocks; + // Reserve memory, input.size() is upper bound. + relevantBlocks.reserve(input.size()); + + // Given the relevant Id ranges, retrieve the corresponding relevant + // BlockMetadata values from vector input and add them to the relevantBlocks + // vector. + auto endValueIdsInput = valueIdsInput.end(); + for (const auto& [firstId, secondId] : relevantIdRanges) { + // Ensures that index is within bounds of index vector. + auto secondIdAdjusted = + secondId < endValueIdsInput ? secondId + 1 : secondId; + relevantBlocks.insert( + relevantBlocks.end(), + input.begin() + std::distance(valueIdsInput.begin(), firstId) / 2, + // Round up, for Ids contained within the bounding Ids of firstTriple + // and lastTriple we have to include the respective metadata block + // (that block is partially relevant). + input.begin() + + std::distance(valueIdsInput.begin(), secondIdAdjusted) / 2); + } + relevantBlocks.shrink_to_fit(); + // Merge mixedDatatypeBlocks into relevantBlocks while maintaining order and + // avoiding duplicates. + return getSetUnion(relevantBlocks, mixedDatatypeBlocks); +}; + +// SECTION LOGICAL OPERATIONS +//______________________________________________________________________________ +template +std::unique_ptr +LogicalExpression::logicalComplement() const { + using enum LogicalOperators; + // Source De-Morgan's laws: De Morgan's laws, Wikipedia. + // Reference: https://en.wikipedia.org/wiki/De_Morgan%27s_laws + if constexpr (Operation == OR) { + // De Morgan's law: not (A or B) = (not A) and (not B) + return std::make_unique(child1_->logicalComplement(), + child2_->logicalComplement()); + } else { + static_assert(Operation == AND); + // De Morgan's law: not (A and B) = (not A) or (not B) + return std::make_unique(child1_->logicalComplement(), + child2_->logicalComplement()); + } +}; + +//______________________________________________________________________________ +std::unique_ptr NotExpression::logicalComplement() const { + // Logically we complement (negate) a NOT here => NOT cancels out. + // Therefore, we can simply return the child of the respective NOT + // expression after undoing its previous complementation. + return child_->logicalComplement(); +}; + +//______________________________________________________________________________ +template +std::vector LogicalExpression::evaluateImpl( + const std::vector& input, size_t evaluationColumn) const { + using enum LogicalOperators; + if constexpr (Operation == AND) { + auto resultChild1 = child1_->evaluate(input, evaluationColumn); + return child2_->evaluate(resultChild1, evaluationColumn); + } else { + static_assert(Operation == OR); + return getSetUnion(child1_->evaluate(input, evaluationColumn), + child2_->evaluate(input, evaluationColumn)); + } +}; + +//______________________________________________________________________________ +std::vector NotExpression::evaluateImpl( + const std::vector& input, size_t evaluationColumn) const { + return child_->evaluate(input, evaluationColumn); +}; + +//______________________________________________________________________________ +// Necessary instantiation of template specializations +template class RelationalExpression; +template class RelationalExpression; +template class RelationalExpression; +template class RelationalExpression; +template class RelationalExpression; +template class RelationalExpression; + +template class LogicalExpression; +template class LogicalExpression; + +} // namespace prefilterExpressions diff --git a/src/index/CompressedBlockPrefiltering.h b/src/index/CompressedBlockPrefiltering.h new file mode 100644 index 0000000000..28db52c0a7 --- /dev/null +++ b/src/index/CompressedBlockPrefiltering.h @@ -0,0 +1,163 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures +// Author: Hannes Baumann + +#pragma once + +#include +#include + +#include "global/Id.h" +#include "global/ValueIdComparators.h" +#include "index/CompressedRelation.h" + +namespace prefilterExpressions { + +// The compressed block metadata (see `CompressedRelation.h`) that we use to +// filter out the non-relevant blocks by checking their content of +// `firstTriple_` and `lastTriple_` (`PermutedTriple`) +using BlockMetadata = CompressedBlockMetadata; + +//______________________________________________________________________________ +/* +`PrefilterExpression` represents a base class for the following sub-classes that +implement the block-filtering procedure for the specific relational + logical +operations/expressions. + +Remark: We do not actually evaluate the respective SPARQL Expression. We only +pre-filter w.r.t. blocks that contain relevant data for the actual evaluation of +those expressions to make the evaluation procedure more efficient. + +The block-filtering is applied with the following operations: +Relational Expressions - `<=`, `>=`, `<`, `>`, `==` and `!=`. +Logical Operations - `and`, `or` and `not`. +*/ + +class PrefilterExpression { + public: + virtual ~PrefilterExpression() = default; + + // Needed for implementing the `NotExpression`. This method is required, + // because we logically operate on `BlockMetadata` values which define ranges + // given the `ValueIds` from last and first triple. + // E.g. the `BlockMetadata` that defines the range [IntId(0),... IntId(5)], + // should be considered relevant for the expression `?x >= IntId(3)`, but also + // for expression `!(?x >= IntId(3))`. Thus we can't retrieve the negation by + // simply taking the complementing set of `BlockMetadata`, instead we + // retrieve it by directly negating/complementing the child expression itself. + // Every derived class can return it's respective logical complement + // (negation) when being called on `logicalCoplement()`. E.g. for a call + // w.r.t. `RelationalExpression(IntId(5))` (< 5), the returned logical + // complement is `RelationalExpression(IntId(5))` (>= 5). On a + // `LogicalExpression` (`AND` or `OR`), we respectively apply De-Morgan's law + // and return the resulting `LogicalExpression`. In case of the + // `NotExpression`, we just return its child expression given that two + // negations (complementations) cancel out. For a more concise explanation + // take a look at the actual implementation for derived classes. + virtual std::unique_ptr logicalComplement() const = 0; + + // The respective metadata to the blocks is expected to be provided in + // a sorted order (w.r.t. the relevant column). + std::vector evaluate(const std::vector& input, + size_t evaluationColumn) const; + + private: + virtual std::vector evaluateImpl( + const std::vector& input, + size_t evaluationColumn) const = 0; +}; + +//______________________________________________________________________________ +// For the actual comparison of the relevant ValueIds from the metadata triples, +// we use the implementations from ValueIdComparators. +// +// Supported comparisons are: +// - LessThan, LessEqual, Equal, NotEqual, GreaterEqual, GreaterThan +using CompOp = valueIdComparators::Comparison; + +//______________________________________________________________________________ +template +class RelationalExpression : public PrefilterExpression { + private: + // The ValueId on which we perform the relational comparison on. + ValueId referenceId_; + + public: + explicit RelationalExpression(const ValueId referenceId) + : referenceId_(referenceId) {} + + std::unique_ptr logicalComplement() const override; + + private: + std::vector evaluateImpl( + const std::vector& input, + size_t evaluationColumn) const override; +}; + +//______________________________________________________________________________ +// Helper struct for a compact class implementation regarding the logical +// operations `AND` and `OR`. `NOT` is implemented separately given that the +// expression is unary (single child expression). +enum struct LogicalOperators { AND, OR }; + +//______________________________________________________________________________ +template +class LogicalExpression : public PrefilterExpression { + private: + std::unique_ptr child1_; + std::unique_ptr child2_; + + public: + // AND and OR + explicit LogicalExpression(std::unique_ptr child1, + std::unique_ptr child2) + : child1_(std::move(child1)), child2_(std::move(child2)) {} + + std::unique_ptr logicalComplement() const override; + + private: + std::vector evaluateImpl( + const std::vector& input, + size_t evaluationColumn) const override; +}; + +//______________________________________________________________________________ +class NotExpression : public PrefilterExpression { + private: + std::unique_ptr child_; + + public: + explicit NotExpression(std::unique_ptr child) + : child_(child->logicalComplement()) {} + + std::unique_ptr logicalComplement() const override; + + private: + std::vector evaluateImpl( + const std::vector& input, + size_t evaluationColumn) const override; +}; + +//______________________________________________________________________________ +// Definition of the RelationalExpression for LT, LE, EQ, NE, GE and GT. +using LessThanExpression = prefilterExpressions::RelationalExpression< + prefilterExpressions::CompOp::LT>; +using LessEqualExpression = prefilterExpressions::RelationalExpression< + prefilterExpressions::CompOp::LE>; +using EqualExpression = prefilterExpressions::RelationalExpression< + prefilterExpressions::CompOp::EQ>; +using NotEqualExpression = prefilterExpressions::RelationalExpression< + prefilterExpressions::CompOp::NE>; +using GreaterEqualExpression = prefilterExpressions::RelationalExpression< + prefilterExpressions::CompOp::GE>; +using GreaterThanExpression = prefilterExpressions::RelationalExpression< + prefilterExpressions::CompOp::GT>; + +//______________________________________________________________________________ +// Definition of the LogicalExpression for AND and OR. +using AndExpression = prefilterExpressions::LogicalExpression< + prefilterExpressions::LogicalOperators::AND>; +using OrExpression = prefilterExpressions::LogicalExpression< + prefilterExpressions::LogicalOperators::OR>; + +} // namespace prefilterExpressions diff --git a/src/index/CompressedRelation.h b/src/index/CompressedRelation.h index 55e9354cce..00cdb10dae 100644 --- a/src/index/CompressedRelation.h +++ b/src/index/CompressedRelation.h @@ -108,6 +108,16 @@ struct CompressedBlockMetadataNoBlockIndex { // Two of these are equal if all members are equal. bool operator==(const CompressedBlockMetadataNoBlockIndex&) const = default; + + // Format BlockMetadata contents for debugging. + friend std::ostream& operator<<( + std::ostream& str, + const CompressedBlockMetadataNoBlockIndex& blockMetadata) { + str << "#BlockMetadata\n(first) " << blockMetadata.firstTriple_ << "(last) " + << blockMetadata.lastTriple_ << "num. rows: " << blockMetadata.numRows_ + << "." << std::endl; + return str; + } }; // The same as the above struct, but this block additionally knows its index. diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index eaf4b037de..cd64d5c0b6 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -291,6 +291,8 @@ addLinkAndDiscoverTest(AlgorithmTest) addLinkAndDiscoverTestSerial(CompressedRelationsTest index) +addLinkAndDiscoverTestSerial(CompressedBlockPrefilteringTest index) + addLinkAndDiscoverTest(ExceptionTest) addLinkAndDiscoverTestSerial(RandomExpressionTest index) diff --git a/test/CompressedBlockPrefilteringTest.cpp b/test/CompressedBlockPrefilteringTest.cpp new file mode 100644 index 0000000000..864906fbce --- /dev/null +++ b/test/CompressedBlockPrefilteringTest.cpp @@ -0,0 +1,567 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures +// Author: Hannes Baumann + +#include + +#include + +#include "./SparqlExpressionTestHelpers.h" +#include "index/CompressedBlockPrefiltering.h" +#include "util/DateYearDuration.h" +#include "util/GTestHelpers.h" +#include "util/IdTestHelpers.h" + +namespace { +using ad_utility::testing::BlankNodeId; +using ad_utility::testing::BoolId; +using ad_utility::testing::DateId; +using ad_utility::testing::DoubleId; +using ad_utility::testing::IntId; +using ad_utility::testing::UndefId; +using ad_utility::testing::VocabId; +constexpr auto DateParser = &DateYearOrDuration::parseXsdDate; +using namespace prefilterExpressions; + +namespace makeFilterExpr { +//______________________________________________________________________________ +// Make RelationalExpression +template +auto relExpr = + [](const ValueId& referenceId) -> std::unique_ptr { + return std::make_unique(referenceId); +}; + +// Make AndExpression or OrExpression +template +auto logExpr = [](std::unique_ptr child1, + std::unique_ptr child2) + -> std::unique_ptr { + return std::make_unique(std::move(child1), std::move(child2)); +}; + +// Make NotExpression +auto notExpr = [](std::unique_ptr child) + -> std::unique_ptr { + return std::make_unique(std::move(child)); +}; + +} // namespace makeFilterExpr +//______________________________________________________________________________ +// instantiation relational +// LESS THAN (`<`) +constexpr auto lt = makeFilterExpr::relExpr; +// LESS EQUAL (`<=`) +constexpr auto le = makeFilterExpr::relExpr; +// GREATER EQUAL (`>=`) +constexpr auto ge = makeFilterExpr::relExpr; +// GREATER THAN (`>`) +constexpr auto gt = makeFilterExpr::relExpr; +// EQUAL (`==`) +constexpr auto eq = makeFilterExpr::relExpr; +// NOT EQUAL (`!=`) +constexpr auto neq = makeFilterExpr::relExpr; +// AND (`&&`) +constexpr auto andExpr = makeFilterExpr::logExpr; +// OR (`||`) +constexpr auto orExpr = makeFilterExpr::logExpr; +// NOT (`!`) +constexpr auto notExpr = makeFilterExpr::notExpr; + +//______________________________________________________________________________ +/* +Our pre-filtering procedure expects blocks that are in correct (ascending) +order w.r.t. their contained ValueIds given the first and last triple. + +The correct order of the ValueIds is dependent on their type and underlying +representation. + +Short overview on the ascending order logic for the underlying values: +Order ValueIds for (signed) integer values - [0... max, -max... -1] +Order ValueIds for (signed) doubles values - [0.0... max, -0.0... -max] +Order ValueIds for Vocab and LocalVocab values given the vocabulary with +indices (up to N) - [VocabId(0), .... VocabId(N)] + +COLUMN 1 and COLUMN 2 contain fixed values, this is a necessary condition +that is also checked during the pre-filtering procedure. The actual evaluation +column (we filter w.r.t. values of COLUMN 0) contains mixed types. +*/ +//______________________________________________________________________________ +class TestPrefilterExprOnBlockMetadata : public ::testing::Test { + public: + const Id referenceDate1 = DateId(DateParser, "1999-11-11"); + const Id referenceDate2 = DateId(DateParser, "2005-02-27"); + const Id undef = Id::makeUndefined(); + const Id falseId = BoolId(false); + const Id trueId = BoolId(true); + const Id referenceDateEqual = DateId(DateParser, "2000-01-01"); + + // Fixed column ValueIds + const Id VocabId10 = VocabId(10); + const Id DoubleId33 = DoubleId(33); + const Id GraphId = VocabId(0); + + // Define BlockMetadata + const BlockMetadata b1 = makeBlock(undef, undef); + const BlockMetadata b2 = makeBlock(undef, falseId); + const BlockMetadata b3 = makeBlock(falseId, falseId); + const BlockMetadata b4 = makeBlock(trueId, IntId(0)); + const BlockMetadata b5 = makeBlock(IntId(0), IntId(0)); + const BlockMetadata b6 = makeBlock(IntId(0), IntId(5)); + const BlockMetadata b7 = makeBlock(IntId(5), IntId(6)); + const BlockMetadata b8 = makeBlock(IntId(8), IntId(9)); + const BlockMetadata b9 = makeBlock(IntId(-10), IntId(-8)); + const BlockMetadata b10 = makeBlock(IntId(-4), IntId(-4)); + const BlockMetadata b11 = makeBlock(IntId(-4), DoubleId(2)); + const BlockMetadata b12 = makeBlock(DoubleId(2), DoubleId(2)); + const BlockMetadata b13 = makeBlock(DoubleId(4), DoubleId(4)); + const BlockMetadata b14 = makeBlock(DoubleId(4), DoubleId(10)); + const BlockMetadata b15 = makeBlock(DoubleId(-1.23), DoubleId(-6.25)); + const BlockMetadata b16 = makeBlock(DoubleId(-6.25), DoubleId(-6.25)); + const BlockMetadata b17 = makeBlock(DoubleId(-10.42), DoubleId(-12.00)); + const BlockMetadata b18 = makeBlock(DoubleId(-14.01), VocabId(0)); + const BlockMetadata b19 = makeBlock(VocabId(10), VocabId(14)); + const BlockMetadata b20 = makeBlock(VocabId(14), VocabId(14)); + const BlockMetadata b21 = makeBlock(VocabId(14), VocabId(17)); + const BlockMetadata b22 = + makeBlock(VocabId(20), DateId(DateParser, "1999-12-12")); + const BlockMetadata b23 = makeBlock(DateId(DateParser, "2000-01-01"), + DateId(DateParser, "2000-01-01")); + const BlockMetadata b24 = + makeBlock(DateId(DateParser, "2024-10-08"), BlankNodeId(10)); + + // All blocks that contain mixed (ValueId) types over column 0 + const std::vector mixedBlocks = {b2, b4, b11, b18, b22, b24}; + + // Ordered and unique vector with BlockMetadata + const std::vector blocks = { + b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, + b13, b14, b15, b16, b17, b18, b19, b20, b21, b22, b23, b24}; + + const std::vector blocksInvalidOrder1 = { + b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, + b13, b14, b15, b16, b17, b18, b19, b20, b21, b22, b24, b23}; + + const std::vector blocksInvalidOrder2 = { + b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, + b14, b10, b15, b16, b17, b18, b19, b20, b21, b22, b23, b24}; + + const std::vector blocksWithDuplicate1 = { + b1, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, + b13, b14, b15, b16, b17, b18, b19, b20, b21, b22, b23, b24}; + + const std::vector blocksWithDuplicate2 = { + b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, + b14, b15, b16, b17, b18, b19, b20, b21, b22, b23, b24, b24}; + + // Function to create BlockMetadata + const BlockMetadata makeBlock(const ValueId& firstId, const ValueId& lastId) { + assert(firstId <= lastId); + static size_t blockIdx = 0; + ++blockIdx; + return {{{}, + 0, + // COLUMN 0 | COLUMN 1 | COLUMN 2 + {firstId, VocabId10, DoubleId33, GraphId}, // firstTriple + {lastId, VocabId10, DoubleId33, GraphId}, // lastTriple + {}, + false}, + blockIdx}; + } + + // Check if expected error is thrown. + auto makeTestErrorCheck(std::unique_ptr expr, + const std::vector& input, + const std::string& expected, + size_t evaluationColumn = 0) { + AD_EXPECT_THROW_WITH_MESSAGE(expr->evaluate(input, evaluationColumn), + ::testing::HasSubstr(expected)); + } + + // Check that the provided expression prefilters the correct blocks. + auto makeTest(std::unique_ptr expr, + std::vector&& expected) { + std::vector expectedAdjusted; + // This is for convenience, we automatically insert all mixed blocks + // which must be always returned. + std::ranges::set_union( + expected, mixedBlocks, std::back_inserter(expectedAdjusted), + [](const BlockMetadata& b1, const BlockMetadata& b2) { + return b1.blockIndex_ < b2.blockIndex_; + }); + ASSERT_EQ(expr->evaluate(blocks, 0), expectedAdjusted); + } +}; + +} // namespace + +//______________________________________________________________________________ +TEST_F(TestPrefilterExprOnBlockMetadata, testBlockFormatForDebugging) { + EXPECT_EQ( + "#BlockMetadata\n(first) Triple: I:0 V:10 D:33.000000 V:0\n(last) " + "Triple: I:0 V:10 D:33.000000 V:0\nnum. rows: 0.\n", + (std::stringstream() << b5).str()); + EXPECT_EQ( + "#BlockMetadata\n(first) Triple: I:-4 V:10 D:33.000000 V:0\n(last) " + "Triple: D:2.000000 V:10 D:33.000000 V:0\nnum. rows: 0.\n", + (std::stringstream() << b11).str()); + EXPECT_EQ( + "#BlockMetadata\n(first) Triple: V:14 V:10 D:33.000000 V:0\n(last) " + "Triple: V:17 V:10 D:33.000000 V:0\nnum. rows: 0.\n", + (std::stringstream() << b21).str()); +} + +// Test Relational Expressions +//______________________________________________________________________________ +// Test LessThanExpression +// Note: the `makeTest` function automatically adds the blocks with mixed +// datatypes to the expected result. +TEST_F(TestPrefilterExprOnBlockMetadata, testLessThanExpressions) { + makeTest(lt(IntId(5)), + {b5, b6, b9, b10, b11, b12, b13, b14, b15, b16, b17, b18}); + makeTest(lt(IntId(-12)), {b18}); + makeTest(lt(IntId(0)), {b9, b10, b15, b16, b17, b18}); + makeTest(lt(DoubleId(-14.01)), {b18}); + makeTest(lt(DoubleId(-11.22)), {b17, b18}); + makeTest(lt(DoubleId(-4.121)), {b9, b15, b16, b17, b18}); + makeTest(lt(VocabId(0)), {b18}); + makeTest(lt(VocabId(12)), {b18, b19}); + makeTest(lt(VocabId(14)), {b18, b19}); + makeTest(lt(VocabId(16)), {b18, b19, b20, b21}); + makeTest(lt(IntId(100)), + {b5, b6, b7, b8, b9, b10, b12, b13, b14, b15, b16, b17, b18}); + makeTest(lt(undef), {}); + makeTest(lt(falseId), {}); + makeTest(lt(trueId), {b2, b3}); + makeTest(lt(referenceDate1), {}); + makeTest(lt(referenceDateEqual), {b22}); + makeTest(lt(referenceDate2), {b22, b23, b24}); + makeTest(lt(BlankNodeId(11)), {b24}); +} + +//______________________________________________________________________________ +// Test LessEqualExpression +// Note: the `makeTest` function automatically adds the blocks with mixed +// datatypes to the expected result. +TEST_F(TestPrefilterExprOnBlockMetadata, testLessEqualExpressions) { + makeTest(le(IntId(0)), {b5, b6, b9, b10, b11, b15, b16, b17, b18}); + makeTest(le(IntId(-6)), {b9, b11, b15, b16, b17, b18}); + makeTest(le(IntId(7)), + {b5, b6, b7, b9, b10, b11, b12, b13, b14, b15, b16, b17, b18}); + makeTest(le(IntId(-9)), {b9, b11, b17, b18}); + makeTest(le(DoubleId(-9.131)), {b9, b11, b17, b18}); + makeTest(le(DoubleId(1.1415)), {b5, b6, b9, b10, b11, b15, b16, b17, b18}); + makeTest(le(DoubleId(3.1415)), + {b5, b6, b9, b10, b11, b12, b15, b16, b17, b18}); + makeTest(le(DoubleId(-11.99999999999999)), {b17, b18}); + makeTest(le(DoubleId(-14.03)), {b18}); + makeTest(le(VocabId(0)), {b18}); + makeTest(le(VocabId(11)), {b18, b19}); + makeTest(le(VocabId(14)), {b18, b19, b20, b21}); + makeTest(le(undef), {}); + makeTest(le(falseId), {b2, b3}); + makeTest(le(trueId), {b2, b3, b4}); + makeTest(le(referenceDateEqual), {b22, b23}); + makeTest(le(BlankNodeId(11)), {b24}); +} + +//______________________________________________________________________________ +// Test GreaterThanExpression +// Note: the `makeTest` function automatically adds the blocks with mixed +// datatypes to the expected result. +TEST_F(TestPrefilterExprOnBlockMetadata, testGreaterThanExpression) { + makeTest(gt(DoubleId(5.5375)), {b7, b8, b11, b14, b18}); + makeTest(gt(DoubleId(9.9994)), {b14}); + makeTest(gt(IntId(-5)), {b5, b6, b7, b8, b10, b11, b12, b13, b14, b15}); + makeTest(gt(DoubleId(-5.5375)), + {b5, b6, b7, b8, b10, b11, b12, b13, b14, b15}); + makeTest(gt(DoubleId(-6.2499999)), + {b5, b6, b7, b8, b10, b11, b12, b13, b14, b15}); + makeTest(gt(IntId(1)), {b6, b7, b8, b11, b12, b13, b14}); + makeTest(gt(IntId(3)), {b6, b7, b8, b11, b13, b14}); + makeTest(gt(IntId(4)), {b6, b7, b8, b11, b14}); + makeTest(gt(IntId(-4)), {b5, b6, b7, b8, b11, b12, b13, b14, b15}); + makeTest(gt(IntId(33)), {}); + makeTest(gt(VocabId(22)), {b22}); + makeTest(gt(VocabId(14)), {b21, b22}); + makeTest(gt(VocabId(12)), {b19, b20, b21, b22}); + makeTest(gt(undef), {}); + makeTest(gt(falseId), {b4}); + makeTest(gt(trueId), {}); + makeTest(gt(referenceDateEqual), {b24}); + makeTest(gt(referenceDate1), {b22, b23, b24}); + makeTest(gt(referenceDate2), {b24}); +} + +//______________________________________________________________________________ +// Test GreaterEqualExpression +// Note: the `makeTest` function automatically adds the blocks with mixed +// datatypes to the expected result. +TEST_F(TestPrefilterExprOnBlockMetadata, testGreaterEqualExpression) { + makeTest(ge(IntId(0)), {b5, b6, b7, b8, b11, b12, b13, b14}); + makeTest(ge(IntId(8)), {b8, b11, b14}); + makeTest(ge(DoubleId(9.98)), {b11, b14}); + makeTest(ge(IntId(-3)), {b5, b6, b7, b8, b11, b12, b13, b14, b15}); + makeTest(ge(IntId(-10)), + {b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15, b16}); + makeTest(ge(DoubleId(-3.1415)), {b5, b6, b7, b8, b11, b12, b13, b14, b15}); + makeTest(ge(DoubleId(-4.000001)), + {b5, b6, b7, b8, b10, b11, b12, b13, b14, b15}); + makeTest(ge(DoubleId(10.000)), {b11, b14}); + makeTest(ge(DoubleId(-15.22)), + {b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15, b16, b17, b18}); + makeTest(ge(DoubleId(7.999999)), {b8, b11, b14}); + makeTest(ge(DoubleId(10.0001)), {}); + makeTest(ge(VocabId(14)), {b18, b19, b20, b21, b22}); + makeTest(ge(VocabId(10)), {b18, b19, b20, b21, b22}); + makeTest(ge(VocabId(17)), {b18, b21, b22}); + makeTest(ge(undef), {}); + makeTest(ge(falseId), {b2, b3, b4}); + makeTest(ge(trueId), {b4}); + makeTest(ge(referenceDateEqual), {b23, b24}); +} + +//______________________________________________________________________________ +// Test EqualExpression +// Note: the `makeTest` function automatically adds the blocks with mixed +// datatypes to the expected result. +TEST_F(TestPrefilterExprOnBlockMetadata, testEqualExpression) { + makeTest(eq(IntId(0)), {b4, b5, b6, b11}); + makeTest(eq(IntId(5)), {b6, b7, b11, b14}); + makeTest(eq(IntId(22)), {}); + makeTest(eq(IntId(-10)), {b9, b11, b18}); + makeTest(eq(DoubleId(-6.25)), {b15, b16}); + makeTest(eq(IntId(-11)), {b17}); + makeTest(eq(DoubleId(-14.02)), {b18}); + makeTest(eq(DoubleId(-0.001)), {b11}); + makeTest(eq(DoubleId(0)), {b4, b5, b6, b11}); + makeTest(eq(IntId(2)), {b6, b11, b12}); + makeTest(eq(DoubleId(5.5)), {b7, b11, b14}); + makeTest(eq(DoubleId(1.5)), {b6, b11}); + makeTest(eq(VocabId(1)), {b18}); + makeTest(eq(VocabId(14)), {b18, b19, b20, b21}); + makeTest(eq(VocabId(11)), {b18, b19}); + makeTest(eq(VocabId(17)), {b18, b21}); + makeTest(eq(IntId(-4)), {b10, b11, b15}); + makeTest(eq(trueId), {b4}); + makeTest(eq(referenceDate1), {b22}); + makeTest(eq(referenceDateEqual), {b23}); + makeTest(eq(referenceDate2), {}); +} + +//______________________________________________________________________________ +// Test NotEqualExpression +// Note: the `makeTest` function automatically adds the blocks with mixed +// datatypes to the expected result. +TEST_F(TestPrefilterExprOnBlockMetadata, testNotEqualExpression) { + makeTest(neq(DoubleId(0.00)), + {b6, b7, b8, b9, b10, b11, b12, b13, b14, b15, b16, b17, b18}); + makeTest(neq(IntId(-4)), + {b5, b6, b7, b8, b9, b11, b12, b13, b14, b15, b16, b17, b18}); + makeTest(neq(DoubleId(0.001)), + {b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15, b16, b17, b18}); + makeTest(neq(IntId(2)), + {b5, b6, b7, b8, b9, b10, b11, b13, b14, b15, b16, b17, b18}); + makeTest(neq(DoubleId(-6.2500)), + {b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15, b17, b18}); + makeTest(neq(IntId(5)), + {b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15, b16, b17, b18}); + makeTest(neq(DoubleId(-101.23)), + {b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15, b16, b17, b18}); + makeTest(neq(VocabId(0)), {b19, b20, b21, b22}); + makeTest(neq(VocabId(7)), {b18, b19, b20, b21, b22}); + makeTest(neq(VocabId(14)), {b18, b19, b21, b22}); + makeTest(neq(VocabId(17)), {b18, b19, b20, b21, b22}); + makeTest(neq(undef), {}); + makeTest(neq(falseId), {b4}); + makeTest(neq(referenceDateEqual), {b22, b24}); + makeTest(neq(referenceDate1), {b22, b23, b24}); +} + +// Test Logical Expressions +//______________________________________________________________________________ +// Test AndExpression +// Note: the `makeTest` function automatically adds the blocks with mixed +// datatypes to the expected result. +TEST_F(TestPrefilterExprOnBlockMetadata, testAndExpression) { + makeTest(andExpr(ge(VocabId(10)), gt(VocabId(10))), {b19, b20, b21, b22}); + makeTest(andExpr(ge(VocabId(10)), ge(VocabId(10))), {b19, b20, b21, b22}); + makeTest(andExpr(ge(VocabId(12)), gt(VocabId(17))), {b22}); + makeTest(andExpr(ge(VocabId(12)), gt(VocabId(17))), {b22}); + makeTest(andExpr(ge(VocabId(10)), lt(VocabId(14))), {b19}); + makeTest(andExpr(le(VocabId(0)), lt(VocabId(10))), {b18}); + makeTest(andExpr(le(VocabId(17)), lt(VocabId(17))), {b18, b19, b20, b21}); + makeTest(andExpr(ge(DoubleId(-6.25)), lt(IntId(-7))), {}); + makeTest(andExpr(gt(DoubleId(-6.25)), lt(DoubleId(-6.25))), {}); + makeTest(andExpr(gt(IntId(0)), lt(IntId(0))), {}); + makeTest(andExpr(gt(IntId(-10)), lt(DoubleId(0))), {b9, b10, b11, b15, b16}); + makeTest(andExpr(gt(IntId(0)), eq(DoubleId(0))), {b6, b11}); + makeTest(andExpr(ge(IntId(0)), eq(IntId(0))), {b5, b6, b11}); + makeTest(andExpr(gt(DoubleId(-34.23)), ge(DoubleId(15.1))), {}); + makeTest(andExpr(lt(IntId(0)), le(DoubleId(-4))), + {b9, b10, b11, b15, b16, b17, b18}); + makeTest(andExpr(neq(IntId(0)), neq(IntId(-4))), + {b6, b7, b8, b9, b11, b12, b13, b14, b15, b16, b17, b18}); + makeTest(andExpr(neq(DoubleId(-3.141)), eq(DoubleId(4.5))), + {b6, b11, b14, b18}); + makeTest(andExpr(neq(DoubleId(-6.25)), lt(IntId(0))), + {b9, b10, b11, b15, b17, b18}); + makeTest(andExpr(le(DoubleId(-4)), ge(DoubleId(1))), {}); + makeTest(andExpr(le(DoubleId(-2)), eq(IntId(-3))), {b11, b15}); + makeTest(andExpr(andExpr(le(IntId(10)), gt(DoubleId(0))), eq(undef)), {}); + makeTest(andExpr(gt(referenceDate1), le(IntId(10))), {}); + makeTest(andExpr(gt(IntId(4)), andExpr(gt(DoubleId(8)), lt(IntId(10)))), + {b8, b14}); + makeTest(andExpr(eq(IntId(0)), andExpr(lt(IntId(-20)), gt(IntId(30)))), {}); + makeTest(andExpr(eq(IntId(0)), andExpr(le(IntId(0)), ge(IntId(0)))), + {b4, b5, b6, b11}); +} + +//______________________________________________________________________________ +// Test OrExpression +// Note: the `makeTest` function automatically adds the blocks with mixed +// datatypes to the expected result. +TEST_F(TestPrefilterExprOnBlockMetadata, testOrExpression) { + makeTest(orExpr(lt(VocabId(22)), le(VocabId(0))), {b18, b19, b20, b21}); + makeTest(orExpr(le(VocabId(0)), ge(VocabId(16))), {b18, b21, b22}); + makeTest(orExpr(gt(VocabId(17)), ge(VocabId(17))), {b21, b22}); + makeTest(orExpr(lt(DoubleId(-5.95)), eq(VocabId(14))), + {b9, b15, b16, b17, b18, b19, b20, b21}); + makeTest(orExpr(eq(DoubleId(0)), neq(VocabId(14))), + {b5, b6, b11, b18, b19, b21}); + makeTest(orExpr(eq(DoubleId(0)), eq(DoubleId(-6.25))), + {b5, b6, b11, b15, b16, b18}); + makeTest(orExpr(gt(undef), le(IntId(-6))), {b9, b15, b16, b17, b18}); + makeTest(orExpr(le(trueId), gt(referenceDate1)), {b2, b3, b4, b22, b23, b24}); + makeTest(orExpr(eq(IntId(0)), orExpr(lt(IntId(-10)), gt(IntId(8)))), + {b5, b6, b8, b11, b14, b17, b18}); + makeTest(orExpr(gt(referenceDate2), eq(trueId)), {b4}); + makeTest(orExpr(eq(VocabId(17)), orExpr(lt(VocabId(0)), gt(VocabId(20)))), + {b21, b22}); + makeTest(orExpr(eq(undef), gt(referenceDateEqual)), {b24}); + makeTest(orExpr(gt(IntId(8)), gt(DoubleId(22.1))), {b8, b14}); + makeTest(orExpr(lt(DoubleId(-8.25)), le(IntId(-10))), {b9, b17, b18}); + makeTest(orExpr(eq(IntId(0)), neq(DoubleId(0.25))), + {b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15, b16, b17, b18}); + makeTest(orExpr(gt(referenceDate1), orExpr(gt(trueId), eq(IntId(0)))), + {b4, b5, b6, b11, b22, b23, b24}); + makeTest(orExpr(gt(DoubleId(-6.25)), lt(DoubleId(-6.25))), + {b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15, b17, b18}); + makeTest(orExpr(orExpr(eq(IntId(0)), eq(IntId(5))), + orExpr(eq(DoubleId(-6.25)), lt(DoubleId(-12)))), + {b4, b5, b6, b7, b11, b14, b15, b16, b18}); + makeTest(orExpr(le(trueId), gt(falseId)), {b2, b3, b4}); + makeTest(orExpr(eq(VocabId(0)), eq(DoubleId(0.25))), {b6, b11, b18}); +} + +//______________________________________________________________________________ +// Test NotExpression +// Note: the `makeTest` function automatically adds the blocks with mixed +// datatypes to the expected result. +TEST_F(TestPrefilterExprOnBlockMetadata, testNotExpression) { + makeTest(notExpr(eq(VocabId(2))), {b18, b19, b20, b21, b22}); + makeTest(notExpr(eq(VocabId(14))), {b18, b19, b21, b22}); + makeTest(notExpr(neq(VocabId(14))), {b19, b20, b21}); + makeTest(notExpr(gt(VocabId(2))), {b18}); + makeTest(notExpr(lt(DoubleId(-14.01))), + {b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15, b16, b17, b18}); + makeTest(notExpr(ge(DoubleId(-14.01))), {b18}); + makeTest(notExpr(gt(DoubleId(-4.00))), {b9, b10, b11, b15, b16, b17, b18}); + makeTest(notExpr(ge(DoubleId(-24.4))), {b18}); + makeTest(notExpr(gt(referenceDate2)), {b22, b23}); + makeTest(notExpr(le(trueId)), {}); + makeTest(notExpr(le(IntId(0))), {b6, b7, b8, b11, b12, b13, b14}); + makeTest(notExpr(gt(undef)), {}); + makeTest(notExpr(eq(DoubleId(-6.25))), + {b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15, b17, b18}); + makeTest(notExpr(neq(DoubleId(4))), {b6, b11, b13, b14, b18}); + makeTest(notExpr(gt(DoubleId(0))), + {b4, b5, b6, b9, b10, b11, b15, b16, b17, b18}); + makeTest(notExpr(notExpr(eq(IntId(0)))), {b4, b5, b6, b11}); + makeTest(notExpr(notExpr(neq(DoubleId(-6.25)))), + {b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15, b17, b18}); + makeTest(notExpr(notExpr(lt(VocabId(10)))), {b18}); + makeTest(notExpr(notExpr(ge(DoubleId(3.99)))), {b6, b7, b8, b11, b13, b14}); + makeTest(notExpr(andExpr(le(IntId(0)), ge(IntId(0)))), + {b6, b7, b8, b9, b10, b11, b12, b13, b14, b15, b16, b17, b18}); + makeTest(notExpr(andExpr(neq(IntId(-10)), neq(DoubleId(-14.02)))), {b9, b18}); + makeTest( + notExpr(andExpr(gt(IntId(10)), ge(DoubleId(-6.25)))), + {b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15, b16, b17, b18}); + makeTest( + notExpr(andExpr(lt(DoubleId(-7)), ge(IntId(6)))), + {b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15, b16, b17, b18}); + makeTest(notExpr(orExpr(le(IntId(0)), ge(DoubleId(6)))), + {b6, b7, b11, b12, b13, b14}); + makeTest(notExpr(orExpr(ge(DoubleId(0)), gt(IntId(-10)))), + {b9, b11, b17, b18}); + makeTest(notExpr(orExpr(lt(VocabId(10)), gt(VocabId(10)))), {b19}); + makeTest(notExpr(orExpr(lt(DoubleId(-4)), gt(IntId(-4)))), {b10, b11, b15}); + makeTest(notExpr(orExpr(gt(IntId(-42)), ge(VocabId(0)))), {b11}); + makeTest(notExpr(orExpr(ge(VocabId(14)), gt(VocabId(15)))), {b18, b19}); +} + +//______________________________________________________________________________ +// Test PrefilterExpressions mixed +// Note: the `makeTest` function automatically adds the blocks with mixed +// datatypes to the expected result. +TEST_F(TestPrefilterExprOnBlockMetadata, testGeneralPrefilterExprCombinations) { + makeTest(andExpr(notExpr(gt(DoubleId(-14.01))), lt(IntId(0))), {b18}); + makeTest( + orExpr(andExpr(gt(DoubleId(8.25)), le(IntId(10))), eq(DoubleId(-6.25))), + {b8, b14, b15, b16}); + makeTest( + orExpr(andExpr(gt(DoubleId(8.25)), le(IntId(10))), lt(DoubleId(-6.25))), + {b8, b9, b14, b17, b18}); + makeTest(andExpr(orExpr(ge(trueId), le(falseId)), eq(referenceDate1)), {}); + makeTest(andExpr(eq(IntId(0)), orExpr(lt(IntId(-11)), le(IntId(-12)))), {}); + makeTest( + andExpr(eq(DoubleId(-4)), orExpr(gt(IntId(-4)), lt(DoubleId(-1.25)))), + {b10, b11, b15}); + makeTest(orExpr(notExpr(andExpr(lt(IntId(10)), gt(IntId(5)))), eq(IntId(0))), + {b4, b5, b6, b7, b9, b10, b11, b12, b13, b14, b15, b16, b17, b18}); + makeTest(andExpr(orExpr(gt(VocabId(16)), le(VocabId(5))), gt(DoubleId(7.25))), + {}); + makeTest(andExpr(lt(falseId), orExpr(lt(IntId(10)), gt(DoubleId(17.25)))), + {}); + makeTest( + orExpr(andExpr(gt(VocabId(16)), ge(VocabId(17))), gt(DoubleId(7.25))), + {b8, b14, b18, b21, b22}); + makeTest(orExpr(eq(trueId), andExpr(gt(referenceDate1), lt(referenceDate2))), + {b4, b22, b23}); +} + +//______________________________________________________________________________ +// Test that correct errors are thrown for invalid input (condition) +TEST_F(TestPrefilterExprOnBlockMetadata, testInputConditionCheck) { + makeTestErrorCheck(le(IntId(5)), blocksWithDuplicate1, + "The provided data blocks must be unique."); + makeTestErrorCheck(andExpr(gt(VocabId(10)), le(VocabId(20))), + blocksWithDuplicate2, + "The provided data blocks must be unique."); + makeTestErrorCheck(gt(DoubleId(2)), blocksInvalidOrder1, + "The blocks must be provided in sorted order."); + makeTestErrorCheck(andExpr(gt(VocabId(10)), le(VocabId(20))), + blocksInvalidOrder2, + "The blocks must be provided in sorted order."); + makeTestErrorCheck( + gt(DoubleId(2)), blocks, + "The values in the columns up to the evaluation column must be " + "consistent.", + 1); + makeTestErrorCheck( + gt(DoubleId(2)), blocks, + "The values in the columns up to the evaluation column must be " + "consistent.", + 2); +} + +//______________________________________________________________________________ +// Check for correctness given only one BlockMetadata value is provided. +TEST_F(TestPrefilterExprOnBlockMetadata, testWithOneBlockMetadataValue) { + auto expr = orExpr(eq(DoubleId(-6.25)), eq(IntId(0))); + std::vector input = {b16}; + EXPECT_EQ(expr->evaluate(input, 0), input); + EXPECT_EQ(expr->evaluate(input, 1), std::vector{}); + EXPECT_EQ(expr->evaluate(input, 2), std::vector{}); +} diff --git a/test/SparqlExpressionTestHelpers.h b/test/SparqlExpressionTestHelpers.h index 6ded863c41..55a6734b51 100644 --- a/test/SparqlExpressionTestHelpers.h +++ b/test/SparqlExpressionTestHelpers.h @@ -72,30 +72,29 @@ struct TestContext { zz = getId("\"zz\"@en"); blank = Id::makeFromBlankNodeIndex(BlankNodeIndex::make(0)); - constexpr auto lit = [](std::string_view s) { - return ad_utility::triple_component::LiteralOrIri::literalWithoutQuotes( - s); + auto addLocalLiteral = [this](std::string_view s) { + return Id::makeFromLocalVocabIndex( + this->localVocab.getIndexAndAddIfNotContained( + ad_utility::triple_component::LiteralOrIri::literalWithoutQuotes( + s))); }; - constexpr auto iri = [](const std::string& s) { - return ad_utility::triple_component::LiteralOrIri::iriref(s); + + auto addLocalIri = [this](const std::string& s) { + return Id::makeFromLocalVocabIndex( + this->localVocab.getIndexAndAddIfNotContained( + ad_utility::triple_component::LiteralOrIri::iriref(s))); }; - notInVocabA = Id::makeFromLocalVocabIndex( - localVocab.getIndexAndAddIfNotContained(lit("notInVocabA"))); - notInVocabB = Id::makeFromLocalVocabIndex( - localVocab.getIndexAndAddIfNotContained(lit("notInVocabB"))); - notInVocabC = Id::makeFromLocalVocabIndex( - localVocab.getIndexAndAddIfNotContained(iri(""))); - notInVocabD = Id::makeFromLocalVocabIndex( - localVocab.getIndexAndAddIfNotContained(iri(""))); - notInVocabAelpha = Id::makeFromLocalVocabIndex( - localVocab.getIndexAndAddIfNotContained(lit("notInVocabÄlpha"))); + notInVocabA = addLocalLiteral("notInVocabA"); + notInVocabB = addLocalLiteral("notInVocabB"); + notInVocabC = addLocalIri(""); + notInVocabD = addLocalIri(""); + notInVocabAelpha = addLocalLiteral("notInVocabÄlpha"); + notInVocabAelpha = addLocalLiteral("notInVocabÄlpha"); notInVocabIri = - Id::makeFromLocalVocabIndex(localVocab.getIndexAndAddIfNotContained( - iri(""))); - notInVocabIriLit = - Id::makeFromLocalVocabIndex(localVocab.getIndexAndAddIfNotContained( - lit("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"))); + addLocalIri(""); + notInVocabIriLit = addLocalLiteral( + "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"); // Set up the `table` that represents the previous partial query results. It // has five columns/variables: ?ints (only integers), ?doubles (only diff --git a/test/util/IdTestHelpers.h b/test/util/IdTestHelpers.h index a8818ba296..dd5dfbd715 100644 --- a/test/util/IdTestHelpers.h +++ b/test/util/IdTestHelpers.h @@ -25,6 +25,10 @@ inline auto VocabId = [](const auto& v) { return Id::makeFromVocabIndex(VocabIndex::make(v)); }; +inline auto BlankNodeId = [](const auto& v) { + return Id::makeFromBlankNodeIndex(BlankNodeIndex::make(v)); +}; + inline auto LocalVocabId = [](std::integral auto v) { static ad_utility::Synchronized localVocab; using namespace ad_utility::triple_component; From 07d64d72c6aa8d9e280b509565cc6d89d64e8501 Mon Sep 17 00:00:00 2001 From: "c.u." Date: Mon, 28 Oct 2024 14:52:37 +0100 Subject: [PATCH 2/5] Fix bug in WKT point literal regex (#1593) This fixes a bug in the regular expression that is used for extracting points from WKT literals. The bug caused WKT literals with more than one digit but without a decimal point in the latitude coordinate to be falsely rejected as GeoPoints. Fixes #1579 --- src/util/GeoSparqlHelpers.cpp | 2 +- test/GeoSparqlHelpersTest.cpp | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/util/GeoSparqlHelpers.cpp b/src/util/GeoSparqlHelpers.cpp index 3cc6364912..796205dae1 100644 --- a/src/util/GeoSparqlHelpers.cpp +++ b/src/util/GeoSparqlHelpers.cpp @@ -26,7 +26,7 @@ static constexpr auto wktPointRegex = ctll::fixed_string( "^\\s*[Pp][Oo][Ii][Nn][Tt]\\s*\\(\\s*" "(-?[0-9]+|-?[0-9]+\\.[0-9]+)" "\\s+" - "(-?[0-9+]|-?[0-9]+\\.[0-9]+)" + "(-?[0-9]+|-?[0-9]+\\.[0-9]+)" "\\s*\\)\\s*$"); // Parse a single WKT point and returns a pair of longitude and latitude. If diff --git a/test/GeoSparqlHelpersTest.cpp b/test/GeoSparqlHelpersTest.cpp index f457785d6c..4addc25e78 100644 --- a/test/GeoSparqlHelpersTest.cpp +++ b/test/GeoSparqlHelpersTest.cpp @@ -42,7 +42,12 @@ TEST(GeoSparqlHelpers, ParseWktPoint) { testParseWktPointCorrect("pOiNt(7 -0.0)", 7.0, 0.0); testParseWktPointCorrect(" pOiNt\t( 7 \r -0.0 \n ) ", 7.0, 0.0); testParseWktPointCorrect("POINT(2.2945 48.8585)", 2.2945, 48.8585); + testParseWktPointCorrect("POINT(2 48.8585)", 2.0, 48.8585); + testParseWktPointCorrect("POINT(20 48.8585)", 20.0, 48.8585); testParseWktPointCorrect("POINT(7.8529 47.9957)", 7.8529, 47.9957); + testParseWktPointCorrect("POINT(7.8529 47)", 7.8529, 47.0); + testParseWktPointCorrect("POINT(17 47)", 17.0, 47.0); + testParseWktPointCorrect("POINT(7 47)", 7.0, 47.0); // Invalid WKT points because of issues unrelated to the number format (one of // the quotes missing, one of the parentheses missing, it must be exactly two From 05a3f69a25faa87b3b072fbba6a228c8ed57a805 Mon Sep 17 00:00:00 2001 From: unex <63149623+UNEXENU@users.noreply.github.com> Date: Mon, 28 Oct 2024 20:37:39 +0100 Subject: [PATCH 3/5] Add cli-arg `service-max-value-rows` (#1589) Adds the commandline argument `service-max-value-rows` allowing to set the maximum number of rows passed to a `Service` operation (see #1341 and #1502 for details). The default value is set to 10,000 rows. --- src/ServerMain.cpp | 4 ++++ src/global/RuntimeParameters.h | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/ServerMain.cpp b/src/ServerMain.cpp index b109de05ae..4dc007dbf8 100644 --- a/src/ServerMain.cpp +++ b/src/ServerMain.cpp @@ -107,6 +107,10 @@ int main(int argc, char** argv) { optionFactory.getProgramOption<"default-query-timeout">(), "Set the default timeout in seconds after which queries are cancelled" "automatically."); + add("service-max-value-rows,S", + optionFactory.getProgramOption<"service-max-value-rows">(), + "The maximal number of result rows to be passed to a SERVICE operation " + "as a VALUES clause to optimize its computation."); po::variables_map optionsMap; try { diff --git a/src/global/RuntimeParameters.h b/src/global/RuntimeParameters.h index 9c39ffb28e..f84f9023da 100644 --- a/src/global/RuntimeParameters.h +++ b/src/global/RuntimeParameters.h @@ -48,7 +48,7 @@ inline auto& RuntimeParameters() { Bool<"use-binsearch-transitive-path">{true}, Bool<"group-by-hash-map-enabled">{false}, Bool<"group-by-disable-index-scan-optimizations">{false}, - SizeT<"service-max-value-rows">{100}, + SizeT<"service-max-value-rows">{10'000}, SizeT<"query-planning-budget">{1500}}; }(); return params; From 9510d463f1591bd2d36400dbc991777af60ae71d Mon Sep 17 00:00:00 2001 From: Julian <14220769+Qup42@users.noreply.github.com> Date: Wed, 30 Oct 2024 22:29:40 +0100 Subject: [PATCH 4/5] Refactor `Server.h/.cpp` to prepare it for SPARQL UPDATE (#1590) Split the very large function `Server::processQuery` into several smaller functions, such that they can be reused by the `processUpdate` function (which for the most part still has to be implemented). --- src/engine/Server.cpp | 401 ++++++++++++++++++++----------------- src/engine/Server.h | 55 +++-- src/util/http/MediaTypes.h | 3 + test/ServerTest.cpp | 80 ++++++-- 4 files changed, 328 insertions(+), 211 deletions(-) diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp index f22ab3b799..cb9bf96a34 100644 --- a/src/engine/Server.cpp +++ b/src/engine/Server.cpp @@ -344,18 +344,25 @@ Awaitable Server::process( checkParameterNotPresent("default-graph-uri"); checkParameterNotPresent("named-graph-uri"); - auto checkParameter = [¶meters](std::string_view key, - std::optional value, - bool accessAllowed = true) { - return Server::checkParameter(parameters, key, std::move(value), - accessAllowed); - }; + // We always want to call `Server::checkParameter` with the same first + // parameter. + auto checkParameter = + std::bind_front(&Server::checkParameter, std::cref(parameters)); // Check the access token. If an access token is provided and the check fails, // throw an exception and do not process any part of the query (even if the // processing had been allowed without access token). bool accessTokenOk = checkAccessToken(checkParameter("access-token", std::nullopt)); + auto requireValidAccessToken = [&accessTokenOk]( + const std::string& actionName) { + if (!accessTokenOk) { + throw std::runtime_error(absl::StrCat( + actionName, + " requires a valid access token. No valid access token is present.", + "Processing of request aborted.")); + } + }; // Process all URL parameters known to QLever. If there is more than one, // QLever processes all of them, but only returns the result from the last @@ -382,8 +389,8 @@ Awaitable Server::process( logCommand(cmd, "clear the cache (unpinned elements only)"); cache_.clearUnpinnedOnly(); response = createJsonResponse(composeCacheStatsJson(), request); - } else if (auto cmd = - checkParameter("cmd", "clear-cache-complete", accessTokenOk)) { + } else if (auto cmd = checkParameter("cmd", "clear-cache-complete")) { + requireValidAccessToken("clear-cache-complete"); logCommand(cmd, "clear cache completely (including unpinned elements)"); cache_.clearAll(); response = createJsonResponse(composeCacheStatsJson(), request); @@ -394,8 +401,8 @@ Awaitable Server::process( logCommand(cmd, "get index ID"); response = createOkResponse(index_.getIndexId(), request, MediaType::textPlain); - } else if (auto cmd = - checkParameter("cmd", "dump-active-queries", accessTokenOk)) { + } else if (auto cmd = checkParameter("cmd", "dump-active-queries")) { + requireValidAccessToken("dump-active-queries"); logCommand(cmd, "dump active queries"); nlohmann::json json; for (auto& [key, value] : queryRegistry_.getActiveQueries()) { @@ -417,8 +424,8 @@ Awaitable Server::process( } // Set description of KB index. - if (auto description = - checkParameter("index-description", std::nullopt, accessTokenOk)) { + if (auto description = checkParameter("index-description", std::nullopt)) { + requireValidAccessToken("index-description"); LOG(INFO) << "Setting index description to: \"" << description.value() << "\"" << std::endl; index_.setKbName(std::string{description.value()}); @@ -426,8 +433,8 @@ Awaitable Server::process( } // Set description of text index. - if (auto description = - checkParameter("text-description", std::nullopt, accessTokenOk)) { + if (auto description = checkParameter("text-description", std::nullopt)) { + requireValidAccessToken("text-description"); LOG(INFO) << "Setting text description to: \"" << description.value() << "\"" << std::endl; index_.setTextName(std::string{description.value()}); @@ -436,7 +443,8 @@ Awaitable Server::process( // Set one or several of the runtime parameters. for (auto key : RuntimeParameters().getKeys()) { - if (auto value = checkParameter(key, std::nullopt, accessTokenOk)) { + if (auto value = checkParameter(key, std::nullopt)) { + requireValidAccessToken("setting runtime parameters"); LOG(INFO) << "Setting runtime parameter \"" << key << "\"" << " to value \"" << value.value() << "\"" << std::endl; RuntimeParameters().set(key, std::string{value.value()}); @@ -451,19 +459,31 @@ Awaitable Server::process( if (auto timeLimit = co_await verifyUserSubmittedQueryTimeout( checkParameter("timeout", std::nullopt), accessTokenOk, request, send)) { - co_return co_await processQuery(parameters, query.query_, requestTimer, - std::move(request), send, - timeLimit.value()); + co_return co_await processQueryOrUpdate( + parameters, query.query_, requestTimer, std::move(request), send, + timeLimit.value()); } else { // If the optional is empty, this indicates an error response has been // sent to the client already. We can stop here. co_return; } }; - auto visitUpdate = [](const ad_utility::url_parser::sparqlOperation::Update&) + auto visitUpdate = + [&checkParameter, &accessTokenOk, &request, &send, ¶meters, + &requestTimer, + this](const ad_utility::url_parser::sparqlOperation::Update& update) -> Awaitable { - throw std::runtime_error( - "SPARQL 1.1 Update is currently not supported by QLever."); + if (auto timeLimit = co_await verifyUserSubmittedQueryTimeout( + checkParameter("timeout", std::nullopt), accessTokenOk, request, + send)) { + co_return co_await processQueryOrUpdate( + parameters, update.update_, requestTimer, std::move(request), send, + timeLimit.value()); + } else { + // If the optional is empty, this indicates an error response has been + // sent to the client already. We can stop here. + co_return; + } }; auto visitNone = [&response, &send, &request]( @@ -492,6 +512,37 @@ Awaitable Server::process( parsedHttpRequest.operation_); } +// ____________________________________________________________________________ +std::pair Server::determineResultPinning( + const ad_utility::url_parser::ParamValueMap& params) { + const bool pinSubtrees = + checkParameter(params, "pinsubtrees", "true").has_value(); + const bool pinResult = + checkParameter(params, "pinresult", "true").has_value(); + return {pinSubtrees, pinResult}; +} +// ____________________________________________________________________________ +Awaitable Server::setupPlannedQuery( + const ad_utility::url_parser::ParamValueMap& params, + const std::string& operation, QueryExecutionContext& qec, + SharedCancellationHandle handle, TimeLimit timeLimit, + const ad_utility::Timer& requestTimer) { + auto queryDatasets = ad_utility::url_parser::parseDatasetClauses(params); + std::optional plannedQuery = + co_await parseAndPlan(operation, queryDatasets, qec, handle, timeLimit); + AD_CORRECTNESS_CHECK(plannedQuery.has_value()); + auto& qet = plannedQuery.value().queryExecutionTree_; + qet.isRoot() = true; // allow pinning of the final result + auto timeForQueryPlanning = requestTimer.msecs(); + auto& runtimeInfoWholeQuery = + qet.getRootOperation()->getRuntimeInfoWholeQuery(); + runtimeInfoWholeQuery.timeQueryPlanning = timeForQueryPlanning; + LOG(INFO) << "Query planning done in " << timeForQueryPlanning.count() + << " ms" << std::endl; + LOG(TRACE) << qet.getCacheKey() << std::endl; + + co_return std::move(plannedQuery.value()); +} // _____________________________________________________________________________ json Server::composeErrorResponseJson( const string& query, const std::string& errorMsg, @@ -512,11 +563,6 @@ json Server::composeErrorResponseJson( j["metadata"]["stopIndex"] = value.stopIndex_; j["metadata"]["line"] = value.line_; j["metadata"]["positionInLine"] = value.charPositionInLine_; - // The ANTLR parser may not see the whole query. (The reason is value mixing - // of the old and new parser.) To detect/work with this we also transmit - // what ANTLR saw as query. - // TODO remove once the whole query is parsed with ANTLR. - j["metadata"]["query"] = value.query_; } return j; @@ -574,7 +620,6 @@ class QueryAlreadyInUseError : public std::runtime_error { }; // _____________________________________________ - ad_utility::websocket::OwningQueryId Server::getQueryId( const ad_utility::httpUtils::HttpRequest auto& request, std::string_view query) { @@ -592,7 +637,6 @@ ad_utility::websocket::OwningQueryId Server::getQueryId( } // _____________________________________________________________________________ - auto Server::cancelAfterDeadline( std::weak_ptr> cancellationHandle, TimeLimit timeLimit) @@ -665,20 +709,143 @@ Awaitable Server::sendStreamableResponse( } // ____________________________________________________________________________ -boost::asio::awaitable Server::processQuery( +class NoSupportedMediatypeError : public std::runtime_error { + public: + explicit NoSupportedMediatypeError(std::string_view msg) + : std::runtime_error{std::string{msg}} {} +}; + +// ____________________________________________________________________________ +MediaType Server::determineMediaType( + const ad_utility::url_parser::ParamValueMap& params, + const ad_utility::httpUtils::HttpRequest auto& request) { + // The following code block determines the media type to be used for the + // result. The media type is either determined by the "Accept:" header of + // the request or by the URL parameter "action=..." (for TSV and CSV export, + // for QLever-historical reasons). + std::optional mediaType = std::nullopt; + + // The explicit `action=..._export` parameter have precedence over the + // `Accept:...` header field + if (checkParameter(params, "action", "csv_export")) { + mediaType = MediaType::csv; + } else if (checkParameter(params, "action", "tsv_export")) { + mediaType = MediaType::tsv; + } else if (checkParameter(params, "action", "qlever_json_export")) { + mediaType = MediaType::qleverJson; + } else if (checkParameter(params, "action", "sparql_json_export")) { + mediaType = MediaType::sparqlJson; + } else if (checkParameter(params, "action", "turtle_export")) { + mediaType = MediaType::turtle; + } else if (checkParameter(params, "action", "binary_export")) { + mediaType = MediaType::octetStream; + } + + std::string_view acceptHeader = request.base()[http::field::accept]; + + if (!mediaType.has_value()) { + mediaType = ad_utility::getMediaTypeFromAcceptHeader(acceptHeader); + } + AD_CORRECTNESS_CHECK(mediaType.has_value()); + + return mediaType.value(); +} + +// ____________________________________________________________________________ +Awaitable Server::processQuery( const ad_utility::url_parser::ParamValueMap& params, const string& query, ad_utility::Timer& requestTimer, const ad_utility::httpUtils::HttpRequest auto& request, auto&& send, TimeLimit timeLimit) { - using namespace ad_utility::httpUtils; + MediaType mediaType = determineMediaType(params, request); + LOG(INFO) << "Requested media type of result is \"" + << ad_utility::toString(mediaType) << "\"" << std::endl; + + // TODO use std::optional::transform + std::optional maxSend = std::nullopt; + auto parameterValue = + ad_utility::url_parser::getParameterCheckAtMostOnce(params, "send"); + if (parameterValue.has_value()) { + maxSend = std::stoul(parameterValue.value()); + } + // Limit JSON requests by default + if (!maxSend.has_value() && (mediaType == MediaType::sparqlJson || + mediaType == MediaType::qleverJson)) { + maxSend = MAX_NOF_ROWS_IN_RESULT; + } - auto sendJson = - [&request, &send]( - const json& jsonString, - http::status responseStatus) -> boost::asio::awaitable { - auto response = createJsonResponse(jsonString, request, responseStatus); - co_return co_await send(std::move(response)); - }; + auto queryHub = queryHub_.lock(); + AD_CORRECTNESS_CHECK(queryHub); + ad_utility::websocket::MessageSender messageSender{getQueryId(request, query), + *queryHub}; + + auto [cancellationHandle, cancelTimeoutOnDestruction] = + setupCancellationHandle(messageSender.getQueryId(), timeLimit); + + // Do the query planning. This creates a `QueryExecutionTree`, which will + // then be used to process the query. + auto [pinSubtrees, pinResult] = determineResultPinning(params); + LOG(INFO) << "Processing the following SPARQL query:" + << (pinResult ? " [pin result]" : "") + << (pinSubtrees ? " [pin subresults]" : "") << "\n" + << query << std::endl; + QueryExecutionContext qec(index_, &cache_, allocator_, + sortPerformanceEstimator_, std::ref(messageSender), + pinSubtrees, pinResult); + auto plannedQuery = co_await setupPlannedQuery( + params, query, qec, cancellationHandle, timeLimit, requestTimer); + auto qet = plannedQuery.queryExecutionTree_; + + if (plannedQuery.parsedQuery_.hasUpdateClause()) { + throw std::runtime_error("Expected Query but received Update."); + } + + // Apply stricter limit for export if present + if (maxSend.has_value()) { + auto& pq = plannedQuery.parsedQuery_; + pq._limitOffset._limit = + std::min(maxSend.value(), pq._limitOffset.limitOrDefault()); + } + // Make sure we don't underflow here + AD_CORRECTNESS_CHECK(plannedQuery.parsedQuery_._limitOffset._offset >= + qet.getRootOperation()->getLimit()._offset); + // Don't apply offset twice, if the offset was not applied to the operation + // then the exporter can safely apply it during export. + plannedQuery.parsedQuery_._limitOffset._offset -= + qet.getRootOperation()->getLimit()._offset; + + // This actually processes the query and sends the result in the requested + // format. + co_await sendStreamableResponse(request, send, mediaType, plannedQuery, qet, + requestTimer, cancellationHandle); + + // Print the runtime info. This needs to be done after the query + // was computed. + + // Log that we are done with the query and how long it took. + // + // NOTE: We need to explicitly stop the `requestTimer` here because in the + // sending code above, it is done only in some cases and not in others (in + // particular, not for TSV and CSV because for those, the result does not + // contain timing information). + // + // TODO Also log an identifier of the query. + LOG(INFO) << "Done processing query and sending result" + << ", total time was " << requestTimer.msecs().count() << " ms" + << std::endl; + LOG(DEBUG) << "Runtime Info:\n" + << qet.getRootOperation()->runtimeInfo().toString() << std::endl; + co_return; +} + +// ____________________________________________________________________________ +template +Awaitable Server::processQueryOrUpdate( + const ad_utility::url_parser::ParamValueMap& params, + const string& queryOrUpdate, ad_utility::Timer& requestTimer, + const ad_utility::httpUtils::HttpRequest auto& request, auto&& send, + TimeLimit timeLimit) { + using namespace ad_utility::httpUtils; http::status responseStatus = http::status::ok; @@ -692,141 +859,13 @@ boost::asio::awaitable Server::processQuery( // access to the runtimeInformation in the case of an error. std::optional plannedQuery; try { - auto containsParam = [¶ms](const std::string& param, - const std::string& expected) { - auto parameterValue = - ad_utility::url_parser::getParameterCheckAtMostOnce(params, param); - return parameterValue.has_value() && parameterValue.value() == expected; - }; - const bool pinSubtrees = containsParam("pinsubtrees", "true"); - const bool pinResult = containsParam("pinresult", "true"); - LOG(INFO) << "Processing the following SPARQL query:" - << (pinResult ? " [pin result]" : "") - << (pinSubtrees ? " [pin subresults]" : "") << "\n" - << query << std::endl; - - // The following code block determines the media type to be used for the - // result. The media type is either determined by the "Accept:" header of - // the request or by the URL parameter "action=..." (for TSV and CSV export, - // for QLever-historical reasons). - - std::optional mediaType = std::nullopt; - - // The explicit `action=..._export` parameter have precedence over the - // `Accept:...` header field - if (containsParam("action", "csv_export")) { - mediaType = MediaType::csv; - } else if (containsParam("action", "tsv_export")) { - mediaType = MediaType::tsv; - } else if (containsParam("action", "qlever_json_export")) { - mediaType = MediaType::qleverJson; - } else if (containsParam("action", "sparql_json_export")) { - mediaType = MediaType::sparqlJson; - } else if (containsParam("action", "turtle_export")) { - mediaType = MediaType::turtle; - } else if (containsParam("action", "binary_export")) { - mediaType = MediaType::octetStream; - } - - std::string_view acceptHeader = request.base()[http::field::accept]; - - if (!mediaType.has_value()) { - mediaType = ad_utility::getMediaTypeFromAcceptHeader(acceptHeader); - } - - // TODO use std::optional::transform - std::optional maxSend = std::nullopt; - auto parameterValue = - ad_utility::url_parser::getParameterCheckAtMostOnce(params, "send"); - if (parameterValue.has_value()) { - maxSend = std::stoul(parameterValue.value()); - } - // Limit JSON requests by default - if (!maxSend.has_value() && (mediaType == MediaType::sparqlJson || - mediaType == MediaType::qleverJson)) { - maxSend = MAX_NOF_ROWS_IN_RESULT; - } - - if (!mediaType.has_value()) { - co_return co_await send(createBadRequestResponse( - absl::StrCat("Did not find any supported media type " - "in this \'Accept:\' header field: \"", - acceptHeader, "\". ", - ad_utility::getErrorMessageForSupportedMediaTypes()), - request)); - } - AD_CONTRACT_CHECK(mediaType.has_value()); - LOG(INFO) << "Requested media type of result is \"" - << ad_utility::toString(mediaType.value()) << "\"" << std::endl; - - auto queryHub = queryHub_.lock(); - AD_CORRECTNESS_CHECK(queryHub); - ad_utility::websocket::MessageSender messageSender{ - getQueryId(request, query), *queryHub}; - // Do the query planning. This creates a `QueryExecutionTree`, which will - // then be used to process the query. - // - // NOTE: This should come after determining the media type. Otherwise, it - // might happen that the query planner runs for a while (recall that it many - // do index scans) and then we get an error message afterwards that a - // certain media type is not supported. - QueryExecutionContext qec(index_, &cache_, allocator_, - sortPerformanceEstimator_, - std::ref(messageSender), pinSubtrees, pinResult); - auto [cancellationHandle, cancelTimeoutOnDestruction] = - setupCancellationHandle(messageSender.getQueryId(), timeLimit); - - auto queryDatasets = ad_utility::url_parser::parseDatasetClauses(params); - plannedQuery = co_await parseAndPlan(query, queryDatasets, qec, - cancellationHandle, timeLimit); - AD_CORRECTNESS_CHECK(plannedQuery.has_value()); - auto& qet = plannedQuery.value().queryExecutionTree_; - qet.isRoot() = true; // allow pinning of the final result - auto timeForQueryPlanning = requestTimer.msecs(); - auto& runtimeInfoWholeQuery = - qet.getRootOperation()->getRuntimeInfoWholeQuery(); - runtimeInfoWholeQuery.timeQueryPlanning = timeForQueryPlanning; - LOG(INFO) << "Query planning done in " << timeForQueryPlanning.count() - << " ms" << std::endl; - LOG(TRACE) << qet.getCacheKey() << std::endl; - - // Apply stricter limit for export if present - if (maxSend.has_value()) { - auto& pq = plannedQuery.value().parsedQuery_; - pq._limitOffset._limit = - std::min(maxSend.value(), pq._limitOffset.limitOrDefault()); + if constexpr (type == OperationType::Query) { + co_await processQuery(params, queryOrUpdate, requestTimer, request, send, + timeLimit); + } else { + throw std::runtime_error( + "SPARQL 1.1 Update is currently not supported by QLever."); } - // Make sure we don't underflow here - AD_CORRECTNESS_CHECK( - plannedQuery.value().parsedQuery_._limitOffset._offset >= - qet.getRootOperation()->getLimit()._offset); - // Don't apply offset twice, if the offset was not applied to the operation - // then the exporter can safely apply it during export. - plannedQuery.value().parsedQuery_._limitOffset._offset -= - qet.getRootOperation()->getLimit()._offset; - - // This actually processes the query and sends the result in the requested - // format. - co_await sendStreamableResponse(request, send, mediaType.value(), - plannedQuery.value(), qet, requestTimer, - cancellationHandle); - - // Print the runtime info. This needs to be done after the query - // was computed. - - // Log that we are done with the query and how long it took. - // - // NOTE: We need to explicitly stop the `requestTimer` here because in the - // sending code above, it is done only in some cases and not in others (in - // particular, not for TSV and CSV because for those, the result does not - // contain timing information). - // - // TODO Also log an identifier of the query. - LOG(INFO) << "Done processing query and sending result" - << ", total time was " << requestTimer.msecs().count() << " ms" - << std::endl; - LOG(DEBUG) << "Runtime Info:\n" - << qet.getRootOperation()->runtimeInfo().toString() << std::endl; } catch (const ParseException& e) { responseStatus = http::status::bad_request; exceptionErrorMsg = e.errorMessageWithoutPositionalInfo(); @@ -834,6 +873,9 @@ boost::asio::awaitable Server::processQuery( } catch (const QueryAlreadyInUseError& e) { responseStatus = http::status::conflict; exceptionErrorMsg = e.what(); + } catch (const NoSupportedMediatypeError& e) { + responseStatus = http::status::bad_request; + exceptionErrorMsg = e.what(); } catch (const ad_utility::CancellationException& e) { // Send 429 status code to indicate that the time limit was reached // or the query was cancelled because of some other reason. @@ -863,14 +905,16 @@ boost::asio::awaitable Server::processQuery( } } auto errorResponseJson = composeErrorResponseJson( - query, exceptionErrorMsg.value(), requestTimer, metadata); + queryOrUpdate, exceptionErrorMsg.value(), requestTimer, metadata); if (plannedQuery.has_value()) { errorResponseJson["runtimeInformation"] = nlohmann::ordered_json(plannedQuery.value() .queryExecutionTree_.getRootOperation() ->runtimeInfo()); } - co_return co_await sendJson(errorResponseJson, responseStatus); + auto response = + createJsonResponse(errorResponseJson, request, responseStatus); + co_return co_await send(std::move(response)); } } @@ -964,11 +1008,9 @@ bool Server::checkAccessToken( } // _____________________________________________________________________________ - std::optional Server::checkParameter( const ad_utility::url_parser::ParamValueMap& parameters, - std::string_view key, std::optional value, - bool accessAllowed) { + std::string_view key, std::optional value) { auto param = ad_utility::url_parser::getParameterCheckAtMostOnce(parameters, key); if (!param.has_value()) { @@ -983,12 +1025,5 @@ std::optional Server::checkParameter( } else if (value != parameterValue) { return std::nullopt; } - // Now that we have the value, check if there is a problem with the access. - // If yes, we abort the query processing at this point. - if (!accessAllowed) { - throw std::runtime_error(absl::StrCat( - "Access to \"", key, "=", value.value(), "\" denied", - " (requires a valid access token)", ", processing of request aborted")); - } return value; } diff --git a/src/engine/Server.h b/src/engine/Server.h index c863e93771..7abaf3eaf5 100644 --- a/src/engine/Server.h +++ b/src/engine/Server.h @@ -5,6 +5,8 @@ #pragma once +#include + #include #include @@ -118,9 +120,14 @@ class Server { Awaitable process( const ad_utility::httpUtils::HttpRequest auto& request, auto&& send); - /// Handle a http request that asks for the processing of a query. + // Indicates which type of operation is being processed. + enum class OperationType { Query, Update }; + + /// Handle a http request that asks for the processing of an query or update. + /// This is only a wrapper for `processQuery` and `processUpdate` which + /// does the error handling. /// \param params The key-value-pairs sent in the HTTP GET request. - /// \param query The query. + /// \param queryOrUpdate The query or update. /// \param requestTimer Timer that measure the total processing /// time of this request. /// \param request The HTTP request. @@ -128,12 +135,37 @@ class Server { /// `HttpServer.h` for documentation). /// \param timeLimit Duration in seconds after which the query will be /// cancelled. + template + Awaitable processQueryOrUpdate( + const ad_utility::url_parser::ParamValueMap& params, + const string& queryOrUpdate, ad_utility::Timer& requestTimer, + const ad_utility::httpUtils::HttpRequest auto& request, auto&& send, + TimeLimit timeLimit); + // Do the actual execution of a query. Awaitable processQuery( const ad_utility::url_parser::ParamValueMap& params, const string& query, ad_utility::Timer& requestTimer, const ad_utility::httpUtils::HttpRequest auto& request, auto&& send, TimeLimit timeLimit); + // Determine the media type to be used for the result. The media type is + // determined (in this order) by the current action (e.g., + // "action=csv_export") and by the "Accept" header of the request. + static ad_utility::MediaType determineMediaType( + const ad_utility::url_parser::ParamValueMap& params, + const ad_utility::httpUtils::HttpRequest auto& request); + FRIEND_TEST(ServerTest, determineMediaType); + // Determine whether the subtrees and the result should be pinned. + static std::pair determineResultPinning( + const ad_utility::url_parser::ParamValueMap& params); + FRIEND_TEST(ServerTest, determineResultPinning); + // Sets up the PlannedQuery s.t. it is ready to be executed. + Awaitable setupPlannedQuery( + const ad_utility::url_parser::ParamValueMap& params, + const std::string& operation, QueryExecutionContext& qec, + SharedCancellationHandle handle, TimeLimit timeLimit, + const ad_utility::Timer& requestTimer); + static json composeErrorResponseJson( const string& query, const std::string& errorMsg, ad_utility::Timer& requestTimer, @@ -204,19 +236,16 @@ class Server { /// HTTP error response. bool checkAccessToken(std::optional accessToken) const; - /// Checks if a URL parameter exists in the request, if we are allowed to - /// access it and it matches the expected `value`. If yes, return the value, - /// otherwise return `std::nullopt`. If `value` is `std::nullopt`, only check - /// if the key exists. We need this because we have parameters like - /// "cmd=stats", where a fixed combination of the key and value determines the - /// kind of action, as well as parameters like "index-decription=...", where - /// the key determines the kind of action. If the key is not found, always - /// return `std::nullopt`. If `accessAllowed` is false and a value is present, - /// throw an exception. + /// Checks if a URL parameter exists in the request, and it matches the + /// expected `value`. If yes, return the value, otherwise return + /// `std::nullopt`. If `value` is `std::nullopt`, only check if the key + /// exists. We need this because we have parameters like "cmd=stats", where a + /// fixed combination of the key and value determines the kind of action, as + /// well as parameters like "index-decription=...", where the key determines + /// the kind of action. If the key is not found, always return `std::nullopt`. static std::optional checkParameter( const ad_utility::url_parser::ParamValueMap& parameters, - std::string_view key, std::optional value, - bool accessAllowed); + std::string_view key, std::optional value); FRIEND_TEST(ServerTest, checkParameter); /// Check if user-provided timeout is authorized with a valid access-token or diff --git a/src/util/http/MediaTypes.h b/src/util/http/MediaTypes.h index 43a809e33b..0b2634b8ce 100644 --- a/src/util/http/MediaTypes.h +++ b/src/util/http/MediaTypes.h @@ -112,6 +112,9 @@ std::vector parseAcceptHeader( /// media types that appear earlier in the `SUPPORTED_MEDIA_TYPES`. If none of /// the `SUPPORTED_MEDIA_TYPES` is accepted by `acceptHeader`, then /// `std::nullopt` is returned. +// TODO: This function never returns `nullopt`, because an exception is thrown +// if no supported media type is found. Update the docstring and make the return +// type just `MediaType`. std::optional getMediaTypeFromAcceptHeader( std::string_view acceptHeader); diff --git a/test/ServerTest.cpp b/test/ServerTest.cpp index 4f98de679d..5ed589fc42 100644 --- a/test/ServerTest.cpp +++ b/test/ServerTest.cpp @@ -146,29 +146,79 @@ TEST(ServerTest, checkParameter) { const ParamValueMap exampleParams = {{"foo", {"bar"}}, {"baz", {"qux", "quux"}}}; - EXPECT_THAT(Server::checkParameter(exampleParams, "doesNotExist", "", false), + EXPECT_THAT(Server::checkParameter(exampleParams, "doesNotExist", ""), testing::Eq(std::nullopt)); - EXPECT_THAT(Server::checkParameter(exampleParams, "foo", "baz", false), + EXPECT_THAT(Server::checkParameter(exampleParams, "foo", "baz"), testing::Eq(std::nullopt)); - AD_EXPECT_THROW_WITH_MESSAGE( - Server::checkParameter(exampleParams, "foo", "bar", false), - testing::StrEq("Access to \"foo=bar\" denied (requires a valid access " - "token), processing of request aborted")); - EXPECT_THAT(Server::checkParameter(exampleParams, "foo", "bar", true), + EXPECT_THAT(Server::checkParameter(exampleParams, "foo", "bar"), testing::Optional(testing::StrEq("bar"))); AD_EXPECT_THROW_WITH_MESSAGE( - Server::checkParameter(exampleParams, "baz", "qux", false), + Server::checkParameter(exampleParams, "baz", "qux"), testing::StrEq("Parameter \"baz\" must be given exactly once. Is: 2")); - EXPECT_THAT(Server::checkParameter(exampleParams, "foo", std::nullopt, true), + EXPECT_THAT(Server::checkParameter(exampleParams, "foo", std::nullopt), testing::Optional(testing::StrEq("bar"))); AD_EXPECT_THROW_WITH_MESSAGE( - Server::checkParameter(exampleParams, "foo", std::nullopt, false), - testing::StrEq("Access to \"foo=bar\" denied (requires a valid access " - "token), processing of request aborted")); - AD_EXPECT_THROW_WITH_MESSAGE( - Server::checkParameter(exampleParams, "baz", std::nullopt, false), + Server::checkParameter(exampleParams, "baz", std::nullopt), testing::StrEq("Parameter \"baz\" must be given exactly once. Is: 2")); AD_EXPECT_THROW_WITH_MESSAGE( - Server::checkParameter(exampleParams, "baz", std::nullopt, true), + Server::checkParameter(exampleParams, "baz", std::nullopt), testing::StrEq("Parameter \"baz\" must be given exactly once. Is: 2")); } + +TEST(ServerTest, determineResultPinning) { + EXPECT_THAT(Server::determineResultPinning( + {{"pinsubtrees", {"true"}}, {"pinresult", {"true"}}}), + testing::Pair(true, true)); + EXPECT_THAT(Server::determineResultPinning({{"pinresult", {"true"}}}), + testing::Pair(false, true)); + EXPECT_THAT(Server::determineResultPinning({{"pinsubtrees", {"otherValue"}}}), + testing::Pair(false, false)); +} + +TEST(ServerTest, determineMediaType) { + auto MakeRequest = [](const std::optional& accept, + const http::verb method = http::verb::get, + const std::string& target = "/", + const std::string& body = "") { + auto req = http::request{method, target, 11}; + if (accept.has_value()) { + req.set(http::field::accept, accept.value()); + } + req.body() = body; + req.prepare_payload(); + return req; + }; + auto checkActionMediatype = [&](const std::string& actionName, + ad_utility::MediaType expectedMediaType) { + EXPECT_THAT(Server::determineMediaType({{"action", {actionName}}}, + MakeRequest(std::nullopt)), + testing::Eq(expectedMediaType)); + }; + // The media type associated with the action overrides the `Accept` header. + EXPECT_THAT(Server::determineMediaType( + {{"action", {"csv_export"}}}, + MakeRequest("application/sparql-results+json")), + testing::Eq(ad_utility::MediaType::csv)); + checkActionMediatype("csv_export", ad_utility::MediaType::csv); + checkActionMediatype("tsv_export", ad_utility::MediaType::tsv); + checkActionMediatype("qlever_json_export", ad_utility::MediaType::qleverJson); + checkActionMediatype("sparql_json_export", ad_utility::MediaType::sparqlJson); + checkActionMediatype("turtle_export", ad_utility::MediaType::turtle); + checkActionMediatype("binary_export", ad_utility::MediaType::octetStream); + EXPECT_THAT(Server::determineMediaType( + {}, MakeRequest("application/sparql-results+json")), + testing::Eq(ad_utility::MediaType::sparqlJson)); + // No supported media type in the `Accept` header. (Contrary to it's docstring + // and interface) `ad_utility::getMediaTypeFromAcceptHeader` throws an + // exception if no supported media type is found. + AD_EXPECT_THROW_WITH_MESSAGE( + Server::determineMediaType({}, MakeRequest("text/css")), + testing::HasSubstr("Not a single media type known to this parser was " + "detected in \"text/css\".")); + // No `Accept` header means that any content type is allowed. + EXPECT_THAT(Server::determineMediaType({}, MakeRequest(std::nullopt)), + testing::Eq(ad_utility::MediaType::sparqlJson)); + // No `Accept` header and an empty `Accept` header are not distinguished. + EXPECT_THAT(Server::determineMediaType({}, MakeRequest("")), + testing::Eq(ad_utility::MediaType::sparqlJson)); +} From 7bd2438dffda2c5b63cdc3e47b2649cf205d9d3e Mon Sep 17 00:00:00 2001 From: DuDaAG <152475267+DuDaAG@users.noreply.github.com> Date: Fri, 1 Nov 2024 12:51:09 +0100 Subject: [PATCH 5/5] Correct `STRLEN` function for UTF-8 multibyte characters (#1584) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `STRLEN` function now correctly counts the number of UTF codepoints in the Input (previously: The number of bytes in the UTF-8 serialization). So for example `STRLEN("Bäh") now correctly returns `3`. --- src/engine/sparqlExpressions/StringExpressions.cpp | 7 +++++-- test/SparqlExpressionTest.cpp | 5 +++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index 93ba543005..47ee97bd23 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -125,9 +125,12 @@ using IriOrUriExpression = NARY<1, FV>; // STRLEN [[maybe_unused]] auto strlen = [](std::string_view s) { - return Id::makeFromInt(static_cast(s.size())); + // Count UTF-8 characters by skipping continuation bytes (those starting with + // "10"). + auto utf8Len = std::ranges::count_if( + s, [](char c) { return (static_cast(c) & 0xC0) != 0x80; }); + return Id::makeFromInt(utf8Len); }; - using StrlenExpression = StringExpressionImpl<1, LiftStringFunction>; diff --git a/test/SparqlExpressionTest.cpp b/test/SparqlExpressionTest.cpp index 285f990bc8..f55d8ed0d1 100644 --- a/test/SparqlExpressionTest.cpp +++ b/test/SparqlExpressionTest.cpp @@ -587,8 +587,9 @@ TEST(SparqlExpression, stringOperators) { // Test the different (optimized) behavior depending on whether the STR() // function was applied to the argument. - checkStrlen(IdOrLiteralOrIriVec{lit("one"), I(1), D(3.6), lit("")}, - Ids{I(3), U, U, I(0)}); + checkStrlen( + IdOrLiteralOrIriVec{lit("one"), lit("tschüss"), I(1), D(3.6), lit("")}, + Ids{I(3), I(7), U, U, I(0)}); checkStrlenWithStrChild( IdOrLiteralOrIriVec{lit("one"), I(1), D(3.6), lit("")}, Ids{I(3), I(1), I(3), I(0)});