Skip to content

Commit

Permalink
Add Redis query parser via PEGTL for search module (#2192)
Browse files Browse the repository at this point in the history
  • Loading branch information
PragmaTwice committed Mar 23, 2024
1 parent 54d0084 commit 03a332b
Show file tree
Hide file tree
Showing 8 changed files with 477 additions and 93 deletions.
56 changes: 56 additions & 0 deletions src/search/common_parser.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/

#pragma once

#include <tao/pegtl.hpp>

namespace kqir {

namespace peg = tao::pegtl;

struct True : peg::string<'t', 'r', 'u', 'e'> {};
struct False : peg::string<'f', 'a', 'l', 's', 'e'> {};
struct Boolean : peg::sor<True, False> {};

struct Digits : peg::plus<peg::digit> {};
struct NumberExp : peg::seq<peg::one<'e', 'E'>, peg::opt<peg::one<'-', '+'>>, Digits> {};
struct NumberFrac : peg::seq<peg::one<'.'>, Digits> {};
struct Number : peg::seq<peg::opt<peg::one<'-'>>, Digits, peg::opt<NumberFrac>, peg::opt<NumberExp>> {};

struct UnicodeXDigit : peg::list<peg::seq<peg::one<'u'>, peg::rep<4, peg::xdigit>>, peg::one<'\\'>> {};
struct EscapedSingleChar : peg::one<'"', '\\', 'b', 'f', 'n', 'r', 't'> {};
struct EscapedChar : peg::sor<EscapedSingleChar, UnicodeXDigit> {};
struct UnescapedChar : peg::utf8::range<0x20, 0x10FFFF> {};
struct Char : peg::if_then_else<peg::one<'\\'>, EscapedChar, UnescapedChar> {};

struct StringContent : peg::until<peg::at<peg::one<'"'>>, Char> {};
struct String : peg::seq<peg::one<'"'>, StringContent, peg::any> {};

struct Identifier : peg::identifier {};

struct WhiteSpace : peg::one<' ', '\t', '\n', '\r'> {};
template <typename T>
struct WSPad : peg::pad<T, WhiteSpace> {};

struct UnsignedInteger : Digits {};
struct Integer : peg::seq<peg::opt<peg::one<'-'>>, Digits> {};

} // namespace kqir
91 changes: 91 additions & 0 deletions src/search/common_transformer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/

#pragma once

#include <tao/pegtl/contrib/parse_tree.hpp>
#include <tao/pegtl/contrib/unescape.hpp>
#include <tao/pegtl/demangle.hpp>

#include "common_parser.h"
#include "status.h"

namespace kqir {

struct TreeTransformer {
using TreeNode = std::unique_ptr<peg::parse_tree::node>;

template <typename T>
static bool Is(const TreeNode& node) {
return node->type == peg::demangle<T>();
}

static bool IsRoot(const TreeNode& node) { return node->type.empty(); }

static StatusOr<std::string> UnescapeString(std::string_view str) {
str = str.substr(1, str.size() - 2);

std::string result;
while (!str.empty()) {
if (str[0] == '\\') {
str.remove_prefix(1);
switch (str[0]) {
case '\\':
case '"':
result.push_back(str[0]);
break;
case 'b':
result.push_back('\b');
break;
case 'f':
result.push_back('\f');
break;
case 'n':
result.push_back('\n');
break;
case 'r':
result.push_back('\r');
break;
case 't':
result.push_back('\t');
break;
case 'u':
if (!peg::unescape::utf8_append_utf32(
result, peg::unescape::unhex_string<unsigned>(str.data() + 1, str.data() + 5))) {
return {Status::NotOK,
fmt::format("invalid Unicode code point '{}' in string literal", std::string(str.data() + 1, 4))};
}
str.remove_prefix(4);
break;
default:
__builtin_unreachable();
};
str.remove_prefix(1);
} else {
result.push_back(str[0]);
str.remove_prefix(1);
}
}

return result;
}
};

} // namespace kqir
67 changes: 67 additions & 0 deletions src/search/redis_query_parser.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/

#pragma once

#include <tao/pegtl.hpp>

#include "common_parser.h"

namespace kqir {

namespace redis_query {

using namespace peg;

struct Field : seq<one<'@'>, Identifier> {};

struct Tag : sor<Identifier, String> {};
struct TagList : seq<one<'{'>, WSPad<Tag>, star<seq<one<'|'>, WSPad<Tag>>>, one<'}'>> {};

struct Inf : seq<opt<one<'+', '-'>>, string<'i', 'n', 'f'>> {};
struct ExclusiveNumber : seq<one<'('>, Number> {};
struct NumericRangePart : sor<Inf, ExclusiveNumber, Number> {};
struct NumericRange : seq<one<'['>, WSPad<NumericRangePart>, WSPad<NumericRangePart>, one<']'>> {};

struct FieldQuery : seq<WSPad<Field>, one<':'>, WSPad<sor<TagList, NumericRange>>> {};

struct Wildcard : one<'*'> {};

struct QueryExpr;

struct ParenExpr : WSPad<seq<one<'('>, QueryExpr, one<')'>>> {};

struct NotExpr;

struct BooleanExpr : sor<FieldQuery, ParenExpr, NotExpr, WSPad<Wildcard>> {};

struct NotExpr : seq<WSPad<one<'-'>>, BooleanExpr> {};

struct AndExpr : seq<BooleanExpr, plus<seq<BooleanExpr>>> {};
struct AndExprP : sor<AndExpr, BooleanExpr> {};

struct OrExpr : seq<AndExprP, plus<seq<one<'|'>, AndExprP>>> {};
struct OrExprP : sor<OrExpr, AndExprP> {};

struct QueryExpr : seq<OrExprP> {};

} // namespace redis_query

} // namespace kqir
159 changes: 159 additions & 0 deletions src/search/redis_query_transformer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/

#pragma once

#include <memory>

#include "common_transformer.h"
#include "ir.h"
#include "parse_util.h"
#include "redis_query_parser.h"
#include "search/common_parser.h"

namespace kqir {

namespace redis_query {

namespace ir = kqir;

template <typename Rule>
using TreeSelector =
parse_tree::selector<Rule, parse_tree::store_content::on<Number, String, Identifier, Inf>,
parse_tree::remove_content::on<TagList, NumericRange, ExclusiveNumber, FieldQuery, NotExpr,
AndExpr, OrExpr, Wildcard>>;

template <typename Input>
StatusOr<std::unique_ptr<parse_tree::node>> ParseToTree(Input&& in) {
if (auto root = parse_tree::parse<seq<QueryExpr, eof>, TreeSelector>(std::forward<Input>(in))) {
return root;
} else {
// TODO: improve parse error message, with source location
return {Status::NotOK, "invalid syntax"};
}
}

struct Transformer : ir::TreeTransformer {
static auto Transform(const TreeNode& node) -> StatusOr<std::unique_ptr<Node>> {
if (Is<Number>(node)) {
return Node::Create<ir::NumericLiteral>(*ParseFloat(node->string()));
} else if (Is<Wildcard>(node)) {
return Node::Create<ir::BoolLiteral>(true);
} else if (Is<FieldQuery>(node)) {
CHECK(node->children.size() == 2);

auto field = node->children[0]->string();
const auto& query = node->children[1];

if (Is<TagList>(query)) {
std::vector<std::unique_ptr<ir::QueryExpr>> exprs;

for (const auto& tag : query->children) {
auto tag_str = Is<Identifier>(tag) ? tag->string() : GET_OR_RET(UnescapeString(tag->string()));
exprs.push_back(std::make_unique<ir::TagContainExpr>(std::make_unique<FieldRef>(field),
std::make_unique<StringLiteral>(tag_str)));
}

if (exprs.size() == 1) {
return std::move(exprs[0]);
} else {
return std::make_unique<ir::OrExpr>(std::move(exprs));
}
} else { // NumericRange
std::vector<std::unique_ptr<ir::QueryExpr>> exprs;

const auto& lhs = query->children[0];
const auto& rhs = query->children[1];

if (Is<ExclusiveNumber>(lhs)) {
exprs.push_back(
std::make_unique<NumericCompareExpr>(NumericCompareExpr::GT, std::make_unique<FieldRef>(field),
Node::As<NumericLiteral>(GET_OR_RET(Transform(lhs->children[0])))));
} else if (Is<Number>(lhs)) {
exprs.push_back(std::make_unique<NumericCompareExpr>(NumericCompareExpr::GET,
std::make_unique<FieldRef>(field),
Node::As<NumericLiteral>(GET_OR_RET(Transform(lhs)))));
} else { // Inf
if (lhs->string_view() == "+inf") {
return {Status::NotOK, "it's not allowed to set the lower bound as positive infinity"};
}
}

if (Is<ExclusiveNumber>(rhs)) {
exprs.push_back(
std::make_unique<NumericCompareExpr>(NumericCompareExpr::LT, std::make_unique<FieldRef>(field),
Node::As<NumericLiteral>(GET_OR_RET(Transform(rhs->children[0])))));
} else if (Is<Number>(rhs)) {
exprs.push_back(std::make_unique<NumericCompareExpr>(NumericCompareExpr::LET,
std::make_unique<FieldRef>(field),
Node::As<NumericLiteral>(GET_OR_RET(Transform(rhs)))));
} else { // Inf
if (rhs->string_view() == "-inf") {
return {Status::NotOK, "it's not allowed to set the upper bound as negative infinity"};
}
}

if (exprs.empty()) {
return std::make_unique<BoolLiteral>(true);
} else if (exprs.size() == 1) {
return std::move(exprs[0]);
} else {
return std::make_unique<ir::AndExpr>(std::move(exprs));
}
}
} else if (Is<NotExpr>(node)) {
CHECK(node->children.size() == 1);

return Node::Create<ir::NotExpr>(Node::As<ir::QueryExpr>(GET_OR_RET(Transform(node->children[0]))));
} else if (Is<AndExpr>(node)) {
std::vector<std::unique_ptr<ir::QueryExpr>> exprs;

for (const auto& child : node->children) {
exprs.push_back(Node::As<ir::QueryExpr>(GET_OR_RET(Transform(child))));
}

return Node::Create<ir::AndExpr>(std::move(exprs));
} else if (Is<OrExpr>(node)) {
std::vector<std::unique_ptr<ir::QueryExpr>> exprs;

for (const auto& child : node->children) {
exprs.push_back(Node::As<ir::QueryExpr>(GET_OR_RET(Transform(child))));
}

return Node::Create<ir::OrExpr>(std::move(exprs));
} else if (IsRoot(node)) {
CHECK(node->children.size() == 1);

return Transform(node->children[0]);
} else {
// UNREACHABLE CODE, just for debugging here
return {Status::NotOK, fmt::format("encountered invalid node type: {}", node->type)};
}
} // NOLINT
};

template <typename Input>
StatusOr<std::unique_ptr<ir::Node>> ParseToIR(Input&& in) {
return Transformer::Transform(GET_OR_RET(ParseToTree(std::forward<Input>(in))));
}

} // namespace redis_query

} // namespace kqir
Loading

0 comments on commit 03a332b

Please sign in to comment.