From 47e6705cd1c3939b540f09bc022e2587f56015df Mon Sep 17 00:00:00 2001 From: Twice Date: Wed, 19 Jun 2024 18:38:12 +0900 Subject: [PATCH 1/3] feat(search): add a value type system to KQIR (#2369) --- src/commands/cmd_search.cc | 2 +- src/common/string_util.h | 2 +- src/search/common_parser.h | 2 +- src/search/executors/filter_executor.h | 10 +- .../executors/numeric_field_scan_executor.h | 3 +- src/search/executors/topn_sort_executor.h | 6 +- src/search/indexer.cc | 96 ++++++++++++----- src/search/indexer.h | 13 +-- src/search/ir.h | 2 +- src/search/plan_executor.cc | 9 +- src/search/plan_executor.h | 8 +- src/search/redis_query_parser.h | 2 +- src/search/redis_query_transformer.h | 2 +- src/search/sql_parser.h | 2 +- src/search/sql_transformer.h | 4 +- src/search/value.h | 101 ++++++++++++++++++ tests/cppunit/indexer_test.cc | 6 +- tests/cppunit/plan_executor_test.cc | 47 ++++---- 18 files changed, 236 insertions(+), 81 deletions(-) create mode 100644 src/search/value.h diff --git a/src/commands/cmd_search.cc b/src/commands/cmd_search.cc index 618e0cf6831..1928672c93a 100644 --- a/src/commands/cmd_search.cc +++ b/src/commands/cmd_search.cc @@ -150,7 +150,7 @@ static void DumpQueryResult(const std::vector &r output->append(MultiLen(fields.size() * 2)); for (const auto &[info, field] : fields) { output->append(redis::BulkString(info->name)); - output->append(redis::BulkString(field)); + output->append(redis::BulkString(field.ToString(info->metadata.get()))); } } } diff --git a/src/common/string_util.h b/src/common/string_util.h index da345b306ae..ac3b2904fae 100644 --- a/src/common/string_util.h +++ b/src/common/string_util.h @@ -42,7 +42,7 @@ std::string StringNext(std::string s); template std::string StringJoin( - const T &con, F &&f = [](const auto &v) -> decltype(auto) { return v; }, const std::string &sep = ", ") { + const T &con, F &&f = [](const auto &v) -> decltype(auto) { return v; }, std::string_view sep = ", ") { std::string res; bool is_first = true; for (const auto &v : con) { diff --git a/src/search/common_parser.h b/src/search/common_parser.h index 15f5bc36aff..7d617dbdc3d 100644 --- a/src/search/common_parser.h +++ b/src/search/common_parser.h @@ -42,7 +42,7 @@ struct UnescapedChar : peg::utf8::range<0x20, 0x10FFFF> {}; struct Char : peg::if_then_else, EscapedChar, UnescapedChar> {}; struct StringContent : peg::until>, Char> {}; -struct String : peg::seq, StringContent, peg::any> {}; +struct StringL : peg::seq, StringContent, peg::any> {}; struct Identifier : peg::identifier {}; diff --git a/src/search/executors/filter_executor.h b/src/search/executors/filter_executor.h index ea860fc642d..f668af7d620 100644 --- a/src/search/executors/filter_executor.h +++ b/src/search/executors/filter_executor.h @@ -74,17 +74,17 @@ struct QueryExprEvaluator { StatusOr Visit(TagContainExpr *v) const { auto val = GET_OR_RET(ctx->Retrieve(row, v->field->info)); - auto meta = v->field->info->MetadataAs(); - auto split = util::Split(val, std::string(1, meta->separator)); + CHECK(val.Is()); + auto split = val.Get(); return std::find(split.begin(), split.end(), v->tag->val) != split.end(); } StatusOr Visit(NumericCompareExpr *v) const { - auto l_str = GET_OR_RET(ctx->Retrieve(row, v->field->info)); + auto l_val = GET_OR_RET(ctx->Retrieve(row, v->field->info)); - // TODO: reconsider how to handle failure case here - auto l = GET_OR_RET(ParseFloat(l_str)); + CHECK(l_val.Is()); + auto l = l_val.Get(); auto r = v->num->val; switch (v->op) { diff --git a/src/search/executors/numeric_field_scan_executor.h b/src/search/executors/numeric_field_scan_executor.h index e9699aa4710..5c997df0fc1 100644 --- a/src/search/executors/numeric_field_scan_executor.h +++ b/src/search/executors/numeric_field_scan_executor.h @@ -26,6 +26,7 @@ #include "encoding.h" #include "search/plan_executor.h" #include "search/search_encoding.h" +#include "search/value.h" #include "storage/redis_db.h" #include "storage/redis_metadata.h" #include "storage/storage.h" @@ -108,7 +109,7 @@ struct NumericFieldScanExecutor : ExecutorNode { } else { iter->Prev(); } - return RowType{key_str, {{scan->field->info, std::to_string(curr)}}, scan->field->info->index}; + return RowType{key_str, {{scan->field->info, kqir::MakeValue(curr)}}, scan->field->info->index}; } }; diff --git a/src/search/executors/topn_sort_executor.h b/src/search/executors/topn_sort_executor.h index 741a968928b..163b1bc7f3c 100644 --- a/src/search/executors/topn_sort_executor.h +++ b/src/search/executors/topn_sort_executor.h @@ -57,9 +57,9 @@ struct TopNSortExecutor : ExecutorNode { auto &row = std::get(v); auto get_order = [this](RowType &row) -> StatusOr { - auto order_str = GET_OR_RET(ctx->Retrieve(row, sort->order->field->info)); - auto order = GET_OR_RET(ParseFloat(order_str)); - return order; + auto order_val = GET_OR_RET(ctx->Retrieve(row, sort->order->field->info)); + CHECK(order_val.Is()); + return order_val.Get(); }; if (rows.size() == total) { diff --git a/src/search/indexer.cc b/src/search/indexer.cc index 0ee1b39d118..80fea7a924c 100644 --- a/src/search/indexer.cc +++ b/src/search/indexer.cc @@ -26,6 +26,7 @@ #include "db_util.h" #include "parse_util.h" #include "search/search_encoding.h" +#include "search/value.h" #include "storage/redis_metadata.h" #include "storage/storage.h" #include "string_util.h" @@ -56,25 +57,67 @@ StatusOr FieldValueRetriever::Create(IndexOnDataType type, } } -rocksdb::Status FieldValueRetriever::Retrieve(std::string_view field, std::string *output) { +StatusOr FieldValueRetriever::Retrieve(std::string_view field, const redis::IndexFieldMetadata *type) { if (std::holds_alternative(db)) { auto &[hash, metadata, key] = std::get(db); std::string ns_key = hash.AppendNamespacePrefix(key); + LatestSnapShot ss(hash.storage_); rocksdb::ReadOptions read_options; read_options.snapshot = ss.GetSnapShot(); std::string sub_key = InternalKey(ns_key, field, metadata.version, hash.storage_->IsSlotIdEncoded()).Encode(); - return hash.storage_->Get(read_options, sub_key, output); + std::string value; + auto s = hash.storage_->Get(read_options, sub_key, &value); + if (s.IsNotFound()) return {Status::NotFound, s.ToString()}; + if (!s.ok()) return {Status::NotOK, s.ToString()}; + + if (auto numeric [[maybe_unused]] = dynamic_cast(type)) { + auto num = GET_OR_RET(ParseFloat(value)); + return kqir::MakeValue(num); + } else if (auto tag = dynamic_cast(type)) { + const char delim[] = {tag->separator, '\0'}; + auto vec = util::Split(value, delim); + return kqir::MakeValue(vec); + } else { + return {Status::NotOK, "unknown field type to retrieve"}; + } + } else if (std::holds_alternative(db)) { auto &value = std::get(db); + auto s = value.Get(field.front() == '$' ? field : fmt::format("$.{}", field)); - if (!s.IsOK()) return rocksdb::Status::Corruption(s.Msg()); + if (!s.IsOK()) return {Status::NotOK, s.Msg()}; if (s->value.size() != 1) - return rocksdb::Status::NotFound("json value specified by the field (json path) should exist and be unique"); - *output = s->value[0].as_string(); - return rocksdb::Status::OK(); + return {Status::NotFound, "json value specified by the field (json path) should exist and be unique"}; + auto val = s->value[0]; + + if (auto numeric [[maybe_unused]] = dynamic_cast(type)) { + if (val.is_string()) return {Status::NotOK, "json value cannot be string for numeric fields"}; + return kqir::MakeValue(val.as_double()); + } else if (auto tag = dynamic_cast(type)) { + if (val.is_string()) { + const char delim[] = {tag->separator, '\0'}; + auto vec = util::Split(val.as_string(), delim); + return kqir::MakeValue(vec); + } else if (val.is_array()) { + std::vector strs; + for (size_t i = 0; i < val.size(); ++i) { + if (!val[i].is_string()) + return {Status::NotOK, "json value should be string or array of strings for tag fields"}; + strs.push_back(val[i].as_string()); + } + return kqir::MakeValue(strs); + } else { + return {Status::NotOK, "json value should be string or array of strings for tag fields"}; + } + } else { + return {Status::NotOK, "unknown field type to retrieve"}; + } + + return Status::OK(); + } else { - __builtin_unreachable(); + return {Status::NotOK, "unknown redis data type to retrieve"}; } } @@ -102,22 +145,22 @@ StatusOr IndexUpdater::Record(std::string_view key) c continue; } - std::string value; - auto s = retriever.Retrieve(field, &value); - if (s.IsNotFound()) continue; - if (!s.ok()) return {Status::NotOK, s.ToString()}; + auto s = retriever.Retrieve(field, i.metadata.get()); + if (s.Is()) continue; + if (!s) return s; - values.emplace(field, value); + values.emplace(field, *s); } return values; } -Status IndexUpdater::UpdateTagIndex(std::string_view key, std::string_view original, std::string_view current, +Status IndexUpdater::UpdateTagIndex(std::string_view key, const kqir::Value &original, const kqir::Value ¤t, const SearchKey &search_key, const TagFieldMetadata *tag) const { - const char delim[] = {tag->separator, '\0'}; - auto original_tags = util::Split(original, delim); - auto current_tags = util::Split(current, delim); + CHECK(original.IsNull() || original.Is()); + CHECK(current.IsNull() || current.Is()); + auto original_tags = original.IsNull() ? std::vector() : original.Get(); + auto current_tags = current.IsNull() ? std::vector() : current.Get(); auto to_tag_set = [](const std::vector &tags, bool case_sensitive) -> std::set { if (case_sensitive) { @@ -167,22 +210,23 @@ Status IndexUpdater::UpdateTagIndex(std::string_view key, std::string_view origi return Status::OK(); } -Status IndexUpdater::UpdateNumericIndex(std::string_view key, std::string_view original, std::string_view current, +Status IndexUpdater::UpdateNumericIndex(std::string_view key, const kqir::Value &original, const kqir::Value ¤t, const SearchKey &search_key, const NumericFieldMetadata *num) const { + CHECK(original.IsNull() || original.Is()); + CHECK(original.IsNull() || original.Is()); + auto *storage = indexer->storage; auto batch = storage->GetWriteBatchBase(); auto cf_handle = storage->GetCFHandle(ColumnFamilyID::Search); - if (!original.empty()) { - auto original_num = GET_OR_RET(ParseFloat(std::string(original.begin(), original.end()))); - auto index_key = search_key.ConstructNumericFieldData(original_num, key); + if (!original.IsNull()) { + auto index_key = search_key.ConstructNumericFieldData(original.Get(), key); batch->Delete(cf_handle, index_key); } - if (!current.empty()) { - auto current_num = GET_OR_RET(ParseFloat(std::string(current.begin(), current.end()))); - auto index_key = search_key.ConstructNumericFieldData(current_num, key); + if (!current.IsNull()) { + auto index_key = search_key.ConstructNumericFieldData(current.Get(), key); batch->Put(cf_handle, index_key, Slice()); } @@ -192,8 +236,8 @@ Status IndexUpdater::UpdateNumericIndex(std::string_view key, std::string_view o return Status::OK(); } -Status IndexUpdater::UpdateIndex(const std::string &field, std::string_view key, std::string_view original, - std::string_view current) const { +Status IndexUpdater::UpdateIndex(const std::string &field, std::string_view key, const kqir::Value &original, + const kqir::Value ¤t) const { if (original == current) { // the value of this field is unchanged, no need to update return Status::OK(); @@ -225,7 +269,7 @@ Status IndexUpdater::Update(const FieldValues &original, std::string_view key) c continue; } - std::string_view original_val, current_val; + kqir::Value original_val, current_val; if (auto it = original.find(field); it != original.end()) { original_val = it->second; diff --git a/src/search/indexer.h b/src/search/indexer.h index 2bc7471b816..029944e24c5 100644 --- a/src/search/indexer.h +++ b/src/search/indexer.h @@ -36,6 +36,7 @@ #include "storage/storage.h" #include "types/redis_hash.h" #include "types/redis_json.h" +#include "value.h" namespace redis { @@ -63,11 +64,11 @@ struct FieldValueRetriever { explicit FieldValueRetriever(JsonValue json) : db(std::in_place_type, std::move(json)) {} - rocksdb::Status Retrieve(std::string_view field, std::string *output); + StatusOr Retrieve(std::string_view field, const redis::IndexFieldMetadata *type); }; struct IndexUpdater { - using FieldValues = std::map; + using FieldValues = std::map; const kqir::IndexInfo *info = nullptr; GlobalIndexer *indexer = nullptr; @@ -75,15 +76,15 @@ struct IndexUpdater { explicit IndexUpdater(const kqir::IndexInfo *info) : info(info) {} StatusOr Record(std::string_view key) const; - Status UpdateIndex(const std::string &field, std::string_view key, std::string_view original, - std::string_view current) const; + Status UpdateIndex(const std::string &field, std::string_view key, const kqir::Value &original, + const kqir::Value ¤t) const; Status Update(const FieldValues &original, std::string_view key) const; Status Build() const; - Status UpdateTagIndex(std::string_view key, std::string_view original, std::string_view current, + Status UpdateTagIndex(std::string_view key, const kqir::Value &original, const kqir::Value ¤t, const SearchKey &search_key, const TagFieldMetadata *tag) const; - Status UpdateNumericIndex(std::string_view key, std::string_view original, std::string_view current, + Status UpdateNumericIndex(std::string_view key, const kqir::Value &original, const kqir::Value ¤t, const SearchKey &search_key, const NumericFieldMetadata *num) const; }; diff --git a/src/search/ir.h b/src/search/ir.h index 4007890e730..72cb7351e03 100644 --- a/src/search/ir.h +++ b/src/search/ir.h @@ -38,7 +38,7 @@ #include "string_util.h" #include "type_util.h" -// kqir stands for Kvorcks Query Intermediate Representation +// kqir stands for Kvrocks Query Intermediate Representation namespace kqir { struct Node { diff --git a/src/search/plan_executor.cc b/src/search/plan_executor.cc index 7de9f5188bd..9140587e1e9 100644 --- a/src/search/plan_executor.cc +++ b/src/search/plan_executor.cc @@ -152,12 +152,11 @@ auto ExecutorContext::Retrieve(RowType &row, const FieldInfo *field) -> StatusOr auto retriever = GET_OR_RET( redis::FieldValueRetriever::Create(field->index->metadata.on_data_type, row.key, storage, field->index->ns)); - std::string result; - auto s = retriever.Retrieve(field->name, &result); - if (!s.ok()) return {Status::NotOK, s.ToString()}; + auto s = retriever.Retrieve(field->name, field->metadata.get()); + if (!s) return s; - row.fields.emplace(field, result); - return result; + row.fields.emplace(field, *s); + return *s; } } // namespace kqir diff --git a/src/search/plan_executor.h b/src/search/plan_executor.h index 82d8e73e6c0..0ead68701d2 100644 --- a/src/search/plan_executor.h +++ b/src/search/plan_executor.h @@ -24,6 +24,7 @@ #include "ir_plan.h" #include "search/index_info.h" +#include "search/value.h" #include "storage/storage.h" #include "string_util.h" @@ -33,7 +34,7 @@ struct ExecutorContext; struct ExecutorNode { using KeyType = std::string; - using ValueType = std::string; + using ValueType = kqir::Value; struct RowType { KeyType key; std::map fields; @@ -52,8 +53,9 @@ struct ExecutorNode { } else { os << row.key; } - return os << " {" << util::StringJoin(row.fields, [](const auto &v) { return v.first->name + ": " + v.second; }) - << "}"; + return os << " {" << util::StringJoin(row.fields, [](const auto &v) { + return v.first->name + ": " + v.second.ToString(); + }) << "}"; } }; diff --git a/src/search/redis_query_parser.h b/src/search/redis_query_parser.h index fd373830f32..d64d0bf0ed1 100644 --- a/src/search/redis_query_parser.h +++ b/src/search/redis_query_parser.h @@ -32,7 +32,7 @@ using namespace peg; struct Field : seq, Identifier> {}; -struct Tag : sor {}; +struct Tag : sor {}; struct TagList : seq, WSPad, star, WSPad>>, one<'}'>> {}; struct Inf : seq>, string<'i', 'n', 'f'>> {}; diff --git a/src/search/redis_query_transformer.h b/src/search/redis_query_transformer.h index 45734b3182a..0928ed592e6 100644 --- a/src/search/redis_query_transformer.h +++ b/src/search/redis_query_transformer.h @@ -36,7 +36,7 @@ namespace ir = kqir; template using TreeSelector = - parse_tree::selector, + parse_tree::selector, parse_tree::remove_content::on>; diff --git a/src/search/sql_parser.h b/src/search/sql_parser.h index 981ea0accde..26e3da6d5b4 100644 --- a/src/search/sql_parser.h +++ b/src/search/sql_parser.h @@ -31,7 +31,7 @@ namespace sql { using namespace peg; struct HasTag : string<'h', 'a', 's', 't', 'a', 'g'> {}; -struct HasTagExpr : WSPad, String>> {}; +struct HasTagExpr : WSPad, StringL>> {}; struct NumericAtomExpr : WSPad> {}; struct NumericCompareOp : sor', '='>, one<'=', '<', '>'>> {}; diff --git a/src/search/sql_transformer.h b/src/search/sql_transformer.h index 8386a66e021..972ae894b53 100644 --- a/src/search/sql_transformer.h +++ b/src/search/sql_transformer.h @@ -38,7 +38,7 @@ namespace ir = kqir; template using TreeSelector = parse_tree::selector< Rule, - parse_tree::store_content::on, + parse_tree::store_content::on, parse_tree::remove_content::on>; @@ -58,7 +58,7 @@ struct Transformer : ir::TreeTransformer { return Node::Create(node->string_view() == "true"); } else if (Is(node)) { return Node::Create(*ParseFloat(node->string())); - } else if (Is(node)) { + } else if (Is(node)) { return Node::Create(GET_OR_RET(UnescapeString(node->string_view()))); } else if (Is(node)) { CHECK(node->children.size() == 2); diff --git a/src/search/value.h b/src/search/value.h new file mode 100644 index 00000000000..f1a1e8b7130 --- /dev/null +++ b/src/search/value.h @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#pragma once + +#include +#include +#include +#include + +#include "fmt/core.h" +#include "search/search_encoding.h" +#include "string_util.h" + +namespace kqir { + +using Null = std::monostate; + +using Numeric = double; // used for numeric fields + +using String = std::string; // e.g. a single tag + +using NumericArray = std::vector; // used for vector fields +using StringArray = std::vector; // used for tag fields, e.g. a list for tags + +struct Value : std::variant { + using Base = std::variant; + + using Base::Base; + + bool IsNull() const { return Is(); } + + template + bool Is() const { + return std::holds_alternative(*this); + } + + template + const auto &Get() const { + CHECK(Is()); + return std::get(*this); + } + + template + auto &Get() { + CHECK(Is()); + return std::get(*this); + } + + std::string ToString(const std::string &sep = ",") const { + if (IsNull()) { + return ""; + } else if (Is()) { + return fmt::format("{}", Get()); + } else if (Is()) { + return util::StringJoin( + Get(), [](const auto &v) -> decltype(auto) { return v; }, sep); + } + + __builtin_unreachable(); + } + + std::string ToString(redis::IndexFieldMetadata *meta) const { + if (IsNull()) { + return ""; + } else if (Is()) { + return fmt::format("{}", Get()); + } else if (Is()) { + auto tag = dynamic_cast(meta); + char sep = tag ? tag->separator : ','; + return util::StringJoin( + Get(), [](const auto &v) -> decltype(auto) { return v; }, std::string(1, sep)); + } + + __builtin_unreachable(); + } +}; + +template +auto MakeValue(Args &&...args) { + return Value(std::in_place_type, std::forward(args)...); +} + +} // namespace kqir diff --git a/tests/cppunit/indexer_test.cc b/tests/cppunit/indexer_test.cc index d9b3656e800..c3a7769e5bf 100644 --- a/tests/cppunit/indexer_test.cc +++ b/tests/cppunit/indexer_test.cc @@ -30,6 +30,8 @@ #include "storage/redis_metadata.h" #include "types/redis_hash.h" +static auto T(const std::string& v) { return kqir::MakeValue(util::Split(v, ",")); } + struct IndexerTest : TestBase { redis::GlobalIndexer indexer; kqir::IndexMap map; @@ -115,7 +117,7 @@ TEST_F(IndexerTest, HashTag) { ASSERT_TRUE(s); ASSERT_EQ(s->updater.info->name, idxname); ASSERT_EQ(s->fields.size(), 1); - ASSERT_EQ(s->fields["x"], "food,kitChen,Beauty"); + ASSERT_EQ(s->fields["x"], T("food,kitChen,Beauty")); uint64_t cnt = 0; auto s_set = db.Set(key1, "x", "Clothing,FOOD,sport", &cnt); @@ -205,7 +207,7 @@ TEST_F(IndexerTest, JsonTag) { ASSERT_TRUE(s); ASSERT_EQ(s->updater.info->name, idxname); ASSERT_EQ(s->fields.size(), 1); - ASSERT_EQ(s->fields["$.x"], "food,kitChen,Beauty"); + ASSERT_EQ(s->fields["$.x"], T("food,kitChen,Beauty")); auto s_set = db.Set(key1, "$.x", "\"Clothing,FOOD,sport\""); ASSERT_TRUE(s_set.ok()); diff --git a/tests/cppunit/plan_executor_test.cc b/tests/cppunit/plan_executor_test.cc index 20f1f00978d..00e0c162021 100644 --- a/tests/cppunit/plan_executor_test.cc +++ b/tests/cppunit/plan_executor_test.cc @@ -29,6 +29,8 @@ #include "search/interval.h" #include "search/ir.h" #include "search/ir_plan.h" +#include "search/value.h" +#include "string_util.h" #include "test_base.h" #include "types/redis_json.h" @@ -81,12 +83,15 @@ TEST(PlanExecutorTest, Mock) { static auto IndexI() -> const IndexInfo* { return index_map.Find("ia", "search_ns")->second.get(); } static auto FieldI(const std::string& f) -> const FieldInfo* { return &IndexI()->fields.at(f); } +static auto N(double n) { return MakeValue(n); } +static auto T(const std::string& v) { return MakeValue(util::Split(v, ",")); } + TEST(PlanExecutorTest, TopNSort) { std::vector data{ - {"a", {{FieldI("f3"), "4"}}, IndexI()}, {"b", {{FieldI("f3"), "2"}}, IndexI()}, - {"c", {{FieldI("f3"), "7"}}, IndexI()}, {"d", {{FieldI("f3"), "3"}}, IndexI()}, - {"e", {{FieldI("f3"), "1"}}, IndexI()}, {"f", {{FieldI("f3"), "6"}}, IndexI()}, - {"g", {{FieldI("f3"), "8"}}, IndexI()}, + {"a", {{FieldI("f3"), N(4)}}, IndexI()}, {"b", {{FieldI("f3"), N(2)}}, IndexI()}, + {"c", {{FieldI("f3"), N(7)}}, IndexI()}, {"d", {{FieldI("f3"), N(3)}}, IndexI()}, + {"e", {{FieldI("f3"), N(1)}}, IndexI()}, {"f", {{FieldI("f3"), N(6)}}, IndexI()}, + {"g", {{FieldI("f3"), N(8)}}, IndexI()}, }; { auto op = std::make_unique( @@ -118,10 +123,10 @@ TEST(PlanExecutorTest, TopNSort) { TEST(PlanExecutorTest, Filter) { std::vector data{ - {"a", {{FieldI("f3"), "4"}}, IndexI()}, {"b", {{FieldI("f3"), "2"}}, IndexI()}, - {"c", {{FieldI("f3"), "7"}}, IndexI()}, {"d", {{FieldI("f3"), "3"}}, IndexI()}, - {"e", {{FieldI("f3"), "1"}}, IndexI()}, {"f", {{FieldI("f3"), "6"}}, IndexI()}, - {"g", {{FieldI("f3"), "8"}}, IndexI()}, + {"a", {{FieldI("f3"), N(4)}}, IndexI()}, {"b", {{FieldI("f3"), N(2)}}, IndexI()}, + {"c", {{FieldI("f3"), N(7)}}, IndexI()}, {"d", {{FieldI("f3"), N(3)}}, IndexI()}, + {"e", {{FieldI("f3"), N(1)}}, IndexI()}, {"f", {{FieldI("f3"), N(6)}}, IndexI()}, + {"g", {{FieldI("f3"), N(8)}}, IndexI()}, }; { auto field = std::make_unique("f3", FieldI("f3")); @@ -157,10 +162,10 @@ TEST(PlanExecutorTest, Filter) { ASSERT_EQ(ctx.Next().GetValue(), exe_end); } - data = {{"a", {{FieldI("f1"), "cpp,java"}}, IndexI()}, {"b", {{FieldI("f1"), "python,cpp,c"}}, IndexI()}, - {"c", {{FieldI("f1"), "c,perl"}}, IndexI()}, {"d", {{FieldI("f1"), "rust,python"}}, IndexI()}, - {"e", {{FieldI("f1"), "java,kotlin"}}, IndexI()}, {"f", {{FieldI("f1"), "c,rust"}}, IndexI()}, - {"g", {{FieldI("f1"), "c,cpp,java"}}, IndexI()}}; + data = {{"a", {{FieldI("f1"), T("cpp,java")}}, IndexI()}, {"b", {{FieldI("f1"), T("python,cpp,c")}}, IndexI()}, + {"c", {{FieldI("f1"), T("c,perl")}}, IndexI()}, {"d", {{FieldI("f1"), T("rust,python")}}, IndexI()}, + {"e", {{FieldI("f1"), T("java,kotlin")}}, IndexI()}, {"f", {{FieldI("f1"), T("c,rust")}}, IndexI()}, + {"g", {{FieldI("f1"), T("c,cpp,java")}}, IndexI()}}; { auto field = std::make_unique("f1", FieldI("f1")); auto op = std::make_unique( @@ -192,10 +197,10 @@ TEST(PlanExecutorTest, Filter) { TEST(PlanExecutorTest, Limit) { std::vector data{ - {"a", {{FieldI("f3"), "4"}}, IndexI()}, {"b", {{FieldI("f3"), "2"}}, IndexI()}, - {"c", {{FieldI("f3"), "7"}}, IndexI()}, {"d", {{FieldI("f3"), "3"}}, IndexI()}, - {"e", {{FieldI("f3"), "1"}}, IndexI()}, {"f", {{FieldI("f3"), "6"}}, IndexI()}, - {"g", {{FieldI("f3"), "8"}}, IndexI()}, + {"a", {{FieldI("f3"), N(4)}}, IndexI()}, {"b", {{FieldI("f3"), N(2)}}, IndexI()}, + {"c", {{FieldI("f3"), N(7)}}, IndexI()}, {"d", {{FieldI("f3"), N(3)}}, IndexI()}, + {"e", {{FieldI("f3"), N(1)}}, IndexI()}, {"f", {{FieldI("f3"), N(6)}}, IndexI()}, + {"g", {{FieldI("f3"), N(8)}}, IndexI()}, }; { auto op = std::make_unique(std::make_unique(data), std::make_unique(1, 2)); @@ -219,12 +224,12 @@ TEST(PlanExecutorTest, Limit) { TEST(PlanExecutorTest, Merge) { std::vector data1{ - {"a", {{FieldI("f3"), "4"}}, IndexI()}, - {"b", {{FieldI("f3"), "2"}}, IndexI()}, + {"a", {{FieldI("f3"), N(4)}}, IndexI()}, + {"b", {{FieldI("f3"), N(2)}}, IndexI()}, }; - std::vector data2{{"c", {{FieldI("f3"), "7"}}, IndexI()}, - {"d", {{FieldI("f3"), "3"}}, IndexI()}, - {"e", {{FieldI("f3"), "1"}}, IndexI()}}; + std::vector data2{{"c", {{FieldI("f3"), N(7)}}, IndexI()}, + {"d", {{FieldI("f3"), N(3)}}, IndexI()}, + {"e", {{FieldI("f3"), N(1)}}, IndexI()}}; { auto op = std::make_unique(Node::List(std::make_unique(data1), std::make_unique(data2))); From 7d48490ba1890d9bb0510c11350d7ae1a1f14dd0 Mon Sep 17 00:00:00 2001 From: Twice Date: Thu, 20 Jun 2024 00:49:49 +0900 Subject: [PATCH 2/3] feat(search): add vector type to kqir::Value (#2371) --- src/search/indexer.cc | 105 +++++++++++++++++++++++++++--------------- src/search/indexer.h | 3 ++ src/search/value.h | 9 +++- 3 files changed, 79 insertions(+), 38 deletions(-) diff --git a/src/search/indexer.cc b/src/search/indexer.cc index 80fea7a924c..7ce0b3d013b 100644 --- a/src/search/indexer.cc +++ b/src/search/indexer.cc @@ -57,6 +57,73 @@ StatusOr FieldValueRetriever::Create(IndexOnDataType type, } } +// placeholders, remove them after vector indexing is implemented +static bool IsVectorType(const redis::IndexFieldMetadata *) { return false; } +static size_t GetVectorDim(const redis::IndexFieldMetadata *) { return 1; } + +StatusOr FieldValueRetriever::ParseFromJson(const jsoncons::json &val, + const redis::IndexFieldMetadata *type) { + if (auto numeric [[maybe_unused]] = dynamic_cast(type)) { + if (!val.is_number() || val.is_string()) return {Status::NotOK, "json value cannot be string for numeric fields"}; + return kqir::MakeValue(val.as_double()); + } else if (auto tag = dynamic_cast(type)) { + if (val.is_string()) { + const char delim[] = {tag->separator, '\0'}; + auto vec = util::Split(val.as_string(), delim); + return kqir::MakeValue(vec); + } else if (val.is_array()) { + std::vector strs; + for (size_t i = 0; i < val.size(); ++i) { + if (!val[i].is_string()) + return {Status::NotOK, "json value should be string or array of strings for tag fields"}; + strs.push_back(val[i].as_string()); + } + return kqir::MakeValue(strs); + } else { + return {Status::NotOK, "json value should be string or array of strings for tag fields"}; + } + } else if (IsVectorType(type)) { + size_t dim = GetVectorDim(type); + if (!val.is_array()) return {Status::NotOK, "json value should be array of numbers for vector fields"}; + if (dim != val.size()) return {Status::NotOK, "the size of the json array is not equal to the dim of the vector"}; + std::vector nums; + for (size_t i = 0; i < dim; ++i) { + if (!val[i].is_number() || val[i].is_string()) + return {Status::NotOK, "json value should be array of numbers for vector fields"}; + nums.push_back(val[i].as_double()); + } + return kqir::MakeValue(nums); + } else { + return {Status::NotOK, "unknown field type to retrieve"}; + } +} + +StatusOr FieldValueRetriever::ParseFromHash(const std::string &value, + const redis::IndexFieldMetadata *type) { + if (auto numeric [[maybe_unused]] = dynamic_cast(type)) { + auto num = GET_OR_RET(ParseFloat(value)); + return kqir::MakeValue(num); + } else if (auto tag = dynamic_cast(type)) { + const char delim[] = {tag->separator, '\0'}; + auto vec = util::Split(value, delim); + return kqir::MakeValue(vec); + } else if (IsVectorType(type)) { + const size_t dim = GetVectorDim(type); + if (value.size() != dim * sizeof(double)) { + return {Status::NotOK, "field value is too short or too long to be parsed as a vector"}; + } + std::vector vec; + for (size_t i = 0; i < dim; ++i) { + // TODO: care about endian later + // TODO: currently only support 64bit floating point + vec.push_back(*(reinterpret_cast(value.data()) + i)); + } + return kqir::MakeValue(vec); + } else { + return {Status::NotOK, "unknown field type to retrieve"}; + } +} + StatusOr FieldValueRetriever::Retrieve(std::string_view field, const redis::IndexFieldMetadata *type) { if (std::holds_alternative(db)) { auto &[hash, metadata, key] = std::get(db); @@ -71,17 +138,7 @@ StatusOr FieldValueRetriever::Retrieve(std::string_view field, cons if (s.IsNotFound()) return {Status::NotFound, s.ToString()}; if (!s.ok()) return {Status::NotOK, s.ToString()}; - if (auto numeric [[maybe_unused]] = dynamic_cast(type)) { - auto num = GET_OR_RET(ParseFloat(value)); - return kqir::MakeValue(num); - } else if (auto tag = dynamic_cast(type)) { - const char delim[] = {tag->separator, '\0'}; - auto vec = util::Split(value, delim); - return kqir::MakeValue(vec); - } else { - return {Status::NotOK, "unknown field type to retrieve"}; - } - + return ParseFromHash(value, type); } else if (std::holds_alternative(db)) { auto &value = std::get(db); @@ -91,31 +148,7 @@ StatusOr FieldValueRetriever::Retrieve(std::string_view field, cons return {Status::NotFound, "json value specified by the field (json path) should exist and be unique"}; auto val = s->value[0]; - if (auto numeric [[maybe_unused]] = dynamic_cast(type)) { - if (val.is_string()) return {Status::NotOK, "json value cannot be string for numeric fields"}; - return kqir::MakeValue(val.as_double()); - } else if (auto tag = dynamic_cast(type)) { - if (val.is_string()) { - const char delim[] = {tag->separator, '\0'}; - auto vec = util::Split(val.as_string(), delim); - return kqir::MakeValue(vec); - } else if (val.is_array()) { - std::vector strs; - for (size_t i = 0; i < val.size(); ++i) { - if (!val[i].is_string()) - return {Status::NotOK, "json value should be string or array of strings for tag fields"}; - strs.push_back(val[i].as_string()); - } - return kqir::MakeValue(strs); - } else { - return {Status::NotOK, "json value should be string or array of strings for tag fields"}; - } - } else { - return {Status::NotOK, "unknown field type to retrieve"}; - } - - return Status::OK(); - + return ParseFromJson(val, type); } else { return {Status::NotOK, "unknown redis data type to retrieve"}; } diff --git a/src/search/indexer.h b/src/search/indexer.h index 029944e24c5..8ffd503b6ba 100644 --- a/src/search/indexer.h +++ b/src/search/indexer.h @@ -65,6 +65,9 @@ struct FieldValueRetriever { explicit FieldValueRetriever(JsonValue json) : db(std::in_place_type, std::move(json)) {} StatusOr Retrieve(std::string_view field, const redis::IndexFieldMetadata *type); + + static StatusOr ParseFromJson(const jsoncons::json &value, const redis::IndexFieldMetadata *type); + static StatusOr ParseFromHash(const std::string &value, const redis::IndexFieldMetadata *type); }; struct IndexUpdater { diff --git a/src/search/value.h b/src/search/value.h index f1a1e8b7130..f339571788d 100644 --- a/src/search/value.h +++ b/src/search/value.h @@ -40,8 +40,8 @@ using String = std::string; // e.g. a single tag using NumericArray = std::vector; // used for vector fields using StringArray = std::vector; // used for tag fields, e.g. a list for tags -struct Value : std::variant { - using Base = std::variant; +struct Value : std::variant { + using Base = std::variant; using Base::Base; @@ -72,6 +72,9 @@ struct Value : std::variant { } else if (Is()) { return util::StringJoin( Get(), [](const auto &v) -> decltype(auto) { return v; }, sep); + } else if (Is()) { + return util::StringJoin( + Get(), [](const auto &v) -> decltype(auto) { return std::to_string(v); }, sep); } __builtin_unreachable(); @@ -87,6 +90,8 @@ struct Value : std::variant { char sep = tag ? tag->separator : ','; return util::StringJoin( Get(), [](const auto &v) -> decltype(auto) { return v; }, std::string(1, sep)); + } else if (Is()) { + return util::StringJoin(Get(), [](const auto &v) -> decltype(auto) { return std::to_string(v); }); } __builtin_unreachable(); From 92df2b802cec24790a35ebee09f45e8c5beedc81 Mon Sep 17 00:00:00 2001 From: Aleks Lozovyuk Date: Thu, 20 Jun 2024 17:47:57 +0300 Subject: [PATCH 3/3] chore: Bump cpptrace to v0.6.2 (#2372) --- cmake/cpptrace.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/cpptrace.cmake b/cmake/cpptrace.cmake index 11baef532f5..370198bef81 100644 --- a/cmake/cpptrace.cmake +++ b/cmake/cpptrace.cmake @@ -20,8 +20,8 @@ include_guard() include(cmake/utils.cmake) FetchContent_DeclareGitHubWithMirror(cpptrace - jeremy-rifkin/cpptrace v0.6.1 - MD5=246eb8d730b44373573783f218bd3b01 + jeremy-rifkin/cpptrace v0.6.2 + MD5=b13786adcc1785cb900746ea96c50bee ) if (SYMBOLIZE_BACKEND STREQUAL "libbacktrace")