Skip to content

Commit

Permalink
Unify index info structure between indexer and sema checker (#2240)
Browse files Browse the repository at this point in the history
  • Loading branch information
PragmaTwice committed Apr 13, 2024
1 parent ed5937c commit 8fae903
Show file tree
Hide file tree
Showing 7 changed files with 120 additions and 90 deletions.
61 changes: 61 additions & 0 deletions src/search/index_info.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/

#pragma once

#include <map>
#include <memory>
#include <string>

#include "search_encoding.h"

namespace kqir {

struct IndexInfo;

struct FieldInfo {
std::string name;
IndexInfo *index = nullptr;
std::unique_ptr<redis::SearchFieldMetadata> metadata;

FieldInfo(std::string name, std::unique_ptr<redis::SearchFieldMetadata> &&metadata)
: name(std::move(name)), metadata(std::move(metadata)) {}
};

struct IndexInfo {
using FieldMap = std::map<std::string, FieldInfo>;

std::string name;
SearchMetadata metadata;
FieldMap fields;
redis::SearchPrefixesMetadata prefixes;

IndexInfo(std::string name, SearchMetadata metadata) : name(std::move(name)), metadata(std::move(metadata)) {}

void Add(FieldInfo &&field) {
const auto &name = field.name;
field.index = this;
fields.emplace(name, std::move(field));
}
};

using IndexMap = std::map<std::string, IndexInfo>;

} // namespace kqir
44 changes: 22 additions & 22 deletions src/search/indexer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ rocksdb::Status FieldValueRetriever::Retrieve(std::string_view field, std::strin
}
}

StatusOr<IndexUpdater::FieldValues> IndexUpdater::Record(std::string_view key, const std::string &ns) {
StatusOr<IndexUpdater::FieldValues> IndexUpdater::Record(std::string_view key, const std::string &ns) const {
Database db(indexer->storage, ns);

RedisType type = kRedisNone;
Expand All @@ -87,16 +87,16 @@ StatusOr<IndexUpdater::FieldValues> IndexUpdater::Record(std::string_view key, c
// key not exist
if (type == kRedisNone) return FieldValues();

if (type != static_cast<RedisType>(metadata.on_data_type)) {
if (type != static_cast<RedisType>(info->metadata.on_data_type)) {
// not the expected type, stop record
return {Status::TypeMismatched};
}

auto retriever = GET_OR_RET(FieldValueRetriever::Create(metadata.on_data_type, key, indexer->storage, ns));
auto retriever = GET_OR_RET(FieldValueRetriever::Create(info->metadata.on_data_type, key, indexer->storage, ns));

FieldValues values;
for (const auto &[field, info] : fields) {
if (info->noindex) {
for (const auto &[field, i] : info->fields) {
if (i.metadata->noindex) {
continue;
}

Expand All @@ -112,20 +112,20 @@ StatusOr<IndexUpdater::FieldValues> IndexUpdater::Record(std::string_view key, c
}

Status IndexUpdater::UpdateIndex(const std::string &field, std::string_view key, std::string_view original,
std::string_view current, const std::string &ns) {
std::string_view current, const std::string &ns) const {
if (original == current) {
// the value of this field is unchanged, no need to update
return Status::OK();
}

auto iter = fields.find(field);
if (iter == fields.end()) {
auto iter = info->fields.find(field);
if (iter == info->fields.end()) {
return {Status::NotOK, "No such field to do index updating"};
}

auto *metadata = iter->second.get();
auto *metadata = iter->second.metadata.get();
auto *storage = indexer->storage;
auto ns_key = ComposeNamespaceKey(ns, name, storage->IsSlotIdEncoded());
auto ns_key = ComposeNamespaceKey(ns, info->name, storage->IsSlotIdEncoded());
if (auto tag = dynamic_cast<SearchTagFieldMetadata *>(metadata)) {
const char delim[] = {tag->separator, '\0'};
auto original_tags = util::Split(original, delim);
Expand Down Expand Up @@ -163,14 +163,14 @@ Status IndexUpdater::UpdateIndex(const std::string &field, std::string_view key,

for (const auto &tag : tags_to_delete) {
auto sub_key = ConstructTagFieldSubkey(field, tag, key);
auto index_key = InternalKey(ns_key, sub_key, this->metadata.version, storage->IsSlotIdEncoded());
auto index_key = InternalKey(ns_key, sub_key, info->metadata.version, storage->IsSlotIdEncoded());

batch->Delete(cf_handle, index_key.Encode());
}

for (const auto &tag : tags_to_add) {
auto sub_key = ConstructTagFieldSubkey(field, tag, key);
auto index_key = InternalKey(ns_key, sub_key, this->metadata.version, storage->IsSlotIdEncoded());
auto index_key = InternalKey(ns_key, sub_key, info->metadata.version, storage->IsSlotIdEncoded());

batch->Put(cf_handle, index_key.Encode(), Slice());
}
Expand All @@ -184,15 +184,15 @@ Status IndexUpdater::UpdateIndex(const std::string &field, std::string_view key,
if (!original.empty()) {
auto original_num = GET_OR_RET(ParseFloat(std::string(original.begin(), original.end())));
auto sub_key = ConstructNumericFieldSubkey(field, original_num, key);
auto index_key = InternalKey(ns_key, sub_key, this->metadata.version, storage->IsSlotIdEncoded());
auto index_key = InternalKey(ns_key, sub_key, info->metadata.version, storage->IsSlotIdEncoded());

batch->Delete(cf_handle, index_key.Encode());
}

if (!current.empty()) {
auto current_num = GET_OR_RET(ParseFloat(std::string(current.begin(), current.end())));
auto sub_key = ConstructNumericFieldSubkey(field, current_num, key);
auto index_key = InternalKey(ns_key, sub_key, this->metadata.version, storage->IsSlotIdEncoded());
auto index_key = InternalKey(ns_key, sub_key, info->metadata.version, storage->IsSlotIdEncoded());

batch->Put(cf_handle, index_key.Encode(), Slice());
}
Expand All @@ -206,11 +206,11 @@ Status IndexUpdater::UpdateIndex(const std::string &field, std::string_view key,
return Status::OK();
}

Status IndexUpdater::Update(const FieldValues &original, std::string_view key, const std::string &ns) {
Status IndexUpdater::Update(const FieldValues &original, std::string_view key, const std::string &ns) const {
auto current = GET_OR_RET(Record(key, ns));

for (const auto &[field, info] : fields) {
if (info->noindex) {
for (const auto &[field, i] : info->fields) {
if (i.metadata->noindex) {
continue;
}

Expand All @@ -230,24 +230,24 @@ Status IndexUpdater::Update(const FieldValues &original, std::string_view key, c
}

void GlobalIndexer::Add(IndexUpdater updater) {
auto &up = updaters.emplace_back(std::move(updater));
for (const auto &prefix : up.prefixes) {
prefix_map.insert(prefix, &up);
updater.indexer = this;
for (const auto &prefix : updater.info->prefixes.prefixes) {
prefix_map.insert(prefix, updater);
}
}

StatusOr<GlobalIndexer::RecordResult> GlobalIndexer::Record(std::string_view key, const std::string &ns) {
auto iter = prefix_map.longest_prefix(key);
if (iter != prefix_map.end()) {
auto updater = iter.value();
return std::make_pair(updater, GET_OR_RET(updater->Record(key, ns)));
return std::make_pair(updater, GET_OR_RET(updater.Record(key, ns)));
}

return {Status::NoPrefixMatched};
}

Status GlobalIndexer::Update(const RecordResult &original, std::string_view key, const std::string &ns) {
return original.first->Update(original.second, key, ns);
return original.first.Update(original.second, key, ns);
}

} // namespace redis
25 changes: 8 additions & 17 deletions src/search/indexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

#include "commands/commander.h"
#include "config/config.h"
#include "index_info.h"
#include "indexer.h"
#include "search/search_encoding.h"
#include "server/server.h"
Expand Down Expand Up @@ -69,32 +70,22 @@ struct FieldValueRetriever {
struct IndexUpdater {
using FieldValues = std::map<std::string, std::string>;

std::string name;
SearchMetadata metadata;
std::vector<std::string> prefixes;
std::map<std::string, std::unique_ptr<SearchFieldMetadata>> fields;
const kqir::IndexInfo *info = nullptr;
GlobalIndexer *indexer = nullptr;

IndexUpdater(const IndexUpdater &) = delete;
IndexUpdater(IndexUpdater &&) = default;
explicit IndexUpdater(const kqir::IndexInfo *info) : info(info) {}

IndexUpdater &operator=(IndexUpdater &&) = default;
IndexUpdater &operator=(const IndexUpdater &) = delete;

~IndexUpdater() = default;

StatusOr<FieldValues> Record(std::string_view key, const std::string &ns);
StatusOr<FieldValues> Record(std::string_view key, const std::string &ns) const;
Status UpdateIndex(const std::string &field, std::string_view key, std::string_view original,
std::string_view current, const std::string &ns);
Status Update(const FieldValues &original, std::string_view key, const std::string &ns);
std::string_view current, const std::string &ns) const;
Status Update(const FieldValues &original, std::string_view key, const std::string &ns) const;
};

struct GlobalIndexer {
using FieldValues = IndexUpdater::FieldValues;
using RecordResult = std::pair<IndexUpdater *, FieldValues>;
using RecordResult = std::pair<IndexUpdater, FieldValues>;

std::deque<IndexUpdater> updaters;
tsl::htrie_map<char, IndexUpdater *> prefix_map;
tsl::htrie_map<char, IndexUpdater> prefix_map;

engine::Storage *storage = nullptr;

Expand Down
3 changes: 3 additions & 0 deletions src/search/ir.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

#include "fmt/core.h"
#include "ir_iterator.h"
#include "search/index_info.h"
#include "string_util.h"
#include "type_util.h"

Expand Down Expand Up @@ -76,6 +77,7 @@ struct Ref : Node {};

struct FieldRef : Ref {
std::string name;
const FieldInfo *info = nullptr;

explicit FieldRef(std::string name) : name(std::move(name)) {}

Expand Down Expand Up @@ -348,6 +350,7 @@ struct SelectClause : Node {

struct IndexRef : Ref {
std::string name;
const IndexInfo *info = nullptr;

explicit IndexRef(std::string name) : name(std::move(name)) {}

Expand Down
43 changes: 6 additions & 37 deletions src/search/ir_sema_checker.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,57 +23,26 @@
#include <map>
#include <memory>

#include "index_info.h"
#include "ir.h"
#include "search_encoding.h"
#include "storage/redis_metadata.h"

namespace kqir {

struct IndexInfo;

struct FieldInfo {
std::string name;
IndexInfo *index = nullptr;
std::unique_ptr<redis::SearchFieldMetadata> metadata;

FieldInfo(std::string name, std::unique_ptr<redis::SearchFieldMetadata> &&metadata)
: name(std::move(name)), metadata(std::move(metadata)) {}
};

struct IndexInfo {
using FieldMap = std::map<std::string, FieldInfo>;

std::string name;
SearchMetadata metadata;
FieldMap fields;

IndexInfo(std::string name, SearchMetadata metadata) : name(std::move(name)), metadata(std::move(metadata)) {}

void Add(FieldInfo &&field) {
const auto &name = field.name;
field.index = this;
fields.emplace(name, std::move(field));
}
};

using IndexMap = std::map<std::string, IndexInfo>;

struct SemaChecker {
const IndexMap &index_map;

const IndexInfo *current_index = nullptr;

using Result = std::map<const Node *, std::variant<const FieldInfo *, const IndexInfo *>>;
Result result;

explicit SemaChecker(const IndexMap &index_map) : index_map(index_map) {}

Status Check(Node *node) {
if (auto v = dynamic_cast<SearchStmt *>(node)) {
auto index_name = v->index->name;
if (auto iter = index_map.find(index_name); iter != index_map.end()) {
current_index = &iter->second;
result.emplace(v->index.get(), current_index);
v->index->info = current_index;

GET_OR_RET(Check(v->select.get()));
GET_OR_RET(Check(v->query_expr.get()));
Expand All @@ -88,7 +57,7 @@ struct SemaChecker {
if (auto iter = current_index->fields.find(v->field->name); iter == current_index->fields.end()) {
return {Status::NotOK, fmt::format("field `{}` not found in index `{}`", v->field->name, current_index->name)};
} else {
result.emplace(v->field.get(), &iter->second);
v->field->info = &iter->second;
}
} else if (auto v = dynamic_cast<AndExpr *>(node)) {
for (const auto &n : v->inners) {
Expand All @@ -106,7 +75,7 @@ struct SemaChecker {
} else if (auto meta = dynamic_cast<redis::SearchTagFieldMetadata *>(iter->second.metadata.get()); !meta) {
return {Status::NotOK, fmt::format("field `{}` is not a tag field", v->field->name)};
} else {
result.emplace(v->field.get(), &iter->second);
v->field->info = &iter->second;

if (v->tag->val.empty()) {
return {Status::NotOK, "tag cannot be an empty string"};
Expand All @@ -122,14 +91,14 @@ struct SemaChecker {
} else if (!dynamic_cast<redis::SearchNumericFieldMetadata *>(iter->second.metadata.get())) {
return {Status::NotOK, fmt::format("field `{}` is not a numeric field", v->field->name)};
} else {
result.emplace(v->field.get(), &iter->second);
v->field->info = &iter->second;
}
} else if (auto v = dynamic_cast<SelectClause *>(node)) {
for (const auto &n : v->fields) {
if (auto iter = current_index->fields.find(n->name); iter == current_index->fields.end()) {
return {Status::NotOK, fmt::format("field `{}` not found in index `{}`", n->name, current_index->name)};
} else {
result.emplace(n.get(), &iter->second);
n->info = &iter->second;
}
}
} else if (auto v [[maybe_unused]] = dynamic_cast<BoolLiteral *>(node)) {
Expand Down
Loading

0 comments on commit 8fae903

Please sign in to comment.