Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into fix437
Browse files Browse the repository at this point in the history
  • Loading branch information
JinHai-CN committed Dec 25, 2024
2 parents ba245a6 + 31fa6b1 commit 4cca6c4
Show file tree
Hide file tree
Showing 8 changed files with 109 additions and 18 deletions.
14 changes: 10 additions & 4 deletions src/common/analyzer/analyzer_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ constexpr u64 Str2Int(const char *str, u64 last_value = basis) {
return (*str != '\0' && *str != '-') ? Str2Int(str + 1, (*str ^ last_value) * prime) : last_value;
}

u64 AnalyzerPool::AnalyzerNameToInt(const char *str) {
return Str2Int(str);
}

bool IcharEquals(char a, char b) { return ToLower(static_cast<int>(a)) == ToLower(static_cast<int>(b)); }

bool IEquals(std::string_view lhs, std::string_view rhs) { return std::ranges::equal(lhs, rhs, IcharEquals); }
Expand Down Expand Up @@ -321,11 +325,13 @@ Tuple<UniquePtr<Analyzer>, Status> AnalyzerPool::GetAnalyzer(const std::string_v
}
return {MakeUnique<NGramAnalyzer>(ngram), Status::OK()};
}
case Str2Int(KEYWORD.data()): {
return {MakeUnique<WhitespaceAnalyzer>(), Status::OK()};
}
case Str2Int(KEYWORD.data()):
case Str2Int(WHITESPACE.data()): {
return {MakeUnique<WhitespaceAnalyzer>(), Status::OK()};
const auto suffix_pos = name.find_first_of('-');
if (suffix_pos == std::string_view::npos || suffix_pos + 1 == name.size()) {
return {MakeUnique<WhitespaceAnalyzer>(), Status::OK()};
}
return {MakeUnique<WhitespaceAnalyzer>(name.substr(suffix_pos + 1)), Status::OK()};
}
default: {
if(std::filesystem::is_regular_file(name)) {
Expand Down
2 changes: 2 additions & 0 deletions src/common/analyzer/analyzer_pool.cppm
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ public:

Tuple<UniquePtr<Analyzer>, Status> GetAnalyzer(const std::string_view &name);

static u64 AnalyzerNameToInt(const char *str);

void Set(const std::string_view &name);

public:
Expand Down
40 changes: 34 additions & 6 deletions src/common/analyzer/whitespace_analyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,42 @@ import analyzer;

namespace infinity {

WhitespaceAnalyzer::WhitespaceAnalyzer(const std::string_view delimiters) {
delimiters_ = delimiters;
std::sort(delimiters_.begin(), delimiters_.end());
const auto last_unique = std::unique(delimiters_.begin(), delimiters_.end());
delimiters_.erase(last_unique, delimiters_.end());
}

int WhitespaceAnalyzer::AnalyzeImpl(const Term &input, void *data, HookType func) {
std::istringstream is(input.text_);
std::string t;
u32 offset = 0;
while (is >> t) {
func(data, t.data(), t.size(), offset++, 0, Term::AND, 0, false);
if (delimiters_.empty()) {
// split by std::isspace()
std::istringstream is(input.text_);
std::string t;
u32 offset = 0;
while (is >> t) {
func(data, t.data(), t.size(), offset++, 0, Term::AND, 0, false);
}
return 0;
} else {
// split by delimiters
const std::string_view delimiters = delimiters_;
const std::string_view input_text = input.text_;
u32 search_start = 0;
u32 offset = 0;
while (search_start < input_text.size()) {
const auto found = input_text.find_first_of(delimiters, search_start);
if (found == std::string_view::npos) {
func(data, input_text.data() + search_start, input_text.size() - search_start, offset++, 0, Term::AND, 0, false);
break;
}
if (found > search_start) {
func(data, input_text.data() + search_start, found - search_start, offset++, 0, Term::AND, 0, false);
}
search_start = found + 1;
}
return 0;
}
return 0;
}

} // namespace infinity
3 changes: 3 additions & 0 deletions src/common/analyzer/whitespace_analyzer.cppm
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,11 @@ import analyzer;
namespace infinity {

export class WhitespaceAnalyzer : public Analyzer {
String delimiters_{};

public:
WhitespaceAnalyzer() = default;
explicit WhitespaceAnalyzer(std::string_view delimiters);
~WhitespaceAnalyzer() override = default;

protected:
Expand Down
5 changes: 5 additions & 0 deletions src/network/peer_thrift_client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,11 @@ void PeerClient::HeartBeat(HeartBeatPeerTask *peer_task) {
peer_task->error_code_ = static_cast<i64>(ErrorCode::kCantConnectLeader);
return;
}
case TTransportExceptionType::NOT_OPEN: {
peer_task->error_message_ = thrift_exception.what();
peer_task->error_code_ = static_cast<i64>(ErrorCode::kCantConnectLeader);
return;
}
default: {
String error_message = "Heartbeat: error happens when data transfer to leader";
UnrecoverableError(error_message);
Expand Down
16 changes: 9 additions & 7 deletions src/storage/invertedindex/search/search_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,15 +147,16 @@ inline TermList GetTermListFromAnalyzer(const std::string &analyzer_name, Analyz
}

inline std::string GetAnalyzerName(const std::string &field, const std::map<std::string, std::string> &field2analyzer) {
std::string analyzer_name = "standard";
if (!field.empty()) {
if (const auto it = field2analyzer.find(field); it != field2analyzer.end()) {
analyzer_name = it->second;
return it->second;
}
}
return analyzer_name;
return "standard";
}

inline static const auto keyword_analyzer_name_int = AnalyzerPool::AnalyzerNameToInt(AnalyzerPool::KEYWORD.data());

std::unique_ptr<QueryNode> SearchDriver::ParseSingle(const std::string &query, const std::string *default_field_ptr) const {
std::istringstream iss(query);
if (!iss.good()) {
Expand All @@ -166,7 +167,8 @@ std::unique_ptr<QueryNode> SearchDriver::ParseSingle(const std::string &query, c
}
const auto &default_field = *default_field_ptr;
const auto default_analyzer_name = GetAnalyzerName(default_field, field2analyzer_);
if (default_analyzer_name != "keyword" && operator_option_ == FulltextQueryOperatorOption::kInfinitySyntax) {
if (const auto default_analyzer_name_int = AnalyzerPool::AnalyzerNameToInt(default_analyzer_name.c_str());
default_analyzer_name_int != keyword_analyzer_name_int && operator_option_ == FulltextQueryOperatorOption::kInfinitySyntax) {
// use parser
std::unique_ptr<QueryNode> result;
const auto scanner = std::make_unique<SearchScannerInfinitySyntax>(&iss);
Expand All @@ -185,14 +187,14 @@ std::unique_ptr<QueryNode> SearchDriver::ParseSingle(const std::string &query, c
if (terms.empty()) {
return nullptr;
}
if (terms.size() == 1 && default_analyzer_name != "keyword") {
if (terms.size() == 1 && default_analyzer_name_int != keyword_analyzer_name_int) {
auto q = std::make_unique<TermQueryNode>();
q->term_ = terms.front().text_;
q->column_ = default_field;
return q;
}
std::unique_ptr<MultiQueryNode> multi_query;
if (default_analyzer_name == "keyword") {
if (default_analyzer_name_int == keyword_analyzer_name_int) {
multi_query = std::make_unique<KeywordQueryNode>();
} else if (operator_option_ == FulltextQueryOperatorOption::kOr) {
multi_query = std::make_unique<OrQueryNode>();
Expand Down Expand Up @@ -226,7 +228,7 @@ SearchDriver::AnalyzeAndBuildQueryNode(const std::string &field, const std::stri
if (terms.empty()) {
return nullptr;
}
if (analyzer_name == "keyword") {
if (AnalyzerPool::AnalyzerNameToInt(analyzer_name.c_str()) == keyword_analyzer_name_int) {
auto result = std::make_unique<KeywordQueryNode>();
for (const auto &term : terms) {
auto subquery = std::make_unique<TermQueryNode>();
Expand Down
2 changes: 1 addition & 1 deletion src/storage/io/s3_client_minio.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ Status S3ClientMinio::BucketExists(const String &bucket_name) {
return Status::MinioInvalidAccessKey(resp.Error().String());
}
default: {
UnrecoverableError(fmt::format("Unable to do bucket existence check: {}", resp.Error().String()));
UnrecoverableError(fmt::format("Unable to do bucket existence check: {}, Please check if the MINIO connection", resp.Error().String()));
return Status::OK();
}
}
Expand Down
45 changes: 45 additions & 0 deletions test/sql/dql/fulltext/fulltext_whitespace_sharp.slt
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@

statement ok
DROP TABLE IF EXISTS ft_whitespace_sharp;

statement ok
CREATE TABLE ft_whitespace_sharp(num int, doc varchar DEFAULT 'default text');

statement ok
INSERT INTO ft_whitespace_sharp VALUES (1, '2020-01-01#2023-01-01'), (2, '2023@01$01'), (3, '01 01#@2023'), (4);

statement ok
CREATE INDEX ft_index ON ft_whitespace_sharp(doc) USING FULLTEXT WITH (analyzer = 'whitespace-#@$');

query I
SELECT * FROM ft_whitespace_sharp;
----
1 2020-01-01#2023-01-01
2 2023@01$01
3 01 01#@2023
4 default text

query I
SELECT * FROM ft_whitespace_sharp SEARCH MATCH TEXT ('doc^4.5', '2023-01-01^5.0', 'topn=10');
----
1 2020-01-01#2023-01-01

query II
SELECT * FROM ft_whitespace_sharp SEARCH MATCH TEXT ('doc^4.5', '"01 01"^3.3', 'topn=10');
----
3 01 01#@2023

query III
SELECT * FROM ft_whitespace_sharp SEARCH MATCH TEXT ('doc^4.5', '"01#01"^3.3', 'topn=10');
----
2 2023@01$01

query IV rowsort
SELECT * FROM ft_whitespace_sharp SEARCH MATCH TEXT ('doc^4.5', '2023^3.3', 'topn=10');
----
2 2023@01$01
3 01 01#@2023

# Clean up
statement ok
DROP TABLE ft_whitespace_sharp;

0 comments on commit 4cca6c4

Please sign in to comment.