Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,14 @@ file(GLOB_RECURSE TEST_SOURCES
"lib/**/*.cpp"
)

# Add source files that are being tested
set(REGEX_UTILS_SRC
${CMAKE_SOURCE_DIR}/vicinae/src/services/files-service/file-indexer/regex-utils.cpp
)

add_executable(all_tests
${TEST_SOURCES}
${REGEX_UTILS_SRC}
)

set(FIXTURE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/fixtures)
Expand All @@ -17,6 +23,11 @@ target_compile_definitions(all_tests PRIVATE
XDGPP_FIXTURE_DIR="${FIXTURE_DIR}/xdgpp"
)

target_include_directories(all_tests PRIVATE
${CMAKE_SOURCE_DIR}/vicinae/src
${CMAKE_SOURCE_DIR}/vicinae/include
)

target_link_libraries(all_tests
PRIVATE
xdgpp
Expand Down
155 changes: 155 additions & 0 deletions tests/lib/file-indexer/regex-utils.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
#include "services/files-service/file-indexer/regex-utils.hpp"
#include <catch2/catch_test_macros.hpp>

TEST_CASE("extractStaticCharsFromRegex - basic patterns", "[file-indexer]") {
SECTION(".*config.*db^") { REQUIRE(extractStaticCharsFromRegex(".*config.*db^") == "config db"); }
SECTION("^[ab]cd") { REQUIRE(extractStaticCharsFromRegex("^[ab]cd") == "cd"); }
SECTION("^ab[cd](test)k") { REQUIRE(extractStaticCharsFromRegex("^ab[cd](test)k") == "^ab test k"); }
}

TEST_CASE("extractStaticCharsFromRegex - special regex characters", "[file-indexer]") {
SECTION("handles dot wildcard") { REQUIRE(extractStaticCharsFromRegex("a.b") == "a b"); }
SECTION("handles asterisk") { REQUIRE(extractStaticCharsFromRegex("a*b") == "a b"); }
SECTION("handles plus") { REQUIRE(extractStaticCharsFromRegex("a+b") == "a b"); }
SECTION("handles question mark") { REQUIRE(extractStaticCharsFromRegex("a?b") == "b"); }
SECTION("handles pipe") { REQUIRE(extractStaticCharsFromRegex("a|b") == ""); }
SECTION("handles dollar sign") { REQUIRE(extractStaticCharsFromRegex("abc$") == "abc"); }
}

TEST_CASE("extractStaticCharsFromRegex - keeps only alphanumercial, space, tab, newline", "[file-indexer]") {
SECTION("strips dots") { REQUIRE(extractStaticCharsFromRegex("abc\\.") == "abc"); }
SECTION("strips dots") { REQUIRE(extractStaticCharsFromRegex("\\\\-$@#abc\\.") == "abc"); }
SECTION("strips dots") { REQUIRE(extractStaticCharsFromRegex("abc-def") == "abc def"); }
}

TEST_CASE("extractStaticCharsFromRegex - brackets", "[file-indexer]") {
SECTION("ignores content in square brackets") { REQUIRE(extractStaticCharsFromRegex("[abc]def") == "def"); }

SECTION("keeps content in parentheses (groups)") {
REQUIRE(extractStaticCharsFromRegex("(abc)def") == "abcdef");
}
}

TEST_CASE("extractStaticCharsFromRegex - caret handling", "[file-indexer]") {
SECTION("keeps caret followed by regular character") {
REQUIRE(extractStaticCharsFromRegex("^abc") == "^abc");
}

SECTION("removes caret not followed by character") { REQUIRE(extractStaticCharsFromRegex("^[abc]") == ""); }

SECTION("removes caret followed by special char") {
REQUIRE(extractStaticCharsFromRegex("^.abc") == "abc");
}

SECTION("caret in middle without following char") { REQUIRE(extractStaticCharsFromRegex("abc^") == "abc"); }
}

TEST_CASE("extractStaticCharsFromRegex - escaping", "[file-indexer]") {
SECTION("escaped special characters become static") {
REQUIRE(extractStaticCharsFromRegex("a\\.b") == "a b");
}

SECTION("escaped bracket") { REQUIRE(extractStaticCharsFromRegex("a\\[b") == "a b"); }
SECTION("escaped backslash") { REQUIRE(extractStaticCharsFromRegex("a\\\\b") == "a b"); }
SECTION("caret with escaped character") { REQUIRE(extractStaticCharsFromRegex("^\\[test") == "test"); }
}

TEST_CASE("extractStaticCharsFromRegex - empty and edge cases", "[file-indexer]") {
SECTION("empty string") { REQUIRE(extractStaticCharsFromRegex("") == ""); }
SECTION("only special characters") { REQUIRE(extractStaticCharsFromRegex(".*+?|$") == ""); }
SECTION("only brackets") { REQUIRE(extractStaticCharsFromRegex("[abc](def)") == "def"); }
SECTION("single character") { REQUIRE(extractStaticCharsFromRegex("a") == "a"); }
}

TEST_CASE("extractStaticCharsFromRegex - quantifiers", "[file-indexer]") {
SECTION("exactly n - {3}") { REQUIRE(extractStaticCharsFromRegex("a{3}b") == "a b"); }
SECTION("n or more - {3,}") { REQUIRE(extractStaticCharsFromRegex("a{3,}b") == "a b"); }
SECTION("between m and n - {3,5}") { REQUIRE(extractStaticCharsFromRegex("a{3,5}b") == "a b"); }

SECTION("lazy quantifiers - ?? *? +?") {
REQUIRE(extractStaticCharsFromRegex("a??b") == "b");
REQUIRE(extractStaticCharsFromRegex("a*?b") == "a b");
REQUIRE(extractStaticCharsFromRegex("a+?b") == "a b");
}

SECTION("multiple wildcards") { REQUIRE(extractStaticCharsFromRegex("a.*b.*c") == "a b c"); }
SECTION("combined quantifiers") { REQUIRE(extractStaticCharsFromRegex("a+b*c?d") == "a b d"); }
}

TEST_CASE("extractStaticCharsFromRegex - word boundaries", "[file-indexer]") {
SECTION("word boundary \\b") { REQUIRE(extractStaticCharsFromRegex("\\btest\\b") == "test"); }
SECTION("not word boundary \\B") { REQUIRE(extractStaticCharsFromRegex("\\Btest\\B") == "test"); }
SECTION("start of word \\<") { REQUIRE(extractStaticCharsFromRegex("\\<test") == "test"); }
SECTION("end of word \\>") { REQUIRE(extractStaticCharsFromRegex("test\\>") == "test"); }
}

TEST_CASE("extractStaticCharsFromRegex - character classes", "[file-indexer]") {
SECTION("digit \\d") { REQUIRE(extractStaticCharsFromRegex("\\d") == ""); }
SECTION("not digit \\D") { REQUIRE(extractStaticCharsFromRegex("\\D") == ""); }
SECTION("whitespace \\s") { REQUIRE(extractStaticCharsFromRegex("\\s") == ""); }
SECTION("not whitespace \\S") { REQUIRE(extractStaticCharsFromRegex("\\S") == ""); }
SECTION("word character \\w") { REQUIRE(extractStaticCharsFromRegex("\\w") == ""); }
SECTION("not word character \\W") { REQUIRE(extractStaticCharsFromRegex("\\W") == ""); }
SECTION("hex digit \\x") { REQUIRE(extractStaticCharsFromRegex("\\x41") == "A"); }
}

TEST_CASE("extractStaticCharsFromRegex - complex patterns", "[file-indexer]") {
SECTION("multiple groups") { REQUIRE(extractStaticCharsFromRegex("(abc)(def)(ghi)") == "abcdefghi"); }
SECTION("nested groups") { REQUIRE(extractStaticCharsFromRegex("((ab)c)def") == "abcdef"); }
SECTION("alternation in group") { REQUIRE(extractStaticCharsFromRegex("(a|b)cd") == "cd"); }

SECTION("mixed brackets and groups") {
REQUIRE(extractStaticCharsFromRegex("[abc](def)[ghi]jkl") == "def jkl");
}

SECTION("quantifiers with groups") { REQUIRE(extractStaticCharsFromRegex("(abc)+def") == "abc def"); }
}

TEST_CASE("extractStaticCharsFromRegex - anchors", "[file-indexer]") {
SECTION("\\A start of string") { REQUIRE(extractStaticCharsFromRegex("\\Atest") == "test"); }
SECTION("\\Z end of string") { REQUIRE(extractStaticCharsFromRegex("test\\Z") == "test"); }
SECTION("combined anchors") { REQUIRE(extractStaticCharsFromRegex("^test$") == "^test"); }
}

TEST_CASE("extractStaticCharsFromRegex - special escapes", "[file-indexer]") {
SECTION("tab \\t") { REQUIRE(extractStaticCharsFromRegex("a\\tb") == "a\tb"); }
SECTION("newline \\n") { REQUIRE(extractStaticCharsFromRegex("a\\nb") == "a\nb"); }
SECTION("carriage return \\r") { REQUIRE(extractStaticCharsFromRegex("a\\rb") == "a\rb"); }
SECTION("vertical tab \\v") { REQUIRE(extractStaticCharsFromRegex("a\\vb") == "a\vb"); }
SECTION("form feed \\f") { REQUIRE(extractStaticCharsFromRegex("a\\fb") == "a\fb"); }
}

TEST_CASE("extractStaticCharsFromRegex - minWordLength", "[file-indexer]") {
SECTION("filters single character words") {
REQUIRE(extractStaticCharsFromRegex("a b c", 2) == "");
REQUIRE(extractStaticCharsFromRegex("ab cd ef", 2) == "ab cd ef");
REQUIRE(extractStaticCharsFromRegex("a bc d", 2) == "bc");
}

SECTION("filters with minimum length 3") {
REQUIRE(extractStaticCharsFromRegex("a bc def", 3) == "def");
REQUIRE(extractStaticCharsFromRegex("foo bar ab", 3) == "foo bar");
REQUIRE(extractStaticCharsFromRegex("test a b", 3) == "test");
}

SECTION("keeps all words when min is 0") {
REQUIRE(extractStaticCharsFromRegex("a b c", 0) == "a b c");
REQUIRE(extractStaticCharsFromRegex("test a bc", 0) == "test a bc");
}

SECTION("regex patterns with minWordLength") {
REQUIRE(extractStaticCharsFromRegex(".*config.*db^", 3) == "config");
REQUIRE(extractStaticCharsFromRegex("^ab[cd](test)k", 3) == "test");
REQUIRE(extractStaticCharsFromRegex("a.*b.*c", 2) == "");
}

SECTION("empty result when all words too short") {
REQUIRE(extractStaticCharsFromRegex("a b c d e", 3) == "");
REQUIRE(extractStaticCharsFromRegex("ab cd", 3) == "");
}

SECTION("preserves word boundaries") {
REQUIRE(extractStaticCharsFromRegex("test a b config", 4) == "test config");
REQUIRE(extractStaticCharsFromRegex("a very long test", 4) == "very long test");
}
}
4 changes: 3 additions & 1 deletion vicinae/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ find_package(Qt6 REQUIRED COMPONENTS Core Widgets Sql Network Svg DBus Concurren
find_package(OpenSSL REQUIRED)
find_package(LibXml2 REQUIRED)

list(APPEND LIBS Qt6::Widgets Qt6::Sql Qt6::Network Qt6::Svg Qt6::DBus Qt6::Concurrent ${CMARK_LIBRARY} ${CMARK_EXT_LIBRARY} protobuf::libprotobuf minizip OpenSSL::Crypto wayland-client xdgpp qt6keychain LibXml2::LibXml2)
list(APPEND LIBS Qt6::Widgets Qt6::Sql Qt6::Network Qt6::Svg Qt6::DBus Qt6::Concurrent ${CMARK_LIBRARY} ${CMARK_EXT_LIBRARY} protobuf::libprotobuf minizip OpenSSL::Crypto wayland-client xdgpp qt6keychain sqlite3 LibXml2::LibXml2)

set(WLR_CLIP_BIN ${CMAKE_BINARY_DIR}/wlr-clip/wlr-clip${CMAKE_EXECUTABLE_SUFFIX})
set(ASSET_PATH ${CMAKE_CURRENT_SOURCE_DIR}/assets)
Expand Down Expand Up @@ -504,6 +504,8 @@ set(SRCS
src/services/files-service/file-indexer/writer-worker.cpp
src/services/files-service/file-indexer/scan-dispatcher.cpp
src/services/files-service/file-indexer/abstract-scanner.hpp
src/services/files-service/file-indexer/regex-utils.hpp
src/services/files-service/file-indexer/regex-utils.cpp
src/services/files-service/file-indexer/file-indexer-query-engine.hpp

src/services/extension-registry/extension-registry.hpp
Expand Down
1 change: 1 addition & 0 deletions vicinae/database/file-indexer/migrations.qrc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
<RCC>
<qresource prefix="database/file-indexer">
<file>migrations/001_init.sql</file>
<file>migrations/002_tri_idx.sql</file>
</qresource>
</RCC>
13 changes: 13 additions & 0 deletions vicinae/database/file-indexer/migrations/002_tri_idx.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@

CREATE VIRTUAL TABLE IF NOT EXISTS tri_idx USING fts5(name, content='indexed_file',
tokenize='trigram');

INSERT INTO tri_idx(tri_idx) VALUES('rebuild');

-- Triggers to keep the FTS index up to date.

CREATE TRIGGER tri_idx_ai AFTER INSERT ON indexed_file BEGIN
INSERT INTO tri_idx(rowid, name) VALUES (new.id, new.name);END;

CREATE TRIGGER tri_idx_ad AFTER DELETE ON indexed_file BEGIN
INSERT INTO tri_idx(tri_idx, rowid, name) VALUES('delete', old.id, old.name);END;
7 changes: 6 additions & 1 deletion vicinae/src/extensions/file/file-extension.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,12 @@ class FileExtension : public BuiltinCommandRepository {
watcherPaths.setDescription("Semicolon-separated list of paths watched by experimental watcher");
watcherPaths.setDefaultValue("");

return {indexing, paths, excludedPaths, watcherPaths};
auto useRegex = Preference::makeCheckbox("useRegex");
useRegex.setTitle("Use regex search");
useRegex.setDescription("Enable regular expression matching for file searches");
useRegex.setDefaultValue(false);

return {indexing, paths, excludedPaths, watcherPaths, useRegex};
}

void preferenceValuesChanged(const QJsonObject &preferences) const override {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "extension-manifest.hpp"
#include "utils.hpp"
#include <QJsonArray>

namespace fs = std::filesystem;

Expand Down Expand Up @@ -100,7 +101,7 @@ Preference ExtensionManifest::parsePreferenceFromObject(const QJsonObject &obj)
for (const auto &child : data) {
auto obj = child.toObject();

options.push_back({.title = obj["title"].toString(), .value = obj["value"].toString()});
options.push_back(Preference::DropdownData::Option{obj["title"].toString(), obj["value"].toString()});
}

base.setData(Preference::DropdownData{options});
Expand Down Expand Up @@ -150,7 +151,7 @@ CommandArgument ExtensionManifest::parseArgumentFromObject(const QJsonObject &ob
for (const auto &child : data) {
auto obj = child.toObject();

options.push_back({.title = obj["title"].toString(), .value = obj["value"].toString()});
options.push_back(CommandArgument::DropdownData{obj["title"].toString(), obj["value"].toString()});
}

arg.data = options;
Expand Down
9 changes: 6 additions & 3 deletions vicinae/src/services/files-service/abstract-file-indexer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,14 @@ struct Pagination {
int limit = 50;
};

struct FileIndexerQueryParams {
Pagination pagination;
bool useRegex = false;
};

class AbstractFileIndexer : public QObject {
public:
struct QueryParams {
Pagination pagination;
};
using QueryParams = FileIndexerQueryParams;

public:
virtual void start() = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "services/files-service/file-indexer/relevancy-scorer.hpp"
#include "utils/migration-manager/migration-manager.hpp"
#include "utils/utils.hpp"
#include "regex-utils.hpp"
#include <chrono>
#include <qlogging.h>
#include <qsqldatabase.h>
Expand All @@ -12,6 +13,10 @@
#include <quuid.h>
#include <qdebug.h>
#include <QSqlError>
#include <QRegularExpression>
#include <QSqlDriver>
#include <QVariant>
#include <sqlite3.h>

// clang-format off
static const std::vector<std::string> SQLITE_PRAGMAS = {
Expand Down Expand Up @@ -247,7 +252,32 @@ QSqlDatabase *FileIndexerDatabase::database() { return &m_db; }

std::vector<fs::path> FileIndexerDatabase::search(std::string_view searchQuery,
const AbstractFileIndexer::QueryParams &params) {
auto queryString = QString(R"(
QString queryString;
std::string_view regexString;
std::string staticChars;
bool hasPattern = false;

if (params.useRegex) {
regexString = searchQuery;
// trigram search requires at least 3 characters
staticChars = extractStaticCharsFromRegex(searchQuery, 3);
searchQuery = staticChars;
bool hasSearchString = !searchQuery.empty();
hasPattern = !regexString.empty();

queryString =
QString(R"(
SELECT f.path, tri_idx.rank FROM indexed_file f
JOIN tri_idx ON tri_idx.rowid = f.id
WHERE %2%3
ORDER BY f.relevancy_score, tri_idx.rank
LIMIT :limit
OFFSET :offset
)")
.arg(hasSearchString ? "tri_idx MATCH '" + qStringFromStdView(searchQuery) + "'" : "1=1")
.arg(hasPattern ? " AND f.name REGEXP :pattern" : "");
} else {
queryString = QString(R"(
SELECT path, rank FROM indexed_file f
JOIN unicode_idx ON unicode_idx.rowid = f.id
WHERE
Expand All @@ -256,11 +286,13 @@ std::vector<fs::path> FileIndexerDatabase::search(std::string_view searchQuery,
LIMIT :limit
OFFSET :offset
)")
.arg(qStringFromStdView(searchQuery));
.arg(qStringFromStdView(searchQuery));
}

QSqlQuery query(m_db);

query.prepare(queryString);

if (params.useRegex && hasPattern) { query.bindValue(":pattern", qStringFromStdView(regexString)); }
query.bindValue(":limit", params.pagination.limit);
query.bindValue(":offset", params.pagination.offset);

Expand Down Expand Up @@ -424,6 +456,31 @@ void FileIndexerDatabase::indexFiles(const std::vector<std::filesystem::path> &p
if (!m_db.commit()) { qCritical() << "Failed to commit batchIndex" << m_db.lastError(); }
}

static void sqliteRegexpCallback(sqlite3_context *context, int argc, sqlite3_value **argv) {
if (argc != 2) {
sqlite3_result_error(context, "REGEXP requires 2 arguments", -1);
return;
}

const char *pattern = reinterpret_cast<const char *>(sqlite3_value_text(argv[0]));
const char *text = reinterpret_cast<const char *>(sqlite3_value_text(argv[1]));

if (!pattern || !text) {
sqlite3_result_null(context);
return;
}

QRegularExpression regex(QString::fromUtf8(pattern));
QString textStr = QString::fromUtf8(text);

if (!regex.isValid()) {
sqlite3_result_error(context, "Invalid regular expression", -1);
return;
}

sqlite3_result_int(context, regex.match(textStr).hasMatch() ? 1 : 0);
}

FileIndexerDatabase::FileIndexerDatabase() : m_connectionId(createRandomConnectionId()) {
m_db = QSqlDatabase::addDatabase("QSQLITE", m_connectionId);
m_db.setDatabaseName(getDatabasePath().c_str());
Expand All @@ -433,6 +490,15 @@ FileIndexerDatabase::FileIndexerDatabase() : m_connectionId(createRandomConnecti
return;
}

QVariant v = m_db.driver()->handle();
if (v.isValid() && qstrcmp(v.typeName(), "sqlite3*") == 0) {
sqlite3 *db_handle = *static_cast<sqlite3 **>(v.data());
if (db_handle) {
sqlite3_create_function(db_handle, "REGEXP", 2, SQLITE_UTF8, nullptr, sqliteRegexpCallback, nullptr,
nullptr);
}
}

QSqlQuery query(m_db);

for (const auto &pragma : SQLITE_PRAGMAS) {
Expand Down
Loading
Loading