Skip to content

Commit 955d4bb

Browse files
committed
feat(file-search): add support for regex
1 parent aae272f commit 955d4bb

15 files changed

Lines changed: 542 additions & 30 deletions

File tree

tests/CMakeLists.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,14 @@ file(GLOB_RECURSE TEST_SOURCES
55
"lib/**/*.cpp"
66
)
77

8+
# Add source files that are being tested
9+
set(REGEX_UTILS_SRC
10+
${CMAKE_SOURCE_DIR}/vicinae/src/services/files-service/file-indexer/regex-utils.cpp
11+
)
12+
813
add_executable(all_tests
914
${TEST_SOURCES}
15+
${REGEX_UTILS_SRC}
1016
)
1117

1218
set(FIXTURE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/fixtures)
@@ -17,6 +23,11 @@ target_compile_definitions(all_tests PRIVATE
1723
XDGPP_FIXTURE_DIR="${FIXTURE_DIR}/xdgpp"
1824
)
1925

26+
target_include_directories(all_tests PRIVATE
27+
${CMAKE_SOURCE_DIR}/vicinae/src
28+
${CMAKE_SOURCE_DIR}/vicinae/include
29+
)
30+
2031
target_link_libraries(all_tests
2132
PRIVATE
2233
xdgpp
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
#include "services/files-service/file-indexer/regex-utils.hpp"
2+
#include <catch2/catch_test_macros.hpp>
3+
4+
TEST_CASE("extractStaticCharsFromRegex - basic patterns", "[file-indexer]") {
5+
SECTION(".*config.*db^") { REQUIRE(extractStaticCharsFromRegex(".*config.*db^") == "config db"); }
6+
SECTION("^[ab]cd") { REQUIRE(extractStaticCharsFromRegex("^[ab]cd") == "cd"); }
7+
SECTION("^ab[cd](test)k") { REQUIRE(extractStaticCharsFromRegex("^ab[cd](test)k") == "^ab test k"); }
8+
}
9+
10+
TEST_CASE("extractStaticCharsFromRegex - special regex characters", "[file-indexer]") {
11+
SECTION("handles dot wildcard") { REQUIRE(extractStaticCharsFromRegex("a.b") == "a b"); }
12+
SECTION("handles asterisk") { REQUIRE(extractStaticCharsFromRegex("a*b") == "a b"); }
13+
SECTION("handles plus") { REQUIRE(extractStaticCharsFromRegex("a+b") == "a b"); }
14+
SECTION("handles question mark") { REQUIRE(extractStaticCharsFromRegex("a?b") == "b"); }
15+
SECTION("handles pipe") { REQUIRE(extractStaticCharsFromRegex("a|b") == ""); }
16+
SECTION("handles dollar sign") { REQUIRE(extractStaticCharsFromRegex("abc$") == "abc"); }
17+
}
18+
19+
TEST_CASE("extractStaticCharsFromRegex - keeps only alphanumercial, space, tab, newline", "[file-indexer]") {
20+
SECTION("strips dots") { REQUIRE(extractStaticCharsFromRegex("abc\\.") == "abc"); }
21+
SECTION("strips dots") { REQUIRE(extractStaticCharsFromRegex("\\\\-$@#abc\\.") == "abc"); }
22+
SECTION("strips dots") { REQUIRE(extractStaticCharsFromRegex("abc-def") == "abc def"); }
23+
}
24+
25+
TEST_CASE("extractStaticCharsFromRegex - brackets", "[file-indexer]") {
26+
SECTION("ignores content in square brackets") { REQUIRE(extractStaticCharsFromRegex("[abc]def") == "def"); }
27+
28+
SECTION("keeps content in parentheses (groups)") {
29+
REQUIRE(extractStaticCharsFromRegex("(abc)def") == "abcdef");
30+
}
31+
}
32+
33+
TEST_CASE("extractStaticCharsFromRegex - caret handling", "[file-indexer]") {
34+
SECTION("keeps caret followed by regular character") {
35+
REQUIRE(extractStaticCharsFromRegex("^abc") == "^abc");
36+
}
37+
38+
SECTION("removes caret not followed by character") { REQUIRE(extractStaticCharsFromRegex("^[abc]") == ""); }
39+
40+
SECTION("removes caret followed by special char") {
41+
REQUIRE(extractStaticCharsFromRegex("^.abc") == "abc");
42+
}
43+
44+
SECTION("caret in middle without following char") { REQUIRE(extractStaticCharsFromRegex("abc^") == "abc"); }
45+
}
46+
47+
TEST_CASE("extractStaticCharsFromRegex - escaping", "[file-indexer]") {
48+
SECTION("escaped special characters become static") {
49+
REQUIRE(extractStaticCharsFromRegex("a\\.b") == "a b");
50+
}
51+
52+
SECTION("escaped bracket") { REQUIRE(extractStaticCharsFromRegex("a\\[b") == "a b"); }
53+
SECTION("escaped backslash") { REQUIRE(extractStaticCharsFromRegex("a\\\\b") == "a b"); }
54+
SECTION("caret with escaped character") { REQUIRE(extractStaticCharsFromRegex("^\\[test") == "test"); }
55+
}
56+
57+
TEST_CASE("extractStaticCharsFromRegex - empty and edge cases", "[file-indexer]") {
58+
SECTION("empty string") { REQUIRE(extractStaticCharsFromRegex("") == ""); }
59+
SECTION("only special characters") { REQUIRE(extractStaticCharsFromRegex(".*+?|$") == ""); }
60+
SECTION("only brackets") { REQUIRE(extractStaticCharsFromRegex("[abc](def)") == "def"); }
61+
SECTION("single character") { REQUIRE(extractStaticCharsFromRegex("a") == "a"); }
62+
}
63+
64+
TEST_CASE("extractStaticCharsFromRegex - quantifiers", "[file-indexer]") {
65+
SECTION("exactly n - {3}") { REQUIRE(extractStaticCharsFromRegex("a{3}b") == "a b"); }
66+
SECTION("n or more - {3,}") { REQUIRE(extractStaticCharsFromRegex("a{3,}b") == "a b"); }
67+
SECTION("between m and n - {3,5}") { REQUIRE(extractStaticCharsFromRegex("a{3,5}b") == "a b"); }
68+
69+
SECTION("lazy quantifiers - ?? *? +?") {
70+
REQUIRE(extractStaticCharsFromRegex("a??b") == "b");
71+
REQUIRE(extractStaticCharsFromRegex("a*?b") == "a b");
72+
REQUIRE(extractStaticCharsFromRegex("a+?b") == "a b");
73+
}
74+
75+
SECTION("multiple wildcards") { REQUIRE(extractStaticCharsFromRegex("a.*b.*c") == "a b c"); }
76+
SECTION("combined quantifiers") { REQUIRE(extractStaticCharsFromRegex("a+b*c?d") == "a b d"); }
77+
}
78+
79+
TEST_CASE("extractStaticCharsFromRegex - word boundaries", "[file-indexer]") {
80+
SECTION("word boundary \\b") { REQUIRE(extractStaticCharsFromRegex("\\btest\\b") == "test"); }
81+
SECTION("not word boundary \\B") { REQUIRE(extractStaticCharsFromRegex("\\Btest\\B") == "test"); }
82+
SECTION("start of word \\<") { REQUIRE(extractStaticCharsFromRegex("\\<test") == "test"); }
83+
SECTION("end of word \\>") { REQUIRE(extractStaticCharsFromRegex("test\\>") == "test"); }
84+
}
85+
86+
TEST_CASE("extractStaticCharsFromRegex - character classes", "[file-indexer]") {
87+
SECTION("digit \\d") { REQUIRE(extractStaticCharsFromRegex("\\d") == ""); }
88+
SECTION("not digit \\D") { REQUIRE(extractStaticCharsFromRegex("\\D") == ""); }
89+
SECTION("whitespace \\s") { REQUIRE(extractStaticCharsFromRegex("\\s") == ""); }
90+
SECTION("not whitespace \\S") { REQUIRE(extractStaticCharsFromRegex("\\S") == ""); }
91+
SECTION("word character \\w") { REQUIRE(extractStaticCharsFromRegex("\\w") == ""); }
92+
SECTION("not word character \\W") { REQUIRE(extractStaticCharsFromRegex("\\W") == ""); }
93+
SECTION("hex digit \\x") { REQUIRE(extractStaticCharsFromRegex("\\x41") == "A"); }
94+
}
95+
96+
TEST_CASE("extractStaticCharsFromRegex - complex patterns", "[file-indexer]") {
97+
SECTION("multiple groups") { REQUIRE(extractStaticCharsFromRegex("(abc)(def)(ghi)") == "abcdefghi"); }
98+
SECTION("nested groups") { REQUIRE(extractStaticCharsFromRegex("((ab)c)def") == "abcdef"); }
99+
SECTION("alternation in group") { REQUIRE(extractStaticCharsFromRegex("(a|b)cd") == "cd"); }
100+
101+
SECTION("mixed brackets and groups") {
102+
REQUIRE(extractStaticCharsFromRegex("[abc](def)[ghi]jkl") == "def jkl");
103+
}
104+
105+
SECTION("quantifiers with groups") { REQUIRE(extractStaticCharsFromRegex("(abc)+def") == "abc def"); }
106+
}
107+
108+
TEST_CASE("extractStaticCharsFromRegex - anchors", "[file-indexer]") {
109+
SECTION("\\A start of string") { REQUIRE(extractStaticCharsFromRegex("\\Atest") == "test"); }
110+
SECTION("\\Z end of string") { REQUIRE(extractStaticCharsFromRegex("test\\Z") == "test"); }
111+
SECTION("combined anchors") { REQUIRE(extractStaticCharsFromRegex("^test$") == "^test"); }
112+
}
113+
114+
TEST_CASE("extractStaticCharsFromRegex - special escapes", "[file-indexer]") {
115+
SECTION("tab \\t") { REQUIRE(extractStaticCharsFromRegex("a\\tb") == "a\tb"); }
116+
SECTION("newline \\n") { REQUIRE(extractStaticCharsFromRegex("a\\nb") == "a\nb"); }
117+
SECTION("carriage return \\r") { REQUIRE(extractStaticCharsFromRegex("a\\rb") == "a\rb"); }
118+
SECTION("vertical tab \\v") { REQUIRE(extractStaticCharsFromRegex("a\\vb") == "a\vb"); }
119+
SECTION("form feed \\f") { REQUIRE(extractStaticCharsFromRegex("a\\fb") == "a\fb"); }
120+
}
121+
122+
TEST_CASE("extractStaticCharsFromRegex - minWordLength", "[file-indexer]") {
123+
SECTION("filters single character words") {
124+
REQUIRE(extractStaticCharsFromRegex("a b c", 2) == "");
125+
REQUIRE(extractStaticCharsFromRegex("ab cd ef", 2) == "ab cd ef");
126+
REQUIRE(extractStaticCharsFromRegex("a bc d", 2) == "bc");
127+
}
128+
129+
SECTION("filters with minimum length 3") {
130+
REQUIRE(extractStaticCharsFromRegex("a bc def", 3) == "def");
131+
REQUIRE(extractStaticCharsFromRegex("foo bar ab", 3) == "foo bar");
132+
REQUIRE(extractStaticCharsFromRegex("test a b", 3) == "test");
133+
}
134+
135+
SECTION("keeps all words when min is 0") {
136+
REQUIRE(extractStaticCharsFromRegex("a b c", 0) == "a b c");
137+
REQUIRE(extractStaticCharsFromRegex("test a bc", 0) == "test a bc");
138+
}
139+
140+
SECTION("regex patterns with minWordLength") {
141+
REQUIRE(extractStaticCharsFromRegex(".*config.*db^", 3) == "config");
142+
REQUIRE(extractStaticCharsFromRegex("^ab[cd](test)k", 3) == "test");
143+
REQUIRE(extractStaticCharsFromRegex("a.*b.*c", 2) == "");
144+
}
145+
146+
SECTION("empty result when all words too short") {
147+
REQUIRE(extractStaticCharsFromRegex("a b c d e", 3) == "");
148+
REQUIRE(extractStaticCharsFromRegex("ab cd", 3) == "");
149+
}
150+
151+
SECTION("preserves word boundaries") {
152+
REQUIRE(extractStaticCharsFromRegex("test a b config", 4) == "test config");
153+
REQUIRE(extractStaticCharsFromRegex("a very long test", 4) == "very long test");
154+
}
155+
}

vicinae/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ set(TARGET vicinae)
66
find_package(Qt6 REQUIRED COMPONENTS Core Widgets Sql Network Svg DBus)
77
find_package(OpenSSL REQUIRED)
88

9-
list(APPEND LIBS Qt6::Widgets Qt6::Sql Qt6::Network Qt6::Svg Qt6::DBus ${CMARK_LIBRARY} protobuf::libprotobuf minizip OpenSSL::Crypto wayland-client xdgpp qt6keychain)
9+
list(APPEND LIBS Qt6::Widgets Qt6::Sql Qt6::Network Qt6::Svg Qt6::DBus ${CMARK_LIBRARY} protobuf::libprotobuf minizip OpenSSL::Crypto wayland-client xdgpp qt6keychain sqlite3)
1010

1111
set(WLR_CLIP_BIN ${CMAKE_BINARY_DIR}/wlr-clip/wlr-clip${CMAKE_EXECUTABLE_SUFFIX})
1212
set(ASSET_PATH ${CMAKE_CURRENT_SOURCE_DIR}/assets)
@@ -478,6 +478,8 @@ set(SRCS
478478
src/services/files-service/file-indexer/writer-worker.cpp
479479
src/services/files-service/file-indexer/scan-dispatcher.cpp
480480
src/services/files-service/file-indexer/abstract-scanner.hpp
481+
src/services/files-service/file-indexer/regex-utils.hpp
482+
src/services/files-service/file-indexer/regex-utils.cpp
481483

482484
src/services/extension-registry/extension-registry.hpp
483485
src/services/extension-registry/extension-registry.cpp
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
<RCC>
22
<qresource prefix="database/file-indexer">
33
<file>migrations/001_init.sql</file>
4+
<file>migrations/002_tri_idx.sql</file>
45
</qresource>
56
</RCC>
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
2+
CREATE VIRTUAL TABLE IF NOT EXISTS tri_idx USING fts5(name, content='indexed_file',
3+
tokenize='trigram');
4+
5+
INSERT INTO tri_idx(tri_idx) VALUES('rebuild');
6+
7+
-- Triggers to keep the FTS index up to date.
8+
9+
CREATE TRIGGER tri_idx_ai AFTER INSERT ON indexed_file BEGIN
10+
INSERT INTO tri_idx(rowid, name) VALUES (new.id, new.name);END;
11+
12+
CREATE TRIGGER tri_idx_ad AFTER DELETE ON indexed_file BEGIN
13+
INSERT INTO tri_idx(unicode_idx, rowid, name) VALUES('delete', old.id, old.name);END;

vicinae/include/search-files-view.hpp

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,14 @@
77
#include "ui/image/url.hpp"
88
#include "service-registry.hpp"
99
#include "services/files-service/abstract-file-indexer.hpp"
10+
#include "services/files-service/file-indexer/file-indexer.hpp"
1011
#include "ui/omni-list/omni-list.hpp"
1112
#include "utils/utils.hpp"
1213
#include <filesystem>
1314
#include <qfuturewatcher.h>
1415
#include <qlocale.h>
1516
#include <qmimedatabase.h>
17+
#include <qtimer.h>
1618
#include "ui/text-file-viewer/text-file-viewer.hpp"
1719

1820
class FileListItemMetadata : public DetailWithMetadataWidget {
@@ -111,6 +113,7 @@ class SearchFilesView : public ListView {
111113
Watcher m_pendingFileResults;
112114
QString m_lastSearchText;
113115
QString currentQuery;
116+
QTimer m_debounceTimer;
114117

115118
void initialize() override {
116119
setSearchPlaceholderText("Search for files...");
@@ -133,13 +136,24 @@ class SearchFilesView : public ListView {
133136
});
134137
}
135138

136-
void generateFilteredList(const QString &query) {
139+
void executeSearch() {
137140
auto fileService = context()->services->fileService();
138141

139142
if (m_pendingFileResults.isRunning()) { m_pendingFileResults.cancel(); }
140-
m_lastSearchText = query;
143+
m_lastSearchText = currentQuery;
141144
setLoading(true);
142-
m_pendingFileResults.setFuture(fileService->queryAsync(query.toStdString()));
145+
m_pendingFileResults.setFuture(fileService->queryAsync(currentQuery.toStdString()));
146+
}
147+
148+
void generateFilteredList(const QString &query) {
149+
auto fileService = context()->services->fileService();
150+
auto indexer = dynamic_cast<FileIndexer *>(fileService->indexer());
151+
152+
if (indexer && indexer->useRegex()) {
153+
m_debounceTimer.start();
154+
} else {
155+
executeSearch();
156+
}
143157
}
144158

145159
void renderRecentFiles() {
@@ -161,6 +175,11 @@ class SearchFilesView : public ListView {
161175

162176
public:
163177
SearchFilesView() {
178+
// debounce is important for regex searches to avoid excessive computation, could be removed or reduced
179+
// for simple searches
180+
m_debounceTimer.setSingleShot(true);
181+
m_debounceTimer.setInterval(std::chrono::milliseconds(500));
182+
connect(&m_debounceTimer, &QTimer::timeout, this, &SearchFilesView::executeSearch);
164183
connect(&m_pendingFileResults, &Watcher::finished, this, &SearchFilesView::handleSearchResults);
165184
}
166185
};

vicinae/src/extensions/file/file-extension.hpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,12 @@ class FileExtension : public BuiltinCommandRepository {
7575
watcherPaths.setDescription("Semicolon-separated list of paths watched by experimental watcher");
7676
watcherPaths.setDefaultValue("");
7777

78-
return {paths, watcherPaths};
78+
auto useRegex = Preference::makeCheckbox("useRegex");
79+
useRegex.setTitle("Use regex search");
80+
useRegex.setDescription("Enable regular expression matching for file searches");
81+
useRegex.setDefaultValue(false);
82+
83+
return {paths, watcherPaths, useRegex};
7984
}
8085

8186
void preferenceValuesChanged(const QJsonObject &preferences) const override {

vicinae/src/services/extension-registry/extension-registry.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <qfilesystemwatcher.h>
88
#include <qjsonobject.h>
99
#include <qobject.h>
10+
#include <qtimer.h>
1011
#include <qtmetamacros.h>
1112
#include <vector>
1213
#include <QString>

vicinae/src/services/files-service/abstract-file-indexer.hpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,14 @@ struct Pagination {
3838
int limit = 50;
3939
};
4040

41+
struct FileIndexerQueryParams {
42+
Pagination pagination;
43+
bool useRegex = false;
44+
};
45+
4146
class AbstractFileIndexer : public QObject {
4247
public:
43-
struct QueryParams {
44-
Pagination pagination;
45-
};
48+
using QueryParams = FileIndexerQueryParams;
4649

4750
public:
4851
virtual void start() = 0;

0 commit comments

Comments
 (0)