From d531211030908bdc347db097d6e52b2e8d835386 Mon Sep 17 00:00:00 2001 From: Ryan Malia Date: Thu, 12 Mar 2026 00:14:21 -0700 Subject: [PATCH] fix: allow hyphenated words in vec/hyde queries (#383) The validateSemanticQuery regex rejected any hyphen followed by a word character, blocking common compound words (real-time, multi-client, kebab-case identifiers like better-sqlite3). Tighten the check to only match negation syntax at token boundaries (start of string or after whitespace). See https://github.com/tobi/qmd/issues/383 Co-Authored-By: Claude Opus 4.6 --- src/store.ts | 5 +-- test/structured-search.test.ts | 58 +++++++++++++++++++++++++++++++++- 2 files changed, 60 insertions(+), 3 deletions(-) diff --git a/src/store.ts b/src/store.ts index aa5fae4f..e8e5aa1f 100644 --- a/src/store.ts +++ b/src/store.ts @@ -2597,8 +2597,9 @@ function buildFTS5Query(query: string): string | null { * Returns error message if invalid, null if valid. */ export function validateSemanticQuery(query: string): string | null { - // Check for negation syntax - if (/-\w/.test(query) || /-"/.test(query)) { + // Check for negation syntax — only at token boundaries (start of string or after whitespace). + // Hyphenated words like "real-time" or "write-ahead" must not trigger this. + if (/(^|\s)-[\w"]/.test(query)) { return 'Negation (-term) is not supported in vec/hyde queries. Use lex for exclusions.'; } return null; diff --git a/test/structured-search.test.ts b/test/structured-search.test.ts index 5c4e97f3..764d745e 100644 --- a/test/structured-search.test.ts +++ b/test/structured-search.test.ts @@ -361,17 +361,73 @@ describe("lex query syntax", () => { expect(validateSemanticQuery("what is the CAP theorem")).toBeNull(); }); - test("rejects negation syntax", () => { + test("rejects negation at start of query", () => { + expect(validateSemanticQuery("-redis connection pooling")).toContain("Negation"); + }); + + test("rejects negation after space", () => { expect(validateSemanticQuery("performance -sports")).toContain("Negation"); + }); + + test("rejects negated quoted phrase", () => { expect(validateSemanticQuery('-"exact phrase"')).toContain("Negation"); }); + test("rejects multiple negations", () => { + expect(validateSemanticQuery("error handling -java -python")).toContain("Negation"); + }); + + test("rejects negation after leading whitespace", () => { + expect(validateSemanticQuery(" -term at start")).toContain("Negation"); + }); + + test("rejects negation after tab", () => { + expect(validateSemanticQuery("foo\t-bar")).toContain("Negation"); + }); + + test("accepts hyphenated compound words", () => { + expect(validateSemanticQuery("long-lived server shared across clients")).toBeNull(); + expect(validateSemanticQuery("real-time voice processing pipeline")).toBeNull(); + expect(validateSemanticQuery("how does the rate-limiter handle burst traffic")).toBeNull(); + expect(validateSemanticQuery("self-hosted deployment options")).toBeNull(); + expect(validateSemanticQuery("multi-client session architecture")).toBeNull(); + expect(validateSemanticQuery("cross-platform compatibility")).toBeNull(); + expect(validateSemanticQuery("non-blocking I/O model")).toBeNull(); + expect(validateSemanticQuery("in-memory caching strategy")).toBeNull(); + expect(validateSemanticQuery("write-ahead log for crash recovery")).toBeNull(); + expect(validateSemanticQuery("copy-on-write semantics")).toBeNull(); + }); + + test("accepts multiple hyphens in a phrase", () => { + expect(validateSemanticQuery("state-of-the-art embedding models")).toBeNull(); + expect(validateSemanticQuery("end-to-end testing")).toBeNull(); + expect(validateSemanticQuery("man-in-the-middle attack prevention")).toBeNull(); + }); + + test("accepts multiple hyphenated words in one query", () => { + expect(validateSemanticQuery("built-in vs add-on features")).toBeNull(); + }); + + test("accepts short hyphenated terms", () => { + expect(validateSemanticQuery("A-B testing for ML models")).toBeNull(); + expect(validateSemanticQuery("e-commerce platform")).toBeNull(); + }); + + test("accepts bare hyphen without word character", () => { + expect(validateSemanticQuery("-")).toBeNull(); + }); test("accepts hyde-style hypothetical answers", () => { expect(validateSemanticQuery( "The CAP theorem states that a distributed system cannot simultaneously provide consistency, availability, and partition tolerance." )).toBeNull(); }); + + test("accepts hyde with hyphenated words", () => { + expect(validateSemanticQuery( + "HTTP transport runs a single long-lived daemon shared across all clients, avoiding per-session model re-loading." + )).toBeNull(); + }); }); describe("validateLexQuery", () => {