fix: tentative workaround for #383

scambier · Jul 31, 2024 · 439150a · 439150a
1 parent a778937
commit 439150a
Showing 1 changed file with 27 additions and 22 deletions.
diff --git a/src/search/tokenizer.ts b/src/search/tokenizer.ts
@@ -15,36 +15,41 @@ export class Tokenizer {
    * @returns
    */
   public tokenizeForIndexing(text: string): string[] {
-    const words = this.tokenizeWords(text)
-    let urls: string[] = []
-    if (this.plugin.settings.tokenizeUrls) {
-      try {
-        urls = markdownLinkExtractor(text)
-      } catch (e) {
-        logDebug('Error extracting urls', e)
+    try {
+      const words = this.tokenizeWords(text)
+      let urls: string[] = []
+      if (this.plugin.settings.tokenizeUrls) {
+        try {
+          urls = markdownLinkExtractor(text)
+        } catch (e) {
+          logDebug('Error extracting urls', e)
+        }
       }
-    }
 
-    let tokens = this.tokenizeTokens(text, { skipChs: true })
+      let tokens = this.tokenizeTokens(text, { skipChs: true })
 
-    // Split hyphenated tokens
-    tokens = [...tokens, ...tokens.flatMap(splitHyphens)]
+      // Split hyphenated tokens
+      tokens = [...tokens, ...tokens.flatMap(splitHyphens)]
 
-    // Split camelCase tokens into "camel" and "case
-    tokens = [...tokens, ...tokens.flatMap(splitCamelCase)]
+      // Split camelCase tokens into "camel" and "case
+      tokens = [...tokens, ...tokens.flatMap(splitCamelCase)]
 
-    // Add whole words (aka "not tokens")
-    tokens = [...tokens, ...words]
+      // Add whole words (aka "not tokens")
+      tokens = [...tokens, ...words]
 
-    // Add urls
-    if (urls.length) {
-      tokens = [...tokens, ...urls]
-    }
+      // Add urls
+      if (urls.length) {
+        tokens = [...tokens, ...urls]
+      }
 
-    // Remove duplicates
-    tokens = [...new Set(tokens)]
+      // Remove duplicates
+      tokens = [...new Set(tokens)]
 
-    return tokens
+      return tokens
+    } catch (e) {
+      console.error('Error tokenizing text, skipping document', e)
+      return []
+    }
   }
 
   /**