Skip to content

Commit

Permalink
#363 - Gate url tokenization behind a setting
Browse files Browse the repository at this point in the history
  • Loading branch information
scambier committed Apr 15, 2024
1 parent e5ac29c commit 13c7ec7
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 5 deletions.
11 changes: 7 additions & 4 deletions src/search/tokenizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
chsRegex,
getChsSegmenter,
} from 'src/globals'
import { settings } from 'src/settings'
import { logDebug, splitCamelCase, splitHyphens } from 'src/tools/utils'
const markdownLinkExtractor = require('markdown-link-extractor')

Expand All @@ -25,10 +26,12 @@ function tokenizeTokens(text: string): string[] {
export function tokenizeForIndexing(text: string): string[] {
const words = tokenizeWords(text)
let urls: string[] = []
try {
urls = markdownLinkExtractor(text)
} catch (e) {
logDebug('Error extracting urls', e)
if (settings.tokenizeUrls) {
try {
urls = markdownLinkExtractor(text)
} catch (e) {
logDebug('Error extracting urls', e)
}
}

let tokens = tokenizeTokens(text)
Expand Down
18 changes: 17 additions & 1 deletion src/settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ export interface OmnisearchSettings extends WeightingSettings {
welcomeMessage: string
/** If a query returns 0 result, try again with more relax conditions */
simpleSearch: boolean
tokenizeUrls: boolean
highlight: boolean
splitCamelCase: boolean
openInNewPane: boolean
Expand Down Expand Up @@ -199,7 +200,7 @@ export class SettingsTab extends PluginSettingTab {
.setValue(settings.unsupportedFilesIndexing)
.onChange(async v => {
await database.clearCache()
; (settings.unsupportedFilesIndexing as any) = v
;(settings.unsupportedFilesIndexing as any) = v
await saveSettings(this.plugin)
})
})
Expand Down Expand Up @@ -321,6 +322,20 @@ export class SettingsTab extends PluginSettingTab {
})
)

// Extract URLs
new Setting(containerEl)
.setName('Tokenize URLs')
.setDesc(
`Enable this if you want to be able to search for URLs as separate words.
This have a strong impact on indexing performance, and can crash Obsidian under certain conditions.`
)
.addToggle(toggle =>
toggle.setValue(settings.tokenizeUrls).onChange(async v => {
settings.tokenizeUrls = v
await saveSettings(this.plugin)
})
)

// Open in new pane
new Setting(containerEl)
.setName('Open in new pane')
Expand Down Expand Up @@ -659,6 +674,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
highlight: true,
showPreviousQueryResults: true,
simpleSearch: false,
tokenizeUrls: false,
fuzziness: '1',

weightBasename: 3,
Expand Down

0 comments on commit 13c7ec7

Please sign in to comment.