From 13d6aa8f6e7950ffce7b66c30111b3b71ffcf79e Mon Sep 17 00:00:00 2001 From: sua yoo Date: Mon, 6 Oct 2025 14:02:20 -0700 Subject: [PATCH 01/12] add form section --- .../docs/docs/user-guide/workflow-setup.md | 4 ++ .../crawl-workflows/workflow-editor.ts | 46 +++++++++++++++++++ .../src/strings/crawl-workflows/section.ts | 1 + frontend/src/types/crawler.ts | 1 + frontend/src/utils/workflow.ts | 9 ++++ 5 files changed, 61 insertions(+) diff --git a/frontend/docs/docs/user-guide/workflow-setup.md b/frontend/docs/docs/user-guide/workflow-setup.md index 3f86223c5f..ae76d7263a 100644 --- a/frontend/docs/docs/user-guide/workflow-setup.md +++ b/frontend/docs/docs/user-guide/workflow-setup.md @@ -392,6 +392,10 @@ You can use a tool like [crontab.guru](https://crontab.guru/) to check Cron synt Cron schedules are always in [UTC](https://en.wikipedia.org/wiki/Coordinated_Universal_Time). +## Deduplication + +Prevent duplicate content from being crawled and stored. + ## Collections ### Auto-Add to Collection diff --git a/frontend/src/features/crawl-workflows/workflow-editor.ts b/frontend/src/features/crawl-workflows/workflow-editor.ts index 43c6490e21..ac34f1dbd3 100644 --- a/frontend/src/features/crawl-workflows/workflow-editor.ts +++ b/frontend/src/features/crawl-workflows/workflow-editor.ts @@ -204,6 +204,10 @@ const getDefaultProgressState = (hasConfigId = false): ProgressState => { error: false, completed: hasConfigId, }, + deduplication: { + error: false, + completed: hasConfigId, + }, collections: { error: false, completed: hasConfigId, @@ -386,6 +390,11 @@ export class WorkflowEditor extends BtrixElement { "": "", }; + private readonly dedupeTypeLabels: Record = { + collection: msg("Deduplicate using a collection"), + none: msg("No deduplication"), + }; + @query(`form[name="${formName}"]`) private readonly formElem?: HTMLFormElement; @@ -2283,6 +2292,38 @@ https://archiveweb.page/images/${"logo.svg"}`} `; }; + private renderDeduplication() { + return html` ${inputCol(html` + + this.updateFormState({ + dedupeType: (e.target as SlRadio).value as FormState["dedupeType"], + })} + > + ${this.dedupeTypeLabels["none"]} + ${this.dedupeTypeLabels["collection"]} + + `)} + ${this.renderHelpTextCol( + msg( + `Enable duplication checks before and during a crawl to avoid duplicate content in archived items.`, + ), + )} + ${when( + this.formState.dedupeType === "collection", + this.renderDedupeCollection, + )}`; + } + + private readonly renderDedupeCollection = () => { + return html`TODO`; + }; + private renderCollections() { return html` ${inputCol(html` @@ -2469,6 +2510,11 @@ https://archiveweb.page/images/${"logo.svg"}`} desc: msg("Schedule recurring crawls."), render: this.renderJobScheduling, }, + { + name: "deduplication", + desc: msg("Prevent duplicate content from being crawled and stored."), + render: this.renderDeduplication, + }, { name: "collections", desc: msg("Add crawls from this workflow to one or more collections."), diff --git a/frontend/src/strings/crawl-workflows/section.ts b/frontend/src/strings/crawl-workflows/section.ts index 1b09c14064..2e30505bae 100644 --- a/frontend/src/strings/crawl-workflows/section.ts +++ b/frontend/src/strings/crawl-workflows/section.ts @@ -8,6 +8,7 @@ const section: Record = { behaviors: msg("Page Behavior"), browserSettings: msg("Browser Settings"), scheduling: msg("Scheduling"), + deduplication: msg("Deduplication"), collections: msg("Collections"), metadata: msg("Metadata"), }; diff --git a/frontend/src/types/crawler.ts b/frontend/src/types/crawler.ts index 3794355138..931de351a4 100644 --- a/frontend/src/types/crawler.ts +++ b/frontend/src/types/crawler.ts @@ -70,6 +70,7 @@ export type WorkflowParams = { autoAddCollections: string[]; crawlerChannel: string; proxyId: string | null; + dedupCollId?: string; }; export type CrawlConfig = WorkflowParams & { diff --git a/frontend/src/utils/workflow.ts b/frontend/src/utils/workflow.ts index d6930f2fae..e0282e3caa 100644 --- a/frontend/src/utils/workflow.ts +++ b/frontend/src/utils/workflow.ts @@ -39,6 +39,7 @@ export const SECTIONS = [ "behaviors", "browserSettings", "scheduling", + "deduplication", "collections", "metadata", ] as const; @@ -51,6 +52,7 @@ export enum GuideHash { Behaviors = "page-behavior", BrowserSettings = "browser-settings", Scheduling = "scheduling", + Deduplication = "deduplication", Collections = "collections", Metadata = "metadata", } @@ -66,6 +68,7 @@ export const workflowTabToGuideHash: Record = { behaviors: GuideHash.Behaviors, browserSettings: GuideHash.BrowserSettings, scheduling: GuideHash.Scheduling, + deduplication: GuideHash.Deduplication, collections: GuideHash.Collections, metadata: GuideHash.Metadata, }; @@ -169,6 +172,7 @@ export type FormState = { * Custom schedule in cron format. */ scheduleCustom?: string; + dedupeType: "none" | "collection"; jobName: WorkflowParams["name"]; browserProfile: Profile | null; tags: Tags; @@ -231,6 +235,7 @@ export const getDefaultFormState = (): FormState => ({ minute: 0, period: "AM", }, + dedupeType: "collection", jobName: "", browserProfile: null, tags: [], @@ -335,6 +340,10 @@ export function getInitialFormState(params: { formState.autoAddCollections = params.initialWorkflow.autoAddCollections; } + if (params.initialWorkflow.dedupCollId) { + formState.dedupeType = "collection"; + } + const secondsToMinutes = (value: unknown, fallback = 0) => { if (typeof value === "number" && value > 0) return value / 60; return fallback; From 5b37fb460eabe2b5e1fc706982caa02171c0d177 Mon Sep 17 00:00:00 2001 From: sua yoo Date: Mon, 6 Oct 2025 14:09:43 -0700 Subject: [PATCH 02/12] wip field --- frontend/src/components/ui/search-combobox.ts | 10 +++++++++- .../features/crawl-workflows/workflow-editor.ts | 17 ++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/frontend/src/components/ui/search-combobox.ts b/frontend/src/components/ui/search-combobox.ts index 8681d63960..5b2c9ae49a 100644 --- a/frontend/src/components/ui/search-combobox.ts +++ b/frontend/src/components/ui/search-combobox.ts @@ -3,6 +3,7 @@ import type { SlInput, SlMenuItem } from "@shoelace-style/shoelace"; import Fuse from "fuse.js"; import { html, LitElement, nothing, type PropertyValues } from "lit"; import { customElement, property, query, state } from "lit/decorators.js"; +import { ifDefined } from "lit/directives/if-defined.js"; import { when } from "lit/directives/when.js"; import debounce from "lodash/fp/debounce"; @@ -44,6 +45,12 @@ export class SearchCombobox extends LitElement { @property({ type: String }) searchByValue = ""; + @property({ type: String }) + label?: string; + + @property({ type: String }) + size: SlInput["size"] = "small"; + private get hasSearchStr() { return this.searchByValue.length >= MIN_SEARCH_LENGTH; } @@ -115,8 +122,9 @@ export class SearchCombobox extends LitElement { }} > { diff --git a/frontend/src/features/crawl-workflows/workflow-editor.ts b/frontend/src/features/crawl-workflows/workflow-editor.ts index ac34f1dbd3..917a13afb4 100644 --- a/frontend/src/features/crawl-workflows/workflow-editor.ts +++ b/frontend/src/features/crawl-workflows/workflow-editor.ts @@ -2321,7 +2321,22 @@ https://archiveweb.page/images/${"logo.svg"}`} } private readonly renderDedupeCollection = () => { - return html`TODO`; + return html` + ${this.renderSectionHeading(msg("Set Collection"))} + ${inputCol(html` + + + `)} + ${this.renderHelpTextCol( + msg( + "Compare crawls from this workflow with all archived items in a specific collection. Crawls of this workflow will be automatically added to the collection.", + ), + )} + `; }; private renderCollections() { From e2761aaeb7cd62a73a3e29d76a3c5065f552c8f5 Mon Sep 17 00:00:00 2001 From: sua yoo Date: Mon, 6 Oct 2025 16:53:50 -0700 Subject: [PATCH 03/12] reduce calls to search values --- .../features/collections/collections-add.ts | 120 ++++++++---------- .../collections/context/collectionQuery.ts | 11 ++ .../controllers/collectionQueryProvider.ts | 74 +++++++++++ frontend/src/pages/org/collections-list.ts | 65 ++-------- frontend/src/pages/org/index.ts | 4 + 5 files changed, 155 insertions(+), 119 deletions(-) create mode 100644 frontend/src/features/collections/context/collectionQuery.ts create mode 100644 frontend/src/features/collections/controllers/collectionQueryProvider.ts diff --git a/frontend/src/features/collections/collections-add.ts b/frontend/src/features/collections/collections-add.ts index 0412f64f74..6134c7149c 100644 --- a/frontend/src/features/collections/collections-add.ts +++ b/frontend/src/features/collections/collections-add.ts @@ -1,13 +1,18 @@ +import { consume } from "@lit/context"; import { localized, msg } from "@lit/localize"; -import { Task, TaskStatus } from "@lit/task"; +import { Task } from "@lit/task"; import type { SlInput, SlMenuItem } from "@shoelace-style/shoelace"; -import Fuse from "fuse.js"; import { html, nothing } from "lit"; import { customElement, property, query, state } from "lit/decorators.js"; import { when } from "lit/directives/when.js"; import debounce from "lodash/fp/debounce"; import queryString from "query-string"; +import { + collectionQueryContext, + type CollectionQueryContext, +} from "./context/collectionQuery"; + import { BtrixElement } from "@/classes/BtrixElement"; import type { Combobox } from "@/components/ui/combobox"; import type { @@ -20,7 +25,7 @@ import type { APIPaginationQuery, APISortQuery, } from "@/types/api"; -import type { Collection, CollectionSearchValues } from "@/types/collection"; +import type { Collection } from "@/types/collection"; import type { UnderlyingFunction } from "@/types/utils"; import { TwoWayMap } from "@/utils/TwoWayMap"; @@ -45,6 +50,9 @@ export type CollectionsChangeEvent = CustomEvent<{ @customElement("btrix-collections-add") @localized() export class CollectionsAdd extends BtrixElement { + @consume({ context: collectionQueryContext, subscribe: true }) + private readonly collectionQuery?: CollectionQueryContext; + @property({ type: Array }) initialCollections?: string[]; @@ -78,26 +86,6 @@ export class CollectionsAdd extends BtrixElement { return this.searchByValue.length >= MIN_SEARCH_LENGTH; } - private readonly searchValuesTask = new Task(this, { - task: async (_args, { signal }) => { - const { names } = await this.getSearchValues(signal); - - return names; - }, - args: () => [] as const, - }); - - private readonly searchTask = new Task(this, { - task: async ([names], { signal }) => { - if (!names || signal.aborted) { - return; - } - - return new Fuse(names, { threshold: 0.4, minMatchCharLength: 2 }); - }, - args: () => [this.searchValuesTask.value] as const, - }); - private readonly searchResultsTask = new Task(this, { task: async ([searchByValue, hasSearchStr], { signal }) => { if (!hasSearchStr) return []; @@ -167,7 +155,7 @@ export class CollectionsAdd extends BtrixElement { } private renderSearch() { - const disabled = !this.searchValuesTask.value?.length; + const disabled = !this.collectionQuery?.records.length; return html` ) => { this.combobox?.hide(); const item = e.detail.item; - const name = item.dataset["key"]; + const name = item.dataset["value"]; const collections = await this.getCollections({ namePrefix: name }); const coll = collections.items.find((c) => c.name === name); @@ -215,7 +203,7 @@ export class CollectionsAdd extends BtrixElement { > ${when( - disabled && this.searchValuesTask.status === TaskStatus.COMPLETE, + disabled && this.collectionQuery?.records, () => html`
${msg("No collections found.")} @@ -234,47 +222,50 @@ export class CollectionsAdd extends BtrixElement { } private renderSearchResults() { - return this.searchTask.render({ - pending: () => html` + if (!this.collectionQuery) { + html` - `, - complete: (fuse) => { - if (!this.hasSearchStr) { - return html` - - ${msg("Start typing to search Collections.")} - - `; - } - - const results = fuse - ?.search(this.searchByValue) - // Filter out items that have been selected - .filter(({ item }) => !this.nameSearchMap.get(item)) - // Show first few results - .slice(0, 5); - - if (!results?.length) { - return html` - - ${msg("No matching Collections found.")} - - `; - } + `; + } + + if (!this.hasSearchStr) { + return html` + + ${msg("Start typing to search Collections.")} + + `; + } + + const results = this.collectionQuery + ?.search(this.searchByValue) + // Filter out items that have been selected + .filter(({ item }) => !this.nameSearchMap.get(item.name)) + // Show first few results + .slice(0, 5); + + if (!results?.length) { + return html` + + ${msg("No matching Collections found.")} + + `; + } + return html` + ${results.map(({ item }) => { return html` - ${results.map(({ item }: { item: string }) => { - return html` - - ${item} - - `; - })} + + ${item["name"]} + `; - }, - }); + })} + `; } private removeCollection(collectionId: string) { @@ -354,13 +345,6 @@ export class CollectionsAdd extends BtrixElement { return data; } - private async getSearchValues(signal: AbortSignal) { - return await this.api.fetch( - `/orgs/${this.orgId}/collections/search-values`, - { signal }, - ); - } - private async dispatchChange() { await this.updateComplete; this.dispatchEvent( diff --git a/frontend/src/features/collections/context/collectionQuery.ts b/frontend/src/features/collections/context/collectionQuery.ts new file mode 100644 index 0000000000..d931103f4e --- /dev/null +++ b/frontend/src/features/collections/context/collectionQuery.ts @@ -0,0 +1,11 @@ +import { createContext } from "@lit/context"; +import type Fuse from "fuse.js"; + +export type CollectionQueryContext = + | (Fuse<{ name: string }> & { + records: Fuse.FuseIndexRecords; + }) + | null; + +export const collectionQueryContext = + createContext("collectionQuery"); diff --git a/frontend/src/features/collections/controllers/collectionQueryProvider.ts b/frontend/src/features/collections/controllers/collectionQueryProvider.ts new file mode 100644 index 0000000000..fb9391d555 --- /dev/null +++ b/frontend/src/features/collections/controllers/collectionQueryProvider.ts @@ -0,0 +1,74 @@ +import { ContextProvider } from "@lit/context"; +import { Task } from "@lit/task"; +import Fuse from "fuse.js"; +import type { ReactiveController } from "lit"; + +import type { BtrixElement } from "@/classes/BtrixElement"; +import { + collectionQueryContext, + type CollectionQueryContext, +} from "@/features/collections/context/collectionQuery"; +import type { CollectionSearchValues } from "@/types/collection"; + +/** + * Provide searchable client db of collections. + * Currently only supports querying by name. + */ +export class CollectionQueryProvider implements ReactiveController { + private readonly host: BtrixElement; + readonly #searchValuesTask: Task; + readonly #dbProvider: ContextProvider< + { __context__: CollectionQueryContext }, + BtrixElement + >; + + constructor(host: CollectionQueryProvider["host"]) { + this.host = host; + this.#searchValuesTask = new Task(this.host, { + task: async (_args, { signal }) => { + const { names } = await this.getSearchValues(signal); + + if (signal.aborted) return; + + const fuse = new Fuse( + names.map((name) => ({ name })), + { + keys: ["name"], + threshold: 0.4, + minMatchCharLength: 2, + }, + ); + + this.#dbProvider.setValue( + Object.assign(fuse, { + get records() { + return fuse.getIndex().toJSON().records; + }, + }), + ); + }, + args: () => [] as const, + }); + this.#dbProvider = new ContextProvider(this.host, { + context: collectionQueryContext, + }); + + host.addController(this); + } + + hostConnected(): void {} + hostDisconnected(): void { + this.#searchValuesTask.abort(); + } + + public async refresh() { + return this.#searchValuesTask.run(); + } + + private async getSearchValues(signal: AbortSignal) { + return await this.host.api.fetch( + `/orgs/${this.host.appState.orgId}/collections/search-values`, + { signal }, + ); + } +} diff --git a/frontend/src/pages/org/collections-list.ts b/frontend/src/pages/org/collections-list.ts index d7d49ff0ed..5fad258b21 100644 --- a/frontend/src/pages/org/collections-list.ts +++ b/frontend/src/pages/org/collections-list.ts @@ -1,3 +1,4 @@ +import { consume } from "@lit/context"; import { localized, msg } from "@lit/localize"; import type { SlChangeEvent, @@ -5,7 +6,6 @@ import type { SlMenuItem, SlRadioGroup, } from "@shoelace-style/shoelace"; -import Fuse from "fuse.js"; import { html, nothing, type PropertyValues } from "lit"; import { customElement, property, query, state } from "lit/decorators.js"; import { choose } from "lit/directives/choose.js"; @@ -20,6 +20,10 @@ import { BtrixElement } from "@/classes/BtrixElement"; import { parsePage, type PageChangeEvent } from "@/components/ui/pagination"; import { ClipboardController } from "@/controllers/clipboard"; import type { CollectionSavedEvent } from "@/features/collections/collection-create-dialog"; +import { + collectionQueryContext, + type CollectionQueryContext, +} from "@/features/collections/context/collectionQuery"; import { SelectCollectionAccess } from "@/features/collections/select-collection-access"; import { emptyMessage } from "@/layouts/emptyMessage"; import { pageHeader } from "@/layouts/pageHeader"; @@ -27,11 +31,7 @@ import { RouteNamespace } from "@/routes"; import { metadata } from "@/strings/collections/metadata"; import { monthYearDateRange } from "@/strings/utils"; import type { APIPaginatedList, APIPaginationQuery } from "@/types/api"; -import { - CollectionAccess, - type Collection, - type CollectionSearchValues, -} from "@/types/collection"; +import { CollectionAccess, type Collection } from "@/types/collection"; import { SortDirection, type UnderlyingFunction } from "@/types/utils"; import { isApiError } from "@/utils/api"; import { pluralOf } from "@/utils/pluralize"; @@ -39,12 +39,6 @@ import { tw } from "@/utils/tailwind"; type Collections = APIPaginatedList; type SearchFields = "name"; -type SearchResult = { - item: { - key: SearchFields; - value: string; - }; -}; type SortField = | "modified" | "dateLatest" @@ -92,6 +86,9 @@ enum ListView { @customElement("btrix-collections-list") @localized() export class CollectionsList extends BtrixElement { + @consume({ context: collectionQueryContext, subscribe: true }) + private readonly collectionQuery?: CollectionQueryContext; + @property({ type: Boolean }) isCrawler?: boolean; @@ -138,13 +135,6 @@ export class CollectionsList extends BtrixElement { @query("sl-input") private readonly input?: SlInput | null; - // For fuzzy search: - private readonly fuse = new Fuse<{ key: "name"; value: string }>([], { - keys: ["value"], - shouldSort: false, - threshold: 0.2, // stricter; default is 0.6 - }); - private getShareLink(collection: Collection) { return `${window.location.protocol}//${window.location.hostname}${window.location.port ? `:${window.location.port}` : ""}/${collection.access === CollectionAccess.Private ? `${RouteNamespace.PrivateOrgs}/${this.orgSlugState}/collections/view` : `${RouteNamespace.PublicOrgs}/${this.orgSlugState}/collections`}/${collection.slug}`; } @@ -161,10 +151,6 @@ export class CollectionsList extends BtrixElement { } } - protected firstUpdated() { - void this.fetchSearchValues(); - } - render() { return html`
@@ -437,7 +423,8 @@ export class CollectionsList extends BtrixElement { `; } - const searchResults = this.fuse.search(this.searchByValue).slice(0, 10); + const searchResults = + this.collectionQuery?.search(this.searchByValue).slice(0, 10) || []; if (!searchResults.length) { return html` html` - - ${item.value} + ({ item }) => html` + + ${item.name} `, )} @@ -811,26 +794,6 @@ export class CollectionsList extends BtrixElement { } } - private async fetchSearchValues() { - try { - const searchValues: CollectionSearchValues = await this.api.fetch( - `/orgs/${this.orgId}/collections/search-values`, - ); - const names = searchValues.names; - - // Update search/filter collection - const toSearchItem = - (key: SearchFields) => - (value: string): SearchResult["item"] => ({ - key, - value, - }); - this.fuse.setCollection([...names.map(toSearchItem("name"))]); - } catch (e) { - console.debug(e); - } - } - private async fetchCollections(params?: APIPaginationQuery) { this.fetchErrorStatusCode = undefined; diff --git a/frontend/src/pages/org/index.ts b/frontend/src/pages/org/index.ts index e7c1980e83..ecd0dfe71e 100644 --- a/frontend/src/pages/org/index.ts +++ b/frontend/src/pages/org/index.ts @@ -21,6 +21,7 @@ import { proxiesContext, type ProxiesContext } from "@/context/org"; import type { QuotaUpdateDetail } from "@/controllers/api"; import needLogin from "@/decorators/needLogin"; import type { CollectionSavedEvent } from "@/features/collections/collection-create-dialog"; +import { CollectionQueryProvider } from "@/features/collections/controllers/collectionQueryProvider"; import type { SelectJobTypeEvent } from "@/features/crawl-workflows/new-workflow-dialog"; import { OrgTab, RouteNamespace, WorkflowTab } from "@/routes"; import type { ProxiesAPIResponse } from "@/types/crawler"; @@ -122,6 +123,8 @@ export class Org extends BtrixElement { @state() private isCreateDialogVisible = false; + private readonly collectionQueryProvider = new CollectionQueryProvider(this); + connectedCallback() { if ( !this.orgTab || @@ -177,6 +180,7 @@ export class Org extends BtrixElement { } else if (changedProperties.has("orgTab") && this.orgId) { // Get most up to date org data void this.updateOrg(); + void this.collectionQueryProvider.refresh(); } if (changedProperties.has("openDialogName")) { // Sync URL to create dialog From cb6dccb2fe432f7b163934fde633da958f03ad7c Mon Sep 17 00:00:00 2001 From: sua yoo Date: Tue, 7 Oct 2025 11:48:33 -0700 Subject: [PATCH 04/12] generalize org search --- .../search-org/SearchOrgContextController.ts | 97 +++++++++++++++++++ .../search-org/WithSearchOrgContext.ts | 32 ++++++ .../src/context/search-org/connectFuse.ts | 14 +++ frontend/src/context/search-org/index.ts | 5 + frontend/src/context/search-org/search-org.ts | 20 ++++ frontend/src/context/search-org/types.ts | 4 + .../features/collections/collections-add.ts | 21 ++-- .../collections/context/collectionQuery.ts | 11 --- .../controllers/collectionQueryProvider.ts | 74 -------------- frontend/src/pages/org/collections-list.ts | 13 +-- frontend/src/pages/org/index.ts | 8 +- 11 files changed, 187 insertions(+), 112 deletions(-) create mode 100644 frontend/src/context/search-org/SearchOrgContextController.ts create mode 100644 frontend/src/context/search-org/WithSearchOrgContext.ts create mode 100644 frontend/src/context/search-org/connectFuse.ts create mode 100644 frontend/src/context/search-org/index.ts create mode 100644 frontend/src/context/search-org/search-org.ts create mode 100644 frontend/src/context/search-org/types.ts delete mode 100644 frontend/src/features/collections/context/collectionQuery.ts delete mode 100644 frontend/src/features/collections/controllers/collectionQueryProvider.ts diff --git a/frontend/src/context/search-org/SearchOrgContextController.ts b/frontend/src/context/search-org/SearchOrgContextController.ts new file mode 100644 index 0000000000..65a7e07225 --- /dev/null +++ b/frontend/src/context/search-org/SearchOrgContextController.ts @@ -0,0 +1,97 @@ +import { ContextProvider } from "@lit/context"; +import { Task } from "@lit/task"; +import { type ReactiveController } from "lit"; + +import { connectFuse } from "./connectFuse"; +import { + searchOrgContext, + searchOrgInitialValue, + type SearchOrgContext, +} from "./search-org"; +import { type SearchOrgKey, type SearchQuery } from "./types"; + +import type { BtrixElement } from "@/classes/BtrixElement"; +import type { CollectionSearchValues } from "@/types/collection"; + +/** + * Provides org-wide search data to all descendents of a component. + * + * @example Usage: + * ```ts + * class Component extends BtrixElement { + * readonly [searchOrgContextKey] = new SearchOrgContextController(this); + * } + * ``` + */ +export class SearchOrgContextController implements ReactiveController { + readonly #host: BtrixElement; + readonly #context: ContextProvider<{ __context__: SearchOrgContext }>; + readonly #tasks = new Map(); + + constructor(host: BtrixElement) { + this.#host = host; + this.#context = new ContextProvider(this.#host, { + context: searchOrgContext, + initialValue: searchOrgInitialValue, + }); + + this.addTask("collections", this.getCollectionsSearchValues); + + host.addController(this); + } + + hostConnected(): void {} + hostDisconnected(): void {} + + public async refresh(key?: SearchOrgKey) { + if (key) { + void this.#tasks.get(key)?.run(); + } else { + for (const [_key, task] of this.#tasks) { + void task.run(); + } + } + } + + private addTask( + key: SearchOrgKey, + request: (orgId: string, signal: AbortSignal) => Promise, + ) { + this.#tasks.set( + key, + new Task(this.#host, { + task: async ([orgId], { signal }) => { + if (!orgId) return null; + + const values = await request(orgId, signal); + + if (signal.aborted) return; + + this.#context.setValue({ + ...this.#context.value, + [key]: connectFuse(values), + }); + }, + args: () => [this.#host.appState.orgId] as const, + }), + ); + } + + private readonly getCollectionsSearchValues = async ( + orgId: string, + signal: AbortSignal, + ) => { + try { + const { names } = await this.#host.api.fetch( + `/orgs/${orgId}/collections/search-values`, + { signal }, + ); + + return names.map((name) => ({ name })); + } catch (err) { + console.debug(err); + } + + return []; + }; +} diff --git a/frontend/src/context/search-org/WithSearchOrgContext.ts b/frontend/src/context/search-org/WithSearchOrgContext.ts new file mode 100644 index 0000000000..df09c942ae --- /dev/null +++ b/frontend/src/context/search-org/WithSearchOrgContext.ts @@ -0,0 +1,32 @@ +import { ContextConsumer } from "@lit/context"; +import type { LitElement } from "lit"; +import type { Constructor } from "type-fest"; + +import { searchOrgContext, searchOrgInitialValue } from "./search-org"; +import type { SearchOrgKey } from "./types"; + +/** + * Consume search data. + * + * @example Usage: + * ```ts + * class Component extends WithSearchOrgContext(BtrixElement) {} + * ``` + */ +export const WithSearchOrgContext = >( + superClass: T, +) => + class extends superClass { + readonly #searchOrg = new ContextConsumer(this, { + context: searchOrgContext, + subscribe: true, + }); + + public get searchOrg() { + return this.#searchOrg.value || searchOrgInitialValue; + } + + public listSearchValuesFor(key: SearchOrgKey) { + return this.searchOrg[key]?.getIndex().toJSON().records || null; + } + }; diff --git a/frontend/src/context/search-org/connectFuse.ts b/frontend/src/context/search-org/connectFuse.ts new file mode 100644 index 0000000000..ad17e12ae4 --- /dev/null +++ b/frontend/src/context/search-org/connectFuse.ts @@ -0,0 +1,14 @@ +/** + * Enable fuzzy search on available values. + */ +import Fuse from "fuse.js"; + +import { searchQueryKeys, type SearchQuery } from "./types"; + +export function connectFuse(values: SearchQuery[]) { + return new Fuse(values, { + keys: searchQueryKeys, + threshold: 0.4, + minMatchCharLength: 2, + }); +} diff --git a/frontend/src/context/search-org/index.ts b/frontend/src/context/search-org/index.ts new file mode 100644 index 0000000000..e6685e59d6 --- /dev/null +++ b/frontend/src/context/search-org/index.ts @@ -0,0 +1,5 @@ +import { searchOrgContext, type SearchOrgContext } from "./search-org"; + +export type { SearchOrgContext }; + +export default searchOrgContext; diff --git a/frontend/src/context/search-org/search-org.ts b/frontend/src/context/search-org/search-org.ts new file mode 100644 index 0000000000..afe44cb889 --- /dev/null +++ b/frontend/src/context/search-org/search-org.ts @@ -0,0 +1,20 @@ +/** + * Store org-wide searchable data, like collection names. + */ +import { createContext } from "@lit/context"; +import type Fuse from "fuse.js"; + +import { + searchOrgContextKey, + type SearchOrgKey, + type SearchQuery, +} from "./types"; + +export type SearchOrgContext = Record | null>; + +export const searchOrgInitialValue = { + collections: null, +} as const satisfies SearchOrgContext; + +export const searchOrgContext = + createContext(searchOrgContextKey); diff --git a/frontend/src/context/search-org/types.ts b/frontend/src/context/search-org/types.ts new file mode 100644 index 0000000000..5fa0acafcc --- /dev/null +++ b/frontend/src/context/search-org/types.ts @@ -0,0 +1,4 @@ +export const searchQueryKeys = ["name"]; +export const searchOrgContextKey = Symbol("search-values"); +export type SearchQuery = Record<(typeof searchQueryKeys)[number], string>; +export type SearchOrgKey = "collections"; diff --git a/frontend/src/features/collections/collections-add.ts b/frontend/src/features/collections/collections-add.ts index 6134c7149c..f980c099cf 100644 --- a/frontend/src/features/collections/collections-add.ts +++ b/frontend/src/features/collections/collections-add.ts @@ -1,4 +1,3 @@ -import { consume } from "@lit/context"; import { localized, msg } from "@lit/localize"; import { Task } from "@lit/task"; import type { SlInput, SlMenuItem } from "@shoelace-style/shoelace"; @@ -8,13 +7,9 @@ import { when } from "lit/directives/when.js"; import debounce from "lodash/fp/debounce"; import queryString from "query-string"; -import { - collectionQueryContext, - type CollectionQueryContext, -} from "./context/collectionQuery"; - import { BtrixElement } from "@/classes/BtrixElement"; import type { Combobox } from "@/components/ui/combobox"; +import { WithSearchOrgContext } from "@/context/search-org/WithSearchOrgContext"; import type { BtrixLoadedLinkedCollectionEvent, BtrixRemoveLinkedCollectionEvent, @@ -49,10 +44,7 @@ export type CollectionsChangeEvent = CustomEvent<{ */ @customElement("btrix-collections-add") @localized() -export class CollectionsAdd extends BtrixElement { - @consume({ context: collectionQueryContext, subscribe: true }) - private readonly collectionQuery?: CollectionQueryContext; - +export class CollectionsAdd extends WithSearchOrgContext(BtrixElement) { @property({ type: Array }) initialCollections?: string[]; @@ -155,7 +147,8 @@ export class CollectionsAdd extends BtrixElement { } private renderSearch() { - const disabled = !this.collectionQuery?.records.length; + const collections = this.listSearchValuesFor("collections"); + const disabled = !collections?.length; return html` ${when( - disabled && this.collectionQuery?.records, + disabled && collections, () => html`
${msg("No collections found.")} @@ -222,7 +215,7 @@ export class CollectionsAdd extends BtrixElement { } private renderSearchResults() { - if (!this.collectionQuery) { + if (!this.searchOrg.collections) { html` @@ -238,7 +231,7 @@ export class CollectionsAdd extends BtrixElement { `; } - const results = this.collectionQuery + const results = this.searchOrg.collections ?.search(this.searchByValue) // Filter out items that have been selected .filter(({ item }) => !this.nameSearchMap.get(item.name)) diff --git a/frontend/src/features/collections/context/collectionQuery.ts b/frontend/src/features/collections/context/collectionQuery.ts deleted file mode 100644 index d931103f4e..0000000000 --- a/frontend/src/features/collections/context/collectionQuery.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { createContext } from "@lit/context"; -import type Fuse from "fuse.js"; - -export type CollectionQueryContext = - | (Fuse<{ name: string }> & { - records: Fuse.FuseIndexRecords; - }) - | null; - -export const collectionQueryContext = - createContext("collectionQuery"); diff --git a/frontend/src/features/collections/controllers/collectionQueryProvider.ts b/frontend/src/features/collections/controllers/collectionQueryProvider.ts deleted file mode 100644 index fb9391d555..0000000000 --- a/frontend/src/features/collections/controllers/collectionQueryProvider.ts +++ /dev/null @@ -1,74 +0,0 @@ -import { ContextProvider } from "@lit/context"; -import { Task } from "@lit/task"; -import Fuse from "fuse.js"; -import type { ReactiveController } from "lit"; - -import type { BtrixElement } from "@/classes/BtrixElement"; -import { - collectionQueryContext, - type CollectionQueryContext, -} from "@/features/collections/context/collectionQuery"; -import type { CollectionSearchValues } from "@/types/collection"; - -/** - * Provide searchable client db of collections. - * Currently only supports querying by name. - */ -export class CollectionQueryProvider implements ReactiveController { - private readonly host: BtrixElement; - readonly #searchValuesTask: Task; - readonly #dbProvider: ContextProvider< - { __context__: CollectionQueryContext }, - BtrixElement - >; - - constructor(host: CollectionQueryProvider["host"]) { - this.host = host; - this.#searchValuesTask = new Task(this.host, { - task: async (_args, { signal }) => { - const { names } = await this.getSearchValues(signal); - - if (signal.aborted) return; - - const fuse = new Fuse( - names.map((name) => ({ name })), - { - keys: ["name"], - threshold: 0.4, - minMatchCharLength: 2, - }, - ); - - this.#dbProvider.setValue( - Object.assign(fuse, { - get records() { - return fuse.getIndex().toJSON().records; - }, - }), - ); - }, - args: () => [] as const, - }); - this.#dbProvider = new ContextProvider(this.host, { - context: collectionQueryContext, - }); - - host.addController(this); - } - - hostConnected(): void {} - hostDisconnected(): void { - this.#searchValuesTask.abort(); - } - - public async refresh() { - return this.#searchValuesTask.run(); - } - - private async getSearchValues(signal: AbortSignal) { - return await this.host.api.fetch( - `/orgs/${this.host.appState.orgId}/collections/search-values`, - { signal }, - ); - } -} diff --git a/frontend/src/pages/org/collections-list.ts b/frontend/src/pages/org/collections-list.ts index 5fad258b21..135a1e5cd6 100644 --- a/frontend/src/pages/org/collections-list.ts +++ b/frontend/src/pages/org/collections-list.ts @@ -1,4 +1,3 @@ -import { consume } from "@lit/context"; import { localized, msg } from "@lit/localize"; import type { SlChangeEvent, @@ -18,12 +17,9 @@ import type { SelectNewDialogEvent } from "."; import { BtrixElement } from "@/classes/BtrixElement"; import { parsePage, type PageChangeEvent } from "@/components/ui/pagination"; +import { WithSearchOrgContext } from "@/context/search-org/WithSearchOrgContext"; import { ClipboardController } from "@/controllers/clipboard"; import type { CollectionSavedEvent } from "@/features/collections/collection-create-dialog"; -import { - collectionQueryContext, - type CollectionQueryContext, -} from "@/features/collections/context/collectionQuery"; import { SelectCollectionAccess } from "@/features/collections/select-collection-access"; import { emptyMessage } from "@/layouts/emptyMessage"; import { pageHeader } from "@/layouts/pageHeader"; @@ -85,10 +81,7 @@ enum ListView { @customElement("btrix-collections-list") @localized() -export class CollectionsList extends BtrixElement { - @consume({ context: collectionQueryContext, subscribe: true }) - private readonly collectionQuery?: CollectionQueryContext; - +export class CollectionsList extends WithSearchOrgContext(BtrixElement) { @property({ type: Boolean }) isCrawler?: boolean; @@ -424,7 +417,7 @@ export class CollectionsList extends BtrixElement { } const searchResults = - this.collectionQuery?.search(this.searchByValue).slice(0, 10) || []; + this.searchOrg.collections?.search(this.searchByValue).slice(0, 10) || []; if (!searchResults.length) { return html` Date: Tue, 7 Oct 2025 11:51:23 -0700 Subject: [PATCH 05/12] revert field changes --- .../docs/docs/user-guide/workflow-setup.md | 4 -- .../crawl-workflows/workflow-editor.ts | 61 ------------------- .../src/strings/crawl-workflows/section.ts | 1 - frontend/src/utils/workflow.ts | 9 --- 4 files changed, 75 deletions(-) diff --git a/frontend/docs/docs/user-guide/workflow-setup.md b/frontend/docs/docs/user-guide/workflow-setup.md index ae76d7263a..3f86223c5f 100644 --- a/frontend/docs/docs/user-guide/workflow-setup.md +++ b/frontend/docs/docs/user-guide/workflow-setup.md @@ -392,10 +392,6 @@ You can use a tool like [crontab.guru](https://crontab.guru/) to check Cron synt Cron schedules are always in [UTC](https://en.wikipedia.org/wiki/Coordinated_Universal_Time). -## Deduplication - -Prevent duplicate content from being crawled and stored. - ## Collections ### Auto-Add to Collection diff --git a/frontend/src/features/crawl-workflows/workflow-editor.ts b/frontend/src/features/crawl-workflows/workflow-editor.ts index 917a13afb4..43c6490e21 100644 --- a/frontend/src/features/crawl-workflows/workflow-editor.ts +++ b/frontend/src/features/crawl-workflows/workflow-editor.ts @@ -204,10 +204,6 @@ const getDefaultProgressState = (hasConfigId = false): ProgressState => { error: false, completed: hasConfigId, }, - deduplication: { - error: false, - completed: hasConfigId, - }, collections: { error: false, completed: hasConfigId, @@ -390,11 +386,6 @@ export class WorkflowEditor extends BtrixElement { "": "", }; - private readonly dedupeTypeLabels: Record = { - collection: msg("Deduplicate using a collection"), - none: msg("No deduplication"), - }; - @query(`form[name="${formName}"]`) private readonly formElem?: HTMLFormElement; @@ -2292,53 +2283,6 @@ https://archiveweb.page/images/${"logo.svg"}`} `; }; - private renderDeduplication() { - return html` ${inputCol(html` - - this.updateFormState({ - dedupeType: (e.target as SlRadio).value as FormState["dedupeType"], - })} - > - ${this.dedupeTypeLabels["none"]} - ${this.dedupeTypeLabels["collection"]} - - `)} - ${this.renderHelpTextCol( - msg( - `Enable duplication checks before and during a crawl to avoid duplicate content in archived items.`, - ), - )} - ${when( - this.formState.dedupeType === "collection", - this.renderDedupeCollection, - )}`; - } - - private readonly renderDedupeCollection = () => { - return html` - ${this.renderSectionHeading(msg("Set Collection"))} - ${inputCol(html` - - - `)} - ${this.renderHelpTextCol( - msg( - "Compare crawls from this workflow with all archived items in a specific collection. Crawls of this workflow will be automatically added to the collection.", - ), - )} - `; - }; - private renderCollections() { return html` ${inputCol(html` @@ -2525,11 +2469,6 @@ https://archiveweb.page/images/${"logo.svg"}`} desc: msg("Schedule recurring crawls."), render: this.renderJobScheduling, }, - { - name: "deduplication", - desc: msg("Prevent duplicate content from being crawled and stored."), - render: this.renderDeduplication, - }, { name: "collections", desc: msg("Add crawls from this workflow to one or more collections."), diff --git a/frontend/src/strings/crawl-workflows/section.ts b/frontend/src/strings/crawl-workflows/section.ts index 2e30505bae..1b09c14064 100644 --- a/frontend/src/strings/crawl-workflows/section.ts +++ b/frontend/src/strings/crawl-workflows/section.ts @@ -8,7 +8,6 @@ const section: Record = { behaviors: msg("Page Behavior"), browserSettings: msg("Browser Settings"), scheduling: msg("Scheduling"), - deduplication: msg("Deduplication"), collections: msg("Collections"), metadata: msg("Metadata"), }; diff --git a/frontend/src/utils/workflow.ts b/frontend/src/utils/workflow.ts index e0282e3caa..d6930f2fae 100644 --- a/frontend/src/utils/workflow.ts +++ b/frontend/src/utils/workflow.ts @@ -39,7 +39,6 @@ export const SECTIONS = [ "behaviors", "browserSettings", "scheduling", - "deduplication", "collections", "metadata", ] as const; @@ -52,7 +51,6 @@ export enum GuideHash { Behaviors = "page-behavior", BrowserSettings = "browser-settings", Scheduling = "scheduling", - Deduplication = "deduplication", Collections = "collections", Metadata = "metadata", } @@ -68,7 +66,6 @@ export const workflowTabToGuideHash: Record = { behaviors: GuideHash.Behaviors, browserSettings: GuideHash.BrowserSettings, scheduling: GuideHash.Scheduling, - deduplication: GuideHash.Deduplication, collections: GuideHash.Collections, metadata: GuideHash.Metadata, }; @@ -172,7 +169,6 @@ export type FormState = { * Custom schedule in cron format. */ scheduleCustom?: string; - dedupeType: "none" | "collection"; jobName: WorkflowParams["name"]; browserProfile: Profile | null; tags: Tags; @@ -235,7 +231,6 @@ export const getDefaultFormState = (): FormState => ({ minute: 0, period: "AM", }, - dedupeType: "collection", jobName: "", browserProfile: null, tags: [], @@ -340,10 +335,6 @@ export function getInitialFormState(params: { formState.autoAddCollections = params.initialWorkflow.autoAddCollections; } - if (params.initialWorkflow.dedupCollId) { - formState.dedupeType = "collection"; - } - const secondsToMinutes = (value: unknown, fallback = 0) => { if (typeof value === "number" && value > 0) return value / 60; return fallback; From 904340bb88e0743f49711809b8e2ecd5cd4a39eb Mon Sep 17 00:00:00 2001 From: sua yoo Date: Tue, 7 Oct 2025 11:56:29 -0700 Subject: [PATCH 06/12] revert combobox change --- frontend/src/components/ui/search-combobox.ts | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/frontend/src/components/ui/search-combobox.ts b/frontend/src/components/ui/search-combobox.ts index 5b2c9ae49a..8681d63960 100644 --- a/frontend/src/components/ui/search-combobox.ts +++ b/frontend/src/components/ui/search-combobox.ts @@ -3,7 +3,6 @@ import type { SlInput, SlMenuItem } from "@shoelace-style/shoelace"; import Fuse from "fuse.js"; import { html, LitElement, nothing, type PropertyValues } from "lit"; import { customElement, property, query, state } from "lit/decorators.js"; -import { ifDefined } from "lit/directives/if-defined.js"; import { when } from "lit/directives/when.js"; import debounce from "lodash/fp/debounce"; @@ -45,12 +44,6 @@ export class SearchCombobox extends LitElement { @property({ type: String }) searchByValue = ""; - @property({ type: String }) - label?: string; - - @property({ type: String }) - size: SlInput["size"] = "small"; - private get hasSearchStr() { return this.searchByValue.length >= MIN_SEARCH_LENGTH; } @@ -122,9 +115,8 @@ export class SearchCombobox extends LitElement { }} > { From 03102445923cbd06bbf16894a07d95e6176c68f4 Mon Sep 17 00:00:00 2001 From: sua yoo Date: Tue, 7 Oct 2025 12:02:49 -0700 Subject: [PATCH 07/12] revert crawler type change --- frontend/src/types/crawler.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/frontend/src/types/crawler.ts b/frontend/src/types/crawler.ts index 931de351a4..3794355138 100644 --- a/frontend/src/types/crawler.ts +++ b/frontend/src/types/crawler.ts @@ -70,7 +70,6 @@ export type WorkflowParams = { autoAddCollections: string[]; crawlerChannel: string; proxyId: string | null; - dedupCollId?: string; }; export type CrawlConfig = WorkflowParams & { From 6a154f1f1ce1fdd358fda5c24f6114634b58dcf2 Mon Sep 17 00:00:00 2001 From: sua yoo Date: Tue, 7 Oct 2025 12:28:57 -0700 Subject: [PATCH 08/12] remove min search matches --- frontend/src/context/search-org/connectFuse.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/frontend/src/context/search-org/connectFuse.ts b/frontend/src/context/search-org/connectFuse.ts index ad17e12ae4..5c76e8edab 100644 --- a/frontend/src/context/search-org/connectFuse.ts +++ b/frontend/src/context/search-org/connectFuse.ts @@ -9,6 +9,5 @@ export function connectFuse(values: SearchQuery[]) { return new Fuse(values, { keys: searchQueryKeys, threshold: 0.4, - minMatchCharLength: 2, }); } From 4ebc5ebdc7429997e375b10228ad162e4e88982d Mon Sep 17 00:00:00 2001 From: sua yoo Date: Tue, 7 Oct 2025 12:36:20 -0700 Subject: [PATCH 09/12] remove unused task --- .../features/collections/collections-add.ts | 77 ++++--------------- 1 file changed, 13 insertions(+), 64 deletions(-) diff --git a/frontend/src/features/collections/collections-add.ts b/frontend/src/features/collections/collections-add.ts index f980c099cf..5ecebb7365 100644 --- a/frontend/src/features/collections/collections-add.ts +++ b/frontend/src/features/collections/collections-add.ts @@ -1,10 +1,12 @@ import { localized, msg } from "@lit/localize"; -import { Task } from "@lit/task"; -import type { SlInput, SlMenuItem } from "@shoelace-style/shoelace"; +import type { + SlInput, + SlInputEvent, + SlMenuItem, +} from "@shoelace-style/shoelace"; import { html, nothing } from "lit"; import { customElement, property, query, state } from "lit/decorators.js"; import { when } from "lit/directives/when.js"; -import debounce from "lodash/fp/debounce"; import queryString from "query-string"; import { BtrixElement } from "@/classes/BtrixElement"; @@ -21,10 +23,8 @@ import type { APISortQuery, } from "@/types/api"; import type { Collection } from "@/types/collection"; -import type { UnderlyingFunction } from "@/types/utils"; import { TwoWayMap } from "@/utils/TwoWayMap"; -const INITIAL_PAGE_SIZE = 10; const MIN_SEARCH_LENGTH = 1; export type CollectionsChangeEvent = CustomEvent<{ @@ -57,6 +57,9 @@ export class CollectionsAdd extends WithSearchOrgContext(BtrixElement) { @state() private collections: CollectionLikeItem[] = []; + @state() + private searchByValue = ""; + @query("#search-input") private readonly input?: SlInput | null; @@ -70,27 +73,10 @@ export class CollectionsAdd extends WithSearchOrgContext(BtrixElement) { return this.collections.map(({ id }) => id); } - private get searchByValue() { - return this.input ? this.input.value.trim() : ""; - } - private get hasSearchStr() { return this.searchByValue.length >= MIN_SEARCH_LENGTH; } - private readonly searchResultsTask = new Task(this, { - task: async ([searchByValue, hasSearchStr], { signal }) => { - if (!hasSearchStr) return []; - const data = await this.fetchCollectionsByPrefix(searchByValue, signal); - let searchResults: Collection[] = []; - if (data?.items.length) { - searchResults = this.filterOutSelectedCollections(data.items); - } - return searchResults; - }, - args: () => [this.searchByValue, this.hasSearchStr] as const, - }); - public focus() { // Move focus to search input this.input?.focus(); @@ -190,9 +176,11 @@ export class CollectionsAdd extends WithSearchOrgContext(BtrixElement) { this.combobox.show(); } }} - @sl-input=${this.onSearchInput as UnderlyingFunction< - typeof this.onSearchInput - >} + @sl-input=${(e: SlInputEvent) => { + const input = e.target as SlInput; + + this.searchByValue = input.value.trim(); + }} > ${when( @@ -275,49 +263,10 @@ export class CollectionsAdd extends WithSearchOrgContext(BtrixElement) { } } - private readonly onSearchInput = debounce(400)(() => { - void this.searchResultsTask.run(); - }); - private findCollectionIndexById(collectionId: string) { return this.collections.findIndex(({ id }) => id === collectionId); } - private filterOutSelectedCollections(results: Collection[]) { - return results.filter( - (result) => this.findCollectionIndexById(result.id) > -1, - ); - } - - private async fetchCollectionsByPrefix( - namePrefix: string, - signal?: AbortSignal, - ) { - try { - const results = await this.getCollections( - { - oid: this.orgId, - namePrefix: namePrefix, - sortBy: "name", - pageSize: INITIAL_PAGE_SIZE, - }, - signal, - ); - return results; - } catch (e) { - if ((e as Error).name === "AbortError") { - console.debug("Fetch aborted to throttle"); - } else { - this.notify.toast({ - message: msg("Sorry, couldn't retrieve Collections at this time."), - variant: "danger", - icon: "exclamation-octagon", - id: "collection-fetch-throttled", - }); - } - } - } - private async getCollections( params?: Partial<{ oid?: string; From 077bb977831485cb505aa97ab49ec0e9eafcbe29 Mon Sep 17 00:00:00 2001 From: sua yoo Date: Wed, 8 Oct 2025 09:54:57 -0700 Subject: [PATCH 10/12] limit search results --- .../src/context/search-org/connectFuse.ts | 1 + .../features/collections/collections-add.ts | 25 +++++++++++++------ frontend/src/pages/org/collections-list.ts | 5 +++- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/frontend/src/context/search-org/connectFuse.ts b/frontend/src/context/search-org/connectFuse.ts index 5c76e8edab..c9814b1693 100644 --- a/frontend/src/context/search-org/connectFuse.ts +++ b/frontend/src/context/search-org/connectFuse.ts @@ -9,5 +9,6 @@ export function connectFuse(values: SearchQuery[]) { return new Fuse(values, { keys: searchQueryKeys, threshold: 0.4, + useExtendedSearch: true, }); } diff --git a/frontend/src/features/collections/collections-add.ts b/frontend/src/features/collections/collections-add.ts index 5ecebb7365..baa6c87fa7 100644 --- a/frontend/src/features/collections/collections-add.ts +++ b/frontend/src/features/collections/collections-add.ts @@ -26,6 +26,7 @@ import type { Collection } from "@/types/collection"; import { TwoWayMap } from "@/utils/TwoWayMap"; const MIN_SEARCH_LENGTH = 1; +const MAX_SEARCH_RESULTS = 5; export type CollectionsChangeEvent = CustomEvent<{ collections: string[]; @@ -67,7 +68,10 @@ export class CollectionsAdd extends WithSearchOrgContext(BtrixElement) { private readonly combobox?: Combobox | null; // Map collection names to ID for managing search options - private readonly nameSearchMap = new TwoWayMap(); + private readonly nameSearchMap = new TwoWayMap< + /* name: */ string, + /* ID: */ string + >(); private get collectionIds() { return this.collections.map(({ id }) => id); @@ -219,12 +223,19 @@ export class CollectionsAdd extends WithSearchOrgContext(BtrixElement) { `; } - const results = this.searchOrg.collections - ?.search(this.searchByValue) - // Filter out items that have been selected - .filter(({ item }) => !this.nameSearchMap.get(item.name)) - // Show first few results - .slice(0, 5); + // Use search pattern that excludes selected names + const includePattern = `"${this.searchByValue}"`; + const excludePattern = this.nameSearchMap + .keys() + .map((name) => `!"${name}"`) + .join(" "); + const pattern = + includePattern + (excludePattern ? ` ${excludePattern}` : ""); + + // TODO Evaluate performance of searching in render, which will block the main thread + const results = this.searchOrg.collections?.search(pattern, { + limit: MAX_SEARCH_RESULTS, + }); if (!results?.length) { return html` diff --git a/frontend/src/pages/org/collections-list.ts b/frontend/src/pages/org/collections-list.ts index 135a1e5cd6..c31214354a 100644 --- a/frontend/src/pages/org/collections-list.ts +++ b/frontend/src/pages/org/collections-list.ts @@ -73,6 +73,7 @@ const sortableFields: Record< }, }; const MIN_SEARCH_LENGTH = 2; +const MAX_SEARCH_RESULTS = 5; enum ListView { List = "list", @@ -417,7 +418,9 @@ export class CollectionsList extends WithSearchOrgContext(BtrixElement) { } const searchResults = - this.searchOrg.collections?.search(this.searchByValue).slice(0, 10) || []; + this.searchOrg.collections?.search(this.searchByValue, { + limit: MAX_SEARCH_RESULTS, + }) || []; if (!searchResults.length) { return html` Date: Wed, 8 Oct 2025 12:02:50 -0700 Subject: [PATCH 11/12] limit combobox results --- frontend/src/components/ui/search-combobox.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/frontend/src/components/ui/search-combobox.ts b/frontend/src/components/ui/search-combobox.ts index 8681d63960..6907612a27 100644 --- a/frontend/src/components/ui/search-combobox.ts +++ b/frontend/src/components/ui/search-combobox.ts @@ -155,9 +155,10 @@ export class SearchCombobox extends LitElement { `; } - const searchResults = this.fuse - .search(this.searchByValue) - .slice(0, MAX_SEARCH_RESULTS); + const searchResults = this.fuse.search(this.searchByValue, { + limit: MAX_SEARCH_RESULTS, + }); + if (!searchResults.length) { return html` Date: Wed, 8 Oct 2025 17:35:34 -0700 Subject: [PATCH 12/12] manually filter out quoted names --- .../src/context/search-org/connectFuse.ts | 2 +- .../features/collections/collections-add.ts | 29 ++++++++++++++----- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/frontend/src/context/search-org/connectFuse.ts b/frontend/src/context/search-org/connectFuse.ts index c9814b1693..5e080c0a09 100644 --- a/frontend/src/context/search-org/connectFuse.ts +++ b/frontend/src/context/search-org/connectFuse.ts @@ -8,7 +8,7 @@ import { searchQueryKeys, type SearchQuery } from "./types"; export function connectFuse(values: SearchQuery[]) { return new Fuse(values, { keys: searchQueryKeys, - threshold: 0.4, + threshold: 0.3, useExtendedSearch: true, }); } diff --git a/frontend/src/features/collections/collections-add.ts b/frontend/src/features/collections/collections-add.ts index baa6c87fa7..d9cec2ac87 100644 --- a/frontend/src/features/collections/collections-add.ts +++ b/frontend/src/features/collections/collections-add.ts @@ -225,17 +225,32 @@ export class CollectionsAdd extends WithSearchOrgContext(BtrixElement) { // Use search pattern that excludes selected names const includePattern = `"${this.searchByValue}"`; - const excludePattern = this.nameSearchMap - .keys() - .map((name) => `!"${name}"`) - .join(" "); + // Fuse doesn't support escaping quotes or operators in expressions yet, + // so we still need to manually filter out collection names with quotes + // from the search results + // https://github.com/krisk/Fuse/issues/765 + const excludeWithQuotes: string[] = []; + const excludeWithoutQuotes: string[] = []; + + this.nameSearchMap.keys().forEach((name) => { + if (name.includes('"')) { + excludeWithQuotes.push(name); + } else { + excludeWithoutQuotes.push(`!"${name}"`); + } + }); + + const excludePattern = excludeWithoutQuotes.join(" "); const pattern = includePattern + (excludePattern ? ` ${excludePattern}` : ""); // TODO Evaluate performance of searching in render, which will block the main thread - const results = this.searchOrg.collections?.search(pattern, { - limit: MAX_SEARCH_RESULTS, - }); + const results = this.searchOrg.collections + ?.search(pattern, { + limit: MAX_SEARCH_RESULTS + excludeWithQuotes.length, + }) + .filter(({ item }) => !excludeWithQuotes.includes(item["name"])) + .slice(0, MAX_SEARCH_RESULTS); if (!results?.length) { return html`