diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 250eb1ef..be8d80af 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -24,6 +24,30 @@ jobs: e2e: uses: ./.github/workflows/e2e.yml + search-api: + runs-on: ubuntu-24.04 + timeout-minutes: 10 + environment: + name: production + steps: + - name: checkout + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + + - name: setup bun + uses: oven-sh/setup-bun@v2 + + - name: install dependencies + run: bun install --frozen-lockfile + + - name: deploy search api to cloudflare workers + uses: cloudflare/wrangler-action@da0e0dfe58b7a431659754fdf3f186c529afbe65 # v3.14.1 + with: + apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }} + accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} + packageManager: bun + workingDirectory: packages/search-api + command: deploy + website: uses: ./.github/workflows/website.yml with: diff --git a/.github/workflows/website.yml b/.github/workflows/website.yml index b2a1a4df..0c65c285 100644 --- a/.github/workflows/website.yml +++ b/.github/workflows/website.yml @@ -28,6 +28,24 @@ jobs: env: NODE_OPTIONS: "--max-old-space-size=8192" + - name: export search indexes + run: bun run search:export + working-directory: packages/documentation + + # TODO: add R2 lifecycle cleanup/retention for per-commit preview search indexes. + - name: upload search index to r2 + working-directory: packages/documentation + env: + AWS_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} + AWS_DEFAULT_REGION: auto + CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} + SEARCH_INDEX_KEY: ${{ github.sha }} + run: | + aws s3 cp ./.output/search/latest.json \ + "s3://hive-docs-search-index/search/${SEARCH_INDEX_KEY}.json" \ + --endpoint-url "https://${CLOUDFLARE_ACCOUNT_ID}.r2.cloudflarestorage.com" + - name: deploy to cloudflare workers uses: cloudflare/wrangler-action@da0e0dfe58b7a431659754fdf3f186c529afbe65 # v3.14.1 id: deploy @@ -37,7 +55,7 @@ jobs: packageManager: bun workingDirectory: packages/documentation command: | - ${{ github.ref == 'refs/heads/main' && 'deploy' || 'versions upload' }} + ${{ github.ref == 'refs/heads/main' && format('deploy --var SEARCH_INDEX_KEY:{0}', github.sha) || format('versions upload --var SEARCH_INDEX_KEY:{0}', github.sha) }} gitHubToken: ${{ secrets.GITHUB_TOKEN }} - name: find deployment comment diff --git a/bun.lock b/bun.lock index 4d6f3f75..5beb6c1e 100644 --- a/bun.lock +++ b/bun.lock @@ -115,6 +115,15 @@ "wrangler": "^4.69.0", }, }, + "packages/search-api": { + "name": "search-api", + "dependencies": { + "fumadocs-core": "16.4.7", + }, + "devDependencies": { + "wrangler": "^4.69.0", + }, + }, }, "patchedDependencies": { "@tanstack/start-plugin-core@1.147.1": "patches/@tanstack%2Fstart-plugin-core@1.147.1.patch", @@ -2367,6 +2376,8 @@ "scslre": ["scslre@0.3.0", "", { "dependencies": { "@eslint-community/regexpp": "^4.8.0", "refa": "^0.12.0", "regexp-ast-analysis": "^0.7.0" } }, "sha512-3A6sD0WYP7+QrjbfNA2FN3FsOaGGFoekCVgTyypy53gPxhbkCIjtO6YWgdrfM+n/8sI8JeXZOIxsHjMTNxQ4nQ=="], + "search-api": ["search-api@workspace:packages/search-api"], + "semver": ["semver@6.3.1", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA=="], "seroval": ["seroval@1.5.1", "", {}, "sha512-OwrZRZAfhHww0WEnKHDY8OM0U/Qs8OTfIDWhUD4BLpNJUfXK4cGmjiagGze086m+mhI+V2nD0gfbHEnJjb9STA=="], diff --git a/packages/documentation/package.json b/packages/documentation/package.json index 99019b9d..ce2860eb 100644 --- a/packages/documentation/package.json +++ b/packages/documentation/package.json @@ -9,6 +9,7 @@ "scripts": { "dev": "vite dev", "build": "node --max-old-space-size=8192 ./node_modules/vite/bin/vite.js build", + "search:export": "bun --bun ./tools/export-search-indexes.ts ./.output/search/latest.json", "check-seo": "bun ./tools/check-seo.ts", "start": "bun .output/server/index.mjs", "typecheck": "bun --bun fumadocs-mdx && bun --bun tsc --noEmit", diff --git a/packages/documentation/src/lib/search-indexes.ts b/packages/documentation/src/lib/search-indexes.ts new file mode 100644 index 00000000..d0efce69 --- /dev/null +++ b/packages/documentation/src/lib/search-indexes.ts @@ -0,0 +1,155 @@ +import type { StructuredData } from "fumadocs-core/mdx-plugins/remark-structure"; +import type { AdvancedIndex } from "fumadocs-core/search/server"; + +import { CHANGELOG_PAGE_URL } from "@/lib/deployment-changelog"; +import { pathToSlug } from "@/lib/path-to-slug"; +import { getSource } from "@/lib/source"; +import { structure } from "fumadocs-core/mdx-plugins/remark-structure"; +import { findPath } from "fumadocs-core/page-tree"; + +function getDocsBreadcrumbs( + source: Awaited>, + pageUrl: string, +): string[] | undefined { + const pageTree = source.getPageTree(); + const path = findPath( + pageTree.children, + (node) => node.type === "page" && node.url === pageUrl, + ); + if (!path) return undefined; + path.pop(); + const breadcrumbs: string[] = []; + if (typeof pageTree.name === "string" && pageTree.name.length > 0) { + breadcrumbs.push(pageTree.name); + } + for (const segment of path) { + if (typeof segment.name === "string" && segment.name.length > 0) { + breadcrumbs.push(segment.name); + } + } + return breadcrumbs; +} + +type DataWithStructuredData = { + structuredData: StructuredData; +}; + +type DataWithLoader = { + load(): Promise; +}; + +async function resolveStructuredData(data: unknown): Promise { + if ( + typeof data === "object" && + data !== null && + "structuredData" in data && + "load" in data === false + ) { + return (data as DataWithStructuredData).structuredData; + } + + if ( + typeof data === "object" && + data !== null && + "load" in data && + typeof (data as DataWithLoader).load === "function" + ) { + const loaded = await (data as DataWithLoader).load(); + return loaded.structuredData; + } + + throw new Error("Cannot resolve structuredData from page"); +} + +async function getChangelogStructuredData(): Promise { + try { + const snapshotModule = + (await import("virtual:deployment-changelog-snapshot")) as { + deploymentChangelogSnapshot?: string; + }; + + if (!snapshotModule.deploymentChangelogSnapshot) { + return { contents: [], headings: [] }; + } + + return structure(snapshotModule.deploymentChangelogSnapshot); + } catch { + return { contents: [], headings: [] }; + } +} + +export async function buildIndexes(): Promise { + const source = await getSource(); + const { blog, caseStudies, productUpdates } = + await import("fumadocs-mdx:collections/server"); + + const changelogStructuredData = getChangelogStructuredData(); + + const docsIndexes = await Promise.all( + source.getPages().map(async (page) => ({ + breadcrumbs: getDocsBreadcrumbs(source, page.url), + description: page.data.description, + id: page.url, + structuredData: + page.url === CHANGELOG_PAGE_URL + ? await changelogStructuredData + : await resolveStructuredData(page.data), + title: page.data.title ?? page.url, + url: page.url, + })), + ); + + const caseStudyIndexes = await Promise.all( + caseStudies.map(async (entry) => { + const { structuredData } = await entry.load(); + const slug = pathToSlug(entry.info.path); + return { + breadcrumbs: ["Case Studies"], + description: entry.excerpt, + id: `/case-studies/${slug}`, + structuredData, + title: entry.title, + url: `/case-studies/${slug}`, + }; + }), + ); + + const productUpdateIndexes = await Promise.all( + productUpdates.map(async (entry) => { + const { structuredData } = await entry.load(); + const slug = pathToSlug(entry.info.path); + return { + breadcrumbs: ["Product Updates"], + description: entry.description, + id: `/product-updates/${slug}`, + structuredData, + title: entry.title ?? slug, + url: `/product-updates/${slug}`, + }; + }), + ); + + const blogIndexes = await Promise.all( + blog.map(async (entry) => { + const { structuredData } = await entry.load(); + const slug = entry.info.path + .replace(/\.mdx?$/, "") + .replace(/\/index$/, ""); + return { + breadcrumbs: ["Blog"], + description: entry.description, + id: `/blog/${slug}`, + structuredData, + title: entry.title ?? slug, + url: `/blog/${slug}`, + }; + }), + ); + + return [ + ...docsIndexes, + ...caseStudyIndexes, + ...productUpdateIndexes, + ...blogIndexes, + ]; +} diff --git a/packages/documentation/src/lib/seo.ts b/packages/documentation/src/lib/seo.ts index 2a1c7431..7347f0f7 100644 --- a/packages/documentation/src/lib/seo.ts +++ b/packages/documentation/src/lib/seo.ts @@ -10,6 +10,7 @@ type MetaTag = { }; type LinkTag = { + as?: string; href: string; rel: string; }; diff --git a/packages/documentation/src/routeTree.gen.ts b/packages/documentation/src/routeTree.gen.ts index 07842804..bf20e8f8 100644 --- a/packages/documentation/src/routeTree.gen.ts +++ b/packages/documentation/src/routeTree.gen.ts @@ -14,6 +14,7 @@ import { Route as LlmsFullDottxtRouteImport } from './routes/llms-full[.]txt' import { Route as FeedDotxmlRouteImport } from './routes/feed[.]xml' import { Route as LandingRouteImport } from './routes/_landing' import { Route as DocsSplatRouteImport } from './routes/docs/$' +import { Route as ApiSearchDotjsonRouteImport } from './routes/api/search[.]json' import { Route as ApiSearchRouteImport } from './routes/api/search' import { Route as LandingLightOnlyRouteImport } from './routes/_landing/_light-only' import { Route as LandingProductUpdatesIndexRouteImport } from './routes/_landing/product-updates/index' @@ -57,6 +58,11 @@ const DocsSplatRoute = DocsSplatRouteImport.update({ path: '/docs/$', getParentRoute: () => rootRouteImport, } as any) +const ApiSearchDotjsonRoute = ApiSearchDotjsonRouteImport.update({ + id: '/api/search.json', + path: '/api/search.json', + getParentRoute: () => rootRouteImport, +} as any) const ApiSearchRoute = ApiSearchRouteImport.update({ id: '/api/search', path: '/api/search', @@ -158,6 +164,7 @@ export interface FileRoutesByFullPath { '/llms-full.txt': typeof LlmsFullDottxtRoute '/llms.txt': typeof LlmsDottxtRoute '/api/search': typeof ApiSearchRoute + '/api/search.json': typeof ApiSearchDotjsonRoute '/docs/$': typeof DocsSplatRoute '/ecosystem': typeof LandingLightOnlyEcosystemRoute '/gateway': typeof LandingLightOnlyGatewayRoute @@ -181,6 +188,7 @@ export interface FileRoutesByTo { '/llms-full.txt': typeof LlmsFullDottxtRoute '/llms.txt': typeof LlmsDottxtRoute '/api/search': typeof ApiSearchRoute + '/api/search.json': typeof ApiSearchDotjsonRoute '/docs/$': typeof DocsSplatRoute '/ecosystem': typeof LandingLightOnlyEcosystemRoute '/gateway': typeof LandingLightOnlyGatewayRoute @@ -207,6 +215,7 @@ export interface FileRoutesById { '/llms.txt': typeof LlmsDottxtRoute '/_landing/_light-only': typeof LandingLightOnlyRouteWithChildren '/api/search': typeof ApiSearchRoute + '/api/search.json': typeof ApiSearchDotjsonRoute '/docs/$': typeof DocsSplatRoute '/_landing/_light-only/ecosystem': typeof LandingLightOnlyEcosystemRoute '/_landing/_light-only/gateway': typeof LandingLightOnlyGatewayRoute @@ -232,6 +241,7 @@ export interface FileRouteTypes { | '/llms-full.txt' | '/llms.txt' | '/api/search' + | '/api/search.json' | '/docs/$' | '/ecosystem' | '/gateway' @@ -255,6 +265,7 @@ export interface FileRouteTypes { | '/llms-full.txt' | '/llms.txt' | '/api/search' + | '/api/search.json' | '/docs/$' | '/ecosystem' | '/gateway' @@ -280,6 +291,7 @@ export interface FileRouteTypes { | '/llms.txt' | '/_landing/_light-only' | '/api/search' + | '/api/search.json' | '/docs/$' | '/_landing/_light-only/ecosystem' | '/_landing/_light-only/gateway' @@ -305,6 +317,7 @@ export interface RootRouteChildren { LlmsFullDottxtRoute: typeof LlmsFullDottxtRoute LlmsDottxtRoute: typeof LlmsDottxtRoute ApiSearchRoute: typeof ApiSearchRoute + ApiSearchDotjsonRoute: typeof ApiSearchDotjsonRoute DocsSplatRoute: typeof DocsSplatRoute LlmsDotmdxDocsSplatRoute: typeof LlmsDotmdxDocsSplatRoute } @@ -346,6 +359,13 @@ declare module '@tanstack/react-router' { preLoaderRoute: typeof DocsSplatRouteImport parentRoute: typeof rootRouteImport } + '/api/search.json': { + id: '/api/search.json' + path: '/api/search.json' + fullPath: '/api/search.json' + preLoaderRoute: typeof ApiSearchDotjsonRouteImport + parentRoute: typeof rootRouteImport + } '/api/search': { id: '/api/search' path: '/api/search' @@ -531,6 +551,7 @@ const rootRouteChildren: RootRouteChildren = { LlmsFullDottxtRoute: LlmsFullDottxtRoute, LlmsDottxtRoute: LlmsDottxtRoute, ApiSearchRoute: ApiSearchRoute, + ApiSearchDotjsonRoute: ApiSearchDotjsonRoute, DocsSplatRoute: DocsSplatRoute, LlmsDotmdxDocsSplatRoute: LlmsDotmdxDocsSplatRoute, } diff --git a/packages/documentation/src/routes/__root.tsx b/packages/documentation/src/routes/__root.tsx index 6ca19805..c6e88438 100644 --- a/packages/documentation/src/routes/__root.tsx +++ b/packages/documentation/src/routes/__root.tsx @@ -16,6 +16,12 @@ export const Route = createRootRoute({ component: RootComponent, errorComponent: RootErrorComponent, head: seo({ + links: [ + { + href: withBasePath("/api/search"), + rel: "prefetch", + }, + ], meta: [ { // eslint-disable-next-line unicorn/text-encoding-identifier-case @@ -87,7 +93,8 @@ function RootDocument({ children }: { children: React.ReactNode }) { search={{ options: { api: withBasePath("/api/search"), - type: "static", + delayMs: 500, + type: "fetch", }, }} theme={ diff --git a/packages/documentation/src/routes/api/search.ts b/packages/documentation/src/routes/api/search.ts index 8c43e92e..0fd67a68 100644 --- a/packages/documentation/src/routes/api/search.ts +++ b/packages/documentation/src/routes/api/search.ts @@ -1,142 +1,27 @@ -import type { StructuredData } from "fumadocs-core/mdx-plugins/remark-structure"; -import type { AdvancedIndex } from "fumadocs-core/search/server"; - -import { CHANGELOG_PAGE_URL } from "@/lib/deployment-changelog"; -import { pathToSlug } from "@/lib/path-to-slug"; -import { getSource } from "@/lib/source"; +import { buildIndexes } from "@/lib/search-indexes"; import { createFileRoute } from "@tanstack/react-router"; -import { structure } from "fumadocs-core/mdx-plugins/remark-structure"; -import { findPath } from "fumadocs-core/page-tree"; -import { createSearchAPI } from "fumadocs-core/search/server"; -import { deploymentChangelogSnapshot } from "virtual:deployment-changelog-snapshot"; - -function getDocsBreadcrumbs( - source: Awaited>, - pageUrl: string, -): string[] | undefined { - const pageTree = source.getPageTree(); - const path = findPath( - pageTree.children, - (node) => node.type === "page" && node.url === pageUrl, - ); - if (!path) return undefined; - path.pop(); - const breadcrumbs: string[] = []; - if (typeof pageTree.name === "string" && pageTree.name.length > 0) { - breadcrumbs.push(pageTree.name); - } - for (const segment of path) { - if (typeof segment.name === "string" && segment.name.length > 0) { - breadcrumbs.push(segment.name); - } - } - return breadcrumbs; -} - -// eslint-disable-next-line @typescript-eslint/no-explicit-any -- mirrors fumadocs' defaultBuildIndex runtime check for sync/async DocCollectionEntry -async function resolveStructuredData(data: any): Promise { - if ("structuredData" in data) return data.structuredData; - if (typeof data.load === "function") { - const loaded = await data.load(); - return loaded.structuredData; - } - throw new Error("Cannot resolve structuredData from page"); -} - -async function getChangelogStructuredData(): Promise { - if (!deploymentChangelogSnapshot) return { contents: [], headings: [] }; - return structure(deploymentChangelogSnapshot); -} - -async function buildIndexes(): Promise { - const source = await getSource(); - const { blog, caseStudies, productUpdates } = - await import("fumadocs-mdx:collections/server"); - - const changelogStructuredData = getChangelogStructuredData(); +import { createSearchAPI, type SearchAPI } from "fumadocs-core/search/server"; - const docsIndexes = await Promise.all( - source.getPages().map(async (page) => ({ - breadcrumbs: getDocsBreadcrumbs(source, page.url), - description: page.data.description, - id: page.url, - structuredData: - page.url === CHANGELOG_PAGE_URL - ? await changelogStructuredData - : await resolveStructuredData(page.data), - title: page.data.title ?? page.url, - url: page.url, - })), - ); - - const caseStudyIndexes = await Promise.all( - caseStudies.map(async (entry) => { - const { structuredData } = await entry.load(); - const slug = pathToSlug(entry.info.path); - return { - breadcrumbs: ["Case Studies"], - description: entry.excerpt, - id: `/case-studies/${slug}`, - structuredData, - title: entry.title, - url: `/case-studies/${slug}`, - }; - }), - ); - - const productUpdateIndexes = await Promise.all( - productUpdates.map(async (entry) => { - const { structuredData } = await entry.load(); - const slug = pathToSlug(entry.info.path); - return { - breadcrumbs: ["Product Updates"], - description: entry.description, - id: `/product-updates/${slug}`, - structuredData, - title: entry.title ?? slug, - url: `/product-updates/${slug}`, - }; - }), - ); +let searchServerPromise: Promise | undefined; - const blogIndexes = await Promise.all( - blog.map(async (entry) => { - const { structuredData } = await entry.load(); - const slug = entry.info.path - .replace(/\.mdx?$/, "") - .replace(/\/index$/, ""); - return { - breadcrumbs: ["Blog"], - description: entry.description, - id: `/blog/${slug}`, - structuredData, - title: entry.title ?? slug, - url: `/blog/${slug}`, - }; +async function getSearchServer() { + searchServerPromise ??= buildIndexes().then((indexes) => + createSearchAPI("advanced", { + indexes, + language: "english", }), ); - return [ - ...docsIndexes, - ...caseStudyIndexes, - ...productUpdateIndexes, - ...blogIndexes, - ]; + return searchServerPromise; } -// In prod this is usually paid during build via prerendered `/api/search`. -// In local dev, the first hit can still be slow because Vite computes it on demand. -const searchAPIPromise = createSearchAPI("advanced", { - indexes: await buildIndexes(), - language: "english", -}); - export const Route = createFileRoute("/api/search")({ server: { handlers: { - GET: async () => { - const server = await searchAPIPromise; - return server.staticGET(); + GET: async ({ request }) => { + const server = await getSearchServer(); + + return server.GET(request); }, }, }, diff --git a/packages/documentation/src/routes/api/search[.]json.ts b/packages/documentation/src/routes/api/search[.]json.ts new file mode 100644 index 00000000..23f00e41 --- /dev/null +++ b/packages/documentation/src/routes/api/search[.]json.ts @@ -0,0 +1,17 @@ +import { buildIndexes } from "@/lib/search-indexes"; +import { createFileRoute } from "@tanstack/react-router"; + +export const Route = createFileRoute("/api/search.json")({ + server: { + handlers: { + GET: async () => { + return Response.json(await buildIndexes(), { + headers: { + "Content-Type": "application/json", + }, + status: 200, + }); + }, + }, + }, +}); diff --git a/packages/documentation/src/routes/docs/-$.test.ts b/packages/documentation/src/routes/docs/-$.test.ts index 561b9a02..696f77c6 100644 --- a/packages/documentation/src/routes/docs/-$.test.ts +++ b/packages/documentation/src/routes/docs/-$.test.ts @@ -295,7 +295,9 @@ describe("deployment changelog", () => { }); test("api search indexes the changelog source", async () => { - const res = await fetch(`${BASE_URL}/api/search`, { redirect: "follow" }); + const res = await fetch(`${BASE_URL}/api/search.json`, { + redirect: "follow", + }); expect(res.status).toBe(200); const text = await res.text(); diff --git a/packages/documentation/src/server/cloudflare-entry.ts b/packages/documentation/src/server/cloudflare-entry.ts index 3929469b..a68f372c 100644 --- a/packages/documentation/src/server/cloudflare-entry.ts +++ b/packages/documentation/src/server/cloudflare-entry.ts @@ -15,6 +15,8 @@ type AssetFetcher = { type CloudflareEnv = Record & { ASSETS?: AssetFetcher; + SEARCH_API?: AssetFetcher; + SEARCH_INDEX_KEY?: string; }; type CloudflareContext = { @@ -128,6 +130,13 @@ function isServerFnPath(pathname: string) { ); } +function isSearchAPIPath(pathname: string) { + return ( + pathname === "/api/search" || + (baseURL !== "" && pathname === `${baseURL}/api/search`) + ); +} + function createHandler(hooks: HandlerHooks) { return { email(message: unknown, env: CloudflareEnv, context: CloudflareContext) { @@ -352,6 +361,45 @@ async function tryServeAsset( return assetResponse.status === 404 ? undefined : assetResponse; } +async function proxySearchAPI(request: Request, env: CloudflareEnv) { + if (!env.SEARCH_API || !env.SEARCH_INDEX_KEY) { + return; + } + + const requestURL = new URL(request.url); + const searchURL = new URL("https://search-api.internal/"); + searchURL.search = requestURL.search; + + let headers = new Headers(request.headers); + if (env.SEARCH_INDEX_KEY) { + headers.set("x-search-index-key", env.SEARCH_INDEX_KEY); + } + + const init: RequestInit & { duplex?: "half" } = { + headers, + method: request.method, + redirect: request.redirect, + }; + + if (request.method !== "GET" && request.method !== "HEAD") { + init.body = request.body; + init.duplex = "half"; + } + + let res = await env.SEARCH_API.fetch(new Request(searchURL, init)); + + // Se "Cache-Control: private, no-cache" + + headers = new Headers(res.headers); + headers.set("Cache-Control", "private, no-cache"); + + return new Response(res.body, { + status: res.status, + statusText: res.statusText, + headers, + }); +} + async function getWebsocketHandler() { if (!importMeta._websocket) { return; @@ -404,6 +452,13 @@ export default createHandler({ const requestURL = new URL(request.url); const isAliasedRequest = aliasedRequest !== cfRequest; + if (isSearchAPIPath(requestURL.pathname)) { + const searchAPIResponse = proxySearchAPI(request, env); + if (searchAPIResponse) { + return searchAPIResponse; + } + } + const assetResponse = await tryServeAsset( request, env, diff --git a/packages/documentation/tools/export-search-indexes.ts b/packages/documentation/tools/export-search-indexes.ts new file mode 100644 index 00000000..3342b16f --- /dev/null +++ b/packages/documentation/tools/export-search-indexes.ts @@ -0,0 +1,19 @@ +import { copyFile, mkdir } from "node:fs/promises"; +import { dirname, resolve } from "node:path"; + +const prerenderedSearchIndexPath = + "./.output/public/graphql/hive/api/search.json"; + +async function main() { + const outputArg = process.argv[2] || "./.output/search/indexes.json"; + const outputPath = resolve(outputArg); + + await mkdir(dirname(outputPath), { recursive: true }); + await copyFile(resolve(prerenderedSearchIndexPath), outputPath); + + process.stdout.write( + `Exported search indexes from ${resolve(prerenderedSearchIndexPath)} to ${outputPath}\n`, + ); +} + +await main(); diff --git a/packages/documentation/vite.config.ts b/packages/documentation/vite.config.ts index 597d1257..72407533 100644 --- a/packages/documentation/vite.config.ts +++ b/packages/documentation/vite.config.ts @@ -86,7 +86,7 @@ export default defineConfig(async ({ command }) => ({ projects: ["./tsconfig.json"], }), tanstackStart({ - pages: [{ path: "/api/search" }], + pages: [{ path: "/api/search.json" }], prerender: { crawlLinks: true, enabled: true, diff --git a/packages/documentation/wrangler.toml b/packages/documentation/wrangler.toml index 2ccf7334..218fd68f 100644 --- a/packages/documentation/wrangler.toml +++ b/packages/documentation/wrangler.toml @@ -5,3 +5,7 @@ main = ".output/server/index.mjs" [assets] directory = ".output/public" + +[[services]] +binding = "SEARCH_API" +service = "hive-search-api" diff --git a/packages/search-api/README.md b/packages/search-api/README.md new file mode 100644 index 00000000..f9f1ad90 --- /dev/null +++ b/packages/search-api/README.md @@ -0,0 +1,145 @@ +# Search API + +Cloudflare Worker that serves the Fumadocs/orama-based search endpoint for the documentation site. + +The Worker does not generate search indexes on its own, it reads a build-specific search index from R2 and caches the Search Server API in Worker isolate memory. + +## Request Flow + +1. The documentation app configures Fumadocs search with `withBasePath("/api/search")`. +2. In production this resolves to `/graphql/hive/api/search` on the docs Worker origin. +3. The docs Worker intercepts that path and proxies it to this Worker through the Cloudflare service binding `SEARCH_API`. +4. The docs Worker forwards `SEARCH_INDEX_KEY` as the `x-search-index-key` request header. +5. This Worker maps that header to the R2 object key `search/.json`. +6. The Worker loads that R2 object and caches the Search Server API in Worker isolate memory. + +## Search Index Generation + +The search index is generated by `packages/documentation`, not by this package. + +During docs build, TanStack Start prerenders the static route configured in `packages/documentation/vite.config.ts`: + +```ts +tanstackStart({ + pages: [{ path: "/api/search.json" }], +}); +``` + +That produces: + +```txt +packages/documentation/.output/public/graphql/hive/api/search.json +``` + +Then `packages/documentation` runs: + +```sh +bun run search:export +``` + +which copies the prerendered file to: + +```txt +packages/documentation/.output/search/latest.json +``` + +The `latest.json` filename is only a local CI artifact name. It is not used as the runtime R2 key. + +## R2 Upload + +`.github/workflows/website.yml` uploads the generated index to R2 using the GitHub commit SHA: + +```txt +hive-docs-search-index/search/.json +``` + +The docs Worker is deployed with: + +```txt +SEARCH_INDEX_KEY= +``` + +At runtime, the docs Worker forwards that value to this Worker as: + +```txt +x-search-index-key: +``` + +This Worker then reads: + +```txt +search/.json +``` + +from the `SEARCH_INDEX` R2 binding. + +## Worker Isolate Caching + +The Worker caches each loaded Search Server API in module-level isolate memory, selected by the immutable R2 key: + +```ts +searchServers.get("search/.json"); +``` + +Each Worker isolate caches its `SearchAPI` instance after loading the R2 object once. This cache is opportunistic: Cloudflare may evict Worker isolates, and different isolates or regions may load the same index independently. + +Do not use mutable keys such as `latest.json` for requests. Worker isolates cache by object name, so reusing a mutable key could keep serving an old in-memory index after R2 has been overwritten. + +For that reason, `x-search-index-key` is required. Requests without it return `400`. + +Query responses are also cached through Cloudflare's Cache API using a synthetic key that includes the immutable R2 object key. Public responses use `Cache-Control: private, no-cache` so browsers and shared caches do not keep stale results across deployments for the unversioned `/api/search?query=...` URL. + +## Production Deployment + +On `main`, `.github/workflows/main.yml` deploys this Worker before deploying the docs Worker: + +```txt +search-api deploy --> docs build/export/upload --> docs worker deploy +``` + +This ensures the docs Worker service binding points to an existing, up-to-date `hive-search-api` Worker. + +## Preview Deployments + +PR previews do not deploy this Worker. + +They reuse the shared `hive-search-api` Worker and isolate search data through `SEARCH_INDEX_KEY`: + +```txt +docs preview build --> upload search/.json --> docs worker version upload with SEARCH_INDEX_KEY= +``` + +The shared Search API Worker loads a separate in-memory Search Server API per preview SHA, so preview indexes do not share cached search state with production or other previews. + +## Configuration + +`wrangler.toml` defines: + +```toml +name = "hive-search-api" +main = "index.ts" + +[[r2_buckets]] +binding = "SEARCH_INDEX" +bucket_name = "hive-docs-search-index" + +[[durable_objects.bindings]] +name = "SEARCH_INDEX_OBJECT" +class_name = "SearchIndexObject" +``` + +## Local Validation + +Validate the Worker bundle without deploying: + +```sh +bunx wrangler deploy --dry-run +``` + +Run the Worker locally from this package: + +```sh +bun run dev +``` + +Direct requests to this Worker must include `x-search-index-key`, and the referenced `search/.json` object must exist in the bound R2 bucket. diff --git a/packages/search-api/index.ts b/packages/search-api/index.ts new file mode 100644 index 00000000..0bbcd71c --- /dev/null +++ b/packages/search-api/index.ts @@ -0,0 +1,169 @@ +import { createSearchAPI, type SearchAPI } from "fumadocs-core/search/server"; + +const CACHE_TTL_SECONDS = 60 * 60 * 24 * 30; // 30 days +const CORS_HEADERS = { + "Access-Control-Allow-Origin": "*", + "Access-Control-Allow-Methods": "GET,HEAD,POST,OPTIONS", + "Access-Control-Allow-Headers": "Content-Type", +}; + +const searchServers = new Map>(); +const loadedAtByIndex = new Map(); + +type SearchApiEnv = { + SEARCH_INDEX: { + get(key: string): Promise<{ json(): Promise } | null>; + }; +}; + +function getCacheKey(request: Request, searchIndexKey: string) { + const url = new URL(request.url); + + url.pathname = `/__search-cache/v3/${searchIndexKey}${url.pathname}`; + url.search = new URLSearchParams(url.searchParams).toString(); + + return new Request(url.toString(), { + method: request.method, + headers: { + accept: request.headers.get("accept") ?? "", + }, + }); +} + +function cacheableResponse(response: Response) { + const headers = new Headers(response.headers); + + headers.set( + "Cache-Control", + `public, max-age=${CACHE_TTL_SECONDS}, s-maxage=${CACHE_TTL_SECONDS}, immutable`, + ); + + return new Response(response.body, { + status: response.status, + statusText: response.statusText, + headers, + }); +} + +function json(data: any, init: ResponseInit = {}) { + return Response.json(data, { + ...init, + headers: { + ...CORS_HEADERS, + "Content-Type": "application/json", + ...(init.headers ?? {}), + }, + }); +} + +function withCors(response: Response) { + const headers = new Headers(response.headers); + + for (const [key, value] of Object.entries(CORS_HEADERS)) { + headers.set(key, value); + } + + return new Response(response.body, { + status: response.status, + statusText: response.statusText, + headers, + }); +} + +function getSearchIndexKey(request: Request): string | null { + const indexKey = request.headers.get("x-search-index-key"); + + return indexKey ? `search/${indexKey}.json` : null; +} + +async function getSearchServer(env: SearchApiEnv, searchIndexKey: string) { + let searchServerPromise = searchServers.get(searchIndexKey); + + if (!searchServerPromise) { + searchServerPromise = env.SEARCH_INDEX.get(searchIndexKey) + .then(async (blob) => { + if (!blob) { + throw new Error(`${searchIndexKey} not found in R2`); + } + + const indexes = await blob.json(); + const server = createSearchAPI("advanced", { + indexes, + language: "english", + }); + + loadedAtByIndex.set(searchIndexKey, new Date().toISOString()); + + return server; + }) + .catch((error) => { + searchServers.delete(searchIndexKey); + loadedAtByIndex.delete(searchIndexKey); + throw error; + }); + searchServers.set(searchIndexKey, searchServerPromise); + } + + return searchServerPromise; +} + +export default { + async fetch(request: Request, env: SearchApiEnv, ctx: any) { + if (request.method === "OPTIONS") { + return new Response(null, { + status: 204, + headers: CORS_HEADERS, + }); + } + + const searchIndexKey = getSearchIndexKey(request); + if (!searchIndexKey) { + return json( + { + ok: false, + error: "Missing x-search-index-key header", + }, + { status: 400 }, + ); + } + + const requestURL = new URL(request.url); + const hasQuery = requestURL.searchParams.has("query"); + + const cache = caches.default; + const cacheKey = getCacheKey(request, searchIndexKey); + + if (hasQuery) { + const cached = await cache.match(cacheKey); + if (cached) { + return cached; + } + } + + try { + const server = await getSearchServer(env, searchIndexKey); + const response = withCors(await server.GET(request)); + + if (!hasQuery) { + return response; + } + + const responseToReturn = cacheableResponse(response); + + if (responseToReturn.ok) { + ctx.waitUntil(cache.put(cacheKey, responseToReturn.clone())); + } + + return responseToReturn; + } catch (error) { + return json( + { + ok: false, + error: error instanceof Error ? error.message : String(error), + loadedAt: loadedAtByIndex.get(searchIndexKey) ?? null, + }, + { status: 500 }, + ); + } + }, +}; diff --git a/packages/search-api/package.json b/packages/search-api/package.json new file mode 100644 index 00000000..bfe1a287 --- /dev/null +++ b/packages/search-api/package.json @@ -0,0 +1,16 @@ +{ + "name": "search-api", + "private": true, + "type": "module", + "sideEffects": false, + "scripts": { + "dev": "wrangler dev", + "deploy": "wrangler deploy" + }, + "dependencies": { + "fumadocs-core": "16.4.7" + }, + "devDependencies": { + "wrangler": "^4.69.0" + } +} diff --git a/packages/search-api/wrangler.toml b/packages/search-api/wrangler.toml new file mode 100644 index 00000000..c105da56 --- /dev/null +++ b/packages/search-api/wrangler.toml @@ -0,0 +1,7 @@ +name = "hive-search-api" +main = "index.ts" +compatibility_date = "2026-04-30" + +[[r2_buckets]] +binding = "SEARCH_INDEX" +bucket_name = "hive-docs-search-index"