CortexReach · vicjayjay · Mar 19, 2026 · Mar 19, 2026 · Mar 22, 2026 · Mar 22, 2026
diff --git a/src/embedder.ts b/src/embedder.ts
@@ -107,6 +107,7 @@ type EmbeddingProviderProfile =
   | "azure-openai"
   | "jina"
   | "voyage-compatible"
+  | "nvidia"
   | "generic-openai-compatible";
 
 interface EmbeddingCapabilities {
@@ -207,6 +208,7 @@ function getProviderLabel(baseURL: string | undefined, model: string): string {
     if (profile === "voyage-compatible" && /api\.voyageai\.com/i.test(base)) return "Voyage";
     if (profile === "openai" && /api\.openai\.com/i.test(base)) return "OpenAI";
     if (profile === "azure-openai" || /\.openai\.azure\.com/i.test(base)) return "Azure OpenAI";
+    if (profile === "nvidia") return "NVIDIA NIM";
 
     try {
       return new URL(base).host;
@@ -223,6 +225,8 @@ function getProviderLabel(baseURL: string | undefined, model: string): string {
     case "openai":
     case "azure-openai":
       return "OpenAI";
+    case "nvidia":
+      return "NVIDIA NIM";
     default:
       return "embedding provider";
   }
@@ -234,12 +238,19 @@ function detectEmbeddingProviderProfile(
 ): EmbeddingProviderProfile {
   const base = baseURL || "";
 
+  // Host-based detection runs first — endpoint owner semantics take precedence
+  // over model-name heuristics to avoid misclassifying e.g. a jina-xxx model
+  // served from .nvidia.com as Jina instead of NVIDIA.
   if (/api\.openai\.com/i.test(base)) return "openai";
   if (/\.openai\.azure\.com/i.test(base)) return "azure-openai";
-  if (/api\.jina\.ai/i.test(base) || /^jina-/i.test(model)) return "jina";
-  if (/api\.voyageai\.com/i.test(base) || /^voyage\b/i.test(model)) {
-    return "voyage-compatible";
-  }
+  if (/api\.jina\.ai/i.test(base)) return "jina";
+  if (/api\.voyageai\.com/i.test(base)) return "voyage-compatible";
+  if (/\.nvidia\.com/i.test(base)) return "nvidia";
+
+  // Model-prefix fallback — only when baseURL didn't match a known host
+  if (/^jina-/i.test(model)) return "jina";
+  if (/^voyage\b/i.test(model)) return "voyage-compatible";
+  if (/^nvidia\//i.test(model) || /^nv-embed/i.test(model)) return "nvidia";
 
   return "generic-openai-compatible";
 }
@@ -273,6 +284,19 @@ function getEmbeddingCapabilities(profile: EmbeddingProviderProfile): EmbeddingC
         },
         dimensionsField: "output_dimension",
       };
+    case "nvidia":
+      return {
+        encoding_format: true,
+        normalized: false,
+        taskField: "input_type",
+        taskValueMap: {
+          "retrieval.query": "query",
+          "retrieval.passage": "passage",
+          "query": "query",
+          "passage": "passage",
+        },
+        dimensionsField: "dimensions",
+      };
     case "generic-openai-compatible":
     default:
       return {
@@ -633,7 +657,11 @@ export class Embedder {
       payload.normalized = this._normalized;
     }
 
-    // Task hint: field name and optional value translation are provider-defined.
+    // Task hint: only injected when BOTH the provider profile defines a taskField
+    // AND the caller passes a task value (from user-configured taskQuery/taskPassage).
+    // This means broad provider detection (e.g. any .nvidia.com host) is safe —
+    // non-retriever models that don't expect input_type are unaffected unless the
+    // user explicitly configures task hints.
     if (this._capabilities.taskField && task) {
       const cap = this._capabilities;
       const value = cap.taskValueMap?.[task] ?? task;

diff --git a/test/nvidia-nim-provider-profile.test.mjs b/test/nvidia-nim-provider-profile.test.mjs
@@ -0,0 +1,211 @@
+import assert from "node:assert/strict";
+import http from "node:http";
+import { describe, it } from "node:test";
+
+import jitiFactory from "jiti";
+
+const jiti = jitiFactory(import.meta.url, { interopDefault: true });
+const { Embedder, formatEmbeddingProviderError } = jiti("../src/embedder.ts");
+
+/**
+ * Create a capture server that records POST bodies and returns embeddings
+ * with configurable dimension count.
+ */
+async function withCaptureServer(dims, fn) {
+  let capturedBody = null;
+  const fakeVec = Array.from({ length: dims }, (_, i) => i * 0.01);
+  const server = http.createServer((req, res) => {
+    if (req.url === "/v1/embeddings" && req.method === "POST") {
+      const chunks = [];
+      req.on("data", (c) => chunks.push(c));
+      req.on("end", () => {
+        capturedBody = JSON.parse(Buffer.concat(chunks).toString());
+        res.writeHead(200, { "content-type": "application/json" });
+        res.end(
+          JSON.stringify({
+            object: "list",
+            data: [{ object: "embedding", index: 0, embedding: fakeVec }],
+            usage: { prompt_tokens: 5, total_tokens: 5 },
+          }),
+        );
+      });
+      return;
+    }
+    res.writeHead(404);
+    res.end("not found");
+  });
+
+  await new Promise((resolve) => server.listen(0, "127.0.0.1", resolve));
+  const address = server.address();
+  const port = typeof address === "object" && address ? address.port : 0;
+  const baseURL = `http://127.0.0.1:${port}/v1`;
+
+  try {
+    await fn({ baseURL, port, getCaptured: () => capturedBody });
+  } finally {
+    await new Promise((resolve) => server.close(resolve));
+  }
+}
+
+describe("NVIDIA NIM provider profile", () => {
+  it("sends input_type=query for NVIDIA NIM (nv-embed model prefix)", async () => {
+    const dims = 128;
+    await withCaptureServer(dims, async ({ baseURL, getCaptured }) => {
+      const embedder = new Embedder({
+        baseURL,
+        model: "nv-embedqa-e5-v5",
+        apiKey: "test-key",
+        dimensions: dims,
+        taskQuery: "retrieval.query",
+        taskPassage: "retrieval.passage",
+      });
+
+      await embedder.embedQuery("test query");
+      const body = getCaptured();
+
+      assert.ok(body, "Request body should be captured");
+      assert.equal(body.input_type, "query", "Should send input_type=query for NVIDIA");
+      assert.equal(body.task, undefined, "Should NOT send task field for NVIDIA");
+    });
+  });
+
+  it("maps retrieval.passage → passage for NVIDIA NIM", async () => {
+    const dims = 128;
+    await withCaptureServer(dims, async ({ baseURL, getCaptured }) => {
+      const embedder = new Embedder({
+        baseURL,
+        model: "nv-embedqa-e5-v5",
+        apiKey: "test-key",
+        dimensions: dims,
+        taskQuery: "retrieval.query",
+        taskPassage: "retrieval.passage",
+      });
+
+      await embedder.embedPassage("test document");
+      const body = getCaptured();
+
+      assert.ok(body, "Request body should be captured");
+      assert.equal(body.input_type, "passage", "Should map retrieval.passage → passage");
+      assert.equal(body.task, undefined, "Should NOT send task field for NVIDIA");
+    });
+  });
+
+  it("detects NVIDIA from nvidia/ model prefix", async () => {
+    const dims = 128;
+    await withCaptureServer(dims, async ({ baseURL, getCaptured }) => {
+      const embedder = new Embedder({
+        baseURL,
+        model: "nvidia/llama-3.2-nv-embedqa-1b-v2",
+        apiKey: "test-key",
+        dimensions: dims,
+        taskQuery: "query",
+        taskPassage: "passage",
+      });
+
+      await embedder.embedQuery("test");
+      const body = getCaptured();
+
+      assert.ok(body, "Request body should be captured");
+      assert.equal(body.input_type, "query", "nvidia/ model prefix should trigger input_type");
+      assert.equal(body.task, undefined, "nvidia/ model prefix should NOT send task");
+    });
+  });
+
+  it("detects NVIDIA from a .nvidia.com baseURL", () => {
+    const message = formatEmbeddingProviderError(new Error("boom"), {
+      baseURL: "https://build.nvidia.com/v1",
+      model: "custom-embed-model",
+      mode: "single",
+    });
+
+    assert.equal(message, "Failed to generate embedding from NVIDIA NIM: boom");
+  });
+
+  it(".nvidia.com baseURL with conflicting jina- model prefix → NVIDIA wins", async () => {
+    const dims = 128;
+    await withCaptureServer(dims, async ({ baseURL, getCaptured }) => {
+      // Replace localhost URL with a .nvidia.com URL for detection, but route
+      // the actual HTTP request to the capture server.
+      const nvidiaBaseURL = baseURL.replace("127.0.0.1", "integrate.api.nvidia.com");
+      const embedder = new Embedder({
+        baseURL, // actual network target
+        model: "jina-embeddings-v3",
+        apiKey: "test-key",
+        dimensions: dims,
+        taskQuery: "retrieval.query",
+        taskPassage: "retrieval.passage",
+      });
+      // Override the detected profile by using a real .nvidia.com baseURL in detection
+      // We test detection separately via the error label path:
+      const message = formatEmbeddingProviderError(new Error("test"), {
+        baseURL: "https://integrate.api.nvidia.com/v1",
+        model: "jina-embeddings-v3",
+        mode: "single",
+      });
+      assert.equal(message, "Failed to generate embedding from NVIDIA NIM: test",
+        ".nvidia.com host should win over jina- model prefix");
+    });
+  });
+
+  it(".nvidia.com baseURL without taskQuery/taskPassage → no input_type injected", async () => {
+    const dims = 128;
+    await withCaptureServer(dims, async ({ baseURL, getCaptured }) => {
+      const embedder = new Embedder({
+        baseURL,
+        model: "nvidia/nv-clip-v1",
+        apiKey: "test-key",
+        dimensions: dims,
+        // Deliberately omit taskQuery and taskPassage
+      });
+
+      await embedder.embedQuery("test query");
+      const body = getCaptured();
+
+      assert.ok(body, "Request body should be captured");
+      assert.equal(body.input_type, undefined,
+        "NVIDIA profile without taskQuery/taskPassage should NOT inject input_type");
+      assert.equal(body.task, undefined,
+        "NVIDIA profile without taskQuery/taskPassage should NOT inject task");
+    });
+  });
+
+  it("non-NVIDIA: Jina sends task field", async () => {
+    const dims = 128;
+    await withCaptureServer(dims, async ({ baseURL, getCaptured }) => {
+      const embedder = new Embedder({
+        baseURL,
+        model: "jina-embeddings-v5-text-small",
+        apiKey: "test-key",
+        dimensions: dims,
+        taskQuery: "retrieval.query",
+        taskPassage: "retrieval.passage",
+      });
+
+      await embedder.embedQuery("test query");
+      const body = getCaptured();
+
+      assert.ok(body, "Request body should be captured");
+      assert.equal(body.task, "retrieval.query", "Jina should send task field");
+      assert.equal(body.input_type, undefined, "Jina should NOT send input_type");
+    });
+  });
+
+  it("non-NVIDIA: generic OpenAI-compatible sends neither task nor input_type", async () => {
+    const dims = 128;
+    await withCaptureServer(dims, async ({ baseURL, getCaptured }) => {
+      const embedder = new Embedder({
+        baseURL,
+        model: "custom-embed-model",
+        apiKey: "test-key",
+        dimensions: dims,
+      });
+
+      await embedder.embedQuery("test query");
+      const body = getCaptured();
+
+      assert.ok(body, "Request body should be captured");
+      assert.equal(body.task, undefined, "Generic provider should NOT send task");
+      assert.equal(body.input_type, undefined, "Generic provider should NOT send input_type");
+    });
+  });
+});