Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion src/services/code-index/__tests__/service-factory.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ describe("CodeIndexServiceFactory", () => {
// Arrange
const testConfig = {
embedderProvider: "gemini",
modelId: "text-embedding-004",
modelId: "gemini-embedding-001",
geminiOptions: {
apiKey: "test-gemini-api-key",
},
Expand All @@ -297,6 +297,25 @@ describe("CodeIndexServiceFactory", () => {
factory.createEmbedder()

// Assert
expect(MockedGeminiEmbedder).toHaveBeenCalledWith("test-gemini-api-key", "gemini-embedding-001")
})

it("should pass deprecated text-embedding-004 modelId to GeminiEmbedder (migration happens inside GeminiEmbedder)", () => {
// Arrange - service-factory passes the config modelId directly;
// GeminiEmbedder handles the migration internally
const testConfig = {
embedderProvider: "gemini",
modelId: "text-embedding-004",
geminiOptions: {
apiKey: "test-gemini-api-key",
},
}
mockConfigManager.getConfig.mockReturnValue(testConfig as any)

// Act
factory.createEmbedder()

// Assert - factory passes the original modelId; GeminiEmbedder migrates it internally
expect(MockedGeminiEmbedder).toHaveBeenCalledWith("test-gemini-api-key", "text-embedding-004")
})

Expand Down
27 changes: 22 additions & 5 deletions src/services/code-index/embedders/__tests__/gemini.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ describe("GeminiEmbedder", () => {
it("should create an instance with specified model", () => {
// Arrange
const apiKey = "test-gemini-api-key"
const modelId = "text-embedding-004"
const modelId = "gemini-embedding-001"

// Act
embedder = new GeminiEmbedder(apiKey, modelId)
Expand All @@ -53,7 +53,24 @@ describe("GeminiEmbedder", () => {
expect(MockedOpenAICompatibleEmbedder).toHaveBeenCalledWith(
"https://generativelanguage.googleapis.com/v1beta/openai/",
apiKey,
"text-embedding-004",
"gemini-embedding-001",
2048,
)
})

it("should migrate deprecated text-embedding-004 to gemini-embedding-001", () => {
// Arrange
const apiKey = "test-gemini-api-key"
const deprecatedModelId = "text-embedding-004"

// Act
embedder = new GeminiEmbedder(apiKey, deprecatedModelId)

// Assert - should be migrated to gemini-embedding-001
expect(MockedOpenAICompatibleEmbedder).toHaveBeenCalledWith(
"https://generativelanguage.googleapis.com/v1beta/openai/",
apiKey,
"gemini-embedding-001",
2048,
)
})
Expand Down Expand Up @@ -109,8 +126,8 @@ describe("GeminiEmbedder", () => {
})

it("should use provided model parameter when specified", async () => {
// Arrange
embedder = new GeminiEmbedder("test-api-key", "text-embedding-004")
// Arrange - even with deprecated model in constructor, the runtime parameter takes precedence
embedder = new GeminiEmbedder("test-api-key", "gemini-embedding-001")
const texts = ["test text 1", "test text 2"]
const mockResponse = {
embeddings: [
Expand All @@ -120,7 +137,7 @@ describe("GeminiEmbedder", () => {
}
mockCreateEmbeddings.mockResolvedValue(mockResponse)

// Act
// Act - specify a different model at runtime
const result = await embedder.createEmbeddings(texts, "gemini-embedding-001")

// Assert
Expand Down
29 changes: 25 additions & 4 deletions src/services/code-index/embedders/gemini.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,33 @@ import { TelemetryService } from "@roo-code/telemetry"
* with configuration for Google's Gemini embedding API.
*
* Supported models:
* - text-embedding-004 (dimension: 768)
* - gemini-embedding-001 (dimension: 2048)
* - gemini-embedding-001 (dimension: 3072)
*
* Note: text-embedding-004 has been deprecated and is automatically
* migrated to gemini-embedding-001 for backward compatibility.
*/
export class GeminiEmbedder implements IEmbedder {
private readonly openAICompatibleEmbedder: OpenAICompatibleEmbedder
private static readonly GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
private static readonly DEFAULT_MODEL = "gemini-embedding-001"
/**
* Deprecated models that are automatically migrated to their replacements.
* Users with these models configured will be silently migrated without interruption.
*/
private static readonly DEPRECATED_MODEL_MIGRATIONS: Record<string, string> = {
"text-embedding-004": "gemini-embedding-001",
}
private readonly modelId: string

/**
* Migrates deprecated model IDs to their replacements.
* @param modelId The model ID to potentially migrate
* @returns The migrated model ID, or the original if no migration is needed
*/
private static migrateModelId(modelId: string): string {
return GeminiEmbedder.DEPRECATED_MODEL_MIGRATIONS[modelId] ?? modelId
}

/**
* Creates a new Gemini embedder
* @param apiKey The Gemini API key for authentication
Expand All @@ -29,8 +47,11 @@ export class GeminiEmbedder implements IEmbedder {
throw new Error(t("embeddings:validation.apiKeyRequired"))
}

// Use provided model or default
this.modelId = modelId || GeminiEmbedder.DEFAULT_MODEL
// Migrate deprecated models to their replacements silently
const migratedModelId = modelId ? GeminiEmbedder.migrateModelId(modelId) : undefined

// Use provided model (after migration) or default
this.modelId = migratedModelId || GeminiEmbedder.DEFAULT_MODEL

// Create an OpenAI Compatible embedder with Gemini's configuration
this.openAICompatibleEmbedder = new OpenAICompatibleEmbedder(
Expand Down
95 changes: 95 additions & 0 deletions src/shared/__tests__/embeddingModels.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import { describe, it, expect } from "vitest"
import {
getModelDimension,
getModelScoreThreshold,
getDefaultModelId,
EMBEDDING_MODEL_PROFILES,
} from "../embeddingModels"

describe("embeddingModels", () => {
describe("EMBEDDING_MODEL_PROFILES", () => {
it("should have gemini provider with gemini-embedding-001 model", () => {
const geminiProfiles = EMBEDDING_MODEL_PROFILES.gemini
expect(geminiProfiles).toBeDefined()
expect(geminiProfiles!["gemini-embedding-001"]).toBeDefined()
expect(geminiProfiles!["gemini-embedding-001"].dimension).toBe(3072)
})

it("should have deprecated text-embedding-004 in gemini profiles for backward compatibility", () => {
// This is critical for backward compatibility:
// Users with text-embedding-004 configured need dimension lookup to work
// even though the model is migrated to gemini-embedding-001 in GeminiEmbedder
const geminiProfiles = EMBEDDING_MODEL_PROFILES.gemini
expect(geminiProfiles).toBeDefined()
expect(geminiProfiles!["text-embedding-004"]).toBeDefined()
expect(geminiProfiles!["text-embedding-004"].dimension).toBe(3072)
})
})

describe("getModelDimension", () => {
it("should return dimension for gemini-embedding-001", () => {
const dimension = getModelDimension("gemini", "gemini-embedding-001")
expect(dimension).toBe(3072)
})

it("should return dimension for deprecated text-embedding-004", () => {
// This ensures createVectorStore() works for users with text-embedding-004 configured
// The dimension should be 3072 (matching gemini-embedding-001) because:
// 1. GeminiEmbedder migrates text-embedding-004 to gemini-embedding-001
// 2. gemini-embedding-001 produces 3072-dimensional embeddings
// 3. Vector store dimension must match the actual embedding dimension
const dimension = getModelDimension("gemini", "text-embedding-004")
expect(dimension).toBe(3072)
})

it("should return undefined for unknown model", () => {
const dimension = getModelDimension("gemini", "unknown-model")
expect(dimension).toBeUndefined()
})

it("should return undefined for unknown provider", () => {
const dimension = getModelDimension("unknown-provider" as any, "some-model")
expect(dimension).toBeUndefined()
})

it("should return correct dimensions for openai models", () => {
expect(getModelDimension("openai", "text-embedding-3-small")).toBe(1536)
expect(getModelDimension("openai", "text-embedding-3-large")).toBe(3072)
expect(getModelDimension("openai", "text-embedding-ada-002")).toBe(1536)
})
})

describe("getModelScoreThreshold", () => {
it("should return score threshold for gemini-embedding-001", () => {
const threshold = getModelScoreThreshold("gemini", "gemini-embedding-001")
expect(threshold).toBe(0.4)
})

it("should return score threshold for deprecated text-embedding-004", () => {
const threshold = getModelScoreThreshold("gemini", "text-embedding-004")
expect(threshold).toBe(0.4)
})

it("should return undefined for unknown model", () => {
const threshold = getModelScoreThreshold("gemini", "unknown-model")
expect(threshold).toBeUndefined()
})
})

describe("getDefaultModelId", () => {
it("should return gemini-embedding-001 for gemini provider", () => {
const defaultModel = getDefaultModelId("gemini")
expect(defaultModel).toBe("gemini-embedding-001")
})

it("should return text-embedding-3-small for openai provider", () => {
const defaultModel = getDefaultModelId("openai")
expect(defaultModel).toBe("text-embedding-3-small")
})

it("should return codestral-embed-2505 for mistral provider", () => {
const defaultModel = getDefaultModelId("mistral")
expect(defaultModel).toBe("codestral-embed-2505")
})
})
})
4 changes: 3 additions & 1 deletion src/shared/embeddingModels.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@ export const EMBEDDING_MODEL_PROFILES: EmbeddingModelProfiles = {
},
},
gemini: {
"text-embedding-004": { dimension: 768 },
"gemini-embedding-001": { dimension: 3072, scoreThreshold: 0.4 },
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing text-embedding-004 from the profiles breaks the silent migration for users who have it configured. When createVectorStore() in service-factory.ts calls getModelDimension("gemini", "text-embedding-004"), it will return undefined because the model is no longer in the profiles, causing an error. The migration in GeminiEmbedder only affects embedder creation, not dimension lookup.

Consider either: (1) keeping text-embedding-004 in the profiles pointing to the migrated model's dimension (3072), or (2) exporting the migration logic from GeminiEmbedder and applying it in service-factory.ts before calling getModelDimension().

Fix it with Roo Code or mention @roomote and request a fix.

// Deprecated: text-embedding-004 is migrated to gemini-embedding-001 in GeminiEmbedder
// Kept here for backward-compatible dimension lookup in createVectorStore()
"text-embedding-004": { dimension: 3072, scoreThreshold: 0.4 },
},
mistral: {
"codestral-embed-2505": { dimension: 1536, scoreThreshold: 0.4 },
Expand Down
Loading