diff --git a/src/storage/seekdb/seekdb.ts b/src/storage/seekdb/seekdb.ts index 0235c0b..a2b6294 100644 --- a/src/storage/seekdb/seekdb.ts +++ b/src/storage/seekdb/seekdb.ts @@ -15,15 +15,18 @@ export interface SeekDBStoreOptions { } export class SeekDBStore implements VectorStore { - + private client: any; - + private collection: any; - - private constructor(client: any, collection: any) { + private readonly distanceMetric: 'cosine' | 'l2' | 'inner_product'; + + + private constructor(client: any, collection: any, distanceMetric: 'cosine' | 'l2' | 'inner_product') { this.client = client; this.collection = collection; + this.distanceMetric = distanceMetric; } static async create(options: SeekDBStoreOptions): Promise { @@ -49,7 +52,36 @@ export class SeekDBStore implements VectorStore { schema, }); - return new SeekDBStore(client, collection); + return new SeekDBStore(client, collection, distance); + } + + // ─── Distance → Score conversion ───────────────────────────────────── + + /** + * Convert distance to similarity score (0–1, higher = more similar). + * Formula depends on the configured distance metric, matching Python's OceanBase implementation. + */ + private distanceToScore(distance: number): number { + if (distance == null) return 0; + + switch (this.distanceMetric) { + case 'l2': + // L2: smaller distance = more similar → 1 / (1 + distance) + return 1 / (1 + Math.abs(distance)); + + case 'cosine': + // Cosine distance range [0, 2] → max(0, 1 - distance / 2) + return Math.max(0, 1 - distance / 2); + + case 'inner_product': { + // Inner product returned as negative distance → negate, then (ip + 1) / 2, clamped to [0, 1] + const innerProd = -distance; + return Math.max(0, Math.min(1, (innerProd + 1) / 2)); + } + + default: + return 0; + } } // ─── Payload ↔ Metadata mapping ────────────────────────────────────── @@ -230,7 +262,7 @@ export class SeekDBStore implements VectorStore { matches.push({ id: result.ids[0][i], content: result.documents?.[0]?.[i] ?? '', - score: Math.max(0, 1 - distance), + score: this.distanceToScore(distance), metadata: metadata.metadata_b64 ? JSON.parse(Buffer.from(metadata.metadata_b64, 'base64').toString()) : (metadata.metadata_json ? JSON.parse(metadata.metadata_json) : undefined), createdAt: metadata.created_at || undefined, updatedAt: metadata.updated_at || undefined, diff --git a/tests/unit/storage/seekdb.test.ts b/tests/unit/storage/seekdb.test.ts index 46f1038..e1ea03f 100644 --- a/tests/unit/storage/seekdb.test.ts +++ b/tests/unit/storage/seekdb.test.ts @@ -251,6 +251,61 @@ describeIf('SeekDBStore', () => { }); }); + describe('seekdb-specific: distanceToScore conversion', () => { + it('cosine: distance 0 → score 1, distance 2 → score 0', () => { + // Default store uses cosine + const fn = (store as any).distanceToScore.bind(store); + expect(fn(0)).toBeCloseTo(1); + expect(fn(2)).toBeCloseTo(0); + expect(fn(1)).toBeCloseTo(0.5); + }); + + it('cosine: negative distance is clamped to 0', () => { + const fn = (store as any).distanceToScore.bind(store); + // distance > 2 should clamp to 0 + expect(fn(3)).toBe(0); + }); + + it('l2: distance 0 → score 1, distance 1 → score 0.5', async () => { + const l2Store = await SeekDBStore.create({ + path: tmpDir, + database: 'test', + collectionName: `l2_${Date.now()}`, + distance: 'l2', + dimension: 3, + }); + const fn = (l2Store as any).distanceToScore.bind(l2Store); + expect(fn(0)).toBeCloseTo(1); + expect(fn(1)).toBeCloseTo(0.5); + expect(fn(9)).toBeCloseTo(0.1); + await l2Store.close(); + }); + + it('inner_product: negative distance (high similarity) → score near 1', async () => { + const ipStore = await SeekDBStore.create({ + path: tmpDir, + database: 'test', + collectionName: `ip_${Date.now()}`, + distance: 'inner_product', + dimension: 3, + }); + const fn = (ipStore as any).distanceToScore.bind(ipStore); + // distance = -1 → innerProd = 1 → (1+1)/2 = 1.0 + expect(fn(-1)).toBeCloseTo(1); + // distance = 0 → innerProd = 0 → (0+1)/2 = 0.5 + expect(fn(0)).toBeCloseTo(0.5); + // distance = 1 → innerProd = -1 → (-1+1)/2 = 0 + expect(fn(1)).toBeCloseTo(0); + await ipStore.close(); + }); + + it('null/undefined distance returns 0', () => { + const fn = (store as any).distanceToScore.bind(store); + expect(fn(null)).toBe(0); + expect(fn(undefined)).toBe(0); + }); + }); + describe('seekdb-specific: search score conversion', () => { it('identical vectors produce score close to 1', async () => { await store.insert('1', [1, 0, 0], makePayload({ data: 'exact' }));