Skip to content

Commit 6b8fbd1

Browse files
authored
fix(schema): Use sample instead of find for schema sampling (#580)
1 parent 0501bdb commit 6b8fbd1

File tree

4 files changed

+90
-16
lines changed

4 files changed

+90
-16
lines changed

src/helpers/isObjectEmpty.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
type EmptyObject = { [x: string]: never } | null | undefined;
2+
3+
export function isObjectEmpty(value: object | null | undefined): value is EmptyObject {
4+
if (!value) {
5+
return true;
6+
}
7+
8+
for (const prop in value) {
9+
if (Object.prototype.hasOwnProperty.call(value, prop)) {
10+
return false;
11+
}
12+
}
13+
14+
return true;
15+
}
Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,49 @@
11
import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js";
22
import { DbOperationArgs, MongoDBToolBase } from "../mongodbTool.js";
3-
import type { ToolArgs, OperationType } from "../../tool.js";
3+
import type { ToolArgs, OperationType, ToolExecutionContext } from "../../tool.js";
44
import { formatUntrustedData } from "../../tool.js";
55
import { getSimplifiedSchema } from "mongodb-schema";
6+
import z from "zod";
7+
import { ONE_MB } from "../../../helpers/constants.js";
8+
import { collectCursorUntilMaxBytesLimit } from "../../../helpers/collectCursorUntilMaxBytes.js";
9+
import { isObjectEmpty } from "../../../helpers/isObjectEmpty.js";
10+
11+
const MAXIMUM_SAMPLE_SIZE_HARD_LIMIT = 50_000;
612

713
export class CollectionSchemaTool extends MongoDBToolBase {
814
public name = "collection-schema";
915
protected description = "Describe the schema for a collection";
10-
protected argsShape = DbOperationArgs;
16+
protected argsShape = {
17+
...DbOperationArgs,
18+
sampleSize: z.number().optional().default(50).describe("Number of documents to sample for schema inference"),
19+
responseBytesLimit: z
20+
.number()
21+
.optional()
22+
.default(ONE_MB)
23+
.describe(
24+
`The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded.`
25+
),
26+
};
1127

1228
public operationType: OperationType = "metadata";
1329

14-
protected async execute({ database, collection }: ToolArgs<typeof DbOperationArgs>): Promise<CallToolResult> {
30+
protected async execute(
31+
{ database, collection, sampleSize, responseBytesLimit }: ToolArgs<typeof this.argsShape>,
32+
{ signal }: ToolExecutionContext
33+
): Promise<CallToolResult> {
1534
const provider = await this.ensureConnected();
16-
const documents = await provider.find(database, collection, {}, { limit: 5 }).toArray();
35+
const cursor = provider.aggregate(database, collection, [
36+
{ $sample: { size: Math.min(sampleSize, MAXIMUM_SAMPLE_SIZE_HARD_LIMIT) } },
37+
]);
38+
const { cappedBy, documents } = await collectCursorUntilMaxBytesLimit({
39+
cursor,
40+
configuredMaxBytesPerQuery: this.config.maxBytesPerQuery,
41+
toolResponseBytesLimit: responseBytesLimit,
42+
abortSignal: signal,
43+
});
1744
const schema = await getSimplifiedSchema(documents);
1845

19-
const fieldsCount = Object.entries(schema).length;
20-
if (fieldsCount === 0) {
46+
if (isObjectEmpty(schema)) {
2147
return {
2248
content: [
2349
{
@@ -28,11 +54,15 @@ export class CollectionSchemaTool extends MongoDBToolBase {
2854
};
2955
}
3056

57+
const fieldsCount = Object.keys(schema).length;
58+
const header = `Found ${fieldsCount} fields in the schema for "${database}.${collection}"`;
59+
const cappedWarning =
60+
cappedBy !== undefined
61+
? `\nThe schema was inferred from a subset of documents due to the response size limit. (${cappedBy})`
62+
: "";
63+
3164
return {
32-
content: formatUntrustedData(
33-
`Found ${fieldsCount} fields in the schema for "${database}.${collection}"`,
34-
JSON.stringify(schema)
35-
),
65+
content: formatUntrustedData(`${header}${cappedWarning}`, JSON.stringify(schema)),
3666
};
3767
}
3868
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import { isObjectEmpty } from "../../../src/helpers/isObjectEmpty.js";
2+
import { describe, expect, it } from "vitest";
3+
4+
describe("isObjectEmpty", () => {
5+
it("returns true for null", () => {
6+
expect(isObjectEmpty(null)).toBe(true);
7+
});
8+
9+
it("returns true for undefined", () => {
10+
expect(isObjectEmpty(undefined)).toBe(true);
11+
});
12+
13+
it("returns true for empty object", () => {
14+
expect(isObjectEmpty({})).toBe(true);
15+
});
16+
17+
it("returns false for object with properties", () => {
18+
expect(isObjectEmpty({ a: 1 })).toBe(false);
19+
});
20+
});

tests/integration/tools/mongodb/metadata/collectionSchema.test.ts

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,21 @@ import type { SimplifiedSchema } from "mongodb-schema";
1515
import { describe, expect, it } from "vitest";
1616

1717
describeWithMongoDB("collectionSchema tool", (integration) => {
18-
validateToolMetadata(
19-
integration,
20-
"collection-schema",
21-
"Describe the schema for a collection",
22-
databaseCollectionParameters
23-
);
18+
validateToolMetadata(integration, "collection-schema", "Describe the schema for a collection", [
19+
...databaseCollectionParameters,
20+
{
21+
name: "sampleSize",
22+
type: "number",
23+
description: "Number of documents to sample for schema inference",
24+
required: false,
25+
},
26+
{
27+
name: "responseBytesLimit",
28+
type: "number",
29+
description: `The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded.`,
30+
required: false,
31+
},
32+
]);
2433

2534
validateThrowsForInvalidArguments(integration, "collection-schema", databaseCollectionInvalidArgs);
2635

0 commit comments

Comments
 (0)