1
1
import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js" ;
2
2
import { DbOperationArgs , MongoDBToolBase } from "../mongodbTool.js" ;
3
- import type { ToolArgs , OperationType } from "../../tool.js" ;
3
+ import type { ToolArgs , OperationType , ToolExecutionContext } from "../../tool.js" ;
4
4
import { formatUntrustedData } from "../../tool.js" ;
5
5
import { getSimplifiedSchema } from "mongodb-schema" ;
6
+ import z from "zod" ;
7
+ import { ONE_MB } from "../../../helpers/constants.js" ;
8
+ import { collectCursorUntilMaxBytesLimit } from "../../../helpers/collectCursorUntilMaxBytes.js" ;
9
+ import { isObjectEmpty } from "../../../helpers/isObjectEmpty.js" ;
10
+
11
+ const MAXIMUM_SAMPLE_SIZE_HARD_LIMIT = 50_000 ;
6
12
7
13
export class CollectionSchemaTool extends MongoDBToolBase {
8
14
public name = "collection-schema" ;
9
15
protected description = "Describe the schema for a collection" ;
10
- protected argsShape = DbOperationArgs ;
16
+ protected argsShape = {
17
+ ...DbOperationArgs ,
18
+ sampleSize : z . number ( ) . optional ( ) . default ( 50 ) . describe ( "Number of documents to sample for schema inference" ) ,
19
+ responseBytesLimit : z
20
+ . number ( )
21
+ . optional ( )
22
+ . default ( ONE_MB )
23
+ . describe (
24
+ `The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded.`
25
+ ) ,
26
+ } ;
11
27
12
28
public operationType : OperationType = "metadata" ;
13
29
14
- protected async execute ( { database, collection } : ToolArgs < typeof DbOperationArgs > ) : Promise < CallToolResult > {
30
+ protected async execute (
31
+ { database, collection, sampleSize, responseBytesLimit } : ToolArgs < typeof this . argsShape > ,
32
+ { signal } : ToolExecutionContext
33
+ ) : Promise < CallToolResult > {
15
34
const provider = await this . ensureConnected ( ) ;
16
- const documents = await provider . find ( database , collection , { } , { limit : 5 } ) . toArray ( ) ;
35
+ const cursor = provider . aggregate ( database , collection , [
36
+ { $sample : { size : Math . min ( sampleSize , MAXIMUM_SAMPLE_SIZE_HARD_LIMIT ) } } ,
37
+ ] ) ;
38
+ const { cappedBy, documents } = await collectCursorUntilMaxBytesLimit ( {
39
+ cursor,
40
+ configuredMaxBytesPerQuery : this . config . maxBytesPerQuery ,
41
+ toolResponseBytesLimit : responseBytesLimit ,
42
+ abortSignal : signal ,
43
+ } ) ;
17
44
const schema = await getSimplifiedSchema ( documents ) ;
18
45
19
- const fieldsCount = Object . entries ( schema ) . length ;
20
- if ( fieldsCount === 0 ) {
46
+ if ( isObjectEmpty ( schema ) ) {
21
47
return {
22
48
content : [
23
49
{
@@ -28,11 +54,15 @@ export class CollectionSchemaTool extends MongoDBToolBase {
28
54
} ;
29
55
}
30
56
57
+ const fieldsCount = Object . keys ( schema ) . length ;
58
+ const header = `Found ${ fieldsCount } fields in the schema for "${ database } .${ collection } "` ;
59
+ const cappedWarning =
60
+ cappedBy !== undefined
61
+ ? `\nThe schema was inferred from a subset of documents due to the response size limit. (${ cappedBy } )`
62
+ : "" ;
63
+
31
64
return {
32
- content : formatUntrustedData (
33
- `Found ${ fieldsCount } fields in the schema for "${ database } .${ collection } "` ,
34
- JSON . stringify ( schema )
35
- ) ,
65
+ content : formatUntrustedData ( `${ header } ${ cappedWarning } ` , JSON . stringify ( schema ) ) ,
36
66
} ;
37
67
}
38
68
}
0 commit comments