Skip to content

Commit cf6213e

Browse files
committed
Add librarian subagent
1 parent f70a222 commit cf6213e

File tree

2 files changed

+449
-0
lines changed

2 files changed

+449
-0
lines changed

agents/librarian/librarian.test.ts

Lines changed: 294 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,294 @@
1+
/**
2+
* E2E test script for the librarian agent.
3+
*
4+
* Runs the agent on repo-analysis tasks one at a time, writing full event traces
5+
* to files for analysis. Each task produces a trace file in debug/librarian-traces/.
6+
*
7+
* Usage:
8+
* bun agents/librarian/librarian.test.ts [taskIndex]
9+
*
10+
* If taskIndex is provided, runs only that task (0-based). Otherwise runs all tasks.
11+
*/
12+
13+
import * as fs from 'fs'
14+
import * as path from 'path'
15+
16+
import { CodebuffClient, loadLocalAgents } from '@codebuff/sdk'
17+
18+
import type { AgentDefinition } from '@codebuff/sdk'
19+
20+
const TRACE_DIR = path.join(process.cwd(), 'debug', 'librarian-traces')
21+
22+
interface TaskDefinition {
23+
name: string
24+
prompt: string
25+
repoUrl: string
26+
}
27+
28+
const TASKS: TaskDefinition[] = [
29+
{
30+
name: 'express-overview',
31+
prompt:
32+
'What is the main entry point of this project? What are its key dependencies and what does it do?',
33+
repoUrl: 'https://github.com/expressjs/express',
34+
},
35+
{
36+
name: 'zod-api-surface',
37+
prompt:
38+
'What are the main public API exports of this library? List the key functions and types a user would import.',
39+
repoUrl: 'https://github.com/colinhacks/zod',
40+
},
41+
]
42+
43+
interface TraceEvent {
44+
timestamp: string
45+
type: string
46+
data: Record<string, unknown>
47+
}
48+
49+
interface LibrarianOutput {
50+
answer: string
51+
relevantFiles: string[]
52+
cloneDir: string
53+
}
54+
55+
async function runTask(
56+
client: CodebuffClient,
57+
task: TaskDefinition,
58+
agentDefinitions: AgentDefinition[],
59+
taskIndex: number,
60+
): Promise<{
61+
success: boolean
62+
traceFile: string
63+
output: unknown
64+
validationErrors: string[]
65+
}> {
66+
const events: TraceEvent[] = []
67+
const validationErrors: string[] = []
68+
const startTime = Date.now()
69+
70+
console.log(`\n${'='.repeat(60)}`)
71+
console.log(`Task ${taskIndex}: ${task.name}`)
72+
console.log(`Repo: ${task.repoUrl}`)
73+
console.log(`Prompt: ${task.prompt}`)
74+
console.log(`${'='.repeat(60)}\n`)
75+
76+
const runState = await client.run({
77+
agent: 'librarian',
78+
prompt: task.prompt,
79+
params: { repoUrl: task.repoUrl },
80+
agentDefinitions,
81+
maxAgentSteps: 40,
82+
handleEvent: (event) => {
83+
events.push({
84+
timestamp: new Date().toISOString(),
85+
type: event.type,
86+
data: event as Record<string, unknown>,
87+
})
88+
89+
if (event.type === 'text') {
90+
process.stdout.write(event.text ?? '')
91+
} else if (event.type === 'tool_call') {
92+
console.log(`\n[Tool Call] ${event.toolName}`)
93+
} else if (event.type === 'tool_result') {
94+
const preview = JSON.stringify(event.output)?.slice(0, 200)
95+
console.log(`[Tool Result] ${preview}...`)
96+
} else if (event.type === 'error') {
97+
console.error(`[Error] ${event.message}`)
98+
} else if (event.type === 'subagent_start') {
99+
console.log(`[Subagent Start] ${event.agentType}`)
100+
} else if (event.type === 'subagent_finish') {
101+
console.log(`[Subagent Finish] ${event.agentType}`)
102+
}
103+
},
104+
})
105+
106+
const duration = ((Date.now() - startTime) / 1000).toFixed(1)
107+
const output = runState.output
108+
109+
// Validate structured output
110+
if (output?.type === 'structuredOutput' && output.value !== null) {
111+
const data = output.value as Record<string, unknown>
112+
113+
if (typeof data.answer !== 'string' || !data.answer) {
114+
validationErrors.push('Missing or empty "answer" field in output')
115+
}
116+
117+
if (!Array.isArray(data.relevantFiles)) {
118+
validationErrors.push('Missing "relevantFiles" array in output')
119+
} else {
120+
if (data.relevantFiles.length === 0) {
121+
validationErrors.push('"relevantFiles" array is empty')
122+
}
123+
for (const f of data.relevantFiles) {
124+
if (typeof f !== 'string') {
125+
validationErrors.push(
126+
`relevantFiles contains non-string: ${JSON.stringify(f)}`,
127+
)
128+
}
129+
}
130+
}
131+
132+
if (typeof data.cloneDir !== 'string' || !data.cloneDir) {
133+
validationErrors.push('Missing or empty "cloneDir" field in output')
134+
}
135+
136+
// Verify cloneDir exists and files are readable
137+
if (typeof data.cloneDir === 'string' && data.cloneDir) {
138+
if (!fs.existsSync(data.cloneDir)) {
139+
validationErrors.push(`cloneDir does not exist: ${data.cloneDir}`)
140+
} else if (Array.isArray(data.relevantFiles)) {
141+
for (const filePath of data.relevantFiles as string[]) {
142+
if (!fs.existsSync(filePath)) {
143+
validationErrors.push(`relevantFile not found: ${filePath}`)
144+
}
145+
}
146+
}
147+
}
148+
} else if (output?.type === 'error') {
149+
validationErrors.push(`Agent returned error: ${output.message}`)
150+
} else {
151+
validationErrors.push(
152+
`Expected structuredOutput, got: ${output?.type ?? 'null'}`,
153+
)
154+
}
155+
156+
const trace = {
157+
task: {
158+
name: task.name,
159+
prompt: task.prompt,
160+
repoUrl: task.repoUrl,
161+
},
162+
duration: `${duration}s`,
163+
output,
164+
validationErrors,
165+
eventCount: events.length,
166+
events,
167+
}
168+
169+
const timestamp = new Date().toISOString().replace(/[:.]/g, '-')
170+
const traceFile = path.join(TRACE_DIR, `${timestamp}_${task.name}.json`)
171+
fs.writeFileSync(traceFile, JSON.stringify(trace, null, 2))
172+
173+
const success = validationErrors.length === 0
174+
175+
console.log(`\n${'─'.repeat(60)}`)
176+
console.log(`Result: ${success ? '✅ SUCCESS' : '❌ FAILURE'}`)
177+
console.log(`Duration: ${duration}s`)
178+
console.log(`Events: ${events.length}`)
179+
console.log(`Trace: ${traceFile}`)
180+
181+
if (validationErrors.length > 0) {
182+
console.log(`Validation Errors:`)
183+
for (const err of validationErrors) {
184+
console.log(` ❌ ${err}`)
185+
}
186+
}
187+
188+
if (
189+
output?.type === 'structuredOutput' &&
190+
output.value !== null
191+
) {
192+
const data = output.value as LibrarianOutput
193+
console.log(`Answer length: ${data.answer?.length ?? 0} chars`)
194+
console.log(`Relevant files: ${data.relevantFiles?.length ?? 0}`)
195+
console.log(`Clone dir: ${data.cloneDir}`)
196+
}
197+
console.log(`${'─'.repeat(60)}`)
198+
199+
// Clean up the cloned repo after validation
200+
if (
201+
output?.type === 'structuredOutput' &&
202+
output.value !== null
203+
) {
204+
const data = output.value as LibrarianOutput
205+
if (data.cloneDir && fs.existsSync(data.cloneDir)) {
206+
console.log(`Cleaning up ${data.cloneDir}...`)
207+
fs.rmSync(data.cloneDir, { recursive: true, force: true })
208+
}
209+
}
210+
211+
return { success, traceFile, output, validationErrors }
212+
}
213+
214+
async function main() {
215+
fs.mkdirSync(TRACE_DIR, { recursive: true })
216+
217+
const taskIndexArg = process.argv[2]
218+
const tasksToRun =
219+
taskIndexArg !== undefined
220+
? [
221+
{
222+
task: TASKS[parseInt(taskIndexArg, 10)],
223+
index: parseInt(taskIndexArg, 10),
224+
},
225+
]
226+
: TASKS.map((task, index) => ({ task, index }))
227+
228+
if (tasksToRun.some((t) => !t.task)) {
229+
console.error(
230+
`Invalid task index: ${taskIndexArg}. Available: 0-${TASKS.length - 1}`,
231+
)
232+
process.exit(1)
233+
}
234+
235+
const agents = await loadLocalAgents({
236+
agentsPath: path.join(process.cwd(), 'agents'),
237+
verbose: true,
238+
})
239+
const agentDefinitions = Object.values(agents) as AgentDefinition[]
240+
241+
const librarianAgent = agentDefinitions.find((a) => a.id === 'librarian')
242+
if (!librarianAgent) {
243+
console.error('librarian agent not found in agents/ directory')
244+
process.exit(1)
245+
}
246+
console.log(`Loaded librarian agent (model: ${librarianAgent.model})`)
247+
248+
const client = new CodebuffClient({
249+
apiKey: process.env.CODEBUFF_API_KEY,
250+
cwd: process.cwd(),
251+
})
252+
253+
const results: Array<{
254+
name: string
255+
success: boolean
256+
traceFile: string
257+
validationErrors: string[]
258+
}> = []
259+
260+
for (const { task, index } of tasksToRun) {
261+
const result = await runTask(client, task, agentDefinitions, index)
262+
results.push({
263+
name: task.name,
264+
success: result.success,
265+
traceFile: result.traceFile,
266+
validationErrors: result.validationErrors,
267+
})
268+
}
269+
270+
console.log(`\n${'='.repeat(60)}`)
271+
console.log('SUMMARY')
272+
console.log(`${'='.repeat(60)}`)
273+
for (const r of results) {
274+
console.log(` ${r.success ? '✅' : '❌'} ${r.name}${r.traceFile}`)
275+
if (r.validationErrors.length > 0) {
276+
for (const err of r.validationErrors) {
277+
console.log(` ❌ ${err}`)
278+
}
279+
}
280+
}
281+
const passed = results.filter((r) => r.success).length
282+
console.log(`\n${passed}/${results.length} tasks passed`)
283+
284+
if (passed < results.length) {
285+
process.exit(1)
286+
}
287+
}
288+
289+
if (import.meta.main) {
290+
main().catch((err) => {
291+
console.error('Fatal error:', err)
292+
process.exit(1)
293+
})
294+
}

0 commit comments

Comments
 (0)