diff --git a/Dockerfile b/Dockerfile index b670c37..b2afb47 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,16 +13,17 @@ RUN npm run build # --- Runtime Stage --- FROM node:20-alpine AS runtime -RUN apk add --no-cache bash +RUN apk add --no-cache bash git WORKDIR /app ENV NODE_ENV=production + COPY package*.json ./ -# Install production dependencies only RUN npm ci --omit=dev --legacy-peer-deps -# Copy built application from the build stage COPY --from=builder /app/dist ./dist -# Set the container's entrypoint -ENTRYPOINT ["node","dist/cli.js"] +ENV TERM=xterm-256color + +ENTRYPOINT ["node", "dist/cli.js"] +CMD [] diff --git a/Makefile b/Makefile index 475e43b..4dee129 100644 --- a/Makefile +++ b/Makefile @@ -127,4 +127,32 @@ docker-image: ## Build the Docker image docker-run: ## Run the application in a Docker container @echo "Running Docker image: $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG) with args: $(DOCKER_CONTAINER_ARGS)" - @docker run --rm -it $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG) $(DOCKER_CONTAINER_ARGS) + @docker run --rm -it \ + -v $(PWD):/workspace \ + -w /workspace \ + -e OPENAI_API_KEY \ + -e ANTHROPIC_API_KEY \ + -e GOOGLE_API_KEY \ + $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG) $(DOCKER_CONTAINER_ARGS) + +docker-run-local: ## Run with local config and workspace mounted + @echo "Running Docker with local config and workspace" + @docker run --rm -it \ + -v $(PWD):/workspace \ + -v $(HOME)/.config/binharic:/root/.config/binharic \ + -w /workspace \ + -e OPENAI_API_KEY \ + -e ANTHROPIC_API_KEY \ + -e GOOGLE_API_KEY \ + $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG) $(DOCKER_CONTAINER_ARGS) + +docker-shell: ## Open a shell in the Docker container for debugging + @echo "Opening shell in Docker container" + @docker run --rm -it \ + -v $(PWD):/workspace \ + -w /workspace \ + -e OPENAI_API_KEY \ + -e ANTHROPIC_API_KEY \ + -e GOOGLE_API_KEY \ + --entrypoint /bin/bash \ + $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG) diff --git a/README.md b/README.md index de00298..31bc5af 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ like the ability to analyze projects, run tests, find bugs, and perform code rev - Is fully customizable (like customizing system prompt) - Comes with a built-in retrieval-augmented generation (RAG) pipeline - Comes with a large set of built-in tools (like reading and writing files) -- Can use external tools via Model Context Protocol (MCP) +- Can use external tools via the Model Context Protocol (MCP) - Comes with built-in workflows for standard software development tasks (like debugging and code review) See the [ROADMAP.md](ROADMAP.md) for the list of implemented and planned features. @@ -58,7 +58,7 @@ You can follow the instructions below to install and use Binharic in your termin npm install -g @cogitator/binharic-cli ``` -#### Usage +#### Running in the Terminal ```sh # Make sure API keys are available in the environment @@ -77,11 +77,29 @@ binharic > So, it's recommended to use state-of-the-art models (like Claude Sonnet 4.5, GPT-5, and Gemini 2.5 Pro) for the best > results. +#### Running in a Container + +Alternatively, you can start Binharic in a container: + +```sh +# API keys should be available in the environment already +docker run -it --rm \ + -v $(PWD):/workspace \ + -w /workspace \ + -e OPENAI_API_KEY \ + -e ANTHROPIC_API_KEY \ + -e GOOGLE_API_KEY \ + ghcr.io/cogitatortech/binharic-cli: +``` + +`` should be replaced with the version of the Binharic (like `0.1.0-alpha.4`) or `latest`. +Use `latest` if you want to use the latest (development) version of Binharic. + --- -#### Documentation +### Documentation -See the [docs](docs) for more information on how to use Binharic coding agent. +See the [docs](docs) for more information on how to use the Binharic coding agent. --- diff --git a/ROADMAP.md b/ROADMAP.md index 2238872..f7febaa 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -103,7 +103,7 @@ It includes planned features, improvements, and their current implementation sta - [x] File search with @ mention - [x] Non-blocking UI during LLM responses - [x] Command syntax highlighting (partial match in yellow, full match in cyan) - - [x] Colored help menu items** + - [x] Colored help menu items\*\* - [x] Clean message display (no "Binharic:" prefix) - [x] Dynamic username from system (not hardcoded) - [x] Tool results hidden from UI (only failures shown) @@ -250,7 +250,7 @@ It includes planned features, improvements, and their current implementation sta - [x] Multi-step tool execution with automatic loop control - [x] Specialized agents with distinct personalities - [ ] onStepFinish callbacks for monitoring - - [ ] prepareStep callbacks for dynamic configuration** + - [ ] prepareStep callbacks for dynamic configuration\*\* - [ ] Multiple stopping conditions (step count, budget, errors, validation, completion) - [ ] Goal-oriented planning - [ ] Task decomposition diff --git a/docs/assets/diagrams/agentic_workflow.dot b/docs/assets/diagrams/agentic_workflow.dot index fdfb4e4..d09a5e5 100644 --- a/docs/assets/diagrams/agentic_workflow.dot +++ b/docs/assets/diagrams/agentic_workflow.dot @@ -1,54 +1,54 @@ digraph AgenticWorkflow { // --- Graph Settings (Updated Style) --- graph [ -rankdir=LR, -label="AI Agentic Workflow", -fontsize=22, -fontname="Helvetica-Bold,Arial-Bold,sans-serif", -fontcolor="#333333", -labelloc=t, -compound=true, -bgcolor="#F8F9FA", -splines=ortho, -nodesep=0.6, -ranksep=1.2 +rankdir = LR, +label = "AI Agentic Workflow", +fontsize = 22, +fontname = "Helvetica-Bold,Arial-Bold,sans-serif", +fontcolor = "#333333", +labelloc = t, +compound = true, +bgcolor = "#F8F9FA", +splines = ortho, +nodesep = 0.6, +ranksep = 1.2 ]; // --- Default Node & Edge Styles (from example) --- node [ -fontname="Helvetica,Arial,sans-serif", -shape=box, -style="filled,rounded", -color="lightblue", // Border color - fillcolor="white", // Default fill color - penwidth=2 +fontname = "Helvetica,Arial,sans-serif", +shape = box, +style = "filled,rounded", +color = "lightblue", // Border color + fillcolor = "white", // Default fill color + penwidth = 2 ]; edge [ -fontname="Helvetica,Arial,sans-serif", -color="black", -arrowhead=vee, -fontsize=10 +fontname = "Helvetica,Arial,sans-serif", +color = "black", +arrowhead = vee, +fontsize = 10 ]; // --- Node Definitions (with new colors) --- Start [ -shape=circle, -label="Start", -fontname="Helvetica-Bold,Arial-Bold,sans-serif" +shape = circle, +label = "Start", +fontname = "Helvetica-Bold,Arial-Bold,sans-serif" ]; End [ -shape=doublecircle, -label="End", -fontname="Helvetica-Bold,Arial-Bold,sans-serif" +shape = doublecircle, +label = "End", +fontname = "Helvetica-Bold,Arial-Bold,sans-serif" ]; HumanLoop [ -label="User Feedback", -fillcolor="lightgreen" +label = "User Feedback", +fillcolor = "lightgreen" ]; AIModel [ -label="AI Model", -fillcolor="lightpink" +label = "AI Model", +fillcolor = "lightpink" ]; // --- Agentic Loop Cluster (styled like example) --- @@ -56,10 +56,10 @@ fillcolor="lightpink" label = "Agentic Loop"; style = "dashed"; color = "lightgrey"; -fontname="Helvetica-Bold,Arial-Bold,sans-serif"; +fontname = "Helvetica-Bold,Arial-Bold,sans-serif"; // Nodes inside the cluster get a yellow fill - node [fillcolor="lightyellow"]; + node [fillcolor = "lightyellow"]; Plan; Execute; @@ -69,11 +69,11 @@ Plan -> Execute -> Check; } // --- Layout and Workflow Connections --- - HumanLoop -> Execute -> AIModel [style=invis, minlen=1]; + HumanLoop -> Execute -> AIModel [style = invis, minlen = 1]; -Start -> Plan [lhead=cluster_agentic_loop]; -Check -> End [ltail=cluster_agentic_loop]; +Start -> Plan [lhead = cluster_agentic_loop]; +Check -> End [ltail = cluster_agentic_loop]; -HumanLoop -> Plan [lhead=cluster_agentic_loop, constraint=false, xlabel=" Feedback "]; -Execute -> AIModel [ltail=cluster_agentic_loop, constraint=false, xlabel="Uses "]; +HumanLoop -> Plan [lhead = cluster_agentic_loop, constraint = false, xlabel = " Feedback "]; +Execute -> AIModel [ltail = cluster_agentic_loop, constraint = false, xlabel = "Uses "]; } diff --git a/docs/assets/diagrams/agentic_workflow_v0.1.0.svg b/docs/assets/diagrams/agentic_workflow_v0.1.0.svg index 57fcb4a..8afcee5 100644 --- a/docs/assets/diagrams/agentic_workflow_v0.1.0.svg +++ b/docs/assets/diagrams/agentic_workflow_v0.1.0.svg @@ -5,351 +5,376 @@ - - - - AI Agentic Workflow - - cluster_agentic_loop - - Agentic Loop - - - Start - - Start - - - Plan - - Plan - - - Start->Plan - - - - - End - - - End - - - HumanLoop - - User Feedback - - - HumanLoop->Plan - - -  Feedback   - - - Execute - - Execute - - - AIModel - - AI Model - - - Plan->Execute - - - - - Execute->AIModel - + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" + width="683pt" + height="203pt" + viewBox="0.00 0.00 682.96 203.00" + version="1.1" + id="svg113" + sodipodi:docname="agentic_workflow.svg" + inkscape:version="1.2.2 (b0a8486541, 2022-12-01)" + xmlns="http://www.w3.org/2000/svg" +> + + + fill="#f8f9fa" + stroke="transparent" + points="678.96,-199 678.96,4 -4,4 -4,-199 " + id="polygon4" + transform="translate(4,199)"/> Uses   - - - Check - - Check - - - Execute->Check - - - - - Check->End - - - + text-anchor="middle" + x="341.48001" + y="25.600006" + font-family="Arial-Bold" + font-size="22px" + fill="#333333" + id="text6" + style="font-size:20px">AI Agentic Workflow + + + cluster_agentic_loop + + + Agentic Loop + + + + Start + + + Start + + + + Plan + + + Plan + + + + Start->Plan + + + + + + End + + + + End + + + + HumanLoop + + + User Feedback + + + + HumanLoop->Plan + + + +  Feedback   + + + + Execute + + + Execute + + + + AIModel + + + AI Model + + + + Plan->Execute + + + + + + Execute->AIModel + + + + Uses   + + + + Check + + + Check + + + + Execute->Check + + + + + + Check->End + + + + diff --git a/docs/assets/diagrams/agentic_workflow_v0.2.0.svg b/docs/assets/diagrams/agentic_workflow_v0.2.0.svg index 75d4ba6..78161e0 100644 --- a/docs/assets/diagrams/agentic_workflow_v0.2.0.svg +++ b/docs/assets/diagrams/agentic_workflow_v0.2.0.svg @@ -5,340 +5,365 @@ - - - - AI Agentic Workflow - - cluster_agentic_loop + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" + width="720pt" + height="203pt" + viewBox="0.00 0.00 719.69 203.00" + version="1.1" + id="svg113" + sodipodi:docname="agentic_workflow.svg" + inkscape:version="1.2.2 (b0a8486541, 2022-12-01)" + xmlns="http://www.w3.org/2000/svg" +> + + + fill="#f8f9fa" + stroke="transparent" + points="715.69,-199 715.69,4 -4,4 -4,-199 " + id="polygon4" + transform="translate(4,199)"/> Agentic Loop - - - Start - - Start - - - Plan - - Plan - - - Start->Plan - - - - - End - - - End - - - HumanLoop - - User Feedback - - - HumanLoop->Plan - - -  Feedback   - - - Execute - - Execute - - - AIModel - - AI Model - - - Plan->Execute - - - - - Execute->AIModel - - - Uses   - - - Check - - Check - - - Execute->Check - - - - - Check->End - - - + text-anchor="middle" + x="359.84" + y="25.600006" + font-family="Helvetica-Bold, Arial-Bold, sans-serif" + font-size="22px" + fill="#333333" + id="text6">AI Agentic Workflow + + + cluster_agentic_loop + + + Agentic Loop + + + + Start + + + Start + + + + Plan + + + Plan + + + + Start->Plan + + + + + + End + + + + End + + + + HumanLoop + + + User Feedback + + + + HumanLoop->Plan + + + +  Feedback   + + + + Execute + + + Execute + + + + AIModel + + + AI Model + + + + Plan->Execute + + + + + + Execute->AIModel + + + + Uses   + + + + Check + + + Check + + + + Execute->Check + + + + + + Check->End + + + + diff --git a/package.json b/package.json index 3f5ae6c..8cc7449 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@cogitator/binharic-cli", - "version": "0.1.0-alpha.3", + "version": "0.1.0-alpha.4", "description": "A coding agent with the persona of a Tech-Priest of the Adeptus Mechanicus", "main": "dist/cli.js", "type": "module", diff --git a/src/agent/core/circuitBreaker.ts b/src/agent/core/circuitBreaker.ts new file mode 100644 index 0000000..3806190 --- /dev/null +++ b/src/agent/core/circuitBreaker.ts @@ -0,0 +1,160 @@ +import logger from "@/logger.js"; + +export enum CircuitBreakerState { + CLOSED = "CLOSED", + OPEN = "OPEN", + HALF_OPEN = "HALF_OPEN", +} + +export interface CircuitBreakerConfig { + failureThreshold: number; + successThreshold: number; + timeout: number; + resetTimeout: number; +} + +export class CircuitBreaker { + private state: CircuitBreakerState = CircuitBreakerState.CLOSED; + private failureCount = 0; + private successCount = 0; + private nextAttempt = Date.now(); + private lastFailureTime = 0; + + constructor( + private name: string, + private config: CircuitBreakerConfig, + ) {} + + async execute(fn: () => Promise): Promise { + if (this.state === CircuitBreakerState.OPEN) { + if (Date.now() < this.nextAttempt) { + const waitTime = Math.ceil((this.nextAttempt - Date.now()) / 1000); + logger.warn( + `Circuit breaker [${this.name}] is OPEN. Retry in ${waitTime}s. ` + + `Failure count: ${this.failureCount}/${this.config.failureThreshold}`, + ); + throw new Error( + `Circuit breaker is OPEN for ${this.name}. Service temporarily unavailable. Retry in ${waitTime}s.`, + ); + } + this.state = CircuitBreakerState.HALF_OPEN; + this.successCount = 0; + logger.info(`Circuit breaker [${this.name}] entering HALF_OPEN state`); + } + + try { + const timeoutPromise = new Promise((_, reject) => { + setTimeout( + () => + reject(new Error(`Circuit breaker timeout after ${this.config.timeout}ms`)), + this.config.timeout, + ); + }); + + const result = await Promise.race([fn(), timeoutPromise]); + + this.onSuccess(); + return result; + } catch (error) { + this.onFailure(); + throw error; + } + } + + getState(): CircuitBreakerState { + return this.state; + } + + getStats() { + return { + state: this.state, + failureCount: this.failureCount, + successCount: this.successCount, + lastFailureTime: this.lastFailureTime, + nextAttemptTime: this.nextAttempt, + }; + } + + reset(): void { + this.state = CircuitBreakerState.CLOSED; + this.failureCount = 0; + this.successCount = 0; + this.nextAttempt = Date.now(); + logger.info(`Circuit breaker [${this.name}] has been reset`); + } + + private onSuccess(): void { + this.failureCount = 0; + + if (this.state === CircuitBreakerState.HALF_OPEN) { + this.successCount++; + logger.info( + `Circuit breaker [${this.name}] success in HALF_OPEN: ${this.successCount}/${this.config.successThreshold}`, + ); + + if (this.successCount >= this.config.successThreshold) { + this.state = CircuitBreakerState.CLOSED; + this.successCount = 0; + logger.info(`Circuit breaker [${this.name}] is now CLOSED`); + } + } + } + + private onFailure(): void { + this.failureCount++; + this.lastFailureTime = Date.now(); + + logger.warn( + `Circuit breaker [${this.name}] failure: ${this.failureCount}/${this.config.failureThreshold}`, + ); + + if (this.state === CircuitBreakerState.HALF_OPEN) { + this.state = CircuitBreakerState.OPEN; + this.nextAttempt = Date.now() + this.config.resetTimeout; + logger.error(`Circuit breaker [${this.name}] opened from HALF_OPEN state`); + return; + } + + if (this.failureCount >= this.config.failureThreshold) { + this.state = CircuitBreakerState.OPEN; + this.nextAttempt = Date.now() + this.config.resetTimeout; + logger.error( + `Circuit breaker [${this.name}] is now OPEN. Will retry after ${this.config.resetTimeout}ms`, + ); + } + } +} + +const circuitBreakers = new Map(); + +export function getCircuitBreaker( + name: string, + config?: Partial, +): CircuitBreaker { + if (!circuitBreakers.has(name)) { + const defaultConfig: CircuitBreakerConfig = { + failureThreshold: 5, + successThreshold: 2, + timeout: 60000, + resetTimeout: 60000, + }; + + circuitBreakers.set(name, new CircuitBreaker(name, { ...defaultConfig, ...config })); + } + return circuitBreakers.get(name)!; +} + +export function resetAllCircuitBreakers(): void { + for (const breaker of circuitBreakers.values()) { + breaker.reset(); + } + logger.info("All circuit breakers have been reset"); +} + +export function getCircuitBreakerStats() { + const stats: Record> = {}; + for (const [name, breaker] of circuitBreakers.entries()) { + stats[name] = breaker.getStats(); + } + return stats; +} diff --git a/src/agent/core/state.ts b/src/agent/core/state.ts index 98ea02f..29a209a 100644 --- a/src/agent/core/state.ts +++ b/src/agent/core/state.ts @@ -552,7 +552,6 @@ async function _runAgentLogicInternal( const startHistoryLength = get().history.length; - // Track API timing per request let apiStart = 0; let apiCounted = false; @@ -651,7 +650,13 @@ async function _runAgentLogicInternal( mu[key] = mu[key] ? { ...mu[key], requests: mu[key].requests + 1 } : { provider: modelConfig.provider, modelId: modelConfig.modelId, requests: 1 }; - set({ metrics: { ...current.metrics, llmRequests: current.metrics.llmRequests + 1, modelUsage: mu } }); + set({ + metrics: { + ...current.metrics, + llmRequests: current.metrics.llmRequests + 1, + modelUsage: mu, + }, + }); } sdkCompliantHistory = applyContextWindow(sdkCompliantHistory, modelConfig); @@ -711,7 +716,12 @@ async function _runAgentLogicInternal( if (apiStart && !apiCounted) { const current = get(); const dt = Date.now() - apiStart; - set({ metrics: { ...current.metrics, llmApiTimeMs: current.metrics.llmApiTimeMs + dt } }); + set({ + metrics: { + ...current.metrics, + llmApiTimeMs: current.metrics.llmApiTimeMs + dt, + }, + }); apiCounted = true; } return; @@ -752,7 +762,12 @@ async function _runAgentLogicInternal( if (apiStart && !apiCounted) { const current = get(); const dt = Date.now() - apiStart; - set({ metrics: { ...current.metrics, llmApiTimeMs: current.metrics.llmApiTimeMs + dt } }); + set({ + metrics: { + ...current.metrics, + llmApiTimeMs: current.metrics.llmApiTimeMs + dt, + }, + }); apiCounted = true; } } @@ -853,7 +868,10 @@ async function _runAgentLogicInternal( role: "tool-failure", toolCallId: toolCall.toolCallId, toolName: toolCall.toolName, - error: error instanceof Error ? error.message : "An unknown error occurred", + error: + error instanceof Error + ? error.message + : "An unknown error occurred", }); } } else { @@ -895,7 +913,7 @@ async function _runAgentLogicInternal( } consecutiveErrors = 0; - } catch (error) { + } catch (error: unknown) { if (activeStreamTimeout) { clearTimeout(activeStreamTimeout); activeStreamTimeout = null; @@ -926,6 +944,10 @@ async function _runAgentLogicInternal( } consecutiveErrors++; + isAgentRunning = false; + agentLockTimestamp = 0; + shouldStopAgent = false; + const finalErrorMessage = typedError.message; logger.error(`Fatal or unhandled error: ${finalErrorMessage}`); set({ diff --git a/src/agent/llm/provider.ts b/src/agent/llm/provider.ts index 301410a..3c815ab 100644 --- a/src/agent/llm/provider.ts +++ b/src/agent/llm/provider.ts @@ -9,6 +9,7 @@ import type { Config, ModelConfig } from "@/config.js"; import { FatalError, TransientError } from "../errors/index.js"; import { tools } from "@/agent/tools/index.js"; import { createModelRegistry } from "./modelRegistry.js"; +import { getCircuitBreaker } from "../core/circuitBreaker.js"; let globalRegistry: ReturnType | null = null; @@ -203,90 +204,101 @@ export async function streamAssistantResponse( logger.info(`Using model: ${modelConfig.name}`); - let llmProvider; - try { - llmProvider = createLlmProvider(modelConfig, config); - } catch (error) { - if (error instanceof FatalError) throw error; - throw new FatalError( - `Failed to create LLM provider: ${error instanceof Error ? error.message : "Unknown error"}`, - ); - } + const circuitBreaker = getCircuitBreaker(`llm-${modelConfig.provider}`, { + failureThreshold: 5, + successThreshold: 2, + timeout: 90000, + resetTimeout: 60000, + }); + + return await circuitBreaker.execute(async () => { + let llmProvider; + try { + llmProvider = createLlmProvider(modelConfig, config); + } catch (error) { + if (error instanceof FatalError) throw error; + throw new FatalError( + `Failed to create LLM provider: ${error instanceof Error ? error.message : "Unknown error"}`, + ); + } - const maxItems = config.history?.maxItems; - const truncatedHistory = - maxItems && history.length > maxItems ? history.slice(-maxItems) : history; + const maxItems = config.history?.maxItems; + const truncatedHistory = + maxItems && history.length > maxItems ? history.slice(-maxItems) : history; - logger.info("Streaming text from LLM provider with native AI SDK tool calling."); - logger.debug({ - systemPrompt: systemPrompt.substring(0, 200) + "...", - messageCount: truncatedHistory.length, - }); + logger.info("Streaming text from LLM provider with native AI SDK tool calling."); + logger.debug({ + systemPrompt: systemPrompt.substring(0, 200) + "...", + messageCount: truncatedHistory.length, + }); - try { - const result = await Promise.race([ - streamText({ - model: llmProvider, - system: systemPrompt, - messages: truncatedHistory, - experimental_telemetry: { isEnabled: false }, - tools, - experimental_context: config, - }), - new Promise((_, reject) => - setTimeout( - () => reject(new TransientError("LLM request timed out after 60 seconds")), - 60000, + try { + const result = await Promise.race([ + streamText({ + model: llmProvider, + system: systemPrompt, + messages: truncatedHistory, + experimental_telemetry: { isEnabled: false }, + tools, + experimental_context: config, + }), + new Promise((_, reject) => + setTimeout( + () => reject(new TransientError("LLM request timed out after 60 seconds")), + 60000, + ), ), - ), - ]); + ]); - return result as Awaited>; - } catch (error) { - const errorMessage = error instanceof Error ? error.message : "An unknown error occurred."; - const status = (error as Error & { status?: number })?.status; + return result as Awaited>; + } catch (error) { + const errorMessage = + error instanceof Error ? error.message : "An unknown error occurred."; + const status = (error as Error & { status?: number })?.status; - logger.error(`Error streaming from LLM: ${errorMessage}`, { status }); + logger.error(`Error streaming from LLM: ${errorMessage}`, { status }); - if (error instanceof TransientError || error instanceof FatalError) { - throw error; - } + if (error instanceof TransientError || error instanceof FatalError) { + throw error; + } - if (status) { - if (status === 401) { - if ( - errorMessage.includes("insufficient permissions") || - errorMessage.includes("Missing scopes") - ) { - throw new FatalError( - `API key permissions error: Your OpenAI API key lacks required permissions.\n\n` + - `This usually means:\n` + - `1. Your API key doesn't have access to the requested model\n` + - `2. The model requires different API scopes than your key has\n` + - `3. You may need to use a different model (gpt-5-mini, gpt-5-nano, or gpt-4o)\n\n` + - `Try switching models with: /model gpt-5-mini\n` + - `Or use Ollama locally: /model qwen3\n\n` + - `Praise the Omnissiah! The machine spirit requires proper authorization.`, - ); + if (status) { + if (status === 401) { + if ( + errorMessage.includes("insufficient permissions") || + errorMessage.includes("Missing scopes") + ) { + throw new FatalError( + `API key permissions error: Your OpenAI API key lacks required permissions.\n\n` + + `This usually means:\n` + + `1. Your API key doesn't have access to the requested model\n` + + `2. The model requires different API scopes than your key has\n` + + `3. You may need to use a different model (gpt-5-mini, gpt-5-nano, or gpt-4o)\n\n` + + `Try switching models with: /model gpt-5-mini\n` + + `Or use Ollama locally: /model qwen3\n\n` + + `Praise the Omnissiah! The machine spirit requires proper authorization.`, + ); + } + throw new FatalError(`API authentication failed: ${errorMessage}`); } - throw new FatalError(`API authentication failed: ${errorMessage}`); + if (status === 429) + throw new TransientError(`Rate limit exceeded: ${errorMessage}`); + if (status === 403) throw new FatalError(`API access forbidden: ${errorMessage}`); + if (status >= 400 && status < 500) + throw new FatalError(`Client-side API error (${status}): ${errorMessage}`); + if (status >= 500) + throw new TransientError(`LLM provider error (${status}): ${errorMessage}`); } - if (status === 429) throw new TransientError(`Rate limit exceeded: ${errorMessage}`); - if (status === 403) throw new FatalError(`API access forbidden: ${errorMessage}`); - if (status >= 400 && status < 500) - throw new FatalError(`Client-side API error (${status}): ${errorMessage}`); - if (status >= 500) - throw new TransientError(`LLM provider error (${status}): ${errorMessage}`); - } - if (errorMessage.includes("timeout")) { - throw new TransientError(`Request timeout: ${errorMessage}`); - } + if (errorMessage.includes("timeout")) { + throw new TransientError(`Request timeout: ${errorMessage}`); + } - if (errorMessage.includes("network") || errorMessage.includes("ECONNREFUSED")) { - throw new TransientError(`Network error: ${errorMessage}`); - } + if (errorMessage.includes("network") || errorMessage.includes("ECONNREFUSED")) { + throw new TransientError(`Network error: ${errorMessage}`); + } - throw new TransientError(`Unexpected error: ${errorMessage}`); - } + throw new TransientError(`Unexpected error: ${errorMessage}`); + } + }); } diff --git a/src/agent/tools/definitions/fetch.ts b/src/agent/tools/definitions/fetch.ts index 63e6aec..e30d323 100644 --- a/src/agent/tools/definitions/fetch.ts +++ b/src/agent/tools/definitions/fetch.ts @@ -5,6 +5,9 @@ import { ToolError } from "../../errors/index.js"; const converter = compile({ wordwrap: 130 }); +const MAX_RESPONSE_SIZE = 10 * 1024 * 1024; +const DEFAULT_TIMEOUT_MS = 30000; + export const fetchTool = tool({ description: "Fetch the content of a URL.", inputSchema: z @@ -19,16 +22,45 @@ export const fetchTool = tool({ .strict(), execute: async ({ url, stripMarkup = true }) => { try { - const response = await fetch(url); - const fullText = await response.text(); + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), DEFAULT_TIMEOUT_MS); - if (!response.ok) { - throw new ToolError(`Request failed with status ${response.status}: ${fullText}`); - } + try { + const response = await fetch(url, { signal: controller.signal }); + clearTimeout(timeoutId); + + if (!response.ok) { + const errorText = await response.text(); + throw new ToolError( + `Request failed with status ${response.status}: ${errorText}`, + ); + } + + const contentLength = response.headers?.get("content-length"); + if (contentLength && parseInt(contentLength) > MAX_RESPONSE_SIZE) { + throw new ToolError( + `Response too large: ${contentLength} bytes exceeds limit of ${MAX_RESPONSE_SIZE} bytes`, + ); + } - return stripMarkup ? converter(fullText) : fullText; + const fullText = await response.text(); + + if (fullText.length > MAX_RESPONSE_SIZE) { + throw new ToolError( + `Response too large: ${fullText.length} bytes exceeds limit of ${MAX_RESPONSE_SIZE} bytes`, + ); + } + + return stripMarkup ? converter(fullText) : fullText; + } catch (error) { + clearTimeout(timeoutId); + throw error; + } } catch (error: unknown) { if (error instanceof Error) { + if (error.name === "AbortError") { + throw new ToolError(`Request timeout after ${DEFAULT_TIMEOUT_MS}ms`); + } if (error instanceof ToolError) throw error; throw new ToolError(error.message); } diff --git a/src/agent/tools/definitions/grepSearch.ts b/src/agent/tools/definitions/grepSearch.ts index 83fb6d7..a5bab48 100644 --- a/src/agent/tools/definitions/grepSearch.ts +++ b/src/agent/tools/definitions/grepSearch.ts @@ -23,6 +23,18 @@ export const grepSearchTool = tool({ }) .strict(), execute: async ({ query, includePattern, isRegexp = false }) => { + if (!query || query.trim().length === 0) { + throw new ToolError("Search query cannot be empty"); + } + + if (query.length > 1000) { + throw new ToolError("Search query exceeds maximum length of 1000 characters"); + } + + if (includePattern && includePattern.length > 500) { + throw new ToolError("Include pattern exceeds maximum length of 500 characters"); + } + return new Promise((resolve, reject) => { const grepArgs = ["-r", "-n"]; // recursive, line numbers diff --git a/src/agent/tools/definitions/insertEditIntoFile.ts b/src/agent/tools/definitions/insertEditIntoFile.ts index 60562fa..197afd1 100644 --- a/src/agent/tools/definitions/insertEditIntoFile.ts +++ b/src/agent/tools/definitions/insertEditIntoFile.ts @@ -217,11 +217,27 @@ function calculateSimilarity(str1: string, str2: string): number { if (longer.length === 0) return 1.0; + const MAX_COMPARISON_LENGTH = 5000; + if (longer.length > MAX_COMPARISON_LENGTH) { + const truncatedLonger = longer.substring(0, MAX_COMPARISON_LENGTH); + const truncatedShorter = shorter.substring( + 0, + Math.min(shorter.length, MAX_COMPARISON_LENGTH), + ); + const editDistance = levenshteinDistance(truncatedLonger, truncatedShorter); + return (truncatedLonger.length - editDistance) / truncatedLonger.length; + } + const editDistance = levenshteinDistance(longer, shorter); return (longer.length - editDistance) / longer.length; } function levenshteinDistance(str1: string, str2: string): number { + const maxLength = 10000; + if (str1.length > maxLength || str2.length > maxLength) { + return Math.max(str1.length, str2.length); + } + const matrix: number[][] = []; for (let i = 0; i <= str2.length; i++) { diff --git a/src/cli.ts b/src/cli.ts index 5a8d366..1dc83e6 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -52,6 +52,7 @@ const { unmount, waitUntilExit } = render(React.createElement(App)); let ctrlCPressCount = 0; let ctrlCTimeout: NodeJS.Timeout | null = null; let exitCallbackSet = false; +let isExitingGlobal = false; export function setExitCallback(callback: () => void) { if (!exitCallbackSet) { @@ -64,7 +65,47 @@ function getExitCallback(): (() => void) | null { return (globalThis as any).__binharic_exit_callback || null; } +function cleanupAndExit(code: number = 0) { + if (isExitingGlobal) { + return; + } + isExitingGlobal = true; + + if (ctrlCTimeout) { + clearTimeout(ctrlCTimeout); + ctrlCTimeout = null; + } + + cleanupAllSessions(); + + if (process.stdin.isTTY && process.stdin.setRawMode) { + try { + process.stdin.setRawMode(false); + } catch (error) { + logger.warn("Failed to restore stdin mode:", error); + } + } + + const exitCallback = getExitCallback(); + if (exitCallback) { + try { + exitCallback(); + } catch (error) { + logger.error("Error in exit callback:", error); + unmount(); + process.exit(code); + } + } else { + unmount(); + process.exit(code); + } +} + const handleSIGINT = () => { + if (isExitingGlobal) { + return; + } + ctrlCPressCount++; if (ctrlCTimeout) { @@ -99,27 +140,7 @@ const handleSIGINT = () => { console.log("│ May the Omnissiah preserve our data. │"); console.log("╰────────────────────────────────────────────────────╯\n"); - if (ctrlCTimeout) { - clearTimeout(ctrlCTimeout); - ctrlCTimeout = null; - } - - cleanupAllSessions(); - - if (process.stdin.isTTY && process.stdin.setRawMode) { - process.stdin.setRawMode(false); - } - - const exitCallback = getExitCallback(); - if (exitCallback) { - // Let UI handle summary and exit - exitCallback(); - return; - } - - unmount(); - logger.info("Application exit complete"); - process.exit(0); + cleanupAndExit(0); } }; @@ -127,24 +148,10 @@ process.on("SIGINT", handleSIGINT); process.on("SIGTERM", () => { logger.info("Received SIGTERM, powering down Machine Spirit. The Omnissiah protects..."); + cleanupAndExit(0); +}); - if (ctrlCTimeout) { - clearTimeout(ctrlCTimeout); - ctrlCTimeout = null; - } - - if (process.stdin.isTTY && process.stdin.setRawMode) { - process.stdin.setRawMode(false); - } - +process.on("exit", (code) => { + logger.info(`Process exiting with code: ${code}`); cleanupAllSessions(); - - const exitCallback = getExitCallback(); - if (exitCallback) { - exitCallback(); - return; - } - - unmount(); - process.exit(0); }); diff --git a/src/ui/App.tsx b/src/ui/App.tsx index 63cdf45..24b7a3f 100644 --- a/src/ui/App.tsx +++ b/src/ui/App.tsx @@ -37,16 +37,36 @@ export default function App() { const g = globalThis as typeof globalThis & { __binharic_exit_callback?: () => void; }; - // Install a custom exit callback that shows summary before exiting + + let exitTimeout: NodeJS.Timeout | null = null; + let isExiting = false; + g.__binharic_exit_callback = () => { + if (isExiting) { + return; + } + isExiting = true; + beginExit(); - // Give Ink time to render the summary, then exit the app and process - setTimeout(() => { + + exitTimeout = setTimeout(() => { exit(); - // extra safety: force process exit shortly after unmount - setTimeout(() => process.exit(0), 100); + setTimeout(() => { + if (!process.exitCode) { + process.exit(0); + } + }, 100); }, 600); }; + + return () => { + if (exitTimeout) { + clearTimeout(exitTimeout); + } + if (g.__binharic_exit_callback) { + g.__binharic_exit_callback = undefined; + } + }; }, [loadInitialConfig, exit, beginExit]); useInput(() => { diff --git a/src/ui/ExitSummary.tsx b/src/ui/ExitSummary.tsx index 03af171..bbb5980 100644 --- a/src/ui/ExitSummary.tsx +++ b/src/ui/ExitSummary.tsx @@ -7,61 +7,74 @@ function msToSeconds(ms: number): string { } export default function ExitSummary() { - const { sessionId, startedAt, llmRequests, llmApiTimeMs, toolCallsSuccess, toolCallsFailure, toolTimeMs, modelUsage } = - useStore((s) => s.metrics); + const { + sessionId, + startedAt, + llmRequests, + llmApiTimeMs, + toolCallsSuccess, + toolCallsFailure, + toolTimeMs, + modelUsage, + } = useStore((s) => s.metrics); const wallTime = useMemo(() => Date.now() - startedAt, [startedAt]); const totalToolCalls = toolCallsSuccess + toolCallsFailure; - const successRate = totalToolCalls > 0 ? ((toolCallsSuccess / totalToolCalls) * 100).toFixed(1) + "%" : "0.0%"; + const successRate = + totalToolCalls > 0 ? ((toolCallsSuccess / totalToolCalls) * 100).toFixed(1) + "%" : "0.0%"; return ( ✦ Goodbye - + - - Agent powering down. Goodbye! - + Agent powering down. Goodbye! Interaction Summary - - Session ID: {sessionId} - + Session ID: {sessionId} - Tool Calls: {totalToolCalls} ( ✓ {toolCallsSuccess} x {toolCallsFailure} ) + Tool Calls: {totalToolCalls} ( ✓ {toolCallsSuccess} x{" "} + {toolCallsFailure} ) - Success Rate: {successRate} + Success Rate: {successRate} Performance - Wall Time: {msToSeconds(wallTime)} + Wall Time: {msToSeconds(wallTime)} - Agent Active: {msToSeconds(llmApiTimeMs + toolTimeMs)} + Agent Active: {msToSeconds(llmApiTimeMs + toolTimeMs)} - » API Time: {msToSeconds(llmApiTimeMs)} + » API Time: {msToSeconds(llmApiTimeMs)} - » Tool Time: {msToSeconds(toolTimeMs)} + » Tool Time: {msToSeconds(toolTimeMs)} - Model Usage Reqs + Model Usage Reqs {Object.keys(modelUsage).length === 0 ? ( - + ) : ( Object.entries(modelUsage).map(([name, info]) => ( @@ -80,4 +93,3 @@ export default function ExitSummary() { ); } - diff --git a/src/ui/Footer.tsx b/src/ui/Footer.tsx index 6f41a6f..0e60f22 100644 --- a/src/ui/Footer.tsx +++ b/src/ui/Footer.tsx @@ -70,7 +70,9 @@ export function Footer() { {status === "error" && ( - ⚠️ Corruption detected in the machine spirit: {error} + + ⚠️ Corruption detected in the machine spirit: {error} + Consult the sacred logs: {logsDir} Press any key to recalibrate and continue. diff --git a/src/ui/HelpMenu.tsx b/src/ui/HelpMenu.tsx index b52dffa..70ba268 100644 --- a/src/ui/HelpMenu.tsx +++ b/src/ui/HelpMenu.tsx @@ -61,30 +61,30 @@ export function HelpMenu() { /help - Show this help message - /clear - Clear the screen and conversation - history + /clear - Clear the screen and + conversation history /clearHistory - Clear command history - /quit or /exit - - Exit the application + /quit or{" "} + /exit - Exit the application - /model - Switch to a different model (e.g., - /model gpt-5-mini) + /model - Switch to a different model + (e.g., /model gpt-5-mini) /system - Set custom system prompt - /add - Add context files (e.g., /add README.md - config.json) + /add - Add context files (e.g., /add + README.md config.json) - /models - List all available model providers and - models + /models - List all available model + providers and models @@ -95,11 +95,12 @@ export function HelpMenu() { - read_file - Read a file from the filesystem + read_file - Read a file from the + filesystem - read_multiple_files - Read multiple files at once - (batch) + read_multiple_files - Read multiple files + at once (batch) list - List files and directories @@ -108,7 +109,8 @@ export function HelpMenu() { search - Search for files by name pattern - grep_search - Search for text within files + grep_search - Search for text within + files create - Create a new file @@ -117,14 +119,16 @@ export function HelpMenu() { edit - Edit an existing file - insert_edit_into_file - Apply smart edits to a - file + insert_edit_into_file - Apply smart edits + to a file - get_errors - Get compilation or lint errors + get_errors - Get compilation or lint + errors - validate - Validate file operations or changes + validate - Validate file operations or + changes @@ -142,8 +146,8 @@ export function HelpMenu() { terminal - get_terminal_output - Get output from terminal - session + get_terminal_output - Get output from + terminal session fetch - Fetch content from a URL @@ -199,7 +203,8 @@ export function HelpMenu() { diff_files - Compare two files - diff_show_changes - Show uncommitted changes + diff_show_changes - Show uncommitted + changes diff --git a/src/ui/HistoryItemDisplay.tsx b/src/ui/HistoryItemDisplay.tsx index 090e68e..40e6f1a 100644 --- a/src/ui/HistoryItemDisplay.tsx +++ b/src/ui/HistoryItemDisplay.tsx @@ -59,7 +59,9 @@ function renderInlineCode(text: string) { typeof p === "string" ? ( {p} ) : ( - {p.code} + + {p.code} + ), )} @@ -78,13 +80,15 @@ function AssistantMessageContent({ content }: { content: AssistantContent | stri seg.type === "text" ? ( {renderInlineCode(seg.value)} ) : ( - - {seg.lang && ( - {seg.lang} - )} - - {seg.value} - + + {seg.lang && {seg.lang}} + {seg.value} ), )} @@ -98,7 +102,12 @@ export function HistoryItemDisplay({ message }: { message: HistoryItem }) { return > {message.content}; case "assistant": return ( - + ); @@ -108,7 +117,12 @@ export function HistoryItemDisplay({ message }: { message: HistoryItem }) { return null; case "tool-failure": return ( - + › Tool Failure ({message.toolName}): diff --git a/src/ui/TodoList.tsx b/src/ui/TodoList.tsx index bab5e4a..33c19ef 100644 --- a/src/ui/TodoList.tsx +++ b/src/ui/TodoList.tsx @@ -2,6 +2,7 @@ import React from "react"; import { Box, Text } from "ink"; import Spinner from "ink-spinner"; import { theme } from "./theme.js"; +import type { Config } from "@/config.js"; export interface TodoItem { id: string; @@ -91,7 +92,13 @@ export const TodoList: React.FC = ({ } return ( - + Progress: {completedCount}/{totalCount} @@ -115,7 +122,6 @@ export const TodoList: React.FC = ({ }; export default TodoList; -import type { Config } from "@/config.js"; export type OutputStyle = "default" | "explanatory" | "learning" | "concise" | "verbose"; diff --git a/src/ui/ToolConfirmation.tsx b/src/ui/ToolConfirmation.tsx index 0a2da05..af68c1a 100644 --- a/src/ui/ToolConfirmation.tsx +++ b/src/ui/ToolConfirmation.tsx @@ -47,7 +47,9 @@ export function ToolConfirmation() { ); })} - Press ENTER to grant blessing | ESC to deny the ritual + + Press ENTER to grant blessing | ESC to deny the ritual + ); diff --git a/src/ui/UserInput.tsx b/src/ui/UserInput.tsx index a66e8b8..53ac62b 100644 --- a/src/ui/UserInput.tsx +++ b/src/ui/UserInput.tsx @@ -244,8 +244,12 @@ export function UserInput() { break; case "quit": case "exit": + setInputValue(""); beginExit(); - break; + setTimeout(() => { + process.exit(0); + }, 700); + return; case "system": setSystemPrompt(rest); break; @@ -314,8 +318,12 @@ export function UserInput() { break; case "exit": case "quit": + setInputValue(""); beginExit(); - break; + setTimeout(() => { + process.exit(0); + }, 700); + return; default: startAgent(value); break; diff --git a/src/ui/theme.ts b/src/ui/theme.ts index 31641d2..b5306e5 100644 --- a/src/ui/theme.ts +++ b/src/ui/theme.ts @@ -1,23 +1,22 @@ export const theme = { - // Brand and primary accents - primary: "cyan" as const, - accent: "magenta" as const, - info: "blue" as const, + // Brand and primary accents + primary: "cyan" as const, + accent: "magenta" as const, + info: "blue" as const, - // Semantic colors - success: "green" as const, - warning: "yellow" as const, - error: "red" as const, + // Semantic colors + success: "green" as const, + warning: "yellow" as const, + error: "red" as const, - // Text and borders - text: "white" as const, - dim: "gray" as const, - border: "gray" as const, - assistantBorder: "green" as const, - codeBlockBorder: "gray" as const, - codeInline: "yellow" as const, - userPrompt: "white" as const, + // Text and borders + text: "white" as const, + dim: "gray" as const, + border: "gray" as const, + assistantBorder: "green" as const, + codeBlockBorder: "gray" as const, + codeInline: "yellow" as const, + userPrompt: "white" as const, }; export type Theme = typeof theme; - diff --git a/tests/agent/bugs/agentLockReleaseOnError.test.ts b/tests/agent/bugs/agentLockReleaseOnError.test.ts new file mode 100644 index 0000000..6a9e932 --- /dev/null +++ b/tests/agent/bugs/agentLockReleaseOnError.test.ts @@ -0,0 +1,165 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; + +describe("Agent Lock Release on Error Bug Fix", () => { + let isAgentRunning = false; + let agentLockTimestamp = 0; + let shouldStopAgent = false; + + beforeEach(() => { + isAgentRunning = false; + agentLockTimestamp = 0; + shouldStopAgent = false; + }); + + afterEach(() => { + isAgentRunning = false; + agentLockTimestamp = 0; + shouldStopAgent = false; + }); + + it("should release agent lock when fatal error occurs", async () => { + const mockRunAgentLogic = async () => { + if (isAgentRunning) { + throw new Error("Agent already running"); + } + + isAgentRunning = true; + agentLockTimestamp = Date.now(); + + try { + throw new Error("Fatal error occurred"); + } finally { + isAgentRunning = false; + agentLockTimestamp = 0; + } + }; + + try { + await mockRunAgentLogic(); + } catch (error) { + expect(error).toBeInstanceOf(Error); + } + + expect(isAgentRunning).toBe(false); + expect(agentLockTimestamp).toBe(0); + }); + + it("should release agent lock on unexpected error in internal logic", async () => { + const mockInternalLogic = async () => { + throw new Error("Unexpected error in internal logic"); + }; + + const mockRunAgentLogic = async () => { + isAgentRunning = true; + agentLockTimestamp = Date.now(); + + try { + await mockInternalLogic(); + } finally { + isAgentRunning = false; + agentLockTimestamp = 0; + } + }; + + try { + await mockRunAgentLogic(); + } catch (error) { + expect(error).toBeInstanceOf(Error); + } + + expect(isAgentRunning).toBe(false); + expect(agentLockTimestamp).toBe(0); + }); + + it("should reset shouldStopAgent flag on error", async () => { + shouldStopAgent = true; + + const mockInternalLogic = async () => { + if (shouldStopAgent) { + throw new Error("Agent stopped"); + } + }; + + const mockRunAgentLogic = async () => { + isAgentRunning = true; + agentLockTimestamp = Date.now(); + + try { + await mockInternalLogic(); + } catch (error) { + isAgentRunning = false; + agentLockTimestamp = 0; + shouldStopAgent = false; + throw error; + } + }; + + try { + await mockRunAgentLogic(); + } catch (error) { + expect(error).toBeInstanceOf(Error); + } + + expect(isAgentRunning).toBe(false); + expect(agentLockTimestamp).toBe(0); + expect(shouldStopAgent).toBe(false); + }); + + it("should allow new agent run after lock is released on error", async () => { + let runCount = 0; + + const mockRunAgentLogic = async () => { + if (isAgentRunning) { + throw new Error("Agent already running"); + } + + isAgentRunning = true; + agentLockTimestamp = Date.now(); + runCount++; + + try { + if (runCount === 1) { + throw new Error("First run fails"); + } + } finally { + isAgentRunning = false; + agentLockTimestamp = 0; + } + }; + + try { + await mockRunAgentLogic(); + } catch (error) { + expect(runCount).toBe(1); + } + + expect(isAgentRunning).toBe(false); + + await mockRunAgentLogic(); + expect(runCount).toBe(2); + expect(isAgentRunning).toBe(false); + }); + + it("should properly release lock even if error handler throws", async () => { + const mockRunAgentLogic = async () => { + isAgentRunning = true; + agentLockTimestamp = Date.now(); + + try { + throw new Error("Primary error"); + } finally { + isAgentRunning = false; + agentLockTimestamp = 0; + } + }; + + try { + await mockRunAgentLogic(); + } catch (error) { + expect(error).toBeInstanceOf(Error); + } + + expect(isAgentRunning).toBe(false); + expect(agentLockTimestamp).toBe(0); + }); +}); diff --git a/tests/agent/bugs/exitLogicBugFixes.test.ts b/tests/agent/bugs/exitLogicBugFixes.test.ts new file mode 100644 index 0000000..c0776ad --- /dev/null +++ b/tests/agent/bugs/exitLogicBugFixes.test.ts @@ -0,0 +1,282 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; + +describe("Exit Logic Bug Fixes", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + describe("Double Exit Prevention", () => { + it("should prevent multiple exit calls from executing", () => { + let isExiting = false; + let exitCount = 0; + + const safeExit = () => { + if (isExiting) { + return; + } + isExiting = true; + exitCount++; + }; + + safeExit(); + safeExit(); + safeExit(); + + expect(exitCount).toBe(1); + }); + + it("should clear exit timeout on cleanup", () => { + let exitTimeout: NodeJS.Timeout | null = null; + let timeoutCleared = false; + + const originalClearTimeout = global.clearTimeout; + global.clearTimeout = ((id: NodeJS.Timeout) => { + if (id === exitTimeout) { + timeoutCleared = true; + } + originalClearTimeout(id); + }) as typeof clearTimeout; + + exitTimeout = setTimeout(() => {}, 600); + + if (exitTimeout) { + clearTimeout(exitTimeout); + } + + expect(timeoutCleared).toBe(true); + + global.clearTimeout = originalClearTimeout; + }); + }); + + describe("Cleanup Order", () => { + it("should cleanup in correct order: timeout -> sessions -> stdin -> callback", () => { + const cleanupOrder: string[] = []; + let isExiting = false; + + const mockCleanup = () => { + if (isExiting) return; + isExiting = true; + + cleanupOrder.push("timeout"); + cleanupOrder.push("sessions"); + cleanupOrder.push("stdin"); + cleanupOrder.push("callback"); + }; + + mockCleanup(); + + expect(cleanupOrder).toEqual(["timeout", "sessions", "stdin", "callback"]); + }); + }); + + describe("Exit Callback Management", () => { + it("should set and retrieve exit callback", () => { + const globalObj = globalThis as any; + const mockCallback = vi.fn(); + + globalObj.__binharic_exit_callback = mockCallback; + + const retrieved = globalObj.__binharic_exit_callback; + + expect(retrieved).toBe(mockCallback); + }); + + it("should clear exit callback on cleanup", () => { + const globalObj = globalThis as any; + globalObj.__binharic_exit_callback = vi.fn(); + + globalObj.__binharic_exit_callback = undefined; + + expect(globalObj.__binharic_exit_callback).toBeUndefined(); + }); + + it("should not set callback twice", () => { + let exitCallbackSet = false; + const callbacks: Function[] = []; + + const setExitCallback = (callback: () => void) => { + if (!exitCallbackSet) { + exitCallbackSet = true; + callbacks.push(callback); + } + }; + + const callback1 = vi.fn(); + const callback2 = vi.fn(); + + setExitCallback(callback1); + setExitCallback(callback2); + + expect(callbacks.length).toBe(1); + expect(callbacks[0]).toBe(callback1); + }); + }); + + describe("SIGINT/SIGTERM Handling", () => { + it("should ignore SIGINT when already exiting", () => { + let isExitingGlobal = false; + let handlerCallCount = 0; + + const handleSIGINT = () => { + if (isExitingGlobal) { + return; + } + handlerCallCount++; + isExitingGlobal = true; + }; + + handleSIGINT(); + handleSIGINT(); + handleSIGINT(); + + expect(handlerCallCount).toBe(1); + }); + + it("should clear ctrl+c timeout on second press", () => { + let ctrlCTimeout: NodeJS.Timeout | null = null; + let timeoutCleared = false; + + ctrlCTimeout = setTimeout(() => {}, 2000); + + const originalClearTimeout = global.clearTimeout; + global.clearTimeout = ((id: NodeJS.Timeout) => { + if (id === ctrlCTimeout) { + timeoutCleared = true; + } + originalClearTimeout(id); + }) as typeof clearTimeout; + + if (ctrlCTimeout) { + clearTimeout(ctrlCTimeout); + ctrlCTimeout = null; + } + + expect(timeoutCleared).toBe(true); + expect(ctrlCTimeout).toBe(null); + + global.clearTimeout = originalClearTimeout; + }); + }); + + describe("Input Handling During Exit", () => { + it("should clear input immediately on exit command", () => { + let inputValue = "/exit"; + + if (inputValue === "/exit" || inputValue === "/quit") { + inputValue = ""; + } + + expect(inputValue).toBe(""); + }); + + it("should prevent submit during exit sequence", async () => { + let isExiting = false; + let submitCount = 0; + + const handleSubmit = async () => { + if (isExiting) { + return; + } + submitCount++; + isExiting = true; + }; + + await handleSubmit(); + await handleSubmit(); + await handleSubmit(); + + expect(submitCount).toBe(1); + }); + }); + + describe("Error Handling During Exit", () => { + it("should handle errors in exit callback gracefully", () => { + const mockCallback = vi.fn().mockImplementation(() => { + throw new Error("Exit callback error"); + }); + + let errorCaught = false; + + try { + mockCallback(); + } catch (error) { + errorCaught = true; + } + + expect(errorCaught).toBe(true); + }); + + it("should handle stdin cleanup errors gracefully", () => { + const mockStdin = { + isTTY: true, + setRawMode: vi.fn().mockImplementation(() => { + throw new Error("Failed to set raw mode"); + }), + }; + + let errorHandled = false; + + if (mockStdin.isTTY && mockStdin.setRawMode) { + try { + mockStdin.setRawMode(false); + } catch (error) { + errorHandled = true; + } + } + + expect(errorHandled).toBe(true); + }); + }); + + describe("Timeout Management", () => { + it("should use 600ms for exit summary display", () => { + const EXPECTED_SUMMARY_TIME = 600; + const exitSummaryTimeout = 600; + + expect(exitSummaryTimeout).toBe(EXPECTED_SUMMARY_TIME); + }); + + it("should use 700ms for command exit to allow summary render", () => { + const EXPECTED_COMMAND_EXIT_TIME = 700; + const commandExitTimeout = 700; + + expect(commandExitTimeout).toBe(EXPECTED_COMMAND_EXIT_TIME); + }); + + it("should cleanup timeout on unmount", () => { + let exitTimeout: NodeJS.Timeout | null = setTimeout(() => {}, 600); + let cleaned = false; + + const cleanup = () => { + if (exitTimeout) { + clearTimeout(exitTimeout); + exitTimeout = null; + cleaned = true; + } + }; + + cleanup(); + + expect(exitTimeout).toBe(null); + expect(cleaned).toBe(true); + }); + }); + + describe("Exit Code Handling", () => { + it("should exit with code 0 on normal exit", () => { + const exitCode = 0; + expect(exitCode).toBe(0); + }); + + it("should check process.exitCode before forcing exit", () => { + let shouldForceExit = false; + + if (!process.exitCode) { + shouldForceExit = true; + } + + expect(typeof shouldForceExit).toBe("boolean"); + }); + }); +}); diff --git a/tests/agent/bugs/fetchToolTimeout.test.ts b/tests/agent/bugs/fetchToolTimeout.test.ts new file mode 100644 index 0000000..9b47064 --- /dev/null +++ b/tests/agent/bugs/fetchToolTimeout.test.ts @@ -0,0 +1,89 @@ +import { describe, expect, it } from "vitest"; + +describe("Fetch Tool Timeout and Size Limit Fix", () => { + const MAX_RESPONSE_SIZE = 10 * 1024 * 1024; + const DEFAULT_TIMEOUT_MS = 30000; + + it("should reject responses exceeding size limit from content-length header", () => { + const contentLength = (MAX_RESPONSE_SIZE + 1).toString(); + + expect(() => { + if (parseInt(contentLength) > MAX_RESPONSE_SIZE) { + throw new Error( + `Response too large: ${contentLength} bytes exceeds limit of ${MAX_RESPONSE_SIZE} bytes`, + ); + } + }).toThrow("Response too large"); + }); + + it("should reject responses exceeding size limit from actual content", () => { + const largeContent = "x".repeat(MAX_RESPONSE_SIZE + 1); + + expect(() => { + if (largeContent.length > MAX_RESPONSE_SIZE) { + throw new Error( + `Response too large: ${largeContent.length} bytes exceeds limit of ${MAX_RESPONSE_SIZE} bytes`, + ); + } + }).toThrow("Response too large"); + }); + + it("should accept responses at size limit", () => { + const maxContent = "x".repeat(MAX_RESPONSE_SIZE); + + expect(() => { + if (maxContent.length > MAX_RESPONSE_SIZE) { + throw new Error(`Response too large`); + } + }).not.toThrow(); + }); + + it("should handle timeout correctly", async () => { + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), 100); + + try { + await new Promise((_, reject) => { + setTimeout(() => reject(new Error("Should have timed out")), 200); + }); + } catch (error) { + clearTimeout(timeoutId); + } + + expect(true).toBe(true); + }); + + it("should clear timeout on successful fetch", () => { + let timeoutCleared = false; + const mockTimeout = setTimeout(() => {}, 1000); + + clearTimeout(mockTimeout); + timeoutCleared = true; + + expect(timeoutCleared).toBe(true); + }); + + it("should clear timeout on fetch error", () => { + let timeoutCleared = false; + const mockTimeout = setTimeout(() => {}, 1000); + + try { + throw new Error("Fetch failed"); + } catch (error) { + clearTimeout(mockTimeout); + timeoutCleared = true; + } + + expect(timeoutCleared).toBe(true); + }); + + it("should handle abort signal correctly", () => { + const controller = new AbortController(); + + expect(controller.signal.aborted).toBe(false); + + controller.abort(); + + expect(controller.signal.aborted).toBe(true); + }); +}); diff --git a/tests/agent/bugs/grepSearchValidation.test.ts b/tests/agent/bugs/grepSearchValidation.test.ts new file mode 100644 index 0000000..be46007 --- /dev/null +++ b/tests/agent/bugs/grepSearchValidation.test.ts @@ -0,0 +1,62 @@ +import { describe, expect, it } from "vitest"; + +describe("Grep Search Input Validation Bug Fix", () => { + const validateGrepInput = (query: string, includePattern?: string) => { + if (!query || query.trim().length === 0) { + throw new Error("Search query cannot be empty"); + } + + if (query.length > 1000) { + throw new Error("Search query exceeds maximum length of 1000 characters"); + } + + if (includePattern && includePattern.length > 500) { + throw new Error("Include pattern exceeds maximum length of 500 characters"); + } + }; + + it("should reject empty query", () => { + expect(() => validateGrepInput("")).toThrow("Search query cannot be empty"); + expect(() => validateGrepInput(" ")).toThrow("Search query cannot be empty"); + }); + + it("should reject excessively long query", () => { + const longQuery = "a".repeat(1001); + expect(() => validateGrepInput(longQuery)).toThrow("Search query exceeds maximum length"); + }); + + it("should accept query at maximum length", () => { + const maxQuery = "a".repeat(1000); + expect(() => validateGrepInput(maxQuery)).not.toThrow(); + }); + + it("should reject excessively long include pattern", () => { + const longPattern = "a".repeat(501); + expect(() => validateGrepInput("test", longPattern)).toThrow( + "Include pattern exceeds maximum length", + ); + }); + + it("should accept include pattern at maximum length", () => { + const maxPattern = "a".repeat(500); + expect(() => validateGrepInput("test", maxPattern)).not.toThrow(); + }); + + it("should accept valid inputs", () => { + expect(() => validateGrepInput("test")).not.toThrow(); + expect(() => validateGrepInput("test", "*.ts")).not.toThrow(); + expect(() => validateGrepInput("function", "**/*.js")).not.toThrow(); + }); + + it("should handle special characters in query", () => { + expect(() => validateGrepInput("test$")).not.toThrow(); + expect(() => validateGrepInput("^start")).not.toThrow(); + expect(() => validateGrepInput("test.*end")).not.toThrow(); + }); + + it("should handle unicode in query", () => { + expect(() => validateGrepInput("测试")).not.toThrow(); + expect(() => validateGrepInput("тест")).not.toThrow(); + expect(() => validateGrepInput("🔍")).not.toThrow(); + }); +}); diff --git a/tests/agent/bugs/insertEditMemoryLeak.test.ts b/tests/agent/bugs/insertEditMemoryLeak.test.ts new file mode 100644 index 0000000..1e8df08 --- /dev/null +++ b/tests/agent/bugs/insertEditMemoryLeak.test.ts @@ -0,0 +1,136 @@ +import { describe, expect, it } from "vitest"; + +describe("Insert Edit Memory Leak Fix", () => { + function levenshteinDistance(str1: string, str2: string): number { + const maxLength = 10000; + if (str1.length > maxLength || str2.length > maxLength) { + return Math.max(str1.length, str2.length); + } + + const matrix: number[][] = []; + + for (let i = 0; i <= str2.length; i++) { + matrix[i] = [i]; + } + + for (let j = 0; j <= str1.length; j++) { + matrix[0][j] = j; + } + + for (let i = 1; i <= str2.length; i++) { + for (let j = 1; j <= str1.length; j++) { + if (str2.charAt(i - 1) === str1.charAt(j - 1)) { + matrix[i][j] = matrix[i - 1][j - 1]; + } else { + matrix[i][j] = Math.min( + matrix[i - 1][j - 1] + 1, + matrix[i][j - 1] + 1, + matrix[i - 1][j] + 1, + ); + } + } + } + + return matrix[str2.length][str1.length]; + } + + function calculateSimilarity(str1: string, str2: string): number { + const longer = str1.length > str2.length ? str1 : str2; + const shorter = str1.length > str2.length ? str2 : str1; + + if (longer.length === 0) return 1.0; + + const MAX_COMPARISON_LENGTH = 5000; + if (longer.length > MAX_COMPARISON_LENGTH) { + const truncatedLonger = longer.substring(0, MAX_COMPARISON_LENGTH); + const truncatedShorter = shorter.substring( + 0, + Math.min(shorter.length, MAX_COMPARISON_LENGTH), + ); + const editDistance = levenshteinDistance(truncatedLonger, truncatedShorter); + return (truncatedLonger.length - editDistance) / truncatedLonger.length; + } + + const editDistance = levenshteinDistance(longer, shorter); + return (longer.length - editDistance) / longer.length; + } + + it("should handle very large strings without excessive memory allocation", () => { + const largeString1 = "x".repeat(20000); + const largeString2 = "y".repeat(20000); + + const startMemory = process.memoryUsage().heapUsed; + const distance = levenshteinDistance(largeString1, largeString2); + const endMemory = process.memoryUsage().heapUsed; + + expect(distance).toBeGreaterThan(0); + + const memoryIncrease = endMemory - startMemory; + const maxAllowedIncrease = 50 * 1024 * 1024; + expect(memoryIncrease).toBeLessThan(maxAllowedIncrease); + }); + + it("should return early for strings exceeding max length", () => { + const veryLargeString = "a".repeat(15000); + const smallString = "ab"; + + const distance = levenshteinDistance(veryLargeString, smallString); + + expect(distance).toBe(veryLargeString.length); + }); + + it("should truncate strings in similarity calculation when too long", () => { + const longString1 = "a".repeat(10000); + const longString2 = "a".repeat(10000); + + const similarity = calculateSimilarity(longString1, longString2); + + expect(similarity).toBeGreaterThan(0); + expect(similarity).toBeLessThanOrEqual(1); + }); + + it("should handle normal sized strings correctly", () => { + const str1 = "hello world"; + const str2 = "hello world"; + + const distance = levenshteinDistance(str1, str2); + + expect(distance).toBe(0); + }); + + it("should calculate similarity for normal strings", () => { + const str1 = "kitten"; + const str2 = "sitting"; + + const similarity = calculateSimilarity(str1, str2); + + expect(similarity).toBeGreaterThan(0); + expect(similarity).toBeLessThan(1); + }); + + it("should not crash with extremely large strings", () => { + const extremelyLarge1 = "x".repeat(50000); + const extremelyLarge2 = "y".repeat(50000); + + expect(() => { + levenshteinDistance(extremelyLarge1, extremelyLarge2); + }).not.toThrow(); + }); + + it("should handle empty strings", () => { + expect(calculateSimilarity("", "")).toBe(1.0); + expect(calculateSimilarity("hello", "")).toBe(0); + expect(levenshteinDistance("", "")).toBe(0); + }); + + it("should complete similarity calculation in reasonable time", () => { + const str1 = "a".repeat(5000); + const str2 = "b".repeat(5000); + + const startTime = Date.now(); + calculateSimilarity(str1, str2); + const duration = Date.now() - startTime; + + expect(duration).toBeLessThan(5000); + }); +}); diff --git a/tests/agent/bugs/terminalSessionRaceCondition.test.ts b/tests/agent/bugs/terminalSessionRaceCondition.test.ts new file mode 100644 index 0000000..2b22a13 --- /dev/null +++ b/tests/agent/bugs/terminalSessionRaceCondition.test.ts @@ -0,0 +1,185 @@ +import { beforeEach, describe, expect, it } from "vitest"; + +describe("Terminal Session Race Condition Fix", () => { + interface Session { + process: { + killed: boolean; + kill: () => void; + stdout: null; + stderr: null; + removeAllListeners: () => void; + }; + output: string[]; + isBackground: boolean; + startTime: number; + timeout?: NodeJS.Timeout; + isCleaningUp?: boolean; + } + + let sessions: Map; + + beforeEach(() => { + sessions = new Map(); + }); + + function cleanupSession(sessionId: string) { + const session = sessions.get(sessionId); + if (session) { + if (session.isCleaningUp) { + return; + } + session.isCleaningUp = true; + + if (session.timeout) { + clearTimeout(session.timeout); + } + if (!session.process.killed) { + session.process.kill(); + } + sessions.delete(sessionId); + } + } + + it("should prevent double cleanup of same session", () => { + const mockProcess = { + killed: false, + kill: () => { + mockProcess.killed = true; + }, + stdout: null, + stderr: null, + removeAllListeners: () => {}, + }; + + sessions.set("test-1", { + process: mockProcess, + output: [], + isBackground: true, + startTime: Date.now(), + }); + + cleanupSession("test-1"); + expect(sessions.has("test-1")).toBe(false); + + cleanupSession("test-1"); + expect(sessions.has("test-1")).toBe(false); + }); + + it("should handle concurrent cleanup attempts gracefully", () => { + const mockProcess = { + killed: false, + kill: () => { + mockProcess.killed = true; + }, + stdout: null, + stderr: null, + removeAllListeners: () => {}, + }; + + sessions.set("test-2", { + process: mockProcess, + output: [], + isBackground: true, + startTime: Date.now(), + }); + + cleanupSession("test-2"); + cleanupSession("test-2"); + cleanupSession("test-2"); + + expect(sessions.has("test-2")).toBe(false); + expect(mockProcess.killed).toBe(true); + }); + + it("should mark session as cleaning up immediately", () => { + const mockProcess = { + killed: false, + kill: () => { + mockProcess.killed = true; + }, + stdout: null, + stderr: null, + removeAllListeners: () => {}, + }; + + sessions.set("test-3", { + process: mockProcess, + output: [], + isBackground: true, + startTime: Date.now(), + }); + + const session = sessions.get("test-3")!; + expect(session.isCleaningUp).toBeUndefined(); + + cleanupSession("test-3"); + expect(sessions.has("test-3")).toBe(false); + }); + + it("should clear timeout before deleting session", () => { + let timeoutCleared = false; + const mockTimeout = setTimeout(() => {}, 1000) as NodeJS.Timeout; + + const originalClearTimeout = global.clearTimeout; + global.clearTimeout = ((id: NodeJS.Timeout) => { + if (id === mockTimeout) { + timeoutCleared = true; + } + originalClearTimeout(id); + }) as typeof clearTimeout; + + const mockProcess = { + killed: false, + kill: () => { + mockProcess.killed = true; + }, + stdout: null, + stderr: null, + removeAllListeners: () => {}, + }; + + sessions.set("test-4", { + process: mockProcess, + output: [], + isBackground: true, + startTime: Date.now(), + timeout: mockTimeout, + }); + + cleanupSession("test-4"); + + expect(timeoutCleared).toBe(true); + expect(sessions.has("test-4")).toBe(false); + + global.clearTimeout = originalClearTimeout; + }); + + it("should cleanup all sessions without errors", () => { + for (let i = 0; i < 5; i++) { + const mockProcess = { + killed: false, + kill: () => { + mockProcess.killed = true; + }, + stdout: null, + stderr: null, + removeAllListeners: () => {}, + }; + + sessions.set(`test-${i}`, { + process: mockProcess, + output: [], + isBackground: true, + startTime: Date.now(), + }); + } + + expect(sessions.size).toBe(5); + + for (const sessionId of sessions.keys()) { + cleanupSession(sessionId); + } + + expect(sessions.size).toBe(0); + }); +}); diff --git a/tests/agent/core/circuitBreaker.test.ts b/tests/agent/core/circuitBreaker.test.ts new file mode 100644 index 0000000..27a43ad --- /dev/null +++ b/tests/agent/core/circuitBreaker.test.ts @@ -0,0 +1,389 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { + CircuitBreaker, + CircuitBreakerState, + getCircuitBreaker, + resetAllCircuitBreakers, +} from "@/agent/core/circuitBreaker.js"; + +describe("Circuit Breaker Pattern", () => { + beforeEach(() => { + resetAllCircuitBreakers(); + }); + + describe("Basic Functionality", () => { + it("should start in CLOSED state", () => { + const breaker = new CircuitBreaker("test", { + failureThreshold: 3, + successThreshold: 2, + timeout: 5000, + resetTimeout: 5000, + }); + + expect(breaker.getState()).toBe(CircuitBreakerState.CLOSED); + }); + + it("should allow requests to pass through when CLOSED", async () => { + const breaker = new CircuitBreaker("test", { + failureThreshold: 3, + successThreshold: 2, + timeout: 5000, + resetTimeout: 5000, + }); + + const mockFn = vi.fn().mockResolvedValue("success"); + const result = await breaker.execute(mockFn); + + expect(result).toBe("success"); + expect(mockFn).toHaveBeenCalledTimes(1); + }); + + it("should transition to OPEN after failure threshold is reached", async () => { + const breaker = new CircuitBreaker("test", { + failureThreshold: 3, + successThreshold: 2, + timeout: 5000, + resetTimeout: 5000, + }); + + const mockFn = vi.fn().mockRejectedValue(new Error("Service unavailable")); + + for (let i = 0; i < 3; i++) { + try { + await breaker.execute(mockFn); + } catch (error) {} + } + + expect(breaker.getState()).toBe(CircuitBreakerState.OPEN); + }); + + it("should reject requests immediately when OPEN", async () => { + const breaker = new CircuitBreaker("test", { + failureThreshold: 3, + successThreshold: 2, + timeout: 5000, + resetTimeout: 1000, + }); + + const mockFn = vi.fn().mockRejectedValue(new Error("Service error")); + + for (let i = 0; i < 3; i++) { + try { + await breaker.execute(mockFn); + } catch (error) {} + } + + const stats = breaker.getStats(); + expect(stats.state).toBe(CircuitBreakerState.OPEN); + + await expect(breaker.execute(mockFn)).rejects.toThrow("Circuit breaker is OPEN"); + }); + }); + + describe("State Transitions", () => { + it("should transition from OPEN to HALF_OPEN after reset timeout", async () => { + const breaker = new CircuitBreaker("test", { + failureThreshold: 2, + successThreshold: 2, + timeout: 5000, + resetTimeout: 100, + }); + + const mockFn = vi.fn().mockRejectedValue(new Error("Error")); + + for (let i = 0; i < 2; i++) { + try { + await breaker.execute(mockFn); + } catch (error) {} + } + + expect(breaker.getState()).toBe(CircuitBreakerState.OPEN); + + await new Promise((resolve) => setTimeout(resolve, 150)); + + mockFn.mockResolvedValue("success"); + await breaker.execute(mockFn); + + expect(breaker.getState()).toBe(CircuitBreakerState.HALF_OPEN); + }); + + it("should transition from HALF_OPEN to CLOSED after success threshold", async () => { + const breaker = new CircuitBreaker("test", { + failureThreshold: 2, + successThreshold: 2, + timeout: 5000, + resetTimeout: 100, + }); + + const mockFn = vi.fn().mockRejectedValue(new Error("Error")); + + for (let i = 0; i < 2; i++) { + try { + await breaker.execute(mockFn); + } catch (error) {} + } + + await new Promise((resolve) => setTimeout(resolve, 150)); + + mockFn.mockResolvedValue("success"); + await breaker.execute(mockFn); + expect(breaker.getState()).toBe(CircuitBreakerState.HALF_OPEN); + + await breaker.execute(mockFn); + expect(breaker.getState()).toBe(CircuitBreakerState.CLOSED); + }); + + it("should transition from HALF_OPEN back to OPEN on failure", async () => { + const breaker = new CircuitBreaker("test", { + failureThreshold: 2, + successThreshold: 2, + timeout: 5000, + resetTimeout: 100, + }); + + const mockFn = vi.fn().mockRejectedValue(new Error("Error")); + + for (let i = 0; i < 2; i++) { + try { + await breaker.execute(mockFn); + } catch (error) {} + } + + await new Promise((resolve) => setTimeout(resolve, 150)); + + try { + await breaker.execute(mockFn); + } catch (error) {} + + expect(breaker.getState()).toBe(CircuitBreakerState.OPEN); + }); + }); + + describe("Timeout Handling", () => { + it("should timeout long-running operations", async () => { + const breaker = new CircuitBreaker("test", { + failureThreshold: 3, + successThreshold: 2, + timeout: 100, + resetTimeout: 5000, + }); + + const slowFn = vi + .fn() + .mockImplementation( + () => new Promise((resolve) => setTimeout(() => resolve("done"), 200)), + ); + + await expect(breaker.execute(slowFn)).rejects.toThrow("Circuit breaker timeout"); + }); + + it("should count timeouts as failures", async () => { + const breaker = new CircuitBreaker("test", { + failureThreshold: 2, + successThreshold: 2, + timeout: 50, + resetTimeout: 5000, + }); + + const slowFn = vi + .fn() + .mockImplementation( + () => new Promise((resolve) => setTimeout(() => resolve("done"), 100)), + ); + + for (let i = 0; i < 2; i++) { + try { + await breaker.execute(slowFn); + } catch (error) {} + } + + expect(breaker.getState()).toBe(CircuitBreakerState.OPEN); + }); + }); + + describe("Statistics and Monitoring", () => { + it("should track failure count", async () => { + const breaker = new CircuitBreaker("test", { + failureThreshold: 5, + successThreshold: 2, + timeout: 5000, + resetTimeout: 5000, + }); + + const mockFn = vi.fn().mockRejectedValue(new Error("Error")); + + for (let i = 0; i < 3; i++) { + try { + await breaker.execute(mockFn); + } catch (error) {} + } + + const stats = breaker.getStats(); + expect(stats.failureCount).toBe(3); + }); + + it("should reset failure count on success", async () => { + const breaker = new CircuitBreaker("test", { + failureThreshold: 5, + successThreshold: 2, + timeout: 5000, + resetTimeout: 5000, + }); + + const mockFn = vi + .fn() + .mockRejectedValueOnce(new Error("Error")) + .mockRejectedValueOnce(new Error("Error")) + .mockResolvedValue("success"); + + try { + await breaker.execute(mockFn); + } catch (error) {} + try { + await breaker.execute(mockFn); + } catch (error) {} + + await breaker.execute(mockFn); + + const stats = breaker.getStats(); + expect(stats.failureCount).toBe(0); + }); + + it("should track last failure time", async () => { + const breaker = new CircuitBreaker("test", { + failureThreshold: 5, + successThreshold: 2, + timeout: 5000, + resetTimeout: 5000, + }); + + const beforeTime = Date.now(); + const mockFn = vi.fn().mockRejectedValue(new Error("Error")); + + try { + await breaker.execute(mockFn); + } catch (error) {} + + const stats = breaker.getStats(); + expect(stats.lastFailureTime).toBeGreaterThanOrEqual(beforeTime); + }); + }); + + describe("Global Circuit Breaker Management", () => { + it("should create and retrieve circuit breakers by name", () => { + const breaker1 = getCircuitBreaker("service-1"); + const breaker2 = getCircuitBreaker("service-1"); + + expect(breaker1).toBe(breaker2); + }); + + it("should create different breakers for different names", () => { + const breaker1 = getCircuitBreaker("service-1"); + const breaker2 = getCircuitBreaker("service-2"); + + expect(breaker1).not.toBe(breaker2); + }); + + it("should apply custom config when creating breaker", async () => { + const breaker = getCircuitBreaker("custom", { + failureThreshold: 10, + successThreshold: 5, + }); + + const mockFn = vi.fn().mockRejectedValue(new Error("Error")); + + for (let i = 0; i < 9; i++) { + try { + await breaker.execute(mockFn); + } catch (error) {} + } + + expect(breaker.getState()).toBe(CircuitBreakerState.CLOSED); + }); + + it("should reset all circuit breakers", async () => { + const breaker1 = getCircuitBreaker("test-1"); + const breaker2 = getCircuitBreaker("test-2"); + + const mockFn = vi.fn().mockRejectedValue(new Error("Error")); + + for (let i = 0; i < 5; i++) { + try { + await breaker1.execute(mockFn); + } catch (error) {} + try { + await breaker2.execute(mockFn); + } catch (error) {} + } + + resetAllCircuitBreakers(); + + expect(breaker1.getState()).toBe(CircuitBreakerState.CLOSED); + expect(breaker2.getState()).toBe(CircuitBreakerState.CLOSED); + }); + }); + + describe("Real-world Scenarios", () => { + it("should handle intermittent failures gracefully", async () => { + const breaker = new CircuitBreaker("test", { + failureThreshold: 3, + successThreshold: 2, + timeout: 5000, + resetTimeout: 100, + }); + + let callCount = 0; + const intermittentFn = vi.fn().mockImplementation(() => { + callCount++; + if (callCount <= 3) { + return Promise.reject(new Error("Temporary failure")); + } + return Promise.resolve("success"); + }); + + for (let i = 0; i < 3; i++) { + try { + await breaker.execute(intermittentFn); + } catch (error) {} + } + + expect(breaker.getState()).toBe(CircuitBreakerState.OPEN); + + await new Promise((resolve) => setTimeout(resolve, 150)); + + await breaker.execute(intermittentFn); + await breaker.execute(intermittentFn); + + expect(breaker.getState()).toBe(CircuitBreakerState.CLOSED); + }); + + it("should protect against cascading failures", async () => { + const breaker = new CircuitBreaker("downstream", { + failureThreshold: 3, + successThreshold: 2, + timeout: 5000, + resetTimeout: 1000, + }); + + const failingService = vi.fn().mockRejectedValue(new Error("Service down")); + + for (let i = 0; i < 3; i++) { + try { + await breaker.execute(failingService); + } catch (error) {} + } + + let rejectedCount = 0; + for (let i = 0; i < 10; i++) { + try { + await breaker.execute(failingService); + } catch (error) { + rejectedCount++; + } + } + + expect(rejectedCount).toBe(10); + expect(failingService).toHaveBeenCalledTimes(3); + }); + }); +}); diff --git a/tests/agent/llm/circuitBreakerIntegration.test.ts b/tests/agent/llm/circuitBreakerIntegration.test.ts new file mode 100644 index 0000000..6e2b2f7 --- /dev/null +++ b/tests/agent/llm/circuitBreakerIntegration.test.ts @@ -0,0 +1,196 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { + CircuitBreakerState, + getCircuitBreaker, + resetAllCircuitBreakers, +} from "@/agent/core/circuitBreaker.js"; + +describe("Circuit Breaker - LLM Provider Integration", () => { + beforeEach(() => { + resetAllCircuitBreakers(); + }); + + it("should protect LLM provider calls with circuit breaker", async () => { + const breaker = getCircuitBreaker("llm-openai", { + failureThreshold: 3, + successThreshold: 2, + timeout: 5000, + resetTimeout: 1000, + }); + + const mockLLMCall = vi.fn().mockRejectedValue(new Error("API Error 500")); + + for (let i = 0; i < 3; i++) { + try { + await breaker.execute(mockLLMCall); + } catch (error) {} + } + + expect(breaker.getState()).toBe(CircuitBreakerState.OPEN); + expect(mockLLMCall).toHaveBeenCalledTimes(3); + + await expect(breaker.execute(mockLLMCall)).rejects.toThrow("Circuit breaker is OPEN"); + expect(mockLLMCall).toHaveBeenCalledTimes(3); + }); + + it("should allow recovery after provider becomes available", async () => { + const breaker = getCircuitBreaker("llm-anthropic", { + failureThreshold: 2, + successThreshold: 2, + timeout: 5000, + resetTimeout: 100, + }); + + const mockLLMCall = vi + .fn() + .mockRejectedValueOnce(new Error("503 Service Unavailable")) + .mockRejectedValueOnce(new Error("503 Service Unavailable")) + .mockResolvedValue({ response: "Success" }); + + for (let i = 0; i < 2; i++) { + try { + await breaker.execute(mockLLMCall); + } catch (error) {} + } + + expect(breaker.getState()).toBe(CircuitBreakerState.OPEN); + + await new Promise((resolve) => setTimeout(resolve, 150)); + + const result1 = await breaker.execute(mockLLMCall); + expect(breaker.getState()).toBe(CircuitBreakerState.HALF_OPEN); + + const result2 = await breaker.execute(mockLLMCall); + expect(breaker.getState()).toBe(CircuitBreakerState.CLOSED); + expect(result2).toEqual({ response: "Success" }); + }); + + it("should handle rate limit errors appropriately", async () => { + const breaker = getCircuitBreaker("llm-google", { + failureThreshold: 3, + successThreshold: 2, + timeout: 5000, + resetTimeout: 2000, + }); + + const rateLimitError = new Error("Rate limit exceeded"); + (rateLimitError as any).status = 429; + + const mockLLMCall = vi.fn().mockRejectedValue(rateLimitError); + + for (let i = 0; i < 3; i++) { + try { + await breaker.execute(mockLLMCall); + } catch (error) {} + } + + expect(breaker.getState()).toBe(CircuitBreakerState.OPEN); + + const stats = breaker.getStats(); + expect(stats.failureCount).toBe(3); + }); + + it("should provide separate circuit breakers for different providers", async () => { + const openaiBreaker = getCircuitBreaker("llm-openai"); + const anthropicBreaker = getCircuitBreaker("llm-anthropic"); + + expect(openaiBreaker).not.toBe(anthropicBreaker); + + const mockFailure = vi.fn().mockRejectedValue(new Error("Error")); + + for (let i = 0; i < 5; i++) { + try { + await openaiBreaker.execute(mockFailure); + } catch (error) {} + } + + expect(openaiBreaker.getState()).toBe(CircuitBreakerState.OPEN); + expect(anthropicBreaker.getState()).toBe(CircuitBreakerState.CLOSED); + }); + + it("should handle timeouts from slow LLM responses", async () => { + const breaker = getCircuitBreaker("llm-slow", { + failureThreshold: 2, + successThreshold: 2, + timeout: 100, + resetTimeout: 1000, + }); + + const slowLLMCall = vi + .fn() + .mockImplementation( + () => new Promise((resolve) => setTimeout(() => resolve("response"), 200)), + ); + + for (let i = 0; i < 2; i++) { + try { + await breaker.execute(slowLLMCall); + } catch (error) { + expect(error).toBeInstanceOf(Error); + expect((error as Error).message).toContain("timeout"); + } + } + + expect(breaker.getState()).toBe(CircuitBreakerState.OPEN); + }); + + it("should track statistics for monitoring", async () => { + const breaker = getCircuitBreaker("llm-monitored", { + failureThreshold: 5, + successThreshold: 2, + timeout: 5000, + resetTimeout: 1000, + }); + + const mockLLMCall = vi + .fn() + .mockRejectedValueOnce(new Error("Error 1")) + .mockRejectedValueOnce(new Error("Error 2")) + .mockResolvedValueOnce("Success 1") + .mockResolvedValueOnce("Success 2"); + + try { + await breaker.execute(mockLLMCall); + } catch (error) {} + try { + await breaker.execute(mockLLMCall); + } catch (error) {} + + await breaker.execute(mockLLMCall); + await breaker.execute(mockLLMCall); + + const stats = breaker.getStats(); + expect(stats.state).toBe(CircuitBreakerState.CLOSED); + expect(stats.failureCount).toBe(0); + }); + + it("should prevent cascading failures across multiple API calls", async () => { + const breaker = getCircuitBreaker("llm-cascade", { + failureThreshold: 3, + successThreshold: 2, + timeout: 5000, + resetTimeout: 500, + }); + + let apiCallCount = 0; + const cascadingFailure = vi.fn().mockImplementation(() => { + apiCallCount++; + return Promise.reject(new Error("Service degraded")); + }); + + for (let i = 0; i < 3; i++) { + try { + await breaker.execute(cascadingFailure); + } catch (error) {} + } + + for (let i = 0; i < 10; i++) { + try { + await breaker.execute(cascadingFailure); + } catch (error) {} + } + + expect(apiCallCount).toBe(3); + expect(breaker.getState()).toBe(CircuitBreakerState.OPEN); + }); +}); diff --git a/tests/agent/performance/historyMemoryProfiling.test.ts b/tests/agent/performance/historyMemoryProfiling.test.ts new file mode 100644 index 0000000..9df2a43 --- /dev/null +++ b/tests/agent/performance/historyMemoryProfiling.test.ts @@ -0,0 +1,100 @@ +import { describe, expect, it } from "vitest"; +import { HistoryItem } from "@/agent/context/history.js"; + +describe("Memory Profiling - History and Context", () => { + const getMemoryUsage = () => process.memoryUsage().heapUsed; + + const forceGC = () => { + if (global.gc) { + global.gc(); + } + }; + + it("should not leak memory with large conversation history", () => { + const history: HistoryItem[] = []; + + forceGC(); + const startMemory = getMemoryUsage(); + + for (let i = 0; i < 1000; i++) { + history.push({ + id: `msg-${i}`, + role: "user", + content: "This is a test message that simulates a real conversation. ".repeat(10), + }); + history.push({ + id: `resp-${i}`, + role: "assistant", + content: "This is a response from the assistant with detailed information. ".repeat( + 20, + ), + }); + } + + const midMemory = getMemoryUsage(); + const memoryUsedDuringHistory = midMemory - startMemory; + + history.length = 0; + + forceGC(); + const endMemory = getMemoryUsage(); + const memoryAfterClear = endMemory - startMemory; + + expect(history.length).toBe(0); + expect(memoryUsedDuringHistory).toBeGreaterThan(0); + expect(endMemory).toBeLessThan(midMemory + 500000); + }); + + it("should handle context window trimming without memory leaks", () => { + const largeHistory: HistoryItem[] = []; + + for (let i = 0; i < 500; i++) { + largeHistory.push({ + id: `item-${i}`, + role: "user", + content: "x".repeat(1000), + }); + } + + forceGC(); + const beforeTrim = getMemoryUsage(); + + const trimmedHistory = largeHistory.slice(-100); + + forceGC(); + const afterTrim = getMemoryUsage(); + + expect(trimmedHistory.length).toBe(100); + + const memoryIncrease = afterTrim - beforeTrim; + expect(memoryIncrease).toBeLessThan(5 * 1024 * 1024); + }); + + it("should not retain large tool outputs after processing", () => { + const toolResults: HistoryItem[] = []; + + forceGC(); + const startMemory = getMemoryUsage(); + + for (let i = 0; i < 50; i++) { + toolResults.push({ + id: `tool-${i}`, + role: "tool-result", + toolCallId: `call-${i}`, + toolName: "test_tool", + output: "Large output data ".repeat(1000), + }); + } + + const peakMemory = getMemoryUsage(); + + toolResults.length = 0; + + forceGC(); + const afterClear = getMemoryUsage(); + + expect(toolResults.length).toBe(0); + expect(peakMemory).toBeGreaterThan(startMemory); + expect(afterClear).toBeLessThan(peakMemory + 1024 * 1024); + }); +}); diff --git a/tests/agent/performance/memoryProfiling.test.ts b/tests/agent/performance/memoryProfiling.test.ts new file mode 100644 index 0000000..03790bc --- /dev/null +++ b/tests/agent/performance/memoryProfiling.test.ts @@ -0,0 +1,208 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { fileTracker } from "@/agent/core/fileTracker.js"; +import fs from "fs/promises"; +import path from "path"; +import os from "os"; + +describe("Memory Profiling Tests", () => { + let testDir: string; + + beforeEach(async () => { + testDir = path.join(os.tmpdir(), `binharic-memory-test-${Date.now()}`); + await fs.mkdir(testDir, { recursive: true }); + }); + + afterEach(async () => { + try { + await fs.rm(testDir, { recursive: true, force: true }); + } catch (error) {} + fileTracker.clearTracking(); + }); + + const getMemoryUsage = () => { + const usage = process.memoryUsage(); + return { + heapUsed: usage.heapUsed, + heapTotal: usage.heapTotal, + external: usage.external, + rss: usage.rss, + }; + }; + + const forceGC = () => { + if (global.gc) { + global.gc(); + } + }; + + it("should not leak memory when reading multiple files", async () => { + const fileCount = 50; + const fileSize = 10 * 1024; + + for (let i = 0; i < fileCount; i++) { + const filePath = path.join(testDir, `file-${i}.txt`); + await fs.writeFile(filePath, "x".repeat(fileSize)); + } + + forceGC(); + const beforeMemory = getMemoryUsage(); + + for (let i = 0; i < fileCount; i++) { + const filePath = path.join(testDir, `file-${i}.txt`); + await fileTracker.read(filePath); + } + + forceGC(); + const afterMemory = getMemoryUsage(); + + const memoryIncrease = afterMemory.heapUsed - beforeMemory.heapUsed; + const expectedMaxIncrease = fileSize * fileCount * 2; + + expect(memoryIncrease).toBeLessThan(expectedMaxIncrease); + }); + + it("should not leak memory when writing multiple files", async () => { + const fileCount = 50; + const fileSize = 10 * 1024; + const content = "x".repeat(fileSize); + + forceGC(); + const beforeMemory = getMemoryUsage(); + + for (let i = 0; i < fileCount; i++) { + const filePath = path.join(testDir, `write-${i}.txt`); + await fileTracker.write(filePath, content); + } + + forceGC(); + const afterMemory = getMemoryUsage(); + + const memoryIncrease = afterMemory.heapUsed - beforeMemory.heapUsed; + const maxAllowedIncrease = 10 * 1024 * 1024; + + expect(memoryIncrease).toBeLessThan(maxAllowedIncrease); + }); + + it("should release memory when file tracker is cleared", async () => { + const fileCount = 100; + const fileSize = 5 * 1024; + + for (let i = 0; i < fileCount; i++) { + const filePath = path.join(testDir, `tracked-${i}.txt`); + await fs.writeFile(filePath, "x".repeat(fileSize)); + await fileTracker.read(filePath); + } + + forceGC(); + const beforeClear = getMemoryUsage(); + + fileTracker.clearTracking(); + + forceGC(); + await new Promise((resolve) => setTimeout(resolve, 100)); + forceGC(); + + const afterClear = getMemoryUsage(); + + expect(fileTracker.getTrackedFileCount()).toBe(0); + }); + + it("should enforce file tracking limit without excessive memory growth", async () => { + const maxFiles = 1000; + const fileSize = 5 * 1024; + + forceGC(); + const startMemory = getMemoryUsage(); + + for (let i = 0; i < maxFiles + 100; i++) { + const filePath = path.join(testDir, `limit-${i}.txt`); + await fs.writeFile(filePath, "x".repeat(fileSize)); + await fileTracker.read(filePath); + } + + forceGC(); + const endMemory = getMemoryUsage(); + + const trackedCount = fileTracker.getTrackedFileCount(); + expect(trackedCount).toBeLessThanOrEqual(1000); + + const memoryIncrease = endMemory.heapUsed - startMemory.heapUsed; + const maxExpectedIncrease = fileSize * 1000 * 3; + + expect(memoryIncrease).toBeLessThan(maxExpectedIncrease); + }); + + it("should handle large file operations without memory spikes", async () => { + const largeFileSize = 1024 * 1024; + const filePath = path.join(testDir, "large-file.txt"); + const content = "x".repeat(largeFileSize); + + await fs.writeFile(filePath, content); + + forceGC(); + const beforeRead = getMemoryUsage(); + + const readContent = await fileTracker.read(filePath); + + const duringRead = getMemoryUsage(); + + expect(readContent.length).toBe(largeFileSize); + + const memoryIncrease = duringRead.heapUsed - beforeRead.heapUsed; + const maxExpectedIncrease = largeFileSize * 5; + + expect(memoryIncrease).toBeLessThan(maxExpectedIncrease); + }); + + it("should track memory usage over repeated operations", async () => { + const iterations = 10; + const memorySnapshots: number[] = []; + + for (let i = 0; i < iterations; i++) { + const filePath = path.join(testDir, `iteration-${i}.txt`); + await fs.writeFile(filePath, "test content"); + await fileTracker.read(filePath); + + forceGC(); + const snapshot = getMemoryUsage(); + memorySnapshots.push(snapshot.heapUsed); + } + + const memoryGrowthPerIteration = memorySnapshots.map((snapshot, index) => { + if (index === 0) return 0; + return snapshot - memorySnapshots[index - 1]; + }); + + const avgGrowth = + memoryGrowthPerIteration.slice(1).reduce((a, b) => a + b, 0) / (iterations - 1); + + const maxAcceptableGrowth = 100 * 1024; + expect(avgGrowth).toBeLessThan(maxAcceptableGrowth); + }); + + it("should not retain references after file operations complete", async () => { + const filePath = path.join(testDir, "reference-test.txt"); + const content = "x".repeat(100 * 1024); + + await fs.writeFile(filePath, content); + + forceGC(); + const beforeOp = getMemoryUsage(); + + { + const data = await fileTracker.read(filePath); + expect(data.length).toBe(content.length); + } + + forceGC(); + await new Promise((resolve) => setTimeout(resolve, 100)); + forceGC(); + + const afterOp = getMemoryUsage(); + + const memoryRetained = afterOp.heapUsed - beforeOp.heapUsed; + const maxAcceptableRetention = content.length * 2; + + expect(memoryRetained).toBeLessThan(maxAcceptableRetention); + }); +}); diff --git a/tests/agent/tools/definitions/fetch.test.ts b/tests/agent/tools/definitions/fetch.test.ts index e90768c..4a6a6e1 100644 --- a/tests/agent/tools/definitions/fetch.test.ts +++ b/tests/agent/tools/definitions/fetch.test.ts @@ -31,7 +31,12 @@ describe("fetch tool", () => { const result = await fetchTool.execute!({ url: "https://example.com" }, {} as any); expect(result).toBe("Hello"); - expect(mockFetch).toHaveBeenCalledWith("https://example.com"); + expect(mockFetch).toHaveBeenCalledWith( + "https://example.com", + expect.objectContaining({ + signal: expect.any(AbortSignal), + }), + ); }); it("should return raw HTML when stripMarkup is false", async () => {