sjnims · sjnims · Jan 18, 2026 · Jan 18, 2026
@@ -62,6 +62,52 @@ cc-plugin-eval resume -r <run-id>        # Resume interrupted run
 cc-plugin-eval run -p ./plugin --fast    # Re-run failed scenarios only
 ```
 
+## Public API
+
+This package exports a programmatic API via the `exports` field in `package.json`:
+
+### Entry Points
+
+| Subpath                | Description                                 |
+| ---------------------- | ------------------------------------------- |
+| `cc-plugin-eval`       | Main entry: stage runners + config loader   |
+| `cc-plugin-eval/types` | Type definitions (types-only, zero runtime) |
+
+### Exported Functions
+
+These are exported from the main entry (`cc-plugin-eval`):
+
+| Export                    | Description                                              |
+| ------------------------- | -------------------------------------------------------- |
+| `runAnalysis`             | Stage 1: Parse plugin structure and extract triggers     |
+| `runGeneration`           | Stage 2: Generate test scenarios for components          |
+| `runExecution`            | Stage 3: Execute scenarios and capture tool interactions |
+| `runEvaluation`           | Stage 4: Evaluate results and calculate metrics          |
+| `loadConfigWithOverrides` | Load configuration with CLI-style overrides              |
+| `consoleProgress`         | Default progress reporter for execution/evaluation       |
+| `CLIOptions` (type)       | Type for CLI override options                            |
+
+### Usage Example
+
+```typescript
+import {
+  runAnalysis,
+  runGeneration,
+  loadConfigWithOverrides,
+} from "cc-plugin-eval";
+import type { EvalConfig, TestScenario } from "cc-plugin-eval/types";
+
+const config = loadConfigWithOverrides("config.yaml", {
+  plugin: "./my-plugin",
+});
+const analysis = await runAnalysis(config);
+const { scenarios } = await runGeneration(analysis, config);
+```
+
+### Internal vs Public
+
+Functions in `src/index.ts` marked with `@internal` JSDoc are CLI-only helpers not intended for external use. These include resume handlers, option extractors, and output formatters.
+
 ## Architecture
 
 ### 4-Stage Pipeline

@@ -167,6 +167,90 @@ cc-plugin-eval report -r <run-id> --output junit-xml
 | `--reps <n>`          | Repetitions per scenario                          |
 | `--output <format>`   | Output format: `json`, `yaml`, `junit-xml`, `tap` |
 
+## Programmatic Usage
+
+In addition to the CLI, cc-plugin-eval exports a programmatic API for integration into build systems, test frameworks, and custom tooling.
+
+### Installation
+
+```bash
+npm install cc-plugin-eval
+```
+
+### Basic Usage
+
+```typescript
+import {
+  runAnalysis,
+  runGeneration,
+  runExecution,
+  runEvaluation,
+  loadConfigWithOverrides,
+  consoleProgress,
+} from "cc-plugin-eval";
+import type {
+  EvalConfig,
+  AnalysisOutput,
+  TestScenario,
+} from "cc-plugin-eval/types";
+
+// Load configuration
+const config = loadConfigWithOverrides("config.yaml", {
+  plugin: "./path/to/plugin",
+});
+
+// Stage 1: Analyze plugin structure
+const analysis = await runAnalysis(config);
+
+// Stage 2: Generate test scenarios
+const generation = await runGeneration(analysis, config);
+
+// Stage 3: Execute scenarios (captures tool interactions)
+const execution = await runExecution(
+  analysis,
+  generation.scenarios,
+  config,
+  consoleProgress, // or provide custom progress callbacks
+);
+
+// Stage 4: Evaluate results
+const evaluation = await runEvaluation(
+  analysis.plugin_name,
+  generation.scenarios,
+  execution.results,
+  config,
+  consoleProgress,
+);
+
+console.log(`Accuracy: ${(evaluation.metrics.accuracy * 100).toFixed(1)}%`);
+```
+
+### Public API Exports
+
+| Export                    | Description                                              |
+| ------------------------- | -------------------------------------------------------- |
+| `runAnalysis`             | Stage 1: Parse plugin structure and extract triggers     |
+| `runGeneration`           | Stage 2: Generate test scenarios for components          |
+| `runExecution`            | Stage 3: Execute scenarios and capture tool interactions |
+| `runEvaluation`           | Stage 4: Evaluate results and calculate metrics          |
+| `loadConfigWithOverrides` | Load configuration with CLI-style overrides              |
+| `consoleProgress`         | Default progress reporter (console output)               |
+
+### Types
+
+Import types via the `cc-plugin-eval/types` subpath:
+
+```typescript
+import type {
+  EvalConfig,
+  AnalysisOutput,
+  TestScenario,
+  ExecutionResult,
+  EvaluationResult,
+  EvalMetrics,
+} from "cc-plugin-eval/types";
+```
+
 ## Configuration
 
 Configuration is managed via `config.yaml`. Here's a quick reference:

@@ -5,6 +5,16 @@
   "type": "module",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",
+  "exports": {
+    ".": {
+      "types": "./dist/index.d.ts",
+      "import": "./dist/index.js"
+    },
+    "./types": {
+      "types": "./dist/types/index.d.ts",
+      "import": "./dist/types/index.js"
+    }
+  },
   "bin": {
     "cc-plugin-eval": "./dist/index.js"
   },

@@ -7,6 +7,11 @@
  */
 import "./env.js";
 
+// =============================================================================
+// CLI Implementation
+// =============================================================================
+// Local imports for CLI use (re-importing what was exported above for local use)
+
 import { existsSync, readdirSync } from "node:fs";
 import { createRequire } from "node:module";
 import { join } from "node:path";
@@ -15,6 +20,7 @@ import chalk from "chalk";
 import { Command } from "commander";
 import YAML from "yaml";
 
+// Import stage runners locally for CLI implementation
 import { loadConfigWithOverrides, type CLIOptions } from "./config/index.js";
 import { runAnalysis } from "./stages/1-analysis/index.js";
 import {
@@ -49,6 +55,27 @@ import {
 
 import type { EvalMetrics } from "./types/index.js";
 
+// =============================================================================
+// Public API Exports
+// =============================================================================
+// These exports form the public programmatic API of cc-plugin-eval.
+// Import them via: import { runAnalysis } from 'cc-plugin-eval';
+
+/** Stage 1: Analyze plugin structure and extract component triggers */
+export { runAnalysis } from "./stages/1-analysis/index.js";
+
+/** Stage 2: Generate test scenarios for components */
+export { runGeneration } from "./stages/2-generation/index.js";
+
+/** Stage 3: Execute scenarios and capture tool interactions */
+export { runExecution, consoleProgress } from "./stages/3-execution/index.js";
+
+/** Stage 4: Evaluate results and calculate metrics */
+export { runEvaluation } from "./stages/4-evaluation/index.js";
+
+/** Configuration loading with CLI overrides */
+export { loadConfigWithOverrides, type CLIOptions } from "./config/index.js";
+
 // =============================================================================
 // Package Version
 // =============================================================================
@@ -63,6 +90,7 @@ const packageJson = require("../package.json") as { version: string };
 /**
  * Find plugin name by searching results directories for a run ID.
  *
+ * @internal CLI helper - not part of public API
  * @param runId - Run ID to search for
  * @returns Plugin name if found, null otherwise
  */
@@ -85,6 +113,7 @@ function findPluginByRunId(runId: string): string | null {
 /**
  * Find and load pipeline state.
  *
+ * @internal CLI helper - not part of public API
  * @param pluginName - Optional plugin name hint
  * @param runId - Run ID to load
  * @returns Loaded state or null if not found
@@ -116,6 +145,8 @@ function findAndLoadState(
 
 /**
  * Resume from analysis stage (run full pipeline).
+ *
+ * @internal CLI helper - not part of public API
  */
 async function resumeFromAnalysis(
   initialState: NonNullable<ReturnType<typeof loadState>>,
@@ -161,6 +192,8 @@ async function resumeFromAnalysis(
 
 /**
  * Resume from generation stage.
+ *
+ * @internal CLI helper - not part of public API
  */
 async function resumeFromGeneration(
   initialState: NonNullable<ReturnType<typeof loadState>>,
@@ -210,6 +243,8 @@ async function resumeFromGeneration(
 
 /**
  * Resume from execution stage.
+ *
+ * @internal CLI helper - not part of public API
  */
 async function resumeFromExecution(
   initialState: NonNullable<ReturnType<typeof loadState>>,
@@ -255,6 +290,8 @@ async function resumeFromExecution(
 
 /**
  * Resume from evaluation stage.
+ *
+ * @internal CLI helper - not part of public API
  */
 async function resumeFromEvaluation(
   initialState: NonNullable<ReturnType<typeof loadState>>,
@@ -296,6 +333,8 @@ async function resumeFromEvaluation(
 
 /**
  * Stage handler type for resume operations.
+ *
+ * @internal CLI type - not part of public API
  */
 type ResumeHandler = (
   state: NonNullable<ReturnType<typeof loadState>>,
@@ -305,6 +344,8 @@ type ResumeHandler = (
 
 /**
  * Map of stages to their resume handlers.
+ *
+ * @internal CLI constant - not part of public API
  */
 const resumeHandlers: Record<PipelineStage, ResumeHandler> = {
   pending: resumeFromAnalysis,
@@ -335,6 +376,8 @@ program
 
 /**
  * Extract CLI options from commander options object.
+ *
+ * @internal CLI helper - not part of public API
  */
 function extractCLIOptions(
   options: Record<string, unknown>,
@@ -392,6 +435,8 @@ function extractCLIOptions(
 
 /**
  * Valid pipeline stages for resume command.
+ *
+ * @internal CLI constant - not part of public API
  */
 const VALID_STAGES = [
   "analysis",
@@ -402,6 +447,8 @@ const VALID_STAGES = [
 
 /**
  * Extract and validate resume command options.
+ *
+ * @internal CLI helper - not part of public API
  */
 function extractResumeOptions(options: Record<string, unknown>): {
   pluginName: string | undefined;
@@ -441,6 +488,8 @@ function extractResumeOptions(options: Record<string, unknown>): {
 
 /**
  * Extract and validate report command options.
+ *
+ * @internal CLI helper - not part of public API
  */
 function extractReportOptions(options: Record<string, unknown>): {
   pluginName: string | undefined;
@@ -463,6 +512,8 @@ function extractReportOptions(options: Record<string, unknown>): {
 
 /**
  * Evaluation file structure for report command.
+ *
+ * @internal CLI type - not part of public API
  */
 interface EvaluationFile {
   plugin_name: string;
@@ -473,6 +524,8 @@ interface EvaluationFile {
 /**
  * Load and validate evaluation file.
  * Returns null if validation fails.
+ *
+ * @internal CLI helper - not part of public API
  */
 function loadEvaluationFile(evaluationPath: string): EvaluationFile | null {
   const rawEvaluation = readJson(evaluationPath);
@@ -1009,6 +1062,8 @@ program
 
 /**
  * Output CLI summary of evaluation results.
+ *
+ * @internal CLI helper - not part of public API
  */
 function outputCLISummary(evaluation: {
   plugin_name: string;
@@ -1047,6 +1102,8 @@ function outputCLISummary(evaluation: {
 
 /**
  * Output JUnit XML format.
+ *
+ * @internal CLI helper - not part of public API
  */
 function outputJUnitXML(
   pluginName: string,
@@ -1091,6 +1148,8 @@ function outputJUnitXML(
 
 /**
  * Output TAP format.
+ *
+ * @internal CLI helper - not part of public API
  */
 function outputTAP(results: Record<string, unknown>[]): void {
   console.log(`TAP version 14`);
@@ -1126,6 +1185,8 @@ function outputTAP(results: Record<string, unknown>[]): void {
 
 /**
  * Output final summary of evaluation.
+ *
+ * @internal CLI helper - not part of public API
  */
 function outputFinalSummary(resultsDir: string, metrics: EvalMetrics): void {
   const m = metrics;