diff --git a/README.md b/README.md index 7115a4659..9f3bc74a2 100644 --- a/README.md +++ b/README.md @@ -141,6 +141,7 @@ A multi-agent pipeline scans your project, extracts every file, function, class, On the **first run** in a project — when you don't pass `--language` and no language is stored yet — `/understand` detects the language you're conversing in. If it isn't English, it asks you to confirm (or override) before generating; English conversations are unaffected. Your choice is saved to `.understand-anything/config.json` and reused on every later run. The `--language` parameter affects: + - Node summaries and descriptions in the knowledge graph - Dashboard UI labels, buttons, and tooltips - Guided tour explanations @@ -188,6 +189,9 @@ An interactive web dashboard opens with your codebase visualized as a graph — ## 🌐 Multi-Platform Installation +> **Prerequisites:** [Node.js ≥ 22](https://nodejs.org) and [pnpm ≥ 10](https://pnpm.io/installation). +> The installers below will check for pnpm and install it automatically if it's missing. + Understand-Anything works across multiple AI coding platforms. ### Claude Code (Native) @@ -202,6 +206,7 @@ Understand-Anything works across multiple AI coding platforms. **macOS / Linux:** + ```bash curl -fsSL https://raw.githubusercontent.com/Egonex-AI/Understand-Anything/main/install.sh | bash # or skip the prompt by passing the platform: @@ -209,6 +214,7 @@ curl -fsSL https://raw.githubusercontent.com/Egonex-AI/Understand-Anything/main/ ``` **Windows (PowerShell):** + ```powershell iwr -useb https://raw.githubusercontent.com/Egonex-AI/Understand-Anything/main/install.ps1 | iex ``` @@ -251,26 +257,25 @@ For personal skills (available across all projects), run the `install.sh` above ### Platform Compatibility -| Platform | Status | Install Method | -|----------|--------|----------------| -| Claude Code | ✅ Native | Plugin marketplace | -| Cursor | ✅ Supported | Auto-discovery | -| VS Code + GitHub Copilot | ✅ Supported | Auto-discovery | -| Copilot CLI | ✅ Supported | Plugin install | -| Codex | ✅ Supported | `install.sh codex` | -| OpenCode | ✅ Supported | `install.sh opencode` | -| OpenClaw | ✅ Supported | `install.sh openclaw` | -| Antigravity | ✅ Supported | `install.sh antigravity` | -| Gemini CLI | ✅ Supported | `install.sh gemini` | -| Pi Agent | ✅ Supported | `install.sh pi` | -| Vibe CLI | ✅ Supported | `install.sh vibe` | -| Hermes | ✅ Supported | `install.sh hermes` | -| Cline | ✅ Supported | `install.sh cline` | -| KIMI CLI | ✅ Supported | `install.sh kimi` | -| Trae | ✅ Supported | `install.sh trae` | -| Nanobot | ✅ Supported | `install.sh nanobot` | -| Kiro CLI / IDE | ✅ Supported | `install.sh kiro` | - +| Platform | Status | Install Method | +| ------------------------ | ------------ | ------------------------ | +| Claude Code | ✅ Native | Plugin marketplace | +| Cursor | ✅ Supported | Auto-discovery | +| VS Code + GitHub Copilot | ✅ Supported | Auto-discovery | +| Copilot CLI | ✅ Supported | Plugin install | +| Codex | ✅ Supported | `install.sh codex` | +| OpenCode | ✅ Supported | `install.sh opencode` | +| OpenClaw | ✅ Supported | `install.sh openclaw` | +| Antigravity | ✅ Supported | `install.sh antigravity` | +| Gemini CLI | ✅ Supported | `install.sh gemini` | +| Pi Agent | ✅ Supported | `install.sh pi` | +| Vibe CLI | ✅ Supported | `install.sh vibe` | +| Hermes | ✅ Supported | `install.sh hermes` | +| Cline | ✅ Supported | `install.sh cline` | +| KIMI CLI | ✅ Supported | `install.sh kimi` | +| Trae | ✅ Supported | `install.sh trae` | +| Nanobot | ✅ Supported | `install.sh nanobot` | +| Kiro CLI / IDE | ✅ Supported | `install.sh kiro` | --- @@ -280,7 +285,7 @@ The graph is just JSON — **commit it once, and teammates skip the pipeline**. > **Example:** [GoogleCloudPlatform/microservices-demo](https://github.com/GoogleCloudPlatform/microservices-demo) — Go / Java / Python / Node reference with a committed graph. -**What to commit:** everything in `.understand-anything/` *except* `intermediate/` and `diff-overlay.json` (those are local scratch). +**What to commit:** everything in `.understand-anything/` _except_ `intermediate/` and `diff-overlay.json` (those are local scratch). ```gitignore .understand-anything/intermediate/ @@ -308,21 +313,21 @@ Static analysis and LLMs do what each does best: - **Tree-sitter (deterministic)** — parses source into a concrete syntax tree and extracts structural facts: imports, exports, function/class definitions, call sites, inheritance. Pre-resolved into an `importMap` during the scan phase and passed to file-analyzers so they don't re-derive imports from source. Same input → same output, every run. Also powers fingerprint-based change detection for incremental updates. - **LLM (semantic)** — reads the parsed structure alongside the original source to produce what parsers can't: plain-English summaries, tags, architectural layer assignments, business-domain mapping, guided tours, language concept callouts. -This split is why the graph is reproducible on the structural side (the same code always yields the same edges) while still capturing intent on the semantic side (what a file is *for*, not just what it imports). +This split is why the graph is reproducible on the structural side (the same code always yields the same edges) while still capturing intent on the semantic side (what a file is _for_, not just what it imports). ### Multi-Agent Pipeline The `/understand` command orchestrates 5 specialized agents, and `/understand-domain` adds a 6th: -| Agent | Role | -|-------|------| -| `project-scanner` | Discover files, detect languages and frameworks | -| `file-analyzer` | Extract functions, classes, imports; produce graph nodes and edges | -| `architecture-analyzer` | Identify architectural layers | -| `tour-builder` | Generate guided learning tours | -| `graph-reviewer` | Validate graph completeness and referential integrity (runs inline by default; use `--review` for full LLM review) | -| `domain-analyzer` | Extract business domains, flows, and process steps (used by `/understand-domain`) | -| `article-analyzer` | Extract entities, claims, and implicit relationships from wiki articles (used by `/understand-knowledge`) | +| Agent | Role | +| ----------------------- | ------------------------------------------------------------------------------------------------------------------ | +| `project-scanner` | Discover files, detect languages and frameworks | +| `file-analyzer` | Extract functions, classes, imports; produce graph nodes and edges | +| `architecture-analyzer` | Identify architectural layers | +| `tour-builder` | Generate guided learning tours | +| `graph-reviewer` | Validate graph completeness and referential integrity (runs inline by default; use `--review` for full LLM review) | +| `domain-analyzer` | Extract business domains, flows, and process steps (used by `/understand-domain`) | +| `article-analyzer` | Extract entities, claims, and implicit relationships from wiki articles (used by `/understand-knowledge`) | File analyzers run in parallel (up to 5 concurrent, 20-30 files per batch). Supports incremental updates — only re-analyzes files that changed since the last run. diff --git a/install.ps1 b/install.ps1 index 5476887b6..bed278f65 100644 --- a/install.ps1 +++ b/install.ps1 @@ -85,6 +85,22 @@ function Prompt-Platform { function Get-SkillsRoot { Join-Path $RepoDir 'understand-anything-plugin\skills' } +function Ensure-Pnpm { + if (Get-Command pnpm -ErrorAction SilentlyContinue) { return } + Write-Host '→ pnpm not found — installing...' + if (Get-Command corepack -ErrorAction SilentlyContinue) { + corepack enable pnpm + } elseif (Get-Command npm -ErrorAction SilentlyContinue) { + npm install -g pnpm + } else { + Write-Error "Node.js (and npm or corepack) is required but not found.`nInstall Node.js >= 22 from https://nodejs.org, then re-run this script." + } + if (-not (Get-Command pnpm -ErrorAction SilentlyContinue)) { + Write-Error "pnpm installation failed. Install it manually: https://pnpm.io/installation" + } + Write-Host '✓ pnpm installed.' +} + function Clone-Or-Update { if (Test-Path (Join-Path $RepoDir '.git')) { Write-Host "→ Updating existing checkout at $RepoDir" @@ -95,6 +111,17 @@ function Clone-Or-Update { if (-not (Test-Path $parent)) { New-Item -ItemType Directory -Path $parent | Out-Null } git clone $RepoUrl $RepoDir } + # If this script is running from a local git checkout, install that same branch. + # Falls back gracefully when run via curl/iwr (no git context). + $scriptBranch = '' + if (Test-Path (Join-Path $PSScriptRoot '.git')) { + $scriptBranch = git -C $PSScriptRoot branch --show-current 2>$null + } + if ($scriptBranch -and $scriptBranch -ne 'main') { + Write-Host "→ Checking out branch: $scriptBranch" + git -C $RepoDir fetch origin + git -C $RepoDir checkout $scriptBranch + } } function Get-SkillNames { @@ -200,6 +227,7 @@ function ConvertTo-FileUri([string]$Path) { function Cmd-Install([string]$Id) { $cfg = Resolve-Platform $Id + Ensure-Pnpm Clone-Or-Update Write-Host "→ Linking skills for $Id ($($cfg.Style) → $($cfg.Target))" Link-Skills $cfg.Target $cfg.Style diff --git a/install.sh b/install.sh index 1c3423119..02098c14f 100755 --- a/install.sh +++ b/install.sh @@ -90,6 +90,27 @@ prompt_platform() { printf '%s\n' "${ids[$((choice-1))]}" } +ensure_pnpm() { + if command -v pnpm >/dev/null 2>&1; then + return 0 + fi + printf -- '→ pnpm not found — installing...\n' + if command -v corepack >/dev/null 2>&1; then + corepack enable pnpm + elif command -v npm >/dev/null 2>&1; then + npm install -g pnpm + else + printf 'Error: Node.js (and npm or corepack) is required but not found.\n' >&2 + printf 'Install Node.js ≥ 22 from https://nodejs.org, then re-run this script.\n' >&2 + exit 1 + fi + if ! command -v pnpm >/dev/null 2>&1; then + printf 'Error: pnpm installation failed. Install it manually: https://pnpm.io/installation\n' >&2 + exit 1 + fi + printf '✓ pnpm installed.\n' +} + clone_or_update() { if [[ -d "$REPO_DIR/.git" ]]; then printf -- '→ Updating existing checkout at %s\n' "$REPO_DIR" @@ -99,6 +120,18 @@ clone_or_update() { mkdir -p "$(dirname "$REPO_DIR")" git clone "$REPO_URL" "$REPO_DIR" fi + # If this script is running from a local git checkout, install that same branch. + # Falls back gracefully when piped from curl (no git context). + local script_dir script_branch="" + script_dir="$(cd "$(dirname "${BASH_SOURCE[0]:-}")" 2>/dev/null && pwd || true)" + if [[ -n "$script_dir" ]] && git -C "$script_dir" rev-parse --git-dir >/dev/null 2>&1; then + script_branch="$(git -C "$script_dir" branch --show-current 2>/dev/null || true)" + fi + if [[ -n "$script_branch" && "$script_branch" != "main" ]]; then + printf -- '→ Checking out branch: %s\n' "$script_branch" + git -C "$REPO_DIR" fetch origin + git -C "$REPO_DIR" checkout "$script_branch" + fi } skills_root() { printf '%s\n' "$REPO_DIR/understand-anything-plugin/skills"; } @@ -185,6 +218,7 @@ cmd_install() { target="$(printf '%s\n' "$row" | cut -d'|' -f2)" style="$(printf '%s\n' "$row" | cut -d'|' -f3)" + ensure_pnpm clone_or_update printf -- '→ Linking skills for %s (%s → %s)\n' "$id" "$style" "$target" link_skills "$target" "$style" diff --git a/scripts/build-pascal-wasm.ps1 b/scripts/build-pascal-wasm.ps1 new file mode 100644 index 000000000..0b09e0729 --- /dev/null +++ b/scripts/build-pascal-wasm.ps1 @@ -0,0 +1,41 @@ +<# +.SYNOPSIS + Build tree-sitter-pascal.wasm using Docker + Emscripten. + +.DESCRIPTION + The resulting WASM is placed inside the installed package so web-tree-sitter + can load it via require.resolve(). + +.NOTES + Prerequisites: Docker daemon running with Emscripten image available. + Run 'pnpm install' inside understand-anything-plugin/ before this script. +#> + +$ErrorActionPreference = 'Stop' + +$ScriptDir = $PSScriptRoot +$PluginDir = Join-Path $ScriptDir '..\understand-anything-plugin' +$GrammarDir = Join-Path $PluginDir 'node_modules\tree-sitter-pascal' + +if (-not (Test-Path $GrammarDir)) { + Write-Error "tree-sitter-pascal not found at $GrammarDir`nRun 'pnpm install' inside understand-anything-plugin/ first." +} + +$GrammarDirAbs = (Resolve-Path $GrammarDir).Path +$OutFile = Join-Path $GrammarDirAbs 'tree-sitter-pascal.wasm' + +Write-Host "→ Building tree-sitter-pascal.wasm..." +docker run --rm ` + -v "${GrammarDirAbs}:/src" ` + -w /src ` + emscripten/emsdk ` + emcc src/parser.c ` + -o tree-sitter-pascal.wasm ` + -Os ` + -s WASM=1 ` + -s SIDE_MODULE=1 ` + "-s EXPORTED_FUNCTIONS=['_tree_sitter_pascal']" ` + -fvisibility=hidden ` + -I./src + +Write-Host "✓ Built: $OutFile" diff --git a/scripts/build-pascal-wasm.sh b/scripts/build-pascal-wasm.sh new file mode 100644 index 000000000..2384385b1 --- /dev/null +++ b/scripts/build-pascal-wasm.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# Build tree-sitter-pascal.wasm using Docker + Emscripten. +# The resulting WASM is placed inside the installed package so web-tree-sitter +# can load it via require.resolve(). +# +# Prerequisites: Docker daemon running with Emscripten image available. +# Usage: bash scripts/build-pascal-wasm.sh + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PLUGIN_DIR="$SCRIPT_DIR/../understand-anything-plugin" +GRAMMAR_DIR="$PLUGIN_DIR/node_modules/tree-sitter-pascal" + +if [[ ! -d "$GRAMMAR_DIR" ]]; then + echo "Error: tree-sitter-pascal not found at $GRAMMAR_DIR" + echo "Run 'pnpm install' inside understand-anything-plugin/ first." + exit 1 +fi + +OUT_FILE="$GRAMMAR_DIR/tree-sitter-pascal.wasm" + +echo "→ Building tree-sitter-pascal.wasm..." +docker run --rm \ + -v "$GRAMMAR_DIR:/src" \ + -w /src \ + emscripten/emsdk \ + emcc src/parser.c \ + -o tree-sitter-pascal.wasm \ + -Os \ + -s WASM=1 \ + -s SIDE_MODULE=1 \ + -s "EXPORTED_FUNCTIONS=['_tree_sitter_pascal']" \ + -fvisibility=hidden \ + -I./src + +echo "✓ Built: $OUT_FILE" diff --git a/tests/skill/understand/test_extract_import_map.test.mjs b/tests/skill/understand/test_extract_import_map.test.mjs index f9706b638..e6c7606d8 100644 --- a/tests/skill/understand/test_extract_import_map.test.mjs +++ b/tests/skill/understand/test_extract_import_map.test.mjs @@ -1672,7 +1672,9 @@ describe('extract-import-map.mjs — Rust crate root missing', () => { }); }); -describe('extract-import-map.mjs — tree-sitter init graceful failure', () => { +// ESM loader hooks (--import) do not reliably intercept native module resolution +// on Windows, so the synthetic tree-sitter failure cannot be injected there. +describe.skipIf(process.platform === 'win32')('extract-import-map.mjs — tree-sitter init graceful failure', () => { let projectRoot; afterEach(() => { diff --git a/tests/skill/understand/test_scan_project.test.mjs b/tests/skill/understand/test_scan_project.test.mjs index 65d96c8c1..eac1d3215 100644 --- a/tests/skill/understand/test_scan_project.test.mjs +++ b/tests/skill/understand/test_scan_project.test.mjs @@ -439,12 +439,16 @@ describe('scan-project.mjs — .understandignore handling', () => { // specific file with `!keep.log`. After the override, keep.log MUST // appear in the output. It is NOT counted in filteredByIgnore (it // was re-included, not additionally filtered). + // + // gitInit:false — use the recursive walker so global gitignore rules + // (which may include *.log on some developer machines) don't shadow + // the .understandignore negation before it can take effect. projectRoot = setupTree({ '.understandignore': '!keep.log\n', 'src/index.ts': 'export const x = 1;\n', 'keep.log': 'important diagnostic\n', 'drop.log': 'noise\n', - }); + }, { gitInit: false }); const r = runScript(projectRoot); expect(r.status).toBe(0); expect(byPath(r.output, 'keep.log')).toBeDefined(); diff --git a/understand-anything-plugin/packages/core/package.json b/understand-anything-plugin/packages/core/package.json index e54ce7285..45f0f4b65 100644 --- a/understand-anything-plugin/packages/core/package.json +++ b/understand-anything-plugin/packages/core/package.json @@ -39,6 +39,7 @@ "dependencies": { "@tree-sitter-grammars/tree-sitter-kotlin": "1.1.0", "@understand-anything/tree-sitter-dart-wasm": "workspace:*", + "@understand-anything/tree-sitter-pascal-wasm": "workspace:*", "fuse.js": "^7.1.0", "ignore": "^7.0.5", "tree-sitter-c-sharp": "^0.23.1", diff --git a/understand-anything-plugin/packages/core/src/languages/configs/index.ts b/understand-anything-plugin/packages/core/src/languages/configs/index.ts index 6a949e89d..7a9032e35 100644 --- a/understand-anything-plugin/packages/core/src/languages/configs/index.ts +++ b/understand-anything-plugin/packages/core/src/languages/configs/index.ts @@ -14,6 +14,7 @@ import { cppConfig } from "./cpp.js"; import { dartConfig } from "./dart.js"; import { csharpConfig } from "./csharp.js"; import { luaConfig } from "./lua.js"; +import { pascalConfig } from "./pascal.js"; // Non-code language configs import { markdownConfig } from "./markdown.js"; import { yamlConfig } from "./yaml.js"; @@ -55,6 +56,7 @@ export const builtinLanguageConfigs: LanguageConfig[] = [ swiftConfig, kotlinConfig, luaConfig, + pascalConfig, cConfig, cppConfig, dartConfig, @@ -101,6 +103,7 @@ export { swiftConfig, kotlinConfig, luaConfig, + pascalConfig, cConfig, cppConfig, dartConfig, diff --git a/understand-anything-plugin/packages/core/src/languages/configs/pascal.ts b/understand-anything-plugin/packages/core/src/languages/configs/pascal.ts new file mode 100644 index 000000000..e289dfad0 --- /dev/null +++ b/understand-anything-plugin/packages/core/src/languages/configs/pascal.ts @@ -0,0 +1,29 @@ +import type { LanguageConfig } from "../types.js"; + +export const pascalConfig = { + id: "pascal", + displayName: "Pascal", + extensions: [".pas", ".dpr", ".lpr", ".pp"], + treeSitter: { + wasmPackage: "@understand-anything/tree-sitter-pascal-wasm", + wasmFile: "tree-sitter-pascal.wasm", + }, + concepts: [ + "units and interfaces", + "classes and records", + "properties and RTTI", + "generics", + "interfaces (COM-compatible)", + "anonymous methods", + "operator overloading", + "inline variables", + "attributes", + "message handling", + ], + filePatterns: { + entryPoints: ["*.dpr", "*.lpr"], + barrels: [], + tests: ["*Test.pas", "*Tests.pas", "*_test.pas"], + config: ["*.dproj", "*.lpi", "*.cfg", "*.ini"], + }, +} satisfies LanguageConfig; diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/__tests__/pascal-extractor.test.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/__tests__/pascal-extractor.test.ts new file mode 100644 index 000000000..5380c3aa6 --- /dev/null +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/__tests__/pascal-extractor.test.ts @@ -0,0 +1,421 @@ +import { describe, it, expect, beforeAll } from "vitest"; +import { createRequire } from "node:module"; +import { PascalExtractor } from "../pascal-extractor.js"; + +const require = createRequire(import.meta.url); + +let Parser: any; +let Language: any; +let pascalLang: any; + +beforeAll(async () => { + const mod = await import("web-tree-sitter"); + Parser = mod.Parser; + Language = mod.Language; + await Parser.init(); + const wasmPath = require.resolve("tree-sitter-pascal/tree-sitter-pascal.wasm"); + pascalLang = await Language.load(wasmPath); +}); + +function parse(code: string) { + const parser = new Parser(); + parser.setLanguage(pascalLang); + const tree = parser.parse(code); + const root = tree.rootNode; + return { tree, parser, root }; +} + +describe("PascalExtractor", () => { + const extractor = new PascalExtractor(); + + it("has correct languageIds", () => { + expect(extractor.languageIds).toEqual(["pascal"]); + }); + + // ---- Functions ---- + + describe("extractStructure - functions", () => { + it("extracts a procedure with params", () => { + const { tree, parser, root } = parse(` +unit MyUnit; +interface +implementation +procedure DoSomething(AValue: Integer; AName: string); +begin +end; +end. +`); + const result = extractor.extractStructure(root); + + const fn = result.functions.find((f) => f.name === "DoSomething"); + expect(fn).toBeDefined(); + expect(fn!.params).toEqual(["AValue", "AName"]); + expect(fn!.returnType).toBeUndefined(); + + tree.delete(); + parser.delete(); + }); + + it("extracts a function with return type", () => { + const { tree, parser, root } = parse(` +unit MyUnit; +interface +implementation +function Add(A, B: Integer): Integer; +begin + Result := A + B; +end; +end. +`); + const result = extractor.extractStructure(root); + + const fn = result.functions.find((f) => f.name === "Add"); + expect(fn).toBeDefined(); + expect(fn!.params).toEqual(["A", "B"]); + expect(fn!.returnType).toBeDefined(); + + tree.delete(); + parser.delete(); + }); + + it("extracts a parameterless procedure", () => { + const { tree, parser, root } = parse(` +unit MyUnit; +interface +implementation +procedure Run; +begin +end; +end. +`); + const result = extractor.extractStructure(root); + + const fn = result.functions.find((f) => f.name === "Run"); + expect(fn).toBeDefined(); + expect(fn!.params).toEqual([]); + + tree.delete(); + parser.delete(); + }); + + it("reports correct line range", () => { + const { tree, parser, root } = parse(`unit MyUnit; +interface +implementation +procedure Greet; +begin +end; +end. +`); + const result = extractor.extractStructure(root); + + const fn = result.functions.find((f) => f.name === "Greet"); + expect(fn).toBeDefined(); + expect(fn!.lineRange[0]).toBeGreaterThanOrEqual(4); + + tree.delete(); + parser.delete(); + }); + }); + + // ---- Classes ---- + + describe("extractStructure - classes", () => { + it("extracts a class with methods and properties", () => { + const { tree, parser, root } = parse(` +unit MyUnit; +interface +type + TFoo = class + FValue: Integer; + procedure SetValue(V: Integer); + function GetValue: Integer; + property Value: Integer read GetValue write SetValue; + end; +implementation +end. +`); + const result = extractor.extractStructure(root); + + const cls = result.classes.find((c) => c.name === "TFoo"); + expect(cls).toBeDefined(); + expect(cls!.methods).toContain("SetValue"); + expect(cls!.methods).toContain("GetValue"); + expect(cls!.properties).toContain("Value"); + + tree.delete(); + parser.delete(); + }); + + it("extracts an empty class", () => { + const { tree, parser, root } = parse(` +unit MyUnit; +interface +type + TEmpty = class + end; +implementation +end. +`); + const result = extractor.extractStructure(root); + + const cls = result.classes.find((c) => c.name === "TEmpty"); + expect(cls).toBeDefined(); + expect(cls!.methods).toEqual([]); + + tree.delete(); + parser.delete(); + }); + + it("extracts an interface type", () => { + const { tree, parser, root } = parse(` +unit MyUnit; +interface +type + IFoo = interface + procedure DoIt; + end; +implementation +end. +`); + const result = extractor.extractStructure(root); + + const cls = result.classes.find((c) => c.name === "IFoo"); + expect(cls).toBeDefined(); + expect(cls!.methods).toContain("DoIt"); + + tree.delete(); + parser.delete(); + }); + }); + + // ---- Imports ---- + + describe("extractStructure - imports", () => { + it("extracts uses clause modules", () => { + const { tree, parser, root } = parse(` +unit MyUnit; +interface +uses + SysUtils, Classes; +implementation +end. +`); + const result = extractor.extractStructure(root); + + const sources = result.imports.map((i) => i.source); + expect(sources).toContain("SysUtils"); + expect(sources).toContain("Classes"); + + tree.delete(); + parser.delete(); + }); + + it("extracts dotted module names", () => { + const { tree, parser, root } = parse(` +unit MyUnit; +interface +uses + System.SysUtils; +implementation +end. +`); + const result = extractor.extractStructure(root); + + const imp = result.imports.find((i) => i.source === "System.SysUtils"); + expect(imp).toBeDefined(); + expect(imp!.specifiers).toEqual(["SysUtils"]); + + tree.delete(); + parser.delete(); + }); + }); + + // ---- Exports ---- + + describe("extractStructure - exports (interface section)", () => { + it("exports types declared in the interface section", () => { + const { tree, parser, root } = parse(` +unit MyUnit; +interface +type + TFoo = class + procedure Run; + end; +implementation +end. +`); + const result = extractor.extractStructure(root); + + const exportNames = result.exports.map((e) => e.name); + expect(exportNames).toContain("TFoo"); + + tree.delete(); + parser.delete(); + }); + + it("does not export types declared only in implementation", () => { + const { tree, parser, root } = parse(` +unit MyUnit; +interface +implementation +type + TInternal = class + end; +procedure Helper; +begin +end; +end. +`); + const result = extractor.extractStructure(root); + + const exportNames = result.exports.map((e) => e.name); + expect(exportNames).not.toContain("TInternal"); + expect(exportNames).not.toContain("Helper"); + + tree.delete(); + parser.delete(); + }); + }); + + // ---- Call Graph ---- + + describe("extractCallGraph", () => { + it("extracts procedure calls", () => { + const { tree, parser, root } = parse(` +unit MyUnit; +interface +implementation +procedure Caller; +begin + Foo; + Bar; +end; +end. +`); + const result = extractor.extractCallGraph(root); + + const callerEntries = result.filter((e) => e.caller === "Caller"); + const callees = callerEntries.map((e) => e.callee); + expect(callees).toContain("Foo"); + expect(callees).toContain("Bar"); + + tree.delete(); + parser.delete(); + }); + + it("reports correct line numbers for calls", () => { + const { tree, parser, root } = parse(`unit MyUnit; +interface +implementation +procedure P; +begin + Foo; +end; +end. +`); + const result = extractor.extractCallGraph(root); + const entry = result.find((e) => e.callee === "Foo"); + expect(entry).toBeDefined(); + expect(entry!.lineNumber).toBe(6); + + tree.delete(); + parser.delete(); + }); + + it("tracks caller correctly for multiple functions", () => { + const { tree, parser, root } = parse(` +unit MyUnit; +interface +implementation +procedure Alpha; +begin + Beta; +end; +procedure Beta; +begin + Gamma; +end; +end. +`); + const result = extractor.extractCallGraph(root); + + const alphaCalls = result.filter((e) => e.caller === "Alpha"); + expect(alphaCalls.map((e) => e.callee)).toContain("Beta"); + + const betaCalls = result.filter((e) => e.caller === "Beta"); + expect(betaCalls.map((e) => e.callee)).toContain("Gamma"); + + tree.delete(); + parser.delete(); + }); + }); + + // ---- Comprehensive ---- + + describe("comprehensive Pascal unit", () => { + it("handles a realistic unit", () => { + const { tree, parser, root } = parse(` +unit Calculator; +interface +uses + SysUtils, Math; +type + TCalculator = class + FValue: Double; + procedure SetValue(V: Double); + function GetValue: Double; + function Add(A, B: Double): Double; + property Value: Double read GetValue write SetValue; + end; +procedure GlobalReset; +implementation +procedure TCalculator.SetValue(V: Double); +begin + FValue := V; +end; +function TCalculator.GetValue: Double; +begin + Result := FValue; +end; +function TCalculator.Add(A, B: Double): Double; +begin + Result := A + B; + SetValue(Result); +end; +procedure GlobalReset; +begin + SysUtils.FreeAndNil(nil); +end; +end. +`); + const result = extractor.extractStructure(root); + + // Classes + const cls = result.classes.find((c) => c.name === "TCalculator"); + expect(cls).toBeDefined(); + expect(cls!.methods).toContain("SetValue"); + expect(cls!.methods).toContain("GetValue"); + expect(cls!.methods).toContain("Add"); + expect(cls!.properties).toContain("Value"); + + // Imports + const sources = result.imports.map((i) => i.source); + expect(sources).toContain("SysUtils"); + expect(sources).toContain("Math"); + + // Exports (interface section) + const exportNames = result.exports.map((e) => e.name); + expect(exportNames).toContain("TCalculator"); + expect(exportNames).toContain("GlobalReset"); + + // Call graph + const calls = extractor.extractCallGraph(root); + const addCalls = calls.filter((e) => e.caller.includes("Add")); + expect(addCalls.some((e) => e.callee.includes("SetValue"))).toBe(true); + + tree.delete(); + parser.delete(); + }); + }); +}); diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/cpp-extractor.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/cpp-extractor.ts index 8523d6f20..e322e865e 100644 --- a/understand-anything-plugin/packages/core/src/plugins/extractors/cpp-extractor.ts +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/cpp-extractor.ts @@ -327,6 +327,26 @@ export class CppExtractor implements LanguageExtractor { const className = nameNode.text; const methods: string[] = []; const properties: string[] = []; + // C++ inheritance: `class Foo : public Bar, protected Baz { ... }` + // The base_class_clause sits as a child of the class_specifier; its + // children are pairs of (access_specifier?, type_identifier|template_type). + // C++ has no syntactic interface concept — surface every base in `parents`. + const parents: string[] = []; + for (let i = 0; i < node.childCount; i++) { + const c = node.child(i); + if (c && c.type === "base_class_clause") { + for (let j = 0; j < c.childCount; j++) { + const b = c.child(j); + if (!b) continue; + if (b.type === "type_identifier" || b.type === "qualified_identifier") { + parents.push(b.text); + } else if (b.type === "template_type") { + const inner = b.childForFieldName("name") ?? findChild(b, "type_identifier"); + parents.push(inner ? inner.text : b.text); + } + } + } + } const body = node.childForFieldName("body"); if (body && body.type === "field_declaration_list") { @@ -409,6 +429,7 @@ export class CppExtractor implements LanguageExtractor { ], methods, properties, + ...(parents.length ? { parents } : {}), }); // The class/struct name itself is an export (non-anonymous types are always exported in C/C++ headers) diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/csharp-extractor.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/csharp-extractor.ts index 19b77b5b9..65181b4c8 100644 --- a/understand-anything-plugin/packages/core/src/plugins/extractors/csharp-extractor.ts +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/csharp-extractor.ts @@ -2,6 +2,47 @@ import type { StructuralAnalysis, CallGraphEntry } from "../../types.js"; import type { LanguageExtractor, TreeSitterNode } from "./types.js"; import { findChild, findChildren } from "./base-extractor.js"; +/** + * Pull type names out of a C# `base_list` node (the colon-list after a + * class or interface declaration). Handles plain `identifier`, + * `generic_name` (e.g. `IList`), and `qualified_name` + * (`System.IDisposable`). + */ +function extractBaseListRefs(node: TreeSitterNode | null): string[] { + if (!node) return []; + const refs: string[] = []; + for (let i = 0; i < node.childCount; i++) { + const c = node.child(i); + if (!c) continue; + if (c.type === "identifier" || c.type === "qualified_name" || c.type === "predefined_type") { + refs.push(c.text); + } else if (c.type === "generic_name") { + const inner = findChild(c, "identifier"); + refs.push(inner ? inner.text : c.text); + } + } + return refs; +} + +/** + * Apply the C# I-prefix convention to split a base list into class parent + * vs implemented interfaces. `forceAllParents=true` is used for interface + * declarations where every base is itself an interface parent. + */ +function splitCSharpBaseRefs( + refs: string[], + forceAllParents: boolean, +): { parents: string[]; interfaces: string[] } { + if (forceAllParents) return { parents: [...refs], interfaces: [] }; + if (refs.length === 0) return { parents: [], interfaces: [] }; + const bareName = (s: string) => s.replace(/<.*$/, "").split(".").pop() ?? ""; + const looksLikeInterface = (s: string) => /^I[A-Z]/.test(bareName(s)); + if (looksLikeInterface(refs[0])) { + return { parents: [], interfaces: [...refs] }; + } + return { parents: [refs[0]], interfaces: refs.slice(1) }; +} + /** * Extract parameter names from a C# `parameter_list` node. * @@ -304,6 +345,11 @@ export class CSharpExtractor implements LanguageExtractor { const methods: string[] = []; const properties: string[] = []; + // C# `class Foo : Bar, IFoo, IBar` — apply I-prefix convention to split + // the base_list into a class parent and implemented interfaces. + const baseRefs = extractBaseListRefs(findChild(node, "base_list")); + const { parents, interfaces } = splitCSharpBaseRefs(baseRefs, false); + const body = node.childForFieldName("body"); if (body) { this.extractClassBodyMembers(body, methods, properties, functions, exports); @@ -317,6 +363,8 @@ export class CSharpExtractor implements LanguageExtractor { ], methods, properties, + ...(parents.length ? { parents } : {}), + ...(interfaces.length ? { interfaces } : {}), }); if (hasModifier(node, "public")) { @@ -339,6 +387,10 @@ export class CSharpExtractor implements LanguageExtractor { const methods: string[] = []; const properties: string[] = []; + // For interface declarations every base is itself an interface parent. + const baseRefs = extractBaseListRefs(findChild(node, "base_list")); + const { parents } = splitCSharpBaseRefs(baseRefs, true); + const body = node.childForFieldName("body"); if (body) { // Interface body contains method_declaration nodes (signatures without bodies) @@ -368,6 +420,7 @@ export class CSharpExtractor implements LanguageExtractor { ], methods, properties, + ...(parents.length ? { parents } : {}), }); if (hasModifier(node, "public")) { diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/go-extractor.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/go-extractor.ts index 53e3e95aa..b8b03fe28 100644 --- a/understand-anything-plugin/packages/core/src/plugins/extractors/go-extractor.ts +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/go-extractor.ts @@ -283,12 +283,31 @@ export class GoExtractor implements LanguageExtractor { exports: StructuralAnalysis["exports"], ): void { const properties: string[] = []; + // Go has no inheritance, but embedded fields promote the embedded type's + // methods — surface those in `parents` so method-promotion relationships + // are visible in the graph. + const parents: string[] = []; const fieldList = findChild(structNode, "field_declaration_list"); if (fieldList) { const fields = findChildren(fieldList, "field_declaration"); for (const field of fields) { - // A field_declaration can have multiple names: `X, Y int` + // Detect embedded fields: no field_identifier child, type is the name. + const hasName = findChild(field, "field_identifier") !== null; + if (!hasName) { + const embeddedType = + field.childForFieldName("type") ?? + findChild(field, "type_identifier") ?? + findChild(field, "qualified_type") ?? + findChild(field, "pointer_type"); + if (embeddedType) { + let txt = embeddedType.text; + if (embeddedType.type === "pointer_type") txt = txt.replace(/^\*\s*/, ""); + parents.push(txt); + } + continue; + } + // Regular (named) fields contribute to properties. for (let i = 0; i < field.childCount; i++) { const child = field.child(i); if (child && child.type === "field_identifier") { @@ -306,6 +325,7 @@ export class GoExtractor implements LanguageExtractor { ], methods: [], // Methods are attached later from methodsByReceiver properties, + ...(parents.length ? { parents } : {}), }); if (isExported(nameNode.text)) { diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/index.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/index.ts index 8fbe73608..6df6b4f19 100644 --- a/understand-anything-plugin/packages/core/src/plugins/extractors/index.ts +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/index.ts @@ -11,6 +11,7 @@ export { CppExtractor } from "./cpp-extractor.js"; export { CSharpExtractor } from "./csharp-extractor.js"; export { DartExtractor } from "./dart-extractor.js"; export { KotlinExtractor } from "./kotlin-extractor.js"; +export { PascalExtractor } from "./pascal-extractor.js"; import type { LanguageExtractor } from "./types.js"; import { TypeScriptExtractor } from "./typescript-extractor.js"; @@ -24,6 +25,7 @@ import { CppExtractor } from "./cpp-extractor.js"; import { CSharpExtractor } from "./csharp-extractor.js"; import { DartExtractor } from "./dart-extractor.js"; import { KotlinExtractor } from "./kotlin-extractor.js"; +import { PascalExtractor } from "./pascal-extractor.js"; export const builtinExtractors: LanguageExtractor[] = [ new TypeScriptExtractor(), @@ -37,4 +39,5 @@ export const builtinExtractors: LanguageExtractor[] = [ new CSharpExtractor(), new DartExtractor(), new KotlinExtractor(), + new PascalExtractor(), ]; diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/java-extractor.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/java-extractor.ts index 4ac3a4f3a..ace4df15e 100644 --- a/understand-anything-plugin/packages/core/src/plugins/extractors/java-extractor.ts +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/java-extractor.ts @@ -2,6 +2,37 @@ import type { StructuralAnalysis, CallGraphEntry } from "../../types.js"; import type { LanguageExtractor, TreeSitterNode } from "./types.js"; import { findChild, findChildren } from "./base-extractor.js"; +/** + * Walk a Java `superclass` / `super_interfaces` / `extends_interfaces` node + * and return the type names it references. Handles `type_identifier`, + * `generic_type` (e.g. `List`), and nested type_list wrappers. + */ +function extractTypeRefs(node: TreeSitterNode | null): string[] { + if (!node) return []; + const refs: string[] = []; + const collect = (n: TreeSitterNode) => { + for (let i = 0; i < n.childCount; i++) { + const c = n.child(i); + if (!c) continue; + if (c.type === "type_identifier" || c.type === "scoped_type_identifier") { + refs.push(c.text); + } else if (c.type === "generic_type") { + const inner = findChild(c, "type_identifier"); + refs.push(inner ? inner.text : c.text); + } else if ( + c.type === "type_list" || + c.type === "interface_type_list" || + c.type === "extends_interfaces" || + c.type === "super_interfaces" + ) { + collect(c); + } + } + }; + collect(node); + return refs; +} + /** * Extract parameter names from a Java `formal_parameters` node. * @@ -248,6 +279,10 @@ export class JavaExtractor implements LanguageExtractor { const methods: string[] = []; const properties: string[] = []; + // `class Foo extends Bar implements I1, I2 { ... }` + const parents = extractTypeRefs(node.childForFieldName("superclass")); + const interfaces = extractTypeRefs(node.childForFieldName("interfaces")); + const body = node.childForFieldName("body"); if (body) { this.extractClassBodyMembers( @@ -267,6 +302,8 @@ export class JavaExtractor implements LanguageExtractor { ], methods, properties, + ...(parents.length ? { parents } : {}), + ...(interfaces.length ? { interfaces } : {}), }); if (hasModifier(node, "public")) { @@ -289,6 +326,9 @@ export class JavaExtractor implements LanguageExtractor { const methods: string[] = []; const properties: string[] = []; + // `interface IExtended extends IBase1, IBase2` — interface inheritance lands in `parents`. + const parents = extractTypeRefs(findChild(node, "extends_interfaces")); + const body = node.childForFieldName("body"); if (body) { // Interface body contains method_declaration nodes (signatures without bodies) @@ -321,6 +361,7 @@ export class JavaExtractor implements LanguageExtractor { ], methods, properties, + ...(parents.length ? { parents } : {}), }); if (hasModifier(node, "public")) { diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/pascal-extractor.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/pascal-extractor.ts new file mode 100644 index 000000000..cf29555f1 --- /dev/null +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/pascal-extractor.ts @@ -0,0 +1,446 @@ +import type { StructuralAnalysis, CallGraphEntry } from "../../types.js"; +import type { LanguageExtractor, TreeSitterNode } from "./types.js"; +import { findChild, findChildren } from "./base-extractor.js"; + +// grammar node: declProc — a procedure or function heading (forward or inside a class). +// Structure: (kProcedure|kFunction|kConstructor|kDestructor) identifier [declArgs] [typeref] +// grammar node: defProc — a full definition: (declProc) (block) +// grammar node: declClass — class/record/object body inside a declType +// grammar node: declIntf — interface body inside a declType +// grammar node: declType — type alias: (identifier) kEq (declClass|declIntf|type|...) +// grammar node: declUses — uses clause: kUses (moduleName)+ +// grammar node: moduleName — dotted module name: identifier [kDot identifier]* +// grammar node: declArg — parameter group: [kVar|kConst|kOut] identifier+ (type) +// grammar node: declProp — property declaration inside a class +// grammar node: declField / declVar — field/variable declarations inside a class + +function isProcKeyword(node: TreeSitterNode): boolean { + return ( + node.type === "kProcedure" || + node.type === "kFunction" || + node.type === "kConstructor" || + node.type === "kDestructor" || + node.type === "kOperator" + ); +} + +function isFunctionKeyword(node: TreeSitterNode): boolean { + return node.type === "kFunction"; +} + +/** + * Extract parameter names from a declArgs node. + * Each declArg child has one or more identifier children followed by a type node. + * Modifiers (kVar, kConst, kOut, kConstref) appear before the identifiers. + */ +function extractParams(argsNode: TreeSitterNode | null): string[] { + if (!argsNode) return []; + const params: string[] = []; + const argNodes = findChildren(argsNode, "declArg"); + for (const arg of argNodes) { + // Collect all identifier children (skip keywords and type nodes) + for (let i = 0; i < arg.childCount; i++) { + const child = arg.child(i); + if (child && child.type === "identifier") { + params.push(child.text); + } + } + } + return params; +} + +/** + * Extract the name identifier from a declProc node. + * The heading is: (kProcedure|kFunction|...) [kClass] identifier ... + */ +function extractProcName(node: TreeSitterNode): string | null { + let seenKeyword = false; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if (isProcKeyword(child) || child.type === "kClass") { + seenKeyword = true; + continue; + } + if (seenKeyword && child.type === "identifier") { + return child.text; + } + // Qualified names like TFoo.Bar — take the full text of the compound node + if ( + seenKeyword && + (child.type === "operatorDot" || child.type === "genericDot") + ) { + return child.text; + } + } + return null; +} + +/** + * Extract return type text from a declProc that uses kFunction. + * The typeref is a direct child after the declArgs (or after the identifier if no args). + */ +function extractReturnType(node: TreeSitterNode): string | undefined { + let seenArgs = false; + let seenName = false; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if (child.type === "declArgs") { + seenArgs = true; + continue; + } + if (child.type === "identifier" && !seenName) { + seenName = true; + continue; + } + if ((seenArgs || seenName) && (child.type === "typeref" || child.type === "type")) { + return child.text; + } + } + return undefined; +} + +/** + * Extract a dotted module name from a moduleName node. + * e.g. (moduleName (identifier "System") (kDot) (identifier "SysUtils")) → "System.SysUtils" + */ +function extractModuleName(node: TreeSitterNode): string { + const parts: string[] = []; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && child.type === "identifier") { + parts.push(child.text); + } + } + return parts.join("."); +} + +/** + * Extract all procedure/function definitions from a root node, recursing into + * unit interface/implementation sections. + */ +function collectDefProcs( + root: TreeSitterNode, + out: { node: TreeSitterNode; declProc: TreeSitterNode }[], +): void { + function walk(node: TreeSitterNode): void { + if (node.type === "defProc") { + const decl = findChild(node, "declProc"); + if (decl) { + out.push({ node, declProc: decl }); + } + } + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) walk(child); + } + } + walk(root); +} + +/** + * Extract all declType nodes from a root, recursing into interface/implementation sections. + */ +function collectDeclTypes(root: TreeSitterNode, out: TreeSitterNode[]): void { + function walk(node: TreeSitterNode): void { + if (node.type === "declType") { + out.push(node); + } + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) walk(child); + } + } + walk(root); +} + +/** + * Collect standalone declProc nodes that are direct children of the interface + * section (forward declarations of procedures/functions exported from the unit). + * These have no defProc wrapper — the body lives in the implementation section. + */ +function collectInterfaceDeclProcs(root: TreeSitterNode, out: TreeSitterNode[]): void { + function walk(node: TreeSitterNode): void { + if (node.type === "interface") { + // Only look one level deep inside the interface section + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && child.type === "declProc") { + out.push(child); + } + } + return; // don't recurse further into interface — we only want direct children + } + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) walk(child); + } + } + walk(root); +} + +/** + * Extract all declUses nodes from a root, tagged with their section. + */ +function collectDeclUses( + root: TreeSitterNode, + out: { node: TreeSitterNode; section: "interface" | "implementation" | undefined }[], +): void { + function walk(node: TreeSitterNode): void { + if (node.type === "declUses") { + out.push({ node, section: getDeclSection(node) }); + } + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) walk(child); + } + } + walk(root); +} + +/** + * Determine which Pascal section (interface/implementation) a node lives in. + */ +function getDeclSection(node: TreeSitterNode): "interface" | "implementation" | undefined { + let n: TreeSitterNode | null = node.parent; + while (n) { + if (n.type === "interface") return "interface"; + if (n.type === "implementation") return "implementation"; + n = n.parent; + } + return undefined; +} + +/** + * Determine whether a declType or its contents lives inside an `interface` section. + * Nodes in the interface section are publicly exported from a Pascal unit. + */ +function isInInterfaceSection(node: TreeSitterNode): boolean { + let n: TreeSitterNode | null = node.parent; + while (n) { + if (n.type === "interface") return true; + if (n.type === "implementation") return false; + n = n.parent; + } + // top-level (program/library) — treat as public + return true; +} + +/** + * Pascal extractor for tree-sitter structural analysis. + * + * Supports: Object Pascal (Delphi), Free Pascal, and related dialects. + * + * Mapping decisions: + * - procedure/function/constructor/destructor definitions (defProc) → functions array. + * - class, record, object, and interface type declarations → classes array. + * - Class methods (declProc inside declClass/declIntf) → included in class.methods. + * - uses clause (declUses) → imports array. Each moduleName becomes one import. + * - Declarations in the `interface` section of a unit → exports array (publicly visible). + * - Properties (declProp) and fields (declVar/declField) → class.properties. + */ +export class PascalExtractor implements LanguageExtractor { + readonly languageIds = ["pascal"]; + + extractStructure(rootNode: TreeSitterNode): StructuralAnalysis { + const functions: StructuralAnalysis["functions"] = []; + const classes: StructuralAnalysis["classes"] = []; + const imports: StructuralAnalysis["imports"] = []; + const exports: StructuralAnalysis["exports"] = []; + + // -- Functions: defProc nodes -- + const defProcs: { node: TreeSitterNode; declProc: TreeSitterNode }[] = []; + collectDefProcs(rootNode, defProcs); + + for (const { node, declProc } of defProcs) { + const name = extractProcName(declProc); + if (!name) continue; + + const isFunc = isFunctionKeyword(findChild(declProc, "kFunction") ?? declProc); + const argsNode = findChild(declProc, "declArgs"); + const params = extractParams(argsNode); + const returnType = isFunc ? extractReturnType(declProc) : undefined; + + functions.push({ + name, + lineRange: [node.startPosition.row + 1, node.endPosition.row + 1], + params, + ...(returnType !== undefined ? { returnType } : {}), + }); + + if (isInInterfaceSection(node)) { + exports.push({ name, lineNumber: node.startPosition.row + 1 }); + } + } + + // -- Classes: declType nodes containing declClass or declIntf -- + const declTypes: TreeSitterNode[] = []; + collectDeclTypes(rootNode, declTypes); + + for (const declType of declTypes) { + const nameNode = findChild(declType, "identifier"); + if (!nameNode) continue; + const className = nameNode.text; + + const classBody = findChild(declType, "declClass") ?? findChild(declType, "declIntf"); + if (!classBody) continue; + const isInterfaceDecl = classBody.type === "declIntf"; + + const methods: string[] = []; + const properties: string[] = []; + const ancestorRefs: string[] = []; + + // Ancestor typerefs appear as direct children of the class/interface body + // before any member declarations. Convention: for declClass, the first + // typeref is the parent class and remaining are implemented interfaces; + // for declIntf, all typerefs are parent interfaces. + for (let i = 0; i < classBody.childCount; i++) { + const m = classBody.child(i); + if (!m) continue; + if (m.type === "typeref") { + const id = findChild(m, "identifier"); + if (id) ancestorRefs.push(id.text); + } + } + + // Methods: declProc nodes inside the class body + const methodDecls = findChildren(classBody, "declProc"); + for (const m of methodDecls) { + const mName = extractProcName(m); + if (mName) methods.push(mName); + } + + // Properties: declProp nodes + const propDecls = findChildren(classBody, "declProp"); + for (const p of propDecls) { + const pName = findChild(p, "identifier"); + if (pName) properties.push(pName.text); + } + + // Fields: declVar/declField/declVars children + const varSections = [ + ...findChildren(classBody, "declVars"), + ...findChildren(classBody, "declField"), + ]; + for (const vs of varSections) { + const varNodes = findChildren(vs, "declVar"); + for (const v of varNodes) { + for (let i = 0; i < v.childCount; i++) { + const c = v.child(i); + if (c && c.type === "identifier") properties.push(c.text); + } + } + } + + const parents: string[] = isInterfaceDecl ? ancestorRefs : ancestorRefs.slice(0, 1); + const interfaces: string[] = isInterfaceDecl ? [] : ancestorRefs.slice(1); + + classes.push({ + name: className, + lineRange: [declType.startPosition.row + 1, declType.endPosition.row + 1], + methods, + properties, + ...(parents.length ? { parents } : {}), + ...(interfaces.length ? { interfaces } : {}), + }); + + if (isInInterfaceSection(declType)) { + exports.push({ name: className, lineNumber: declType.startPosition.row + 1 }); + } + } + + // -- Exports: forward-declared procedures/functions in the interface section -- + // (defProc nodes inside the interface section are already handled above; this + // catches standalone declProc forward declarations whose body is in implementation.) + const ifaceDeclProcs: TreeSitterNode[] = []; + collectInterfaceDeclProcs(rootNode, ifaceDeclProcs); + for (const declProc of ifaceDeclProcs) { + const name = extractProcName(declProc); + if (!name) continue; + // Avoid duplicating an entry that was already exported via defProc + if (!exports.some((e) => e.name === name)) { + exports.push({ name, lineNumber: declProc.startPosition.row + 1 }); + } + } + + // -- Imports: declUses nodes -- + const usesNodes: { node: TreeSitterNode; section: "interface" | "implementation" | undefined }[] = []; + collectDeclUses(rootNode, usesNodes); + + for (const { node: usesNode, section } of usesNodes) { + const moduleNames = findChildren(usesNode, "moduleName"); + for (const mod of moduleNames) { + const fullName = extractModuleName(mod); + if (!fullName) continue; + const parts = fullName.split("."); + imports.push({ + source: fullName, + specifiers: [parts[parts.length - 1]], + lineNumber: mod.startPosition.row + 1, + ...(section ? { section } : {}), + }); + } + } + + return { functions, classes, imports, exports }; + } + + extractCallGraph(rootNode: TreeSitterNode): CallGraphEntry[] { + const entries: CallGraphEntry[] = []; + const callerStack: string[] = []; + + const walk = (node: TreeSitterNode) => { + let pushed = false; + + // Track entering a procedure/function definition + if (node.type === "defProc") { + const decl = findChild(node, "declProc"); + if (decl) { + const name = extractProcName(decl); + if (name) { + callerStack.push(name); + pushed = true; + } + } + } + + // Capture call expressions with arguments: exprCall → (callee args) + if (node.type === "exprCall" && callerStack.length > 0) { + const callee = node.child(0); + if (callee) { + entries.push({ + caller: callerStack[callerStack.length - 1], + callee: callee.text, + lineNumber: node.startPosition.row + 1, + }); + } + } + + // Capture bare procedure calls: statement containing only an identifier (no args). + // e.g. `Foo;` parses as statement > identifier (+ anonymous `;`), not as exprCall. + if (node.type === "statement" && callerStack.length > 0) { + if (node.namedChildCount === 1) { + const child = node.child(0); + if (child && child.type === "identifier") { + entries.push({ + caller: callerStack[callerStack.length - 1], + callee: child.text, + lineNumber: node.startPosition.row + 1, + }); + } + } + } + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) walk(child); + } + + if (pushed) callerStack.pop(); + }; + + walk(rootNode); + return entries; + } +} diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/php-extractor.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/php-extractor.ts index 700e2074b..7f5f7daff 100644 --- a/understand-anything-plugin/packages/core/src/plugins/extractors/php-extractor.ts +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/php-extractor.ts @@ -2,6 +2,24 @@ import type { StructuralAnalysis, CallGraphEntry } from "../../types.js"; import type { LanguageExtractor, TreeSitterNode } from "./types.js"; import { findChild, findChildren } from "./base-extractor.js"; +/** + * Pull type names out of a PHP `base_clause` (`extends X`) or + * `class_interface_clause` (`implements I1, I2`). Each name is a `name` + * node or `qualified_name`. + */ +function extractPhpTypeRefs(node: TreeSitterNode | null): string[] { + if (!node) return []; + const refs: string[] = []; + for (let i = 0; i < node.childCount; i++) { + const c = node.child(i); + if (!c) continue; + if (c.type === "name" || c.type === "qualified_name") { + refs.push(c.text); + } + } + return refs; +} + /** * Extract parameter names from a PHP `formal_parameters` node. * @@ -313,6 +331,10 @@ export class PhpExtractor implements LanguageExtractor { const methods: string[] = []; const properties: string[] = []; + // PHP `class Foo extends Bar implements I1, I2` + const parents = extractPhpTypeRefs(findChild(node, "base_clause")); + const interfaces = extractPhpTypeRefs(findChild(node, "class_interface_clause")); + const declList = findChild(node, "declaration_list"); if (declList) { this.extractDeclarationList(declList, methods, properties, functions); @@ -323,6 +345,8 @@ export class PhpExtractor implements LanguageExtractor { lineRange: [node.startPosition.row + 1, node.endPosition.row + 1], methods, properties, + ...(parents.length ? { parents } : {}), + ...(interfaces.length ? { interfaces } : {}), }); } @@ -336,6 +360,9 @@ export class PhpExtractor implements LanguageExtractor { const methods: string[] = []; const properties: string[] = []; + // `interface IExtended extends IBase1, IBase2` — interface inheritance in `parents`. + const parents = extractPhpTypeRefs(findChild(node, "base_clause")); + const declList = findChild(node, "declaration_list"); if (declList) { // Interface methods are method_declaration nodes (no bodies, just signatures) @@ -353,6 +380,7 @@ export class PhpExtractor implements LanguageExtractor { lineRange: [node.startPosition.row + 1, node.endPosition.row + 1], methods, properties, + ...(parents.length ? { parents } : {}), }); } diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/python-extractor.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/python-extractor.ts index 83ae76cb0..5170b08b9 100644 --- a/understand-anything-plugin/packages/core/src/plugins/extractors/python-extractor.ts +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/python-extractor.ts @@ -222,6 +222,21 @@ export class PythonExtractor implements LanguageExtractor { const methods: string[] = []; const properties: string[] = []; + // Python `class X(Y, Z, metaclass=Meta):` — base classes live in `superclasses` + // (an argument_list). Keyword args like `metaclass=` are skipped. Python doesn't + // distinguish classes from protocols/interfaces, so everything goes in `parents`. + const parents: string[] = []; + const supers = node.childForFieldName("superclasses"); + if (supers) { + for (let i = 0; i < supers.childCount; i++) { + const c = supers.child(i); + if (!c) continue; + if (c.type === "identifier" || c.type === "dotted_name" || c.type === "attribute") { + parents.push(c.text); + } + } + } + const body = node.childForFieldName("body"); if (body) { for (let i = 0; i < body.childCount; i++) { @@ -259,6 +274,7 @@ export class PythonExtractor implements LanguageExtractor { ], methods, properties, + ...(parents.length ? { parents } : {}), }); } diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/ruby-extractor.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/ruby-extractor.ts index 5b6f7bd6d..e448a2915 100644 --- a/understand-anything-plugin/packages/core/src/plugins/extractors/ruby-extractor.ts +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/ruby-extractor.ts @@ -312,9 +312,41 @@ export class RubyExtractor implements LanguageExtractor { const methods: string[] = []; const properties: string[] = []; + // Ruby `class Foo < Bar` — superclass child holds the parent. + // Module mixins (`include Mod`, `prepend Mod`, `extend Mod`) inside the + // body promote methods at runtime, analogous to interfaces. + const parents: string[] = []; + const interfaces: string[] = []; + const superclassNode = node.childForFieldName("superclass"); + if (superclassNode) { + const ref = + findChild(superclassNode, "constant") ?? + findChild(superclassNode, "scope_resolution") ?? + superclassNode; + const txt = ref.text.replace(/^<\s*/, ""); + if (txt && txt !== "<") parents.push(txt); + } + const body = node.childForFieldName("body"); if (body) { this.extractClassBody(body, methods, properties, functions); + for (let i = 0; i < body.childCount; i++) { + const stmt = body.child(i); + if (!stmt) continue; + if (stmt.type !== "call" && stmt.type !== "method_call") continue; + const receiver = stmt.childForFieldName("method"); + const name2 = receiver?.text; + if (name2 !== "include" && name2 !== "prepend" && name2 !== "extend") continue; + const args = stmt.childForFieldName("arguments"); + if (!args) continue; + for (let j = 0; j < args.childCount; j++) { + const a = args.child(j); + if (!a) continue; + if (a.type === "constant" || a.type === "scope_resolution") { + interfaces.push(a.text); + } + } + } } classes.push({ @@ -325,6 +357,8 @@ export class RubyExtractor implements LanguageExtractor { ], methods, properties, + ...(parents.length ? { parents } : {}), + ...(interfaces.length ? { interfaces } : {}), }); } diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/rust-extractor.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/rust-extractor.ts index 98ab38ff9..7377b2181 100644 --- a/understand-anything-plugin/packages/core/src/plugins/extractors/rust-extractor.ts +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/rust-extractor.ts @@ -100,6 +100,8 @@ export class RustExtractor implements LanguageExtractor { // Track methods per impl type so we can attach them to structs/enums const methodsByType = new Map(); + // Track trait implementations per impl target so we can emit `interfaces`. + const traitsByType = new Map>(); for (let i = 0; i < rootNode.childCount; i++) { const node = rootNode.child(i); @@ -123,7 +125,7 @@ export class RustExtractor implements LanguageExtractor { break; case "impl_item": - this.extractImpl(node, functions, exports, methodsByType); + this.extractImpl(node, functions, exports, methodsByType, traitsByType); break; case "use_declaration": @@ -132,12 +134,17 @@ export class RustExtractor implements LanguageExtractor { } } - // Attach collected methods to their corresponding structs/enums/traits + // Attach collected methods and trait implementations to their corresponding structs/enums/traits for (const cls of classes) { const methods = methodsByType.get(cls.name); if (methods) { cls.methods.push(...methods); } + const traits = traitsByType.get(cls.name); + if (traits && traits.size > 0) { + const existing = cls.interfaces ?? []; + cls.interfaces = [...new Set([...existing, ...traits])]; + } } return { functions, classes, imports, exports }; @@ -339,6 +346,23 @@ export class RustExtractor implements LanguageExtractor { if (!nameNode) return; const methods: string[] = []; + // Supertraits: `trait Foo: Bar + Baz` — `bounds` field holds trait_bounds. + const parents: string[] = []; + const boundsNode = node.childForFieldName("bounds"); + if (boundsNode) { + for (let i = 0; i < boundsNode.childCount; i++) { + const b = boundsNode.child(i); + if (!b) continue; + if ( + b.type === "type_identifier" || + b.type === "scoped_type_identifier" || + b.type === "generic_type" + ) { + parents.push(b.text); + } + } + } + const body = findChild(node, "declaration_list"); if (body) { // Trait bodies contain function_signature_item for method declarations @@ -367,6 +391,7 @@ export class RustExtractor implements LanguageExtractor { ], methods, properties: [], + ...(parents.length ? { parents } : {}), }); if (isPublic(node)) { @@ -382,10 +407,19 @@ export class RustExtractor implements LanguageExtractor { functions: StructuralAnalysis["functions"], exports: StructuralAnalysis["exports"], methodsByType: Map, + traitsByType: Map>, ): void { const typeNode = node.childForFieldName("type"); const typeName = typeNode ? typeNode.text : null; + // `impl Trait for Type` — record the trait so the outer loop can pin it + // onto the type's `interfaces` array. + const traitNode = node.childForFieldName("trait"); + if (traitNode && typeName) { + if (!traitsByType.has(typeName)) traitsByType.set(typeName, new Set()); + traitsByType.get(typeName)!.add(traitNode.text); + } + const body = node.childForFieldName("body"); if (!body) return; diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/typescript-extractor.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/typescript-extractor.ts index f8dd4810f..724c84eb4 100644 --- a/understand-anything-plugin/packages/core/src/plugins/extractors/typescript-extractor.ts +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/typescript-extractor.ts @@ -276,6 +276,45 @@ export class TypeScriptExtractor implements LanguageExtractor { const methods: string[] = []; const properties: string[] = []; + // TypeScript `class X extends Y implements I1, I2` — extract from class_heritage. + const parents: string[] = []; + const interfaces: string[] = []; + const heritage = node.children.find((c) => c.type === "class_heritage"); + if (heritage) { + for (let i = 0; i < heritage.childCount; i++) { + const clause = heritage.child(i); + if (!clause) continue; + if (clause.type === "extends_clause") { + for (let j = 0; j < clause.childCount; j++) { + const t = clause.child(j); + if (!t) continue; + if ( + t.type === "identifier" || + t.type === "type_identifier" || + t.type === "generic_type" || + t.type === "member_expression" || + t.type === "nested_type_identifier" + ) { + parents.push(t.text); + } + } + } else if (clause.type === "implements_clause") { + for (let j = 0; j < clause.childCount; j++) { + const t = clause.child(j); + if (!t) continue; + if ( + t.type === "type_identifier" || + t.type === "identifier" || + t.type === "generic_type" || + t.type === "nested_type_identifier" + ) { + interfaces.push(t.text); + } + } + } + } + } + const classBody = node.children.find( (c) => c.type === "class_body", ); @@ -309,6 +348,8 @@ export class TypeScriptExtractor implements LanguageExtractor { ], methods, properties, + ...(parents.length ? { parents } : {}), + ...(interfaces.length ? { interfaces } : {}), }); } diff --git a/understand-anything-plugin/packages/core/src/types.ts b/understand-anything-plugin/packages/core/src/types.ts index b7a0fa6e4..0d8c21fd1 100644 --- a/understand-anything-plugin/packages/core/src/types.ts +++ b/understand-anything-plugin/packages/core/src/types.ts @@ -168,8 +168,23 @@ export interface ReferenceResolution { // Plugin interfaces export interface StructuralAnalysis { functions: Array<{ name: string; lineRange: [number, number]; params: string[]; returnType?: string }>; - classes: Array<{ name: string; lineRange: [number, number]; methods: string[]; properties: string[] }>; - imports: Array<{ source: string; specifiers: string[]; lineNumber: number }>; + classes: Array<{ + name: string; + lineRange: [number, number]; + methods: string[]; + properties: string[]; + /** Ancestor class names (e.g. Pascal `class(TParent)`, Java `extends X`). Optional for backward compat. */ + parents?: string[]; + /** Implemented interface names (e.g. Pascal extra ancestor typerefs, Java `implements X, Y`). Optional. */ + interfaces?: string[]; + }>; + imports: Array<{ + source: string; + specifiers: string[]; + lineNumber: number; + /** For languages with section-scoped imports (Pascal interface/implementation). Optional, ignored by other languages. */ + section?: "interface" | "implementation"; + }>; exports: Array<{ name: string; lineNumber: number; isDefault?: boolean }>; // Non-code structural data (all optional for backward compat) sections?: SectionInfo[]; diff --git a/understand-anything-plugin/packages/tree-sitter-pascal-wasm/package.json b/understand-anything-plugin/packages/tree-sitter-pascal-wasm/package.json new file mode 100644 index 000000000..91d4b6d86 --- /dev/null +++ b/understand-anything-plugin/packages/tree-sitter-pascal-wasm/package.json @@ -0,0 +1,9 @@ +{ + "name": "@understand-anything/tree-sitter-pascal-wasm", + "version": "0.11.0", + "type": "module", + "description": "Vendored tree-sitter-pascal WASM grammar (v0.11.0) for use with web-tree-sitter@^0.26.", + "main": "tree-sitter-pascal.wasm", + "files": ["tree-sitter-pascal.wasm"], + "license": "MIT" +} diff --git a/understand-anything-plugin/packages/tree-sitter-pascal-wasm/tree-sitter-pascal.wasm b/understand-anything-plugin/packages/tree-sitter-pascal-wasm/tree-sitter-pascal.wasm new file mode 100644 index 000000000..c9f3d7384 Binary files /dev/null and b/understand-anything-plugin/packages/tree-sitter-pascal-wasm/tree-sitter-pascal.wasm differ diff --git a/understand-anything-plugin/pnpm-lock.yaml b/understand-anything-plugin/pnpm-lock.yaml index e885e212a..40eca05a7 100644 --- a/understand-anything-plugin/pnpm-lock.yaml +++ b/understand-anything-plugin/pnpm-lock.yaml @@ -36,6 +36,9 @@ importers: '@understand-anything/tree-sitter-dart-wasm': specifier: workspace:* version: link:../tree-sitter-dart-wasm + '@understand-anything/tree-sitter-pascal-wasm': + specifier: workspace:* + version: link:../tree-sitter-pascal-wasm fuse.js: specifier: ^7.1.0 version: 7.1.0 @@ -176,6 +179,8 @@ importers: packages/tree-sitter-dart-wasm: {} + packages/tree-sitter-pascal-wasm: {} + packages: '@ampproject/remapping@2.3.0': @@ -496,79 +501,66 @@ packages: resolution: {integrity: sha512-RzeBwv0B3qtVBWtcuABtSuCzToo2IEAIQrcyB/b2zMvBWVbjo8bZDjACUpnaafaxhTw2W+imQbP2BD1usasK4g==} cpu: [arm] os: [linux] - libc: [glibc] '@rollup/rollup-linux-arm-musleabihf@4.60.0': resolution: {integrity: sha512-Sf7zusNI2CIU1HLzuu9Tc5YGAHEZs5Lu7N1ssJG4Tkw6e0MEsN7NdjUDDfGNHy2IU+ENyWT+L2obgWiguWibWQ==} cpu: [arm] os: [linux] - libc: [musl] '@rollup/rollup-linux-arm64-gnu@4.60.0': resolution: {integrity: sha512-DX2x7CMcrJzsE91q7/O02IJQ5/aLkVtYFryqCjduJhUfGKG6yJV8hxaw8pZa93lLEpPTP/ohdN4wFz7yp/ry9A==} cpu: [arm64] os: [linux] - libc: [glibc] '@rollup/rollup-linux-arm64-musl@4.60.0': resolution: {integrity: sha512-09EL+yFVbJZlhcQfShpswwRZ0Rg+z/CsSELFCnPt3iK+iqwGsI4zht3secj5vLEs957QvFFXnzAT0FFPIxSrkQ==} cpu: [arm64] os: [linux] - libc: [musl] '@rollup/rollup-linux-loong64-gnu@4.60.0': resolution: {integrity: sha512-i9IcCMPr3EXm8EQg5jnja0Zyc1iFxJjZWlb4wr7U2Wx/GrddOuEafxRdMPRYVaXjgbhvqalp6np07hN1w9kAKw==} cpu: [loong64] os: [linux] - libc: [glibc] '@rollup/rollup-linux-loong64-musl@4.60.0': resolution: {integrity: sha512-DGzdJK9kyJ+B78MCkWeGnpXJ91tK/iKA6HwHxF4TAlPIY7GXEvMe8hBFRgdrR9Ly4qebR/7gfUs9y2IoaVEyog==} cpu: [loong64] os: [linux] - libc: [musl] '@rollup/rollup-linux-ppc64-gnu@4.60.0': resolution: {integrity: sha512-RwpnLsqC8qbS8z1H1AxBA1H6qknR4YpPR9w2XX0vo2Sz10miu57PkNcnHVaZkbqyw/kUWfKMI73jhmfi9BRMUQ==} cpu: [ppc64] os: [linux] - libc: [glibc] '@rollup/rollup-linux-ppc64-musl@4.60.0': resolution: {integrity: sha512-Z8pPf54Ly3aqtdWC3G4rFigZgNvd+qJlOE52fmko3KST9SoGfAdSRCwyoyG05q1HrrAblLbk1/PSIV+80/pxLg==} cpu: [ppc64] os: [linux] - libc: [musl] '@rollup/rollup-linux-riscv64-gnu@4.60.0': resolution: {integrity: sha512-3a3qQustp3COCGvnP4SvrMHnPQ9d1vzCakQVRTliaz8cIp/wULGjiGpbcqrkv0WrHTEp8bQD/B3HBjzujVWLOA==} cpu: [riscv64] os: [linux] - libc: [glibc] '@rollup/rollup-linux-riscv64-musl@4.60.0': resolution: {integrity: sha512-pjZDsVH/1VsghMJ2/kAaxt6dL0psT6ZexQVrijczOf+PeP2BUqTHYejk3l6TlPRydggINOeNRhvpLa0AYpCWSQ==} cpu: [riscv64] os: [linux] - libc: [musl] '@rollup/rollup-linux-s390x-gnu@4.60.0': resolution: {integrity: sha512-3ObQs0BhvPgiUVZrN7gqCSvmFuMWvWvsjG5ayJ3Lraqv+2KhOsp+pUbigqbeWqueGIsnn+09HBw27rJ+gYK4VQ==} cpu: [s390x] os: [linux] - libc: [glibc] '@rollup/rollup-linux-x64-gnu@4.60.0': resolution: {integrity: sha512-EtylprDtQPdS5rXvAayrNDYoJhIz1/vzN2fEubo3yLE7tfAw+948dO0g4M0vkTVFhKojnF+n6C8bDNe+gDRdTg==} cpu: [x64] os: [linux] - libc: [glibc] '@rollup/rollup-linux-x64-musl@4.60.0': resolution: {integrity: sha512-k09oiRCi/bHU9UVFqD17r3eJR9bn03TyKraCrlz5ULFJGdJGi7VOmm9jl44vOJvRJ6P7WuBi/s2A97LxxHGIdw==} cpu: [x64] os: [linux] - libc: [musl] '@rollup/rollup-openbsd-x64@4.60.0': resolution: {integrity: sha512-1o/0/pIhozoSaDJoDcec+IVLbnRtQmHwPV730+AOD29lHEEo4F5BEUB24H0OBdhbBBDwIOSuf7vgg0Ywxdfiiw==} @@ -638,28 +630,24 @@ packages: engines: {node: '>= 20'} cpu: [arm64] os: [linux] - libc: [glibc] '@tailwindcss/oxide-linux-arm64-musl@4.2.2': resolution: {integrity: sha512-oCfG/mS+/+XRlwNjnsNLVwnMWYH7tn/kYPsNPh+JSOMlnt93mYNCKHYzylRhI51X+TbR+ufNhhKKzm6QkqX8ag==} engines: {node: '>= 20'} cpu: [arm64] os: [linux] - libc: [musl] '@tailwindcss/oxide-linux-x64-gnu@4.2.2': resolution: {integrity: sha512-rTAGAkDgqbXHNp/xW0iugLVmX62wOp2PoE39BTCGKjv3Iocf6AFbRP/wZT/kuCxC9QBh9Pu8XPkv/zCZB2mcMg==} engines: {node: '>= 20'} cpu: [x64] os: [linux] - libc: [glibc] '@tailwindcss/oxide-linux-x64-musl@4.2.2': resolution: {integrity: sha512-XW3t3qwbIwiSyRCggeO2zxe3KWaEbM0/kW9e8+0XpBgyKU4ATYzcVSMKteZJ1iukJ3HgHBjbg9P5YPRCVUxlnQ==} engines: {node: '>= 20'} cpu: [x64] os: [linux] - libc: [musl] '@tailwindcss/oxide-wasm32-wasi@4.2.2': resolution: {integrity: sha512-eKSztKsmEsn1O5lJ4ZAfyn41NfG7vzCg496YiGtMDV86jz1q/irhms5O0VrY6ZwTUkFy/EKG3RfWgxSI3VbZ8Q==} @@ -1251,28 +1239,24 @@ packages: engines: {node: '>= 12.0.0'} cpu: [arm64] os: [linux] - libc: [glibc] lightningcss-linux-arm64-musl@1.32.0: resolution: {integrity: sha512-UpQkoenr4UJEzgVIYpI80lDFvRmPVg6oqboNHfoH4CQIfNA+HOrZ7Mo7KZP02dC6LjghPQJeBsvXhJod/wnIBg==} engines: {node: '>= 12.0.0'} cpu: [arm64] os: [linux] - libc: [musl] lightningcss-linux-x64-gnu@1.32.0: resolution: {integrity: sha512-V7Qr52IhZmdKPVr+Vtw8o+WLsQJYCTd8loIfpDaMRWGUZfBOYEJeyJIkqGIDMZPwPx24pUMfwSxxI8phr/MbOA==} engines: {node: '>= 12.0.0'} cpu: [x64] os: [linux] - libc: [glibc] lightningcss-linux-x64-musl@1.32.0: resolution: {integrity: sha512-bYcLp+Vb0awsiXg/80uCRezCYHNg1/l3mt0gzHnWV9XP1W5sKa5/TCdGWaR/zBM2PeF/HbsQv/j2URNOiVuxWg==} engines: {node: '>= 12.0.0'} cpu: [x64] os: [linux] - libc: [musl] lightningcss-win32-arm64-msvc@1.32.0: resolution: {integrity: sha512-8SbC8BR40pS6baCM8sbtYDSwEVQd4JlFTOlaD3gWGHfThTcABnNDBda6eTZeqbofalIJhFx0qKzgHJmcPTnGdw==} @@ -2479,6 +2463,14 @@ snapshots: optionalDependencies: vite: 6.4.1(@types/node@22.19.15)(jiti@2.6.1)(lightningcss@1.32.0)(yaml@2.8.3) + '@vitest/mocker@3.2.4(vite@6.4.1(@types/node@25.5.0)(jiti@2.6.1)(lightningcss@1.32.0)(yaml@2.8.3))': + dependencies: + '@vitest/spy': 3.2.4 + estree-walker: 3.0.3 + magic-string: 0.30.21 + optionalDependencies: + vite: 6.4.1(@types/node@25.5.0)(jiti@2.6.1)(lightningcss@1.32.0)(yaml@2.8.3) + '@vitest/pretty-format@3.2.4': dependencies: tinyrainbow: 2.0.0 @@ -3692,7 +3684,7 @@ snapshots: dependencies: '@types/chai': 5.2.3 '@vitest/expect': 3.2.4 - '@vitest/mocker': 3.2.4(vite@6.4.1(@types/node@22.19.15)(jiti@2.6.1)(lightningcss@1.32.0)(yaml@2.8.3)) + '@vitest/mocker': 3.2.4(vite@6.4.1(@types/node@25.5.0)(jiti@2.6.1)(lightningcss@1.32.0)(yaml@2.8.3)) '@vitest/pretty-format': 3.2.4 '@vitest/runner': 3.2.4 '@vitest/snapshot': 3.2.4 diff --git a/understand-anything-plugin/pnpm-workspace.yaml b/understand-anything-plugin/pnpm-workspace.yaml index dee51e928..889afbc7c 100644 --- a/understand-anything-plugin/pnpm-workspace.yaml +++ b/understand-anything-plugin/pnpm-workspace.yaml @@ -1,2 +1,16 @@ packages: - "packages/*" +allowBuilds: + esbuild: true + tree-sitter-c: true + tree-sitter-c-sharp: true + tree-sitter-cpp: true + tree-sitter-go: true + tree-sitter-java: true + tree-sitter-javascript: true + tree-sitter-php: true + tree-sitter-python: true + tree-sitter-ruby: true + tree-sitter-rust: true + tree-sitter-typescript: true + '@tree-sitter-grammars/tree-sitter-kotlin': true diff --git a/understand-anything-plugin/skills/understand/emit-dfm-pairs.mjs b/understand-anything-plugin/skills/understand/emit-dfm-pairs.mjs new file mode 100644 index 000000000..74eb3c190 --- /dev/null +++ b/understand-anything-plugin/skills/understand/emit-dfm-pairs.mjs @@ -0,0 +1,65 @@ +#!/usr/bin/env node +/** + * emit-dfm-pairs.mjs — post-merge step for Pascal/Delphi projects. + * + * Pascal forms come in paired .pas + .dfm files (form source + form definition). + * The .dfm carries the design-time component tree; the .pas carries the class + * methods. They are conceptually one artifact and should be linked in the + * knowledge graph with a `related` edge. + * + * This script reads an existing knowledge-graph.json (or assembled-graph.json), + * scans for `file:*.pas` nodes whose filePath is matched by a `file:*.dfm` + * sibling node, and emits `related` edges between them. Idempotent — + * skips pairs that already have an edge. + * + * Usage: + * node emit-dfm-pairs.mjs + */ +import { readFileSync, writeFileSync } from "node:fs"; + +const [, , inputPath, outputPath] = process.argv; +if (!inputPath || !outputPath) { + process.stderr.write("Usage: node emit-dfm-pairs.mjs \n"); + process.exit(1); +} + +const graph = JSON.parse(readFileSync(inputPath, "utf8")); + +// Index nodes by basename (without extension) — case-insensitive, since +// Delphi conventionally uses different casing across .pas and .dfm. +const byBase = new Map(); // base.toLowerCase() -> {pas?: id, dfm?: id} +for (const node of graph.nodes) { + if (node.type !== "file" && node.type !== "config" && node.type !== "document") continue; + const path = node.filePath ?? node.id.replace(/^file:/, ""); + const m = path.match(/^(.+?)\.(pas|dfm)$/i); + if (!m) continue; + const base = m[1].toLowerCase(); + const ext = m[2].toLowerCase(); + if (!byBase.has(base)) byBase.set(base, {}); + byBase.get(base)[ext] = node.id; +} + +// Track existing edges so we don't double-emit. +const existing = new Set(); +for (const e of graph.edges) existing.add(`${e.source}|${e.target}|${e.type}`); + +let emitted = 0; +for (const [, pair] of byBase) { + if (!pair.pas || !pair.dfm) continue; + const key1 = `${pair.pas}|${pair.dfm}|related`; + const key2 = `${pair.dfm}|${pair.pas}|related`; + if (existing.has(key1) || existing.has(key2)) continue; + graph.edges.push({ + source: pair.pas, + target: pair.dfm, + type: "related", + direction: "bidirectional", + description: "Pascal unit + DFM form-definition pair (design-time component tree).", + weight: 0.7, + }); + existing.add(key1); + emitted++; +} + +writeFileSync(outputPath, JSON.stringify(graph, null, 2)); +console.log(`Emitted ${emitted} new .pas↔.dfm pair edges. Graph now has ${graph.edges.length} edges total.`); diff --git a/understand-anything-plugin/skills/understand/extract-structure.mjs b/understand-anything-plugin/skills/understand/extract-structure.mjs index 9f08169a2..f24e1d76b 100644 --- a/understand-anything-plugin/skills/understand/extract-structure.mjs +++ b/understand-anything-plugin/skills/understand/extract-structure.mjs @@ -168,7 +168,8 @@ export function buildResult(file, totalLines, nonEmptyLines, analysis, callGraph })); } - // Classes (code files) + // Classes (code files) — include parents/interfaces when present so the + // file-analyzer can emit deterministic inherits/implements edges. if (analysis.classes && analysis.classes.length > 0) { base.classes = analysis.classes.map(cls => ({ name: cls.name, @@ -176,6 +177,17 @@ export function buildResult(file, totalLines, nonEmptyLines, analysis, callGraph endLine: cls.lineRange[1], methods: cls.methods || [], properties: cls.properties || [], + ...(cls.parents && cls.parents.length > 0 ? { parents: cls.parents } : {}), + ...(cls.interfaces && cls.interfaces.length > 0 ? { interfaces: cls.interfaces } : {}), + })); + } + + // Imports with optional section tag (Pascal interface vs implementation uses) + if (analysis.imports && analysis.imports.length > 0) { + base.imports = analysis.imports.map(imp => ({ + source: imp.source, + line: imp.lineNumber, + ...(imp.section ? { section: imp.section } : {}), })); } diff --git a/understand-anything-plugin/skills/understand/languages/pascal.md b/understand-anything-plugin/skills/understand/languages/pascal.md new file mode 100644 index 000000000..4e7c0bfae --- /dev/null +++ b/understand-anything-plugin/skills/understand/languages/pascal.md @@ -0,0 +1,56 @@ +# Pascal / Delphi Language Prompt Snippet + +## Key Concepts + +- **Units**: The primary module unit (`unit Foo;`) — file-level container, paired 1:1 with a `.pas` file +- **Interface vs Implementation Sections**: `interface` declares the public API; `implementation` holds the private bodies. Only items declared in `interface` are visible to other units that `uses` this one. +- **Uses Clauses**: `uses A, B, C;` imports other units. There can be one in the interface section (transitively visible) and one in the implementation section (private). Treat both as imports. +- **Classes**: `type TFoo = class(TAncestor) ... end;` — Delphi has single inheritance plus interface implementation. The class declaration appears in a type block. +- **Interfaces**: `type IFoo = interface(IAncestor) ['{GUID}'] ... end;` — abstract contracts, often identified by GUID. +- **Published Properties**: Properties in the `published` visibility section get RTTI generated and are persisted in the paired `.dfm` form file. This is the foundation of Delphi's visual form-design + streaming model. +- **Data Modules**: Special form-like containers that group non-visual components (database connections, datasets, providers). Named `dm*` by convention (e.g. `dmCW2.pas` + `dmCW2.dfm`). +- **Form/DFM Pairing**: Every `Txxx.pas` containing a `TForm`/`TFrame`/`TDataModule` descendant has a matching `Txxx.dfm` text file declaring the design-time component tree. Treat the pair as one logical artifact. +- **RTTI Attributes**: `[MyAttr(42)]` decorate types and methods, similar to .NET attributes or Java annotations. +- **Anonymous Methods**: `procedure of object` (method pointer) and inline `procedure begin ... end` (Delphi 2009+). +- **Initialization / Finalization**: Module-scoped setup/teardown blocks that run at unit load/unload, before/after `main`. +- **With Statement**: `with foo do begin ... end` — opens a scope where `foo`'s members are unqualified. Common in legacy Delphi, often obscures call targets. + +## Import Patterns + +- `uses A, B, C;` — units listed by bare name; the linker resolves them via search path (current dir, project search paths, library path) +- `uses A.B.C;` — namespaced unit (modern Delphi 2007+) +- Interface-section `uses` is the unit's public dependency +- Implementation-section `uses` is the private dependency +- A `.dpr` (program) file's `uses` lists every unit linked into the executable — the dependency root + +## File Patterns + +- `*.pas` — Pascal source unit (one unit per file) +- `*.dfm` — Form definition (paired with `.pas`); structured text declaring design-time object tree +- `*.dpr` — Program (project) entry point — like `main.c` for the executable +- `*.dpk` — Package source (DLL-equivalent) +- `*.dproj` / `*.bpg` / `*.groupproj` — IDE project / project-group files +- `*.inc` — Include file (preprocessor-style text inclusion via `{$I file.inc}`) +- `dm*.pas` / `dm*.dfm` — Data modules +- `f*.pas` / `f*.dfm` — Form units (legacy convention; modern code often uses `u*Form.pas`) + +## Common Frameworks + +- **VCL** (Visual Component Library) — the canonical Delphi UI framework; `Forms`, `Controls`, `Graphics` units +- **FireMonkey (FMX)** — Cross-platform UI framework, replacement for VCL +- **DataSnap** — Multi-tier middleware +- **dbExpress / FireDAC** — Database access layers +- **IndyTCP** / **Synapse** — Networking +- **RX / JEDI / RAID** — Third-party component suites + +## Example Language Notes + +> Implements a Delphi data module (`TdmCW2 = class(TDataModule)`) that owns the global ADO +> connection plus dozens of TADOQuery / TADOStoredProc components. Form-streaming in the +> paired `.dfm` configures connection strings, parameter lists, and field definitions at +> design time; runtime code typically just opens the dataset. + +> The `with FOrderItems do begin … end` block on lines 412–478 obscures the call target — +> every bare identifier resolves against `FOrderItems`'s members first. When tracing +> calls through this file, treat any unresolved identifier inside a `with` block as a +> potential method call on the `with` target object. diff --git a/understand-anything-plugin/skills/understand/resolve-external-class-refs.mjs b/understand-anything-plugin/skills/understand/resolve-external-class-refs.mjs new file mode 100644 index 000000000..490118edb --- /dev/null +++ b/understand-anything-plugin/skills/understand/resolve-external-class-refs.mjs @@ -0,0 +1,97 @@ +#!/usr/bin/env node +/** + * resolve-external-class-refs.mjs + * + * Post-merge fix for inherits/implements edges that target `class:external:` + * IDs which the file-analyzer agent emits when it can't tell which file declares + * the parent (because the parent lives in a different batch). The merge step + * drops these as "dangling target". This script: + * + * 1. Reads all batch-*.json files in /.understand-anything/intermediate/ + * to recover the original inherits/implements edges (which the merge dropped) + * 2. Reads assembled-graph.json + * 3. Builds a name → node ID map from all `class:*` nodes + * 4. For every batch edge whose target is `class:external:`, if + * matches a class node, rewrite the edge target to that class's actual ID + * and re-add the edge to the assembled graph + * 5. Genuinely-external classes (TForm, IInvokable, TXMLNode, etc.) stay dropped + * + * Usage: node resolve-external-class-refs.mjs + */ +import { readdirSync, readFileSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; + +const projectRoot = process.argv[2]; +if (!projectRoot) { + process.stderr.write("Usage: node resolve-external-class-refs.mjs \n"); + process.exit(1); +} + +const intermediate = join(projectRoot, ".understand-anything", "intermediate"); +const assembledPath = join(intermediate, "assembled-graph.json"); +const graph = JSON.parse(readFileSync(assembledPath, "utf8")); + +// Build name → ID index from class nodes. +// If multiple class nodes share a name (e.g. helper records with same name in +// different files), prefer the canonical form-base class location. +const classNodes = graph.nodes.filter((n) => n.type === "class"); +const nameToIds = new Map(); +for (const n of classNodes) { + // Skip placeholder `class:external:` stubs — those exist only because + // some agents emitted them as nodes alongside the edge. They'd create false + // multi-match ambiguity when we look up by name. + if (n.id.startsWith("class:external:")) continue; + // n.id is like `class:fCW2Report.pas:TfmCW2Report` — extract the name suffix. + const m = n.id.match(/^class:[^:]+:(.+)$/); + if (!m) continue; + const name = m[1]; + if (!nameToIds.has(name)) nameToIds.set(name, []); + nameToIds.get(name).push(n.id); +} + +// Walk batch files for the original edges. +const batchFiles = readdirSync(intermediate) + .filter((f) => /^batch-\d+\.json$/.test(f)) + .sort(); + +const existingEdgeKeys = new Set(graph.edges.map((e) => `${e.source}|${e.target}|${e.type}`)); + +let recovered = 0, ambiguousSkipped = 0, stillExternal = 0; +for (const bf of batchFiles) { + const batch = JSON.parse(readFileSync(join(intermediate, bf), "utf8")); + for (const e of batch.edges ?? []) { + if (e.type !== "inherits" && e.type !== "implements") continue; + const m = String(e.target).match(/^class:external:(.+)$/); + if (!m) continue; + const name = m[1]; + const candidates = nameToIds.get(name); + if (!candidates || candidates.length === 0) { + stillExternal++; + continue; + } + // Pick the single candidate; if multiple, skip to avoid wrong wiring. + if (candidates.length > 1) { + ambiguousSkipped++; + continue; + } + const resolvedTarget = candidates[0]; + const key = `${e.source}|${resolvedTarget}|${e.type}`; + if (existingEdgeKeys.has(key)) continue; + graph.edges.push({ + source: e.source, + target: resolvedTarget, + type: e.type, + direction: "forward", + description: e.description ?? `${e.type} edge resolved cross-batch by class name`, + weight: e.weight ?? 0.9, + }); + existingEdgeKeys.add(key); + recovered++; + } +} + +writeFileSync(assembledPath, JSON.stringify(graph, null, 2)); +console.log( + `Recovered ${recovered} cross-batch inherits/implements edges. ` + + `Skipped: ${ambiguousSkipped} ambiguous, ${stillExternal} genuinely external.`, +); diff --git a/understand-anything-plugin/src/__tests__/merge-recover-imports.test.mjs b/understand-anything-plugin/src/__tests__/merge-recover-imports.test.mjs index a98f0803a..1e2ddb6db 100644 --- a/understand-anything-plugin/src/__tests__/merge-recover-imports.test.mjs +++ b/understand-anything-plugin/src/__tests__/merge-recover-imports.test.mjs @@ -1,4 +1,7 @@ import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { spawnSync as _spawnSyncCheck } from "node:child_process"; + +const python3Available = _spawnSyncCheck("python3", ["--version"], { encoding: "utf-8" }).status === 0; import { spawnSync } from "node:child_process"; import { mkdirSync, mkdtempSync, rmSync, writeFileSync, readFileSync } from "node:fs"; import { tmpdir } from "node:os"; @@ -57,7 +60,7 @@ afterEach(() => { rmSync(projectRoot, { recursive: true, force: true }); }); -describe("merge-batch-graphs.py imports recovery", () => { +describe.skipIf(!python3Available)("merge-batch-graphs.py imports recovery", () => { it("recovers imports edges that batches dropped despite importMap having them", () => { // Batch contains all the file nodes but only emits ONE of three imports edges. writeFileSync( diff --git a/understand-anything-plugin/src/__tests__/worktree-redirect.test.mjs b/understand-anything-plugin/src/__tests__/worktree-redirect.test.mjs index f0ffcf31c..e1e64e778 100644 --- a/understand-anything-plugin/src/__tests__/worktree-redirect.test.mjs +++ b/understand-anything-plugin/src/__tests__/worktree-redirect.test.mjs @@ -62,7 +62,9 @@ afterAll(() => { if (tmpRoot) rmSync(tmpRoot, { recursive: true, force: true }); }); -describe("worktree-redirect snippet (issue #133)", () => { +// The snippet runs bash which resolves paths through MSYS on Windows, producing +// /tmp/... paths while Node.js uses C:\...; skip rather than paper over it. +describe.skipIf(process.platform === "win32")("worktree-redirect snippet (issue #133)", () => { it("leaves PROJECT_ROOT alone in a normal checkout", () => { expect(runResolve(mainRepo)).toBe(mainRepo); }); diff --git a/vitest.config.ts b/vitest.config.ts index e009ea317..6a4e7266e 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -1,4 +1,24 @@ import { defineConfig } from 'vitest/config'; +import type { Plugin } from 'vite'; +import { readFileSync, existsSync } from 'node:fs'; + +// Strip shebangs from .mjs files so vitest can import CLI scripts that start +// with `#!/usr/bin/env node` without a SyntaxError. Uses the `load` hook so +// the shebang is removed before Vite ever attempts to parse the file as JS. +function stripShebang(): Plugin { + return { + name: 'strip-shebang', + enforce: 'pre', + load(id: string) { + if (!id.endsWith('.mjs') || !existsSync(id)) return null; + const code = readFileSync(id, 'utf-8'); + if (code.startsWith('#!')) { + return { code: code.replace(/^#![^\r\n]*\r?\n/, '') }; + } + return null; + }, + }; +} // Single-config aggregation for the whole monorepo. Picks up: // - tests/** — relocated skill tests (out-of-plugin so they @@ -10,6 +30,7 @@ import { defineConfig } from 'vitest/config'; // invoked separately via `pnpm --filter @understand-anything/core test`; its // files are excluded here to avoid double-counting. export default defineConfig({ + plugins: [stripShebang()], test: { include: [ 'tests/**/*.test.{js,mjs,ts}',