From bfe10eed8d1853705a73c59290cd142e0074690a Mon Sep 17 00:00:00 2001 From: JOhnsonKC201 Date: Wed, 24 Jun 2026 17:56:17 -0700 Subject: [PATCH 1/4] fix: preserve real prefix when middle ID segment is a reserved word stripToValidPrefix collapsed any ID whose second segment was a valid prefix, treating e.g. "endpoint:service:x" as a double-prefix and returning "service:x". This dropped the real outer prefix, corrupting the node type and breaking edge references and idempotency. Only collapse a true same-prefix duplicate (e.g. "file:file:...") by requiring the inner segment to equal the outer prefix. A different reserved word in the middle is a legitimate path segment and is kept. Adds regression tests covering the middle-reserved-word case and idempotency. --- .../core/src/__tests__/normalize-graph.test.ts | 16 ++++++++++++++++ .../core/src/analyzer/normalize-graph.ts | 7 +++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts b/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts index fa150c282..46c4e4c4b 100644 --- a/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts +++ b/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts @@ -31,6 +31,22 @@ describe("normalizeNodeId", () => { ).toBe("file:src/foo.ts"); }); + it("keeps a real prefix when a different reserved word is a middle segment", () => { + // Regression: "endpoint:service:x" is a valid prefix followed by a real + // path segment that happens to be a reserved word. The outer "endpoint" + // prefix must be preserved, not dropped in favour of "service". + expect( + normalizeNodeId("endpoint:service:getUser", { type: "endpoint" }), + ).toBe("endpoint:service:getUser"); + }); + + it("is idempotent for IDs whose middle segment is a reserved word", () => { + const once = normalizeNodeId("endpoint:service:getUser", { + type: "endpoint", + }); + expect(normalizeNodeId(once, { type: "endpoint" })).toBe(once); + }); + it("strips project-name prefix when valid prefix follows", () => { expect( normalizeNodeId("my-project:file:src/foo.ts", { type: "file" }), diff --git a/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts b/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts index 594042846..44910b2a7 100644 --- a/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts +++ b/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts @@ -38,10 +38,13 @@ function stripToValidPrefix(id: string): { prefix: string | null; path: string } const segment = remaining.slice(0, colonIdx); if (VALID_PREFIXES.has(segment)) { - // Check for double valid prefix (e.g., "file:file:src/foo.ts") + // Check for a true duplicate prefix (e.g., "file:file:src/foo.ts"). + // Only collapse when the next segment is the SAME prefix — a different + // reserved word in the middle (e.g. "endpoint:service:x") is a real + // path segment, not a duplicate, and must not be stripped. const rest = remaining.slice(colonIdx + 1); const innerColonIdx = rest.indexOf(":"); - if (innerColonIdx > 0 && VALID_PREFIXES.has(rest.slice(0, innerColonIdx))) { + if (innerColonIdx > 0 && rest.slice(0, innerColonIdx) === segment) { // Double-prefixed — skip the outer, recurse on inner remaining = rest; continue; From 6d54111a8badf53cdfd277b3d2b745122bff183f Mon Sep 17 00:00:00 2001 From: JOhnsonKC201 Date: Sat, 27 Jun 2026 08:59:23 -0700 Subject: [PATCH 2/4] fix: use expected prefix when collapsing reserved-word prefixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous fix collapsed a duplicate prefix only when the inner segment matched the outer one (`inner === segment`). That dropped a spurious project-name prefix only when it was an exact duplicate, so an ID like `service:file:src/foo.ts` for a `file` node — where the project name happens to collide with the reserved word `service` — kept the outer `service:` prefix instead of resolving to the canonical `file:src/foo.ts`. Edges referencing `file:src/foo.ts` then dangled. Thread the node's expected prefix into stripToValidPrefix and collapse when the inner segment is either the same reserved word (true duplicate) or the expected prefix (spurious reserved-word project prefix). A different reserved word that is not the expected prefix (`endpoint:service:x`) is still preserved as a real path segment. Adds two regression tests covering the strip + idempotency cases. --- .../src/__tests__/normalize-graph.test.ts | 16 +++++++++ .../core/src/analyzer/normalize-graph.ts | 33 ++++++++++++++----- 2 files changed, 41 insertions(+), 8 deletions(-) diff --git a/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts b/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts index 46c4e4c4b..089d1552a 100644 --- a/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts +++ b/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts @@ -47,6 +47,22 @@ describe("normalizeNodeId", () => { expect(normalizeNodeId(once, { type: "endpoint" })).toBe(once); }); + it("strips a project-name prefix that collides with a reserved word", () => { + // Regression: when the project name is itself a reserved word ("service") + // and the node's real prefix follows ("file"), the spurious outer prefix + // must be dropped so the canonical "file:src/foo.ts" form is used — not + // left as "service:file:src/foo.ts", which would dangle edges that + // reference the canonical ID. + expect( + normalizeNodeId("service:file:src/foo.ts", { type: "file" }), + ).toBe("file:src/foo.ts"); + }); + + it("is idempotent when a reserved-word project prefix is stripped", () => { + const once = normalizeNodeId("service:file:src/foo.ts", { type: "file" }); + expect(normalizeNodeId(once, { type: "file" })).toBe(once); + }); + it("strips project-name prefix when valid prefix follows", () => { expect( normalizeNodeId("my-project:file:src/foo.ts", { type: "file" }), diff --git a/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts b/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts index 44910b2a7..8e2f30cfe 100644 --- a/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts +++ b/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts @@ -27,8 +27,17 @@ const TYPE_TO_PREFIX: Record = { /** * Strips all non-valid prefixes from an ID, returning the bare path * and the first valid prefix found (if any). + * + * `expectedPrefix` is the canonical prefix for the node's declared type + * (e.g. "file" for a file node). It disambiguates a reserved word that + * appears before the expected prefix — a spurious project-name prefix that + * happens to collide with a reserved word — from a reserved word that is a + * legitimate middle path segment. */ -function stripToValidPrefix(id: string): { prefix: string | null; path: string } { +function stripToValidPrefix( + id: string, + expectedPrefix?: string, +): { prefix: string | null; path: string } { let remaining = id; // Peel off colon-separated segments until we find a valid prefix or run out @@ -38,14 +47,22 @@ function stripToValidPrefix(id: string): { prefix: string | null; path: string } const segment = remaining.slice(0, colonIdx); if (VALID_PREFIXES.has(segment)) { - // Check for a true duplicate prefix (e.g., "file:file:src/foo.ts"). - // Only collapse when the next segment is the SAME prefix — a different - // reserved word in the middle (e.g. "endpoint:service:x") is a real - // path segment, not a duplicate, and must not be stripped. + // Collapse the outer prefix only when the next segment is either: + // - the SAME reserved word — a true duplicate ("file:file:src/foo.ts"), or + // - the node's expected prefix — a spurious project-name prefix that + // collides with a reserved word ("service:file:src/foo.ts" for a file + // node), which must resolve to the canonical "file:src/foo.ts". + // A different reserved word that is NOT the expected prefix + // ("endpoint:service:x" for an endpoint node) is a real path segment and + // must be preserved. const rest = remaining.slice(colonIdx + 1); const innerColonIdx = rest.indexOf(":"); - if (innerColonIdx > 0 && rest.slice(0, innerColonIdx) === segment) { - // Double-prefixed — skip the outer, recurse on inner + const innerSegment = innerColonIdx > 0 ? rest.slice(0, innerColonIdx) : ""; + if ( + innerColonIdx > 0 && + (innerSegment === segment || innerSegment === expectedPrefix) + ) { + // Skip the outer prefix, recurse on the inner one remaining = rest; continue; } @@ -72,7 +89,7 @@ export function normalizeNodeId( if (!trimmed) return trimmed; const expectedPrefix = TYPE_TO_PREFIX[node.type]; - const { prefix, path } = stripToValidPrefix(trimmed); + const { prefix, path } = stripToValidPrefix(trimmed, expectedPrefix); if (prefix) { // For step nodes with filePath, reconstruct as step:flowSlug:filePath:stepSlug. From 0a0ffd233c8f5a2044b3c91940b21291bee3ce57 Mon Sep 17 00:00:00 2001 From: JOhnsonKC201 Date: Sat, 27 Jun 2026 20:42:13 -0700 Subject: [PATCH 3/4] fix: resolve reserved-word project prefixes in edge endpoint repair MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The node-level fix did not cover normalizeBatchOutput's edge-repair fallback. When an edge endpoint isn't found in idMap, the fallback infers its type from the endpoint's own prefix via inferTypeFromId. For a reserved-word project prefix (e.g. edge `service:file:src/foo.ts` pointing at node `file:src/foo.ts`), that yields type `service`, so normalizeNodeId keeps the spurious `service:` segment, the endpoint never matches the canonical node, and the edge is dropped as dangling. Add resolveEdgeEndpoint: try the prefix-inferred type first (unchanged common case), then each subsequent leading reserved-word segment as a candidate type, returning the first that resolves to an existing node — and the original id when none do. Add a regression test asserting the edge is repaired rather than dropped. 758/758 core tests pass. --- .../src/__tests__/normalize-graph.test.ts | 42 ++++++++++++++++ .../core/src/analyzer/normalize-graph.ts | 48 +++++++++++++++---- 2 files changed, 81 insertions(+), 9 deletions(-) diff --git a/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts b/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts index 089d1552a..bffbcecc7 100644 --- a/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts +++ b/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts @@ -470,6 +470,48 @@ describe("normalizeBatchOutput", () => { expect(result.edges[0].source).toBe("file:src/bare.ts"); expect(result.edges[0].target).toBe("file:src/target.ts"); }); + + it("repairs an edge endpoint whose project prefix collides with a reserved word", () => { + // Regression: an edge endpoint "service:file:src/foo.ts" refers to the + // canonical node "file:src/foo.ts", but inferTypeFromId reads the spurious + // reserved-word project prefix "service" as the type. The fallback must + // still resolve it to the existing node rather than drop the edge. + const result = normalizeBatchOutput({ + nodes: [ + { + id: "file:src/foo.ts", + type: "file", + name: "foo.ts", + filePath: "src/foo.ts", + summary: "Target", + tags: [], + complexity: "simple", + }, + { + id: "file:src/bar.ts", + type: "file", + name: "bar.ts", + filePath: "src/bar.ts", + summary: "Source", + tags: [], + complexity: "simple", + }, + ], + edges: [ + { + source: "file:src/bar.ts", + target: "service:file:src/foo.ts", + type: "imports", + direction: "forward", + weight: 0.7, + }, + ], + }); + + expect(result.edges).toHaveLength(1); + expect(result.edges[0].target).toBe("file:src/foo.ts"); + expect(result.stats.danglingEdgesDropped).toBe(0); + }); }); describe("normalizeBatchOutput integration", () => { diff --git a/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts b/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts index 8e2f30cfe..afc9b6065 100644 --- a/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts +++ b/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts @@ -211,6 +211,40 @@ function inferTypeFromId(id: string): string { return "file"; } +/** + * Best-effort repair of an edge endpoint that matches no node ID. + * + * Tries the prefix-inferred type first (preserving the common case), then + * each subsequent leading reserved-word segment as a candidate type. This + * recovers a reserved-word project prefix — e.g. an edge endpoint + * `service:file:src/foo.ts` pointing at the canonical node `file:src/foo.ts`, + * where `inferTypeFromId` would treat the spurious `service` as the type and + * fail to strip it, leaving the edge dangling. Returns the original id + * unchanged when nothing resolves to an existing node. + */ +function resolveEdgeEndpoint(id: string, validNodeIds: Set): string { + const candidateTypes: string[] = [inferTypeFromId(id)]; + + // Add each leading valid-prefix segment's type as an additional candidate, + // so a spurious outer reserved word can be skipped in favour of the real one. + let rest = id; + while (true) { + const colonIdx = rest.indexOf(":"); + if (colonIdx <= 0) break; + const segment = rest.slice(0, colonIdx); + if (!(segment in PREFIX_TO_TYPE)) break; + const type = PREFIX_TO_TYPE[segment]; + if (!candidateTypes.includes(type)) candidateTypes.push(type); + rest = rest.slice(colonIdx + 1); + } + + for (const type of candidateTypes) { + const normalized = normalizeNodeId(id, { type }); + if (validNodeIds.has(normalized)) return normalized; + } + return id; +} + /** * Normalizes a merged batch output: fixes node IDs and numeric complexity, * rewrites edge references, deduplicates nodes and edges, and drops dangling edges. @@ -300,18 +334,14 @@ export function normalizeBatchOutput(data: { let newSource = idMap.get(oldSource) ?? oldSource; let newTarget = idMap.get(oldTarget) ?? oldTarget; - // Fallback: if endpoint not found in idMap, normalize it directly - // (handles cross-variant malformed IDs between nodes and edges). - // Try the edge's implied type first (from prefix), then fall back to "file". + // Fallback: if an endpoint isn't found in idMap, repair it directly + // (handles cross-variant malformed IDs between nodes and edges, including + // reserved-word project prefixes that inferTypeFromId alone can't resolve). if (!validNodeIds.has(newSource)) { - const inferredType = inferTypeFromId(newSource); - const normalized = normalizeNodeId(newSource, { type: inferredType }); - if (validNodeIds.has(normalized)) newSource = normalized; + newSource = resolveEdgeEndpoint(newSource, validNodeIds); } if (!validNodeIds.has(newTarget)) { - const inferredType = inferTypeFromId(newTarget); - const normalized = normalizeNodeId(newTarget, { type: inferredType }); - if (validNodeIds.has(normalized)) newTarget = normalized; + newTarget = resolveEdgeEndpoint(newTarget, validNodeIds); } if (newSource !== oldSource || newTarget !== oldTarget) { From 67c6f19707f0463b00076b894de914e2aa97a080 Mon Sep 17 00:00:00 2001 From: Johnson K C Date: Tue, 30 Jun 2026 11:05:54 -0700 Subject: [PATCH 4/4] fix: normalize from candidate prefix segment in edge endpoint repair MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit resolveEdgeEndpoint tried every candidate type against the full edge endpoint id. For a chain of reserved prefixes before the real one (e.g. `service:endpoint:file:src/foo.ts` pointing at node `file:src/foo.ts`), stripToValidPrefix can't collapse the run for the `file` candidate — `service` then `endpoint` are preserved — so no candidate resolves and the edge is dropped as dangling. Pair each candidate type with the id suffix that begins at its prefix segment, and normalize from that suffix instead of the full id. The inferred-type/full-id attempt is kept first so the common and single reserved-word prefix cases are unchanged. Add a regression test for the multi-prefix chain. 759/759 core tests pass. --- .../src/__tests__/normalize-graph.test.ts | 43 +++++++++++++++++++ .../core/src/analyzer/normalize-graph.ts | 37 ++++++++++------ 2 files changed, 66 insertions(+), 14 deletions(-) diff --git a/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts b/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts index bffbcecc7..6e45a45ea 100644 --- a/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts +++ b/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts @@ -512,6 +512,49 @@ describe("normalizeBatchOutput", () => { expect(result.edges[0].target).toBe("file:src/foo.ts"); expect(result.stats.danglingEdgesDropped).toBe(0); }); + + it("repairs an edge endpoint with a chain of reserved-word prefixes", () => { + // Regression: an edge endpoint "service:endpoint:file:src/foo.ts" carries + // more than one reserved prefix before the real "file" prefix. Normalizing + // the full id for each candidate type can't collapse the run, so the repair + // must normalize from the candidate prefix segment to resolve it to the + // canonical node "file:src/foo.ts" rather than drop the edge. + const result = normalizeBatchOutput({ + nodes: [ + { + id: "file:src/foo.ts", + type: "file", + name: "foo.ts", + filePath: "src/foo.ts", + summary: "Target", + tags: [], + complexity: "simple", + }, + { + id: "file:src/bar.ts", + type: "file", + name: "bar.ts", + filePath: "src/bar.ts", + summary: "Source", + tags: [], + complexity: "simple", + }, + ], + edges: [ + { + source: "file:src/bar.ts", + target: "service:endpoint:file:src/foo.ts", + type: "imports", + direction: "forward", + weight: 0.7, + }, + ], + }); + + expect(result.edges).toHaveLength(1); + expect(result.edges[0].target).toBe("file:src/foo.ts"); + expect(result.stats.danglingEdgesDropped).toBe(0); + }); }); describe("normalizeBatchOutput integration", () => { diff --git a/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts b/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts index afc9b6065..3a2ea7f64 100644 --- a/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts +++ b/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts @@ -214,32 +214,41 @@ function inferTypeFromId(id: string): string { /** * Best-effort repair of an edge endpoint that matches no node ID. * - * Tries the prefix-inferred type first (preserving the common case), then - * each subsequent leading reserved-word segment as a candidate type. This - * recovers a reserved-word project prefix — e.g. an edge endpoint - * `service:file:src/foo.ts` pointing at the canonical node `file:src/foo.ts`, - * where `inferTypeFromId` would treat the spurious `service` as the type and - * fail to strip it, leaving the edge dangling. Returns the original id - * unchanged when nothing resolves to an existing node. + * Tries the prefix-inferred type against the full id first (preserving the + * common case), then peels each leading reserved-word segment and normalizes + * from the suffix that begins at it. This recovers a reserved-word project + * prefix — e.g. an edge endpoint `service:file:src/foo.ts` pointing at the + * canonical node `file:src/foo.ts`, where `inferTypeFromId` would treat the + * spurious `service` as the type and fail to strip it. Normalizing from the + * candidate segment (rather than always from the full id) also handles a + * *chain* of reserved prefixes, e.g. `service:endpoint:file:src/foo.ts`, where + * `stripToValidPrefix` can't collapse the run for the `file` candidate and + * every full-id attempt would otherwise leave the edge dangling. Returns the + * original id unchanged when nothing resolves to an existing node. */ function resolveEdgeEndpoint(id: string, validNodeIds: Set): string { - const candidateTypes: string[] = [inferTypeFromId(id)]; + // Each candidate pairs a node type with the id suffix to normalize from. + // The first preserves the common case (inferred type, full id); each peeled + // segment then offers its real prefix and the substring that starts there. + const candidates: { type: string; fromId: string }[] = [ + { type: inferTypeFromId(id), fromId: id }, + ]; - // Add each leading valid-prefix segment's type as an additional candidate, - // so a spurious outer reserved word can be skipped in favour of the real one. let rest = id; while (true) { const colonIdx = rest.indexOf(":"); if (colonIdx <= 0) break; const segment = rest.slice(0, colonIdx); if (!(segment in PREFIX_TO_TYPE)) break; - const type = PREFIX_TO_TYPE[segment]; - if (!candidateTypes.includes(type)) candidateTypes.push(type); rest = rest.slice(colonIdx + 1); + const type = PREFIX_TO_TYPE[segment]; + if (!candidates.some((c) => c.type === type && c.fromId === rest)) { + candidates.push({ type, fromId: rest }); + } } - for (const type of candidateTypes) { - const normalized = normalizeNodeId(id, { type }); + for (const { type, fromId } of candidates) { + const normalized = normalizeNodeId(fromId, { type }); if (validNodeIds.has(normalized)) return normalized; } return id;