From 3dd23bf7aebab3889369b6b48854cbe277627afb Mon Sep 17 00:00:00 2001 From: Jan Carbonell Date: Sat, 4 Apr 2026 17:14:56 -0600 Subject: [PATCH] feat(scripts): enhance find-unused.sh with deeper cross-referencing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends the unused file detection script with sibling repo CI/CD cross-referencing, expanded infra coverage (build configs, tsconfig, docker-compose, .opencode skills), progress indicator, two-bucket output display, and smart generic filename filtering. Handles standalone openwork without factory layout gracefully — auto-detects whether _repos/ exists and skips sibling checks if not. --- scripts/find-unused.README.md | 29 ++- scripts/find-unused.sh | 390 ++++++++++++++++++++++++++-------- 2 files changed, 327 insertions(+), 92 deletions(-) diff --git a/scripts/find-unused.README.md b/scripts/find-unused.README.md index b6c48b20c..9a42188bd 100644 --- a/scripts/find-unused.README.md +++ b/scripts/find-unused.README.md @@ -1,6 +1,6 @@ # find-unused.sh -Wrapper around [knip](https://knip.dev) that detects unused files and cross-references them against CI configs, package.json scripts, convention-based usage, and file-based routing directories to reduce false positives. +Wrapper around [knip](https://knip.dev) that detects unused files and cross-references them against CI configs, build configs, package.json scripts, tsconfig files, convention-based usage, file-based routing directories, and sibling repo CI/CD pipelines to reduce false positives. ## Usage @@ -10,17 +10,30 @@ bash scripts/find-unused.sh Requires `npx` (knip is fetched automatically). A fake `DATABASE_URL` is injected so config resolution doesn't fail. +The script auto-detects whether it's running inside a factory layout (`../../.._repos/`). When inside a factory, it cross-references sibling repos. When standalone, it skips sibling checks gracefully and still runs all internal checks. + ## What it does 1. **Runs `knip --include files`** to get a list of unused files across the monorepo. -2. **Cross-references** each file against: - - **CI / infra configs** — GitHub workflows, Dockerfiles, Vercel configs, Turbo config, Tauri config - - **package.json scripts** — all workspace `package.json` files +2. **Indexes infra files** — collects all build/config/CI files into a single searchable set: + - GitHub workflow YAMLs + - Dockerfiles and docker-compose files + - Deployment configs (Vercel, Tauri) + - Build tool configs (vite, tailwind, postcss, next, drizzle, tsup, playwright) + - Build scripts (`.mjs`, `.ts`, `.sh` across all workspaces) + - `.opencode` skill scripts + - All `package.json` files (for script references) + - All `tsconfig*.json` files (for path aliases and includes) +3. **Cross-references** each file against: + - **Internal infra** — all indexed config/build files, searching by filename and relative path - **Convention patterns** — filenames like `postinstall`, `drizzle.config`, Tauri hooks - - **File-based routing dirs** — Nuxt/Next server routes and API routes that are entry points by convention -3. **Outputs a sorted list** (oldest first) with two categories: - - `✗` **Likely safe to remove** — no references found anywhere - - `⚠` **Review before removing** — referenced in CI, infra, convention, or routing + - **File-based routing dirs** — Next.js server routes, app routes, and API routes that are entry points by convention + - **Sibling repo CI/CD** — workflows, Dockerfiles, and build scripts in sibling repos and factory-level CI, with smart filtering to avoid false positives on generic filenames (e.g., `index`, `utils`, `config`) +4. **Displays results in two buckets** (oldest first within each): + - `✗` **Safe to remove** — no references found anywhere (red) + - `⚠` **Review before removing** — referenced in infra/CI (yellow) or sibling CI (cyan) + +A progress indicator shows the current file being checked during cross-referencing. Certain paths are ignored entirely (scripts, dev tools) — see the `IGNORE_PREFIXES` array in the script. diff --git a/scripts/find-unused.sh b/scripts/find-unused.sh index 5b68aa1ad..ac25d6a0f 100755 --- a/scripts/find-unused.sh +++ b/scripts/find-unused.sh @@ -2,20 +2,79 @@ set -euo pipefail # ── Config ────────────────────────────────────────────────────────────────── -INFRA_PATHS=( - .github/workflows/ - packaging/docker/ - turbo.json - apps/app/vercel.json - apps/share/vercel.json - ee/apps/den-web/vercel.json - apps/desktop/src-tauri/tauri.conf.json -) +OPENWORK_ROOT="$(cd "$(dirname "$0")/.." && pwd)" + +# Detect whether we're inside a factory layout (../../.. has _repos/) +FACTORY_CANDIDATE="$(cd "$OPENWORK_ROOT/../../.." 2>/dev/null && pwd)" +if [ -d "$FACTORY_CANDIDATE/_repos" ]; then + FACTORY_ROOT="$FACTORY_CANDIDATE" +else + FACTORY_ROOT="" +fi + +# All sibling repos to cross-reference against (empty when outside factory) +SIBLING_REPOS=() +if [ -n "$FACTORY_ROOT" ]; then + for d in "$FACTORY_ROOT"/_repos/*/; do + [ -d "$d" ] || continue + [ "$(cd "$d" && pwd)" = "$OPENWORK_ROOT" ] && continue + SIBLING_REPOS+=("$d") + done +fi -# Globs for package.json scripts across all workspaces -PACKAGE_JSONS=$(find . -name package.json -not -path '*/node_modules/*' -not -path '*/.git/*') +# ── Internal infra: all build/config/CI files that may reference source ──── +# These are files WITHIN openwork that knip can't trace but that use source files +# by convention, config, or build step. +INFRA_GLOBS=( + # CI/CD + ".github/workflows/*.yml" + # Docker + "packaging/docker/Dockerfile*" + "packaging/docker/docker-compose*.yml" + "ee/apps/den-worker-runtime/Dockerfile*" + # Deployment + "apps/app/vercel.json" + "apps/share/vercel.json" + "ee/apps/den-web/vercel.json" + # Tauri + "apps/desktop/src-tauri/tauri.conf.json" + # Monorepo orchestration + "turbo.json" + # Build configs + "apps/app/vite.config.ts" + "apps/app/tailwind.config.ts" + "apps/story-book/vite.config.ts" + "apps/ui-demo/vite.config.ts" + "apps/share/next.config.ts" + "apps/share/playwright.config.ts" + "ee/apps/den-web/next.config.js" + "ee/apps/den-web/postcss.config.js" + "ee/apps/den-web/tailwind.config.js" + "ee/apps/landing/next.config.js" + "ee/apps/landing/postcss.config.js" + "ee/apps/landing/tailwind.config.js" + "ee/packages/den-db/drizzle.config.ts" + "ee/packages/den-db/tsup.config.ts" + "ee/packages/utils/tsup.config.ts" + "packages/ui/tsup.config.ts" + # Build scripts (all) + "scripts/*.mjs" + "scripts/*.ts" + "scripts/*.sh" + "scripts/**/*.mjs" + "scripts/**/*.ts" + "scripts/**/*.sh" + "apps/*/scripts/*.mjs" + "apps/*/scripts/*.ts" + "apps/*/scripts/*.sh" + "ee/apps/*/scripts/*.mjs" + "ee/apps/*/scripts/*.sh" + # .opencode skills/commands that may invoke source + ".opencode/skills/*/scripts/*.sh" + ".opencode/skills/*/*.sh" +) -# Files that are used by convention (framework/tool magic), not imports +# Files used by convention (framework/tool magic), not imports CONVENTION_PATTERNS=( "postinstall" "drizzle.config" @@ -26,11 +85,12 @@ CONVENTION_PATTERNS=( # File-based routing directories — files here are entry points by convention ROUTING_DIRS=( "apps/share/server/" + "apps/share/app/" "ee/apps/den-web/app/" - "ee/apps/landing/app/api/" + "ee/apps/landing/app/" ) -# Paths to ignore entirely (scripts, dev tools, etc.) +# Paths to ignore entirely IGNORE_PREFIXES=( "apps/app/scripts/" "apps/desktop/scripts/" @@ -42,11 +102,109 @@ IGNORE_PREFIXES=( RED='\033[0;31m' YELLOW='\033[0;33m' GREEN='\033[0;32m' +CYAN='\033[0;36m' DIM='\033[2m' BOLD='\033[1m' RESET='\033[0m' +# ── Helpers ───────────────────────────────────────────────────────────────── + +# Collect all internal infra files once +collect_infra_files() { + local files="" + for glob_pattern in "${INFRA_GLOBS[@]}"; do + # Use find-based expansion to handle globs + local matched + matched=$(find "$OPENWORK_ROOT" -path "$OPENWORK_ROOT/$glob_pattern" 2>/dev/null || true) + if [ -n "$matched" ]; then + files="${files}${matched}"$'\n' + fi + done + # Also add all package.json files (for script references) + local pkg_jsons + pkg_jsons=$(find "$OPENWORK_ROOT" -name package.json -not -path '*/node_modules/*' -not -path '*/.git/*') + files="${files}${pkg_jsons}"$'\n' + # And all tsconfig*.json files (for path aliases / includes) + local tsconfigs + tsconfigs=$(find "$OPENWORK_ROOT" -name 'tsconfig*.json' -not -path '*/node_modules/*' -not -path '*/.git/*') + files="${files}${tsconfigs}"$'\n' + echo "$files" | sed '/^$/d' | sort -u +} + +# Search infra files for a pattern +search_infra() { + local pattern="$1" + echo "$INFRA_FILES" | xargs grep -l "$pattern" 2>/dev/null || true +} + +# Search sibling repo CI/CD and build scripts for an openwork-relative path. +# Only checks infra files (workflows, Dockerfiles, build scripts), NOT source code, +# since no sibling repo has an npm dependency on openwork packages. +search_sibling_ci() { + local pattern="$1" + local hits="" + for repo in "${SIBLING_REPOS[@]}"; do + for ci_dir in ".github/workflows" "packaging" "containers" "infra" ".circleci" "script" "scripts"; do + [ -d "${repo}${ci_dir}" ] || continue + local ci_hits + ci_hits=$(grep -rl "$pattern" "${repo}${ci_dir}" 2>/dev/null || true) + if [ -n "$ci_hits" ]; then + hits="${hits}${ci_hits}"$'\n' + fi + done + # Dockerfiles anywhere in the repo + local docker_hits + docker_hits=$(find "$repo" -name 'Dockerfile*' -not -path '*/node_modules/*' -not -path '*/.git/*' \ + -exec grep -l "$pattern" {} + 2>/dev/null || true) + if [ -n "$docker_hits" ]; then + hits="${hits}${docker_hits}"$'\n' + fi + done + # Factory-level CI + [ -n "$FACTORY_ROOT" ] && [ -d "$FACTORY_ROOT/.github" ] && { + local factory_ci + factory_ci=$(grep -rl "$pattern" "$FACTORY_ROOT/.github" 2>/dev/null || true) + [ -n "$factory_ci" ] && hits="${hits}${factory_ci}"$'\n' + } + echo "$hits" | sed '/^$/d' +} + +# Format refs: show short paths, max 3 entries +format_refs() { + local refs="$1" + local formatted="" + local count=0 + local total + total=$(echo "$refs" | grep -c '.' || true) + + while IFS= read -r ref; do + [ -z "$ref" ] && continue + count=$((count + 1)) + [ $count -gt 3 ] && continue + + local short="$ref" + if [[ "$ref" == "$OPENWORK_ROOT/"* ]]; then + short="${ref#"$OPENWORK_ROOT/"}" + elif [[ "$ref" == *"/_repos/"* ]]; then + short=$(echo "$ref" | sed "s|.*/_repos/||") + elif [[ "$ref" == "$FACTORY_ROOT/"* ]]; then + short="${ref#"$FACTORY_ROOT/"}" + fi + + [ -n "$formatted" ] && formatted="${formatted}, " + formatted="${formatted}${short}" + done <<< "$refs" + + local remaining=$((total - 3)) + if [ "$remaining" -gt 0 ] 2>/dev/null; then + formatted="${formatted} (+${remaining} more)" + fi + + echo "$formatted" +} + # ── Step 1: Run knip ─────────────────────────────────────────────────────── +cd "$OPENWORK_ROOT" echo -e "${BOLD}Running knip to detect unused files...${RESET}" KNIP_OUTPUT=$(DATABASE_URL=mysql://fake:fake@localhost/fake npx knip --include files --no-progress --no-config-hints 2>&1 || true) @@ -74,39 +232,46 @@ fi echo -e "Found ${BOLD}${#UNUSED_FILES[@]}${RESET} unused files. Cross-referencing...\n" -# ── Step 2: Cross-reference each file ────────────────────────────────────── -declare -A FILE_STATUS # "safe" | "ci" | "convention" | "routing" +# ── Step 2: Build infra file list once ───────────────────────────────────── +echo -e "${DIM} Indexing infra files...${RESET}" >&2 +INFRA_FILES=$(collect_infra_files) +INFRA_FILE_COUNT=$(echo "$INFRA_FILES" | wc -l | tr -d ' ') +echo -e "${DIM} Found ${INFRA_FILE_COUNT} infra/config files to check against.${RESET}" >&2 +if [ -n "$FACTORY_ROOT" ]; then + echo -e "${DIM} Checking ${#SIBLING_REPOS[@]} sibling repos CI/CD pipelines...${RESET}\n" >&2 +else + echo -e "${DIM} No factory layout detected — skipping sibling repo checks.${RESET}\n" >&2 +fi + +# ── Step 3: Cross-reference each file ────────────────────────────────────── +declare -A FILE_STATUS # "safe" | "infra" | "convention" | "routing" | "sibling_ci" declare -A FILE_REFS declare -A FILE_DATES +total=${#UNUSED_FILES[@]} +i=0 + for filepath in "${UNUSED_FILES[@]}"; do + i=$((i + 1)) + printf "\r${DIM} [%d/%d] %s${RESET}%*s" "$i" "$total" "$filepath" 20 "" >&2 + name=$(basename "$filepath") + stem="${name%.*}" status="safe" refs="" - # Check CI/infra configs - existing_paths=() - for p in "${INFRA_PATHS[@]}"; do - [ -e "$p" ] && existing_paths+=("$p") - done - if [ ${#existing_paths[@]} -gt 0 ]; then - ci_hits=$(grep -rl "$name" "${existing_paths[@]}" 2>/dev/null || true) - if [ -n "$ci_hits" ]; then - status="ci" - refs="$ci_hits" - fi + # ── Check 1: Internal infra (CI, Docker, build scripts, configs, tsconfigs, package.jsons) ── + infra_hits=$(search_infra "$name") + # Also try the relative path for more specific matches in CI workflows + if [ -z "$infra_hits" ]; then + infra_hits=$(search_infra "$filepath") fi - - # Check package.json scripts - if [ "$status" = "safe" ]; then - pkg_hits=$(echo "$PACKAGE_JSONS" | xargs grep -l "$name" 2>/dev/null || true) - if [ -n "$pkg_hits" ]; then - status="ci" - refs="$pkg_hits" - fi + if [ -n "$infra_hits" ]; then + status="infra" + refs="$infra_hits" fi - # Check convention-based usage + # ── Check 2: Convention-based usage ── if [ "$status" = "safe" ]; then for pat in "${CONVENTION_PATTERNS[@]}"; do if [[ "$name" == *"$pat"* ]]; then @@ -117,7 +282,7 @@ for filepath in "${UNUSED_FILES[@]}"; do done fi - # Check file-based routing dirs + # ── Check 3: File-based routing dirs ── if [ "$status" = "safe" ]; then for dir in "${ROUTING_DIRS[@]}"; do if [[ "$filepath" == "$dir"* ]]; then @@ -128,6 +293,32 @@ for filepath in "${UNUSED_FILES[@]}"; do done fi + # ── Check 4: Sibling repo CI/CD references ── + # Only search by the openwork-relative path (precise) or unique filename. + # Since no sibling repo has npm deps on openwork packages, we only check + # CI/CD and build scripts for direct path references. + if [ "$status" = "safe" ]; then + sibling_hits=$(search_sibling_ci "$filepath") + if [ -z "$sibling_hits" ]; then + # Try filename only if it's specific enough (not a generic name) + case "$stem" in + index|utils|helpers|types|config|constants|schema|paths|extensions|sessions|\ + system|context|sync|state|card|server|client|app|lib|store|api|auth|data|\ + error|events|hooks|theme|styles|test|main|shared|common|base|core|model|\ + service|provider|route|router|handler|middleware|plugin|loader|init|setup|\ + health|status|log|logger|build|dev|start|run|layout|page|screen) + ;; # skip generic names + *) + sibling_hits=$(search_sibling_ci "$name") + ;; + esac + fi + if [ -n "$sibling_hits" ]; then + status="sibling_ci" + refs="$sibling_hits" + fi + fi + # Get last commit date last_date=$(git log -1 --format="%aI" -- "$filepath" 2>/dev/null || echo "unknown") @@ -136,57 +327,88 @@ for filepath in "${UNUSED_FILES[@]}"; do FILE_DATES["$filepath"]="$last_date" done -# ── Step 3: Sort by date and display ─────────────────────────────────────── -sorted_files=() -while IFS= read -r line; do - sorted_files+=("$line") -done < <( - for filepath in "${UNUSED_FILES[@]}"; do - echo "${FILE_DATES[$filepath]}|$filepath" - done | sort -) +printf "\r%*s\r" 120 "" >&2 + +# ── Step 4: Split into two buckets, sort by date ────────────────────────── +safe_entries=() +flagged_entries=() -safe_count=0 -flagged_count=0 +for filepath in "${UNUSED_FILES[@]}"; do + entry="${FILE_DATES[$filepath]}|$filepath" + if [ "${FILE_STATUS[$filepath]}" = "safe" ]; then + safe_entries+=("$entry") + else + flagged_entries+=("$entry") + fi +done + +IFS=$'\n' safe_sorted=($(printf '%s\n' "${safe_entries[@]}" 2>/dev/null | sort)); unset IFS +IFS=$'\n' flagged_sorted=($(printf '%s\n' "${flagged_entries[@]}" 2>/dev/null | sort)); unset IFS + +safe_count=${#safe_sorted[@]} +flagged_count=${#flagged_sorted[@]} + +# ── Step 5: Display ─────────────────────────────────────────────────────── echo -e "${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}" -echo -e "${BOLD} UNUSED FILES (oldest first)${RESET}" +echo -e "${RED}${BOLD} UNUSED — safe to remove (${safe_count})${RESET}" echo -e "${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}" +echo -e "${DIM} No imports in openwork source, no references in CI/CD, build${RESET}" +echo -e "${DIM} scripts, configs, conventions, routing, or sibling repo pipelines.${RESET}" +echo "" -for entry in "${sorted_files[@]}"; do - date="${entry%%|*}" - filepath="${entry#*|}" - status="${FILE_STATUS[$filepath]}" - refs="${FILE_REFS[$filepath]}" - short_date="${date%%T*}" - - case "$status" in - safe) - echo -e "${RED} ✗ ${RESET}${DIM}${short_date}${RESET} ./$filepath:1" - safe_count=$((safe_count + 1)) - ;; - ci) - echo -e "${YELLOW} ⚠ ${RESET}${DIM}${short_date}${RESET} ./$filepath:1" - echo -e " ${DIM}→ referenced in: $(echo "$refs" | tr '\n' ', ' | sed 's/,$//')${RESET}" - flagged_count=$((flagged_count + 1)) - ;; - convention) - echo -e "${YELLOW} ⚠ ${RESET}${DIM}${short_date}${RESET} ./$filepath:1" - echo -e " ${DIM}→ $refs${RESET}" - flagged_count=$((flagged_count + 1)) - ;; - routing) - echo -e "${YELLOW} ⚠ ${RESET}${DIM}${short_date}${RESET} ./$filepath:1" - echo -e " ${DIM}→ $refs${RESET}" - flagged_count=$((flagged_count + 1)) - ;; - esac -done +if [ "$safe_count" -eq 0 ]; then + echo -e " ${GREEN}None — all files have references somewhere.${RESET}" +else + for entry in "${safe_sorted[@]}"; do + date="${entry%%|*}" + filepath="${entry#*|}" + short_date="${date%%T*}" + echo -e " ${RED}✗${RESET} ${DIM}${short_date}${RESET} ./$filepath:1" + done +fi + +echo "" +echo -e "${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}" +echo -e "${YELLOW}${BOLD} REVIEW — referenced in infra/CI (${flagged_count})${RESET}" +echo -e "${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}" +echo -e "${DIM} Not imported in source, but referenced by build/CI/config/routing.${RESET}" +echo -e "${DIM} Verify the reference is real before removing.${RESET}" +echo "" +if [ "$flagged_count" -eq 0 ]; then + echo -e " ${GREEN}None.${RESET}" +else + for entry in "${flagged_sorted[@]}"; do + date="${entry%%|*}" + filepath="${entry#*|}" + status="${FILE_STATUS[$filepath]}" + refs="${FILE_REFS[$filepath]}" + short_date="${date%%T*}" + formatted_refs=$(format_refs "$refs") + + case "$status" in + infra) + echo -e " ${YELLOW}⚠${RESET} ${DIM}${short_date}${RESET} ./$filepath:1" + echo -e " ${DIM}↳ ${formatted_refs}${RESET}" + ;; + convention) + echo -e " ${YELLOW}⚠${RESET} ${DIM}${short_date}${RESET} ./$filepath:1" + echo -e " ${DIM}↳ ${refs}${RESET}" + ;; + routing) + echo -e " ${YELLOW}⚠${RESET} ${DIM}${short_date}${RESET} ./$filepath:1" + echo -e " ${DIM}↳ ${refs}${RESET}" + ;; + sibling_ci) + echo -e " ${CYAN}⚠${RESET} ${DIM}${short_date}${RESET} ./$filepath:1" + echo -e " ${DIM}↳ sibling CI: ${formatted_refs}${RESET}" + ;; + esac + done +fi + +echo "" +echo -e "${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}" +echo -e "${BOLD}Summary:${RESET} ${RED}${safe_count} removable${RESET} │ ${YELLOW}${flagged_count} need review${RESET} │ ${#UNUSED_FILES[@]} total" echo -e "${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}" -echo -e "" -echo -e "${BOLD}Legend:${RESET}" -echo -e " ${RED}✗${RESET} Likely safe to remove (no references found)" -echo -e " ${YELLOW}⚠${RESET} Review before removing (referenced in CI/infra/convention/routing)" -echo -e "" -echo -e "${BOLD}Summary:${RESET} ${RED}${safe_count} likely removable${RESET} │ ${YELLOW}${flagged_count} need review${RESET} │ ${#UNUSED_FILES[@]} total"