diff --git a/.github/workflows/self-improvement.yml b/.github/workflows/self-improvement.yml index 73bfc20..4d4a796 100644 --- a/.github/workflows/self-improvement.yml +++ b/.github/workflows/self-improvement.yml @@ -42,7 +42,7 @@ jobs: echo "AI Pattern Count:" >> metrics-baseline.txt grep -c -i "stands as\|testament to\|crucial\|pivotal\|vibrant\|showcasing" SKILL.md SKILL_PROFESSIONAL.md QWEN.md >> metrics-baseline.txt || echo "0" >> metrics-baseline.txt - - name: Gather repository intelligence + - name: Gather repository intelligence and decision support run: | node scripts/gather-repo-data.js edithatogo/humanizer-next blader/humanizer node scripts/render-self-improvement-issue.js @@ -65,6 +65,7 @@ jobs: path: | conductor/tracks/repo-self-improvement_20260303/repo-data.json .github/generated/self-improvement-issue.md + .github/generated/self-improvement-decisions.md - name: Create Analysis Issue uses: peter-evans/create-issue-from-file@v6 @@ -78,6 +79,6 @@ jobs: - name: Output Instructions run: | - echo "::notice::Self-improvement cycle initiated. See issue created above for detailed analysis tasks." + echo "::notice::Self-improvement cycle initiated. See issue created above for detailed analysis tasks and Adopt/Reject/Defer suggestions." echo "::notice::Branch created: self-improvement-$(date +%Y-%m-%d)" - echo "::notice::Generated issue body and repo-data.json are attached as workflow artifacts." + echo "::notice::Generated issue body, decision log, and repo-data.json are attached as workflow artifacts." diff --git a/conductor/tracks/repo-self-improvement_20260303/plan.md b/conductor/tracks/repo-self-improvement_20260303/plan.md index 0a35218..35352e7 100644 --- a/conductor/tracks/repo-self-improvement_20260303/plan.md +++ b/conductor/tracks/repo-self-improvement_20260303/plan.md @@ -46,10 +46,12 @@ Decision: the citation manager has been moved to `experiments/citation_ref_manag #### Task R4: Strengthen self-improvement automation -- [ ] Make the weekly workflow consume refreshed upstream data rather than only creating a placeholder issue -- [ ] Add decision criteria for adopting new "AI tells": evidence quality, overlap, false-positive risk, adapter impact +- [x] Make the weekly workflow consume refreshed upstream data rather than only creating a placeholder issue +- [x] Add decision criteria for adopting new "AI tells": evidence quality, overlap, false-positive risk, adapter impact - [ ] Record explicit Adopt / Reject / Defer outcomes for high-signal upstream PRs +Current state: the scheduled workflow now generates decision-oriented issue content plus a standalone decision-log artifact. Maintainers still need to convert suggested Adopt / Reject / Defer outcomes into explicit track decisions. + --- ## Phase 1: Dependency Updates & Security Baseline [P0] diff --git a/conductor/tracks/repo-self-improvement_20260303/spec.md b/conductor/tracks/repo-self-improvement_20260303/spec.md index 333f4c9..f855496 100644 --- a/conductor/tracks/repo-self-improvement_20260303/spec.md +++ b/conductor/tracks/repo-self-improvement_20260303/spec.md @@ -41,7 +41,7 @@ Fresh data was gathered on **2026-03-13** via `scripts/gather-repo-data.js` and 1. `humanizer-next` should remain a **skill-source repository**, not a publishable npm library. 2. `.github/workflows/release.yml` is currently **misaligned** with that goal because it still assumes a Changesets + npm publish lifecycle. -3. `.github/workflows/self-improvement.yml` is useful as a scheduler, but it is too shallow to be considered a closed-loop improvement system. It creates an issue and gathers baseline metrics, but it does not make high-quality adoption decisions. +3. `.github/workflows/self-improvement.yml` now gathers baseline metrics, live repository data, and decision-oriented issue content. It is stronger than the original placeholder workflow, but it is still not fully closed-loop because maintainers must finalize the Adopt / Reject / Defer outcomes. 4. The citation reference manager was a **scope outlier** relative to the repo's core purpose. It has now been moved behind an explicit experimental boundary at `experiments/citation_ref_manager/`, with the decision documented in `docs/citation-manager-boundary.md`. Follow-on extraction into a separate repo or skill remains a valid option if it graduates from experimentation. 5. The highest-value maintenance work is now: - reviewing and merging the 6 current Dependabot PRs, diff --git a/scripts/gather-repo-data.js b/scripts/gather-repo-data.js index d4a111e..94399bf 100644 --- a/scripts/gather-repo-data.js +++ b/scripts/gather-repo-data.js @@ -19,6 +19,15 @@ const OUTPUT_DIR = './conductor/tracks/repo-self-improvement_20260303'; // GitHub API base URL const GITHUB_API = 'https://api.github.com'; +const SECURITY_POLICY_CANDIDATES = ['SECURITY.md', '.github/SECURITY.md', 'docs/SECURITY.md']; + +function getGitHubHeaders() { + return { + Accept: 'application/vnd.github.v3+json', + 'User-Agent': 'humanizer-self-improvement-bot', + ...(process.env.GITHUB_TOKEN ? { Authorization: `token ${process.env.GITHUB_TOKEN}` } : {}), + }; +} /** * Fetch data from GitHub API with rate limit handling @@ -27,10 +36,7 @@ async function fetchGitHub(endpoint, retries = 3) { for (let i = 0; i < retries; i++) { try { const response = await fetch(`${GITHUB_API}${endpoint}`, { - headers: { - Accept: 'application/vnd.github.v3+json', - 'User-Agent': 'humanizer-self-improvement-bot', - }, + headers: getGitHubHeaders(), }); if (response.status === 403 && response.headers.get('X-RateLimit-Remaining') === '0') { @@ -48,6 +54,59 @@ async function fetchGitHub(endpoint, retries = 3) { } } +/** + * Check whether a file exists in a repository. + * @param {string} repo + * @param {string} filePath + * @returns {Promise} + */ +async function repoFileExists(repo, filePath) { + for (let i = 0; i < 3; i++) { + try { + const response = await fetch(`${GITHUB_API}/repos/${repo}/contents/${filePath}`, { + method: 'HEAD', + headers: getGitHubHeaders(), + }); + + if (response.status === 404) { + return false; + } + + if (response.status === 403 && response.headers.get('X-RateLimit-Remaining') === '0') { + const resetTime = new Date(response.headers.get('X-RateLimit-Reset') * 1000); + console.log(`Rate limited while checking ${filePath}. Reset at: ${resetTime}`); + throw new Error('Rate limited'); + } + + if (!response.ok) { + throw new Error(`Failed to check ${filePath} in ${repo}: ${response.status}`); + } + + return true; + } catch (error) { + if (i === 2) { + throw error; + } + await new Promise((resolve) => setTimeout(resolve, 2000 * (i + 1))); + } + } +} + +/** + * Detect whether a repository publishes a SECURITY.md policy in a standard location. + * @param {string} repo + * @returns {Promise} + */ +async function hasPublishedSecurityPolicy(repo) { + for (const candidate of SECURITY_POLICY_CANDIDATES) { + if (await repoFileExists(repo, candidate)) { + return true; + } + } + + return false; +} + /** * Fetch pull requests from a repository */ @@ -107,7 +166,11 @@ async function getIssues(repo, state = 'open') { */ async function getRepoMetadata(repo) { console.log(`Fetching metadata for ${repo}...`); - const repoData = await fetchGitHub(`/repos/${repo}`); + const [repoData, hasSecurityPolicy] = await Promise.all([ + fetchGitHub(`/repos/${repo}`), + hasPublishedSecurityPolicy(repo), + ]); + return { name: repoData.name, full_name: repoData.full_name, @@ -118,7 +181,7 @@ async function getRepoMetadata(repo) { forks_count: repoData.forks_count, open_issues_count: repoData.open_issues_count, default_branch: repoData.default_branch, - has_security_policy: repoData.security_and_analysis?.secret_scanning?.status === 'enabled', + has_security_policy: hasSecurityPolicy, has_vulnerability_alerts: repoData.security_and_analysis?.dependabot_security_updates?.status === 'enabled', created_at: repoData.created_at, diff --git a/scripts/render-self-improvement-issue.js b/scripts/render-self-improvement-issue.js index 51409e0..021d8a6 100644 --- a/scripts/render-self-improvement-issue.js +++ b/scripts/render-self-improvement-issue.js @@ -13,12 +13,121 @@ function summarizeTopTitles(items, limit = 5) { .join('\n'); } +function formatDecisionItems(items) { + if (items.length === 0) { + return '- None'; + } + + return items + .map( + (item) => + `- ${item.scope} #${item.number}: ${item.title}\n Decision: ${item.decision.toUpperCase()}\n Why: ${item.reason}` + ) + .join('\n'); +} + +const LOCAL_DECISION_RULES = [ + { + keywords: ['@changesets/cli'], + decision: 'reject', + reason: + 'Changesets is no longer part of the repo release model. This skill-source repo ships artifacts through GitHub, not package releases.', + }, + { + keywords: ['actions/upload-artifact', 'create-issue-from-file'], + decision: 'adopt', + reason: + 'Workflow dependency updates match the current automation direction and should be merged after the scheduled job passes.', + }, + { + keywords: ['@types/node', 'lint-staged', 'eslint'], + decision: 'adopt', + reason: + 'Maintainer-tooling updates fit the repo contract and should be taken when the local lint, validate, and test gates remain green.', + }, +]; + +const UPSTREAM_DECISION_RULES = [ + { + keywords: ['opencode support'], + decision: 'reject', + reason: + 'OpenCode support is already implemented locally through the adapter distribution path, so this is not a missing capability in humanizer-next.', + }, + { + keywords: ['wikipedia sync'], + decision: 'reject', + reason: + 'Live upstream fetches add runtime dependencies and instability to a skill-source repo that should stay deterministic and artifact-driven.', + }, + { + keywords: ['claude compatibility'], + decision: 'reject', + reason: + 'Compatibility fixes should be evaluated against the local adapter architecture, not cherry-picked blindly from the upstream single-skill format.', + }, + { + keywords: ['license file'], + decision: 'defer', + reason: + 'Reasonable repo hygiene improvement, but lower priority than dependency maintenance and evidence-backed skill changes.', + }, + { + keywords: ['pattern', 'hyphenated', 'rewrite', 'review score'], + decision: 'defer', + reason: + 'Potentially useful, but it needs evidence review against the repo rubric: evidence quality, overlap with existing patterns, false-positive risk, and adapter impact.', + }, +]; + +function classifyDecision(pr, scope, rules) { + const lowerTitle = pr.title.toLowerCase(); + const matchedRule = rules.find((rule) => + rule.keywords.some((keyword) => lowerTitle.includes(keyword)) + ); + + if (!matchedRule) { + return { + scope, + number: pr.number, + title: pr.title, + decision: 'defer', + reason: + scope === 'local' + ? 'No repo-specific automation rule exists for this PR yet. Review manually.' + : 'No automation rule matched. Review manually.', + }; + } + + return { + scope, + number: pr.number, + title: pr.title, + decision: matchedRule.decision, + reason: matchedRule.reason, + }; +} + +function buildLocalDecisions(localPrs) { + return localPrs.slice(0, 10).map((pr) => classifyDecision(pr, 'local', LOCAL_DECISION_RULES)); +} + +function buildUpstreamDecisions(upstreamPrs) { + return upstreamPrs + .slice(0, 8) + .map((pr) => classifyDecision(pr, 'upstream', UPSTREAM_DECISION_RULES)); +} + function main() { const inputPath = process.argv[2] || path.join(REPO_ROOT, 'conductor', 'tracks', 'repo-self-improvement_20260303', 'repo-data.json'); const outputPath = process.argv[3] || path.join(REPO_ROOT, '.github', 'generated', 'self-improvement-issue.md'); + const decisionsPath = outputPath.replace( + /self-improvement-issue\.md$/, + 'self-improvement-decisions.md' + ); const raw = fs.readFileSync(inputPath, 'utf8'); const data = JSON.parse(raw); @@ -27,6 +136,8 @@ function main() { const upstream = data.upstream_repository; const localSecurityPolicy = local.security?.has_security_policy ?? false; const upstreamSecurityPolicy = upstream.security?.has_security_policy ?? false; + const localDecisions = buildLocalDecisions(local.pull_requests.raw); + const upstreamDecisions = buildUpstreamDecisions(upstream.pull_requests.raw); const body = `# Weekly Self-Improvement Report @@ -56,17 +167,47 @@ ${summarizeTopTitles(local.pull_requests.raw)} ${summarizeTopTitles(upstream.pull_requests.raw)} +## Decision Rubric + +- Evidence quality: prefer changes grounded in reproducible examples or clear user pain, not vibes. +- Pattern overlap: avoid adding new rules that duplicate existing Humanizer patterns without meaningfully improving coverage. +- False-positive risk: reject changes that are likely to flatten legitimate human style or technical writing. +- Adapter impact: prefer improvements that do not increase sync complexity or runtime dependencies across supported adapters. + +## Local Decision Support + +${formatDecisionItems(localDecisions)} + +## Upstream Decision Support + +${formatDecisionItems(upstreamDecisions)} + ## Recommended Actions 1. Review and merge the current Dependabot backlog if validation passes. -2. Record explicit Adopt / Reject / Defer decisions for the highest-signal upstream PRs. +2. Convert the automated Adopt / Reject / Defer suggestions above into explicit maintainer decisions on the active conductor track. 3. Keep the repo skill-focused: validate adapter sync and distribution first, not npm publishing. 4. Keep experimental subsystems outside the maintained skill surface; the citation manager now lives under \`experiments/citation_ref_manager/\`. +`; + + const decisionsBody = `# Self-Improvement Decision Log + +Generated from \`scripts/gather-repo-data.js\` on ${data.gathered_at}. + +## Local Decisions + +${formatDecisionItems(localDecisions)} + +## Upstream Decisions + +${formatDecisionItems(upstreamDecisions)} `; fs.mkdirSync(path.dirname(outputPath), { recursive: true }); fs.writeFileSync(outputPath, body, 'utf8'); + fs.writeFileSync(decisionsPath, decisionsBody, 'utf8'); console.log(`Wrote self-improvement issue body to ${outputPath}`); + console.log(`Wrote self-improvement decision log to ${decisionsPath}`); } main();