fixup title extraction across our use-cases (#62 improves #25)

nedtwigg · web-flow · commit f30d18585d31 · 2025-09-18T12:59:45.000-07:00
diff --git a/.claude/commands/corpus-loop.md b/.claude/commands/corpus-loop.md
@@ -0,0 +1,57 @@
+---
+argument-hint: [corpus_slug]
+description: uses Playwright MCP and the `corpus:view` to parse page elements
+---
+
+- using Playwright MCP, navigate to `http://localhost:3001/corpus/$1/gitcasso`
+- the page will have a div with id `gitcasso-comment-spots`, wait 500ms for it to settle
+- inside the `gitcasso-comment-spots` div you will see something like this:
+
+```json
+{
+ "url": "https://github.com/diffplug/selfie/issues/523",
+ "allTextAreas": [
+  {
+   "textarea": "id='feedback' name='feedback' className='form-control width-full mb-2'",
+   "spot": "NO_SPOT"
+  },
+  {
+   "textarea": "id=':rn:' name='' className='prc-Textarea-TextArea-13q4j overtype-input'",
+   "spot": {
+    "domain": "github.com",
+    "number": 523,
+    "slug": "diffplug/selfie",
+    "title": "TODO_TITLE",
+    "type": "GH_ISSUE_ADD_COMMENT",
+    "unique_key": "github.com:diffplug/selfie:523"
+   }
+  }
+ ]
+}
+```
+
+- this output means that this page is simulating the url `https://github.com/diffplug/selfie/issues/523`
+- every textarea on the page is represented
+- `NO_SPOT` means that the spot was not enhanced
+- `type: GH_ISSUE_ADD_COMMENT` means that it was enhanced by whichever implementation of `CommentEnhancer` returns the spot type `GH_ISSUE_ADD_COMMENT`
+- if you search for that string in `src/lib/enhancers` you will find the correct one
+- the `tryToEnhance` method returned a `CommentSpot`, and that whole data is splatted out above
+
+If you make a change to the code of the enhancer, you can click the button with id `gitcasso-rebuild-btn`. It will trigger a rebuild of the browser extension, and then refresh the page. You'll be able to see the effects of your change in the `gitcasso-comment-spots` div described above.
+
+## Common extraction workflow
+
+If you see `"title": "TODO_TITLE"` or similar hardcoded `TODO` values in the JSON output, this indicates the enhancer needs some kind of extraction implemented:
+
+1. **Find the enhancer**: Search for the `type` value (e.g., `GH_ISSUE_ADD_COMMENT`) in `src/lib/enhancers/`
+2. **Implement extraction**: Replace hardcoded title with DOM extraction:
+   ```javascript
+   const title = document.querySelector('main h1')!.textContent.replace(/\s*#\d+$/, '').trim()
+   ```
+4. **Test with rebuild**: Click the 🔄 button to rebuild and verify the title appears correctly in the JSON
+
+## Extraction code style
+
+- Don't hedge your bets and write lots of fallback code or strings of `?.`. Have a specific piece of data you want to get, use non-null `!` assertions where necessary to be clear about getting.
+- If a field is empty, represent it with an empty string. Don't use placeholders when extracting data.
+- The pages we are scraping are going to change over time, and it's easier to fix broken ones if we know exactly what used to work. If the code has lots of branching paths, it's harder to tell what it was doing.
diff --git a/.gitignore b/.gitignore
@@ -17,9 +17,5 @@ dist/
 .DS_Store
 Thumbs.db
 
-# playright
+# playwright
 .playwright-mcp/
-browser-extension/dist-playground/
-browser-extension/playwright-report/
-browser-extension/playwright/
-browser-extension/test-results/
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -1,3 +1,3 @@
-Please refer to `CONTRIBUTING.md` and `README.md`.
+Refer to `CONTRIBUTING.md` for the project's architecture and useful commands.
 
 Whenever you complete a task, if you wish some info had been provided to you ahead of time instead of figuring it out from scratch, you have permission to edit this `CLAUDE.md` to add any helpful context.
diff --git a/src/lib/enhancers/github/githubIssueAddComment.tsx b/src/lib/enhancers/github/githubIssueAddComment.tsx
@@ -42,7 +42,10 @@ export class GitHubIssueAddCommentEnhancer implements CommentEnhancer<GitHubIssu
     const slug = `${owner}/${repo}`
     const number = parseInt(numberStr!, 10)
     const unique_key = `github.com:${slug}:${number}`
-    const title = 'TODO_TITLE'
+    const title = document
+      .querySelector('main h1')!
+      .textContent.replace(/\s*#\d+$/, '')
+      .trim()
     return {
       domain: location.host,
       number,
@@ -77,7 +80,7 @@ export class GitHubIssueAddCommentEnhancer implements CommentEnhancer<GitHubIssu
     )
   }
 
-  tableTitle(_spot: GitHubIssueAddCommentSpot): string {
-    return 'TITLE_TODO'
+  tableTitle(spot: GitHubIssueAddCommentSpot): string {
+    return spot.title
   }
 }
diff --git a/src/lib/enhancers/github/githubIssueNewComment.tsx b/src/lib/enhancers/github/githubIssueNewComment.tsx
@@ -9,6 +9,7 @@ interface GitHubIssueNewCommentSpot extends CommentSpot {
   type: 'GH_ISSUE_NEW_COMMENT'
   domain: string
   slug: string // owner/repo
+  title: string
 }
 
 export class GitHubIssueNewCommentEnhancer implements CommentEnhancer<GitHubIssueNewCommentSpot> {
@@ -17,9 +18,12 @@ export class GitHubIssueNewCommentEnhancer implements CommentEnhancer<GitHubIssu
   }
 
   tryToEnhance(
-    _textarea: HTMLTextAreaElement,
+    textarea: HTMLTextAreaElement,
     location: StrippedLocation,
   ): GitHubIssueNewCommentSpot | null {
+    if (textarea.id === 'feedback') {
+      return null
+    }
     if (location.host !== 'github.com') {
       return null
     }
@@ -34,9 +38,12 @@ export class GitHubIssueNewCommentEnhancer implements CommentEnhancer<GitHubIssu
     const [, owner, repo] = match
     const slug = `${owner}/${repo}`
     const unique_key = `github.com:${slug}:new`
+    const titleInput = document.querySelector('input[placeholder="Title"]') as HTMLInputElement
+    const title = titleInput?.value || ''
     return {
       domain: location.host,
       slug,
+      title,
       type: 'GH_ISSUE_NEW_COMMENT',
       unique_key,
     }
@@ -62,8 +69,8 @@ export class GitHubIssueNewCommentEnhancer implements CommentEnhancer<GitHubIssu
     )
   }
 
-  tableTitle(_spot: GitHubIssueNewCommentSpot): string {
-    return 'New Issue'
+  tableTitle(spot: GitHubIssueNewCommentSpot): string {
+    return spot.title || 'New Issue'
   }
 
   buildUrl(spot: GitHubIssueNewCommentSpot): string {
diff --git a/src/lib/enhancers/github/githubPRAddComment.tsx b/src/lib/enhancers/github/githubPRAddComment.tsx
@@ -38,7 +38,10 @@ export class GitHubPRAddCommentEnhancer implements CommentEnhancer<GitHubPRAddCo
     const slug = `${owner}/${repo}`
     const number = parseInt(numberStr!, 10)
     const unique_key = `github.com:${slug}:${number}`
-    const title = 'TODO_TITLE'
+    const title = document
+      .querySelector('main h1')!
+      .textContent.replace(/\s*#\d+$/, '')
+      .trim()
     return {
       domain: location.host,
       number,
@@ -70,7 +73,7 @@ export class GitHubPRAddCommentEnhancer implements CommentEnhancer<GitHubPRAddCo
     )
   }
 
-  tableTitle(_spot: GitHubPRAddCommentSpot): string {
-    return 'TITLE_TODO'
+  tableTitle(spot: GitHubPRAddCommentSpot): string {
+    return spot.title
   }
 }
diff --git a/src/lib/enhancers/github/githubPRNewComment.tsx b/src/lib/enhancers/github/githubPRNewComment.tsx
@@ -8,7 +8,10 @@ import { prepareGitHubHighlighter } from './githubHighlighter'
 interface GitHubPRNewCommentSpot extends CommentSpot {
   type: 'GH_PR_NEW_COMMENT'
   domain: string
-  slug: string // owner/repo/base-branch/compare-branch
+  slug: string // owner/repo
+  title: string
+  head: string // `user:repo:branch` where changes are implemented
+  base: string // branch you want changes pulled into
 }
 
 export class GitHubPRNewCommentEnhancer implements CommentEnhancer<GitHubPRNewCommentSpot> {
@@ -38,13 +41,19 @@ export class GitHubPRNewCommentEnhancer implements CommentEnhancer<GitHubPRNewCo
 
     if (!match) return null
     const [, owner, repo, baseBranch, compareBranch] = match
-    const slug = baseBranch
-      ? `${owner}/${repo}/${baseBranch}...${compareBranch}`
-      : `${owner}/${repo}/${compareBranch}`
-    const unique_key = `github.com:${slug}`
+    const slug = `${owner}/${repo}`
+    const base = baseBranch || 'main'
+    const head = compareBranch!
+    const unique_key = `github.com:${slug}:${base}...${head}`
+    const titleInput = document.querySelector('input[placeholder="Title"]') as HTMLInputElement
+    const title = titleInput!.value
+
     return {
+      base,
       domain: location.host,
+      head,
       slug,
+      title,
       type: 'GH_PR_NEW_COMMENT',
       unique_key,
     }
@@ -70,8 +79,8 @@ export class GitHubPRNewCommentEnhancer implements CommentEnhancer<GitHubPRNewCo
     )
   }
 
-  tableTitle(_spot: GitHubPRNewCommentSpot): string {
-    return 'TITLE_TODO'
+  tableTitle(spot: GitHubPRNewCommentSpot): string {
+    return spot.title || 'New Pull Request'
   }
 
   buildUrl(spot: GitHubPRNewCommentSpot): string {
diff --git a/tests/corpus-view.ts b/tests/corpus-view.ts
@@ -477,28 +477,28 @@ function createCommentSpotDisplayScript(urlParts: ReturnType<typeof getUrlParts>
 
     function updateCommentSpotDisplay() {
       const textareas = document.querySelectorAll('textarea');
-      const spotsFound = [];
+      const allTextAreas = [];
 
       for (const textarea of textareas) {
-        const forValue = 'id=' + textarea.id + ' name=' + textarea.name + ' className=' + textarea.className;
+        const forValue = "id='" + textarea.id + "' name='" + textarea.name + "' className='" + textarea.className + "'";
         const enhancedItem = window.gitcassoTextareaRegistry ? window.gitcassoTextareaRegistry.get(textarea) : undefined;
         if (enhancedItem) {
-          spotsFound.push({
-            for: forValue,
+          allTextAreas.push({
+            textarea: forValue,
             spot: enhancedItem.spot,
-            title: enhancedItem.enhancer.tableTitle(enhancedItem.spot),
           });
         } else {
-          spotsFound.push({
-            for: forValue,
+          allTextAreas.push({
+            textarea: forValue,
             spot: 'NO_SPOT',
           });
         }
       }
-
-      console.log('Enhanced textareas:', spotsFound.filter(s => s.spot !== 'NO_SPOT').length);
-      console.log('All textareas on page:', textareas.length);
-      commentSpotDisplay.innerHTML = '<div style="' + styles.header + '"><pre>${urlParts.href}\\n' + JSON.stringify(spotsFound, null, 2) + '</pre></div>';
+      const harness = {
+        url: '${urlParts.href}',
+        allTextAreas: allTextAreas
+      }
+      commentSpotDisplay.innerHTML = '<div style="' + styles.header + '"><pre>' + JSON.stringify(harness, null, 1) + '</pre></div>';
     }
 
     // Initial update
@@ -508,9 +508,6 @@ function createCommentSpotDisplayScript(urlParts: ReturnType<typeof getUrlParts>
     setTimeout(updateCommentSpotDisplay, 400);
     setTimeout(updateCommentSpotDisplay, 800);
 
-    // Update display periodically
-    setInterval(updateCommentSpotDisplay, 2000);
-
     document.body.appendChild(commentSpotDisplay);
   `
 }
diff --git a/tests/corpus/_corpus-index.ts b/tests/corpus/_corpus-index.ts
@@ -17,6 +17,11 @@ export const CORPUS: Record<string, CorpusEntry> = {
     type: 'html',
     url: 'https://github.com/diffplug/gitcasso/issues/56',
   },
+  gh_issue_new_populated: {
+    description: 'a new issue wiht some fields filled out',
+    type: 'html',
+    url: 'https://github.com/diffplug/gitcasso/issues/new',
+  },
   gh_issue_populated_comment: {
     description: 'comment text box has some text',
     type: 'html',
diff --git a/tests/corpus/gh_issue_new_populated.html b/tests/corpus/gh_issue_new_populated.html
diff --git a/tests/lib/enhancers/github.test.ts b/tests/lib/enhancers/github.test.ts

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`		-Please refer to `CONTRIBUTING.md` and `README.md`.
	`1`	+Refer to `CONTRIBUTING.md` for the project's architecture and useful commands.
`2`	`2`
`3`	`3`	Whenever you complete a task, if you wish some info had been provided to you ahead of time instead of figuring it out from scratch, you have permission to edit this `CLAUDE.md` to add any helpful context.
Original file line number	Diff line number	Diff line change
`@@ -42,7 +42,10 @@ export class GitHubIssueAddCommentEnhancer implements CommentEnhancer<GitHubIssu`
`42`	`42`	const slug = `${owner}/${repo}`
`43`	`43`	`const number = parseInt(numberStr!, 10)`
`44`	`44`	const unique_key = `github.com:${slug}:${number}`
`45`		`- const title = 'TODO_TITLE'`
	`45`	`+ const title = document`
	`46`	`+ .querySelector('main h1')!`
	`47`	`+ .textContent.replace(/\s*#\d+$/, '')`
	`48`	`+ .trim()`
`46`	`49`	`return {`
`47`	`50`	`domain: location.host,`
`48`	`51`	`number,`
`@@ -77,7 +80,7 @@ export class GitHubIssueAddCommentEnhancer implements CommentEnhancer<GitHubIssu`
`77`	`80`	`)`
`78`	`81`	`}`
`79`	`82`
`80`		`- tableTitle(_spot: GitHubIssueAddCommentSpot): string {`
`81`		`- return 'TITLE_TODO'`
	`83`	`+ tableTitle(spot: GitHubIssueAddCommentSpot): string {`
	`84`	`+ return spot.title`
`82`	`85`	`}`
`83`	`86`	`}`
Original file line number	Diff line number	Diff line change
`@@ -9,6 +9,7 @@ interface GitHubIssueNewCommentSpot extends CommentSpot {`
`9`	`9`	`type: 'GH_ISSUE_NEW_COMMENT'`
`10`	`10`	`domain: string`
`11`	`11`	`slug: string // owner/repo`
	`12`	`+ title: string`
`12`	`13`	`}`
`13`	`14`
`14`	`15`	`export class GitHubIssueNewCommentEnhancer implements CommentEnhancer<GitHubIssueNewCommentSpot> {`
`@@ -17,9 +18,12 @@ export class GitHubIssueNewCommentEnhancer implements CommentEnhancer<GitHubIssu`
`17`	`18`	`}`
`18`	`19`
`19`	`20`	`tryToEnhance(`
`20`		`- _textarea: HTMLTextAreaElement,`
	`21`	`+ textarea: HTMLTextAreaElement,`
`21`	`22`	`location: StrippedLocation,`
`22`	`23`	`): GitHubIssueNewCommentSpot \| null {`
	`24`	`+ if (textarea.id === 'feedback') {`
	`25`	`+ return null`
	`26`	`+ }`
`23`	`27`	`if (location.host !== 'github.com') {`
`24`	`28`	`return null`
`25`	`29`	`}`
`@@ -34,9 +38,12 @@ export class GitHubIssueNewCommentEnhancer implements CommentEnhancer<GitHubIssu`
`34`	`38`	`const [, owner, repo] = match`
`35`	`39`	const slug = `${owner}/${repo}`
`36`	`40`	const unique_key = `github.com:${slug}:new`
	`41`	`+ const titleInput = document.querySelector('input[placeholder="Title"]') as HTMLInputElement`
	`42`	`+ const title = titleInput?.value \|\| ''`
`37`	`43`	`return {`
`38`	`44`	`domain: location.host,`
`39`	`45`	`slug,`
	`46`	`+ title,`
`40`	`47`	`type: 'GH_ISSUE_NEW_COMMENT',`
`41`	`48`	`unique_key,`
`42`	`49`	`}`
`@@ -62,8 +69,8 @@ export class GitHubIssueNewCommentEnhancer implements CommentEnhancer<GitHubIssu`
`62`	`69`	`)`
`63`	`70`	`}`
`64`	`71`
`65`		`- tableTitle(_spot: GitHubIssueNewCommentSpot): string {`
`66`		`- return 'New Issue'`
	`72`	`+ tableTitle(spot: GitHubIssueNewCommentSpot): string {`
	`73`	`+ return spot.title \|\| 'New Issue'`
`67`	`74`	`}`
`68`	`75`
`69`	`76`	`buildUrl(spot: GitHubIssueNewCommentSpot): string {`
Original file line number	Diff line number	Diff line change
`@@ -38,7 +38,10 @@ export class GitHubPRAddCommentEnhancer implements CommentEnhancer<GitHubPRAddCo`
`38`	`38`	const slug = `${owner}/${repo}`
`39`	`39`	`const number = parseInt(numberStr!, 10)`
`40`	`40`	const unique_key = `github.com:${slug}:${number}`
`41`		`- const title = 'TODO_TITLE'`
	`41`	`+ const title = document`
	`42`	`+ .querySelector('main h1')!`
	`43`	`+ .textContent.replace(/\s*#\d+$/, '')`
	`44`	`+ .trim()`
`42`	`45`	`return {`
`43`	`46`	`domain: location.host,`
`44`	`47`	`number,`
`@@ -70,7 +73,7 @@ export class GitHubPRAddCommentEnhancer implements CommentEnhancer<GitHubPRAddCo`
`70`	`73`	`)`
`71`	`74`	`}`
`72`	`75`
`73`		`- tableTitle(_spot: GitHubPRAddCommentSpot): string {`
`74`		`- return 'TITLE_TODO'`
	`76`	`+ tableTitle(spot: GitHubPRAddCommentSpot): string {`
	`77`	`+ return spot.title`
`75`	`78`	`}`
`76`	`79`	`}`