Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ This is a **LinkedIn MCP (Model Context Protocol) Server** that enables AI assis
| `get_company_posts` | Get recent posts from company feed |
| `get_job_details` | Get job posting details |
| `search_jobs` | Search jobs by keywords and location |
| `get_saved_jobs` | Get saved/bookmarked jobs from the job tracker (paginated, optional `max_pages`) |
| `close_session` | Close browser session and clean up resources |
| `search_people` | Search for people by keywords and location |

Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ What has Anthropic been posting about recently? https://www.linkedin.com/company
| `search_jobs` | Search for jobs with keywords and location filters | Working |
| `search_people` | Search for people by keywords and location | Working |
| `get_job_details` | Get detailed information about a specific job posting | Working |
| `get_saved_jobs` | Get saved/bookmarked jobs from your LinkedIn job tracker | Working |
| `close_session` | Close browser session and clean up resources | Working |

> [!IMPORTANT]
Expand Down
1 change: 1 addition & 0 deletions docs/docker-hub.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ A Model Context Protocol (MCP) server that connects AI assistants to LinkedIn. A
- **Company Profiles**: Extract comprehensive company data
- **Job Details**: Retrieve job posting information
- **Job Search**: Search for jobs with keywords and location filters
- **Saved Jobs**: Get saved/bookmarked jobs from your LinkedIn job tracker
- **People Search**: Search for people by keywords and location
- **Company Posts**: Get recent posts from a company's LinkedIn feed

Expand Down
136 changes: 136 additions & 0 deletions linkedin_mcp_server/scraping/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import asyncio
import logging
import re
from collections.abc import Awaitable, Callable
from typing import Any
from urllib.parse import quote_plus

Expand Down Expand Up @@ -384,6 +385,141 @@ async def search_jobs(
"sections_requested": ["search_results"],
}

_EXTRACT_JOB_IDS_JS = """() => {
const seen = new Set();
const ids = [];
document.querySelectorAll('a[href*="/jobs/view/"]').forEach(a => {
const match = a.href.match(/\\/jobs\\/view\\/(\\d+)/);
if (match && !seen.has(match[1])) { seen.add(match[1]); ids.push(match[1]); }
});
return ids;
}"""

_EXTRACT_MAIN_TEXT_JS = """() => {
const main = document.querySelector('main');
return main ? main.innerText : document.body.innerText;
}"""

async def scrape_saved_jobs(
self,
max_pages: int = 10,
on_progress: Callable[[int, int, str], Awaitable[None]] | None = None,
) -> dict[str, Any]:
"""Scrape the user's saved/bookmarked jobs from the jobs tracker page.

Automatically paginates through all pages using numbered page buttons.
Extracts job IDs from link hrefs (``/jobs/view/<id>/``) since they are
not present in the page's innerText.

Args:
max_pages: Safety cap on pages to scrape (default 10).
on_progress: Optional async callback ``(page, total, message)``
invoked after each page is scraped.

Returns:
{url, sections: {name: text}, pages_visited, sections_requested,
job_ids: list[str]}
"""
url = "https://www.linkedin.com/jobs-tracker/"
text = await self.extract_page(url)

all_text_parts: list[str] = []
all_job_ids: list[str] = []

if text:
all_text_parts.append(text)

# Collect job IDs from page 1.
page_ids: list[str] = await self._page.evaluate(self._EXTRACT_JOB_IDS_JS)
all_job_ids.extend(page_ids)
logger.info("Page 1: found %d job IDs", len(page_ids))

# Determine total pages from pagination buttons (10 jobs per page).
page_buttons = self._page.locator('button[aria-label^="Page "]')
total_pages = min(max(await page_buttons.count(), 1), max_pages)
logger.info("Total pages detected: %d", total_pages)

if on_progress:
await on_progress(1, total_pages, "Fetched saved jobs page 1")

# Paginate through remaining pages using numbered page buttons.
for page_num in range(2, max_pages + 1):
page_btn = self._page.locator(f'button[aria-label="Page {page_num}"]')
if not await page_btn.count():
logger.info(
"No page %d button found — stopping at page %d",
page_num,
page_num - 1,
)
break

logger.info("Navigating to saved jobs page %d", page_num)
prev_ids = set(all_job_ids)
await page_btn.scroll_into_view_if_needed()
await page_btn.click()
await asyncio.sleep(_NAV_DELAY)

# Wait for the DOM to reflect new job links.
try:
await self._page.wait_for_function(
"""(prevIds) => {
const prev = new Set(prevIds);
const links = document.querySelectorAll('a[href*="/jobs/view/"]');
for (const a of links) {
const match = a.href.match(/\\/jobs\\/view\\/(\\d+)/);
if (match && !prev.has(match[1])) return true;
}
return false;
}""",
arg=list(prev_ids),
timeout=15000,
)
except PlaywrightTimeoutError:
logger.info("No new job IDs appeared on page %d — stopping", page_num)
break

await scroll_to_bottom(self._page, pause_time=0.5, max_scrolls=3)

raw = await self._page.evaluate(self._EXTRACT_MAIN_TEXT_JS)
if raw:
cleaned = strip_linkedin_noise(raw)
if cleaned:
all_text_parts.append(cleaned)

page_ids = await self._page.evaluate(self._EXTRACT_JOB_IDS_JS)
new_ids = [jid for jid in page_ids if jid not in prev_ids]
logger.info("Page %d: found %d new job IDs", page_num, len(new_ids))
if not new_ids:
break
all_job_ids.extend(new_ids)

if on_progress:
await on_progress(
page_num, total_pages, f"Fetched saved jobs page {page_num}"
)

# Append a summary of job IDs so they are always visible in the text.
id_summary = "\n".join(
f"- Job ID: {jid} (https://www.linkedin.com/jobs/view/{jid}/)"
for jid in all_job_ids
)
if id_summary:
all_text_parts.append(f"--- Saved Job IDs ---\n{id_summary}")

sections: dict[str, str] = {}
if all_text_parts:
sections["saved_jobs"] = "\n\n".join(all_text_parts)

logger.info("Total saved jobs found: %d across all pages", len(all_job_ids))

return {
"url": url,
"sections": sections,
"pages_visited": [url],
"sections_requested": ["saved_jobs"],
"job_ids": all_job_ids,
}

async def search_people(
self,
keywords: str,
Expand Down
47 changes: 47 additions & 0 deletions linkedin_mcp_server/tools/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,53 @@ async def get_job_details(job_id: str, ctx: Context) -> dict[str, Any]:
except Exception as e:
return handle_tool_error(e, "get_job_details")

@mcp.tool(
annotations=ToolAnnotations(
title="Get Saved Jobs",
readOnlyHint=True,
destructiveHint=False,
openWorldHint=True,
)
)
async def get_saved_jobs(ctx: Context, max_pages: int = 10) -> dict[str, Any]:
"""
Get the user's saved/bookmarked jobs from LinkedIn's job tracker.

Args:
max_pages: Maximum number of pages to scrape (default 10, ~10 jobs/page).

Returns:
Dict with url, sections (name -> raw text), pages_visited, sections_requested,
and job_ids (list of LinkedIn job ID strings).
The LLM should parse the raw text to extract saved job listings.
"""
try:
await ensure_authenticated()

logger.info("Scraping saved jobs (max_pages=%d)", max_pages)

browser = await get_or_create_browser()
extractor = LinkedInExtractor(browser.page)

await ctx.report_progress(
progress=0, total=100, message="Fetching saved jobs"
)

async def _report(page: int, total: int, msg: str) -> None:
pct = min(int(page / max(total, 1) * 100), 99)
await ctx.report_progress(progress=pct, total=100, message=msg)

result = await extractor.scrape_saved_jobs(
max_pages=max_pages, on_progress=_report
)

await ctx.report_progress(progress=100, total=100, message="Complete")

return result

except Exception as e:
return handle_tool_error(e, "get_saved_jobs")

@mcp.tool(
annotations=ToolAnnotations(
title="Search Jobs",
Expand Down
Loading