Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ What has Anthropic been posting about recently? https://www.linkedin.com/company
| `search_jobs` | Search for jobs with keywords and location filters | Working |
| `search_people` | Search for people by keywords and location | Working |
| `get_job_details` | Get detailed information about a specific job posting | Working |
| `get_inbox` | List recent conversations from messaging inbox | Working |
| `get_conversation` | Read a specific messaging conversation | Working |
| `search_conversations` | Search messages by keyword | Working |
| `send_message` | Send a message to a LinkedIn user | Working |
| `close_session` | Close browser session and clean up resources | Working |

Tool responses keep readable `sections` text and may also include a compact `references` map keyed by section. Each reference includes a typed target, a relative LinkedIn path (or absolute external URL), and a short label/context when available.
Expand Down
259 changes: 259 additions & 0 deletions linkedin_mcp_server/scraping/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1048,6 +1048,265 @@ async def search_people(
result["section_errors"] = section_errors
return result

# ------------------------------------------------------------------
# Messaging
# ------------------------------------------------------------------

async def get_inbox(self, limit: int = 20) -> dict[str, Any]:
"""List recent conversations from the messaging inbox.

Returns:
{url, sections: {"inbox": text}, references?, section_errors?}
"""
url = "https://www.linkedin.com/messaging/"
await self._goto_with_auth_checks(url)
await detect_rate_limit(self._page)

try:
await self._page.wait_for_function(
"""() => {
const main = document.querySelector('main');
if (!main) return false;
return main.innerText.length > 100;
}""",
timeout=10000,
)
except PlaywrightTimeoutError:
logger.debug("Messaging inbox content did not appear")

await handle_modal_close(self._page)

# Scroll the conversation list to load more entries
scrolls = max(1, limit // 10)
for _ in range(scrolls):
await self._page.evaluate(
"""() => {
const list = document.querySelector(
'.msg-conversations-container__conversations-list'
) || document.querySelector('main');
if (list) list.scrollTop = list.scrollHeight;
}"""
)
await asyncio.sleep(0.5)

raw_result = await self._extract_root_content(["main"])
raw = raw_result["text"]
cleaned = strip_linkedin_noise(raw) if raw else ""

sections: dict[str, str] = {}
references: dict[str, list[Reference]] = {}
section_errors: dict[str, dict[str, Any]] = {}
if cleaned:
sections["inbox"] = cleaned
refs = build_references(raw_result["references"], "inbox")
if refs:
references["inbox"] = refs

result: dict[str, Any] = {"url": self._page.url, "sections": sections}
if references:
result["references"] = references
if section_errors:
result["section_errors"] = section_errors
return result

async def get_conversation(
self,
linkedin_username: str | None = None,
thread_id: str | None = None,
) -> dict[str, Any]:
"""Read a specific messaging conversation.

Provide either ``linkedin_username`` or ``thread_id``.

Returns:
{url, sections: {"conversation": text}, references?, section_errors?}
"""
if not linkedin_username and not thread_id:
raise ValueError("Provide at least one of linkedin_username or thread_id")

if thread_id:
url = f"https://www.linkedin.com/messaging/thread/{thread_id}/"
await self._goto_with_auth_checks(url)
elif linkedin_username:
# Navigate to messaging and search for the user
await self._goto_with_auth_checks("https://www.linkedin.com/messaging/")
try:
search_input = self._page.get_by_role("searchbox").first
await search_input.wait_for(timeout=5000)
await search_input.click()
await self._page.keyboard.type(linkedin_username, delay=30)
await asyncio.sleep(1.5)
# Click the first matching conversation
first_result = self._page.locator(".msg-conversation-listitem").first
await first_result.click(timeout=5000)
except PlaywrightTimeoutError:
logger.warning("Could not find conversation for %s", linkedin_username)

await detect_rate_limit(self._page)

try:
await self._page.wait_for_function(
"""() => {
const main = document.querySelector('main');
if (!main) return false;
return main.innerText.length > 100;
}""",
timeout=10000,
)
except PlaywrightTimeoutError:
logger.debug("Conversation content did not appear")

await handle_modal_close(self._page)

# Scroll up in the thread to load older messages
for _ in range(3):
await self._page.evaluate(
"""() => {
const thread = document.querySelector(
'.msg-s-message-list'
) || document.querySelector('main');
if (thread) thread.scrollTop = 0;
}"""
)
await asyncio.sleep(0.5)

raw_result = await self._extract_root_content(["main"])
raw = raw_result["text"]
cleaned = strip_linkedin_noise(raw) if raw else ""

sections: dict[str, str] = {}
references: dict[str, list[Reference]] = {}
section_errors: dict[str, dict[str, Any]] = {}
if cleaned:
sections["conversation"] = cleaned
refs = build_references(raw_result["references"], "conversation")
if refs:
references["conversation"] = refs

result: dict[str, Any] = {"url": self._page.url, "sections": sections}
if references:
result["references"] = references
if section_errors:
result["section_errors"] = section_errors
return result

async def search_conversations(self, keywords: str) -> dict[str, Any]:
"""Search messages by keyword.

Returns:
{url, sections: {"search_results": text}, references?, section_errors?}
"""
url = "https://www.linkedin.com/messaging/"
await self._goto_with_auth_checks(url)
await detect_rate_limit(self._page)

try:
search_input = self._page.get_by_role("searchbox").first
await search_input.wait_for(timeout=5000)
await search_input.click()
await self._page.keyboard.type(keywords, delay=30)
await asyncio.sleep(1.0)
await self._page.keyboard.press("Enter")
await asyncio.sleep(1.5)
except PlaywrightTimeoutError:
logger.warning("Messaging search input not found")

try:
await self._page.wait_for_function(
"""() => {
const main = document.querySelector('main');
if (!main) return false;
return main.innerText.length > 100;
}""",
timeout=10000,
)
except PlaywrightTimeoutError:
logger.debug("Search results content did not appear")

raw_result = await self._extract_root_content(["main"])
raw = raw_result["text"]
cleaned = strip_linkedin_noise(raw) if raw else ""

sections: dict[str, str] = {}
references: dict[str, list[Reference]] = {}
section_errors: dict[str, dict[str, Any]] = {}
if cleaned:
sections["search_results"] = cleaned
refs = build_references(raw_result["references"], "search_results")
if refs:
references["search_results"] = refs

result: dict[str, Any] = {"url": self._page.url, "sections": sections}
if references:
result["references"] = references
if section_errors:
result["section_errors"] = section_errors
return result

async def send_message(
self, linkedin_username: str, message: str
) -> dict[str, Any]:
"""Send a message to a LinkedIn user.

Navigates to the user's profile, opens the message compose box,
types the message, and clicks send.

Returns:
{url, sections: {"confirmation": text}}
"""
profile_url = f"https://www.linkedin.com/in/{linkedin_username}/"
await self._goto_with_auth_checks(profile_url)
await detect_rate_limit(self._page)

try:
await self._page.wait_for_selector("main", timeout=5000)
except PlaywrightTimeoutError:
logger.debug("Profile page did not load for %s", linkedin_username)

await handle_modal_close(self._page)

# Click the "Message" button on the profile
message_button = self._page.get_by_role("button", name="Message")
try:
await message_button.click(timeout=5000)
except PlaywrightTimeoutError:
raise LinkedInScraperException(
f"Message button not found on {linkedin_username}'s profile. "
"They may not be a 1st-degree connection."
)

# Wait for the compose box to appear
compose_box = self._page.locator(
'div[role="textbox"][contenteditable="true"]'
).last
try:
await compose_box.wait_for(timeout=5000)
except PlaywrightTimeoutError:
raise LinkedInScraperException("Message compose box did not appear.")

# Type the message using page.type for contenteditable compatibility
await compose_box.focus()
await self._page.keyboard.type(message, delay=20)
await asyncio.sleep(0.5)

# Click the send button
send_button = self._page.locator(
'button[type="submit"], button[aria-label*="Send"], button[aria-label*="send"]'
).last
try:
await send_button.click(timeout=5000)
except PlaywrightTimeoutError:
raise LinkedInScraperException("Send button not found or not clickable.")

await asyncio.sleep(1.0)

return {
"url": self._page.url,
"sections": {
"confirmation": f"Message sent to {linkedin_username}: {message}"
},
}

async def _extract_root_content(
self,
selectors: list[str],
Expand Down
13 changes: 13 additions & 0 deletions linkedin_mcp_server/scraping/link_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"article",
"newsletter",
"school",
"messaging",
"external",
]

Expand Down Expand Up @@ -94,6 +95,8 @@ class RawReference(TypedDict, total=False):
"search_results": 15,
"job_posting": 8,
"contact_info": 8,
"inbox": 20,
"conversation": 12,
}

_URL_LIKE_RE = re.compile(r"^(?:https?://|/)\S+$", re.IGNORECASE)
Expand All @@ -107,6 +110,7 @@ class RawReference(TypedDict, total=False):
_NEWSLETTER_PATH_RE = re.compile(r"^/newsletters/([^/?#]+)")
_PULSE_PATH_RE = re.compile(r"^/pulse/([^/?#]+)")
_FEED_PATH_RE = re.compile(r"^/feed/update/([^/?#]+)")
_MESSAGING_PATH_RE = re.compile(r"^/messaging/thread/([^/?#]+)")
_MAX_REDIRECT_UNWRAP_DEPTH = 5


Expand Down Expand Up @@ -229,6 +233,9 @@ def classify_link(href: str) -> tuple[ReferenceKind, str] | None:
if match := _FEED_PATH_RE.match(path):
return "feed_post", f"/feed/update/{match.group(1)}/"

if match := _MESSAGING_PATH_RE.match(path):
return "messaging", f"/messaging/thread/{match.group(1)}/"

return None


Expand Down Expand Up @@ -320,6 +327,12 @@ def derive_context(
return "company post"
return "post attachment"

if section_name == "inbox":
return "conversation" if kind == "messaging" else "participant"

if section_name == "conversation":
return "participant" if kind == "person" else "message link"

if section_name in {"main_profile", "about"}:
if heading in _CONTEXT_LABELS:
return heading
Expand Down
2 changes: 2 additions & 0 deletions linkedin_mcp_server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
)
from linkedin_mcp_server.tools.company import register_company_tools
from linkedin_mcp_server.tools.job import register_job_tools
from linkedin_mcp_server.tools.messaging import register_messaging_tools
from linkedin_mcp_server.tools.person import register_person_tools

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -59,6 +60,7 @@ def create_mcp_server() -> FastMCP:
register_person_tools(mcp)
register_company_tools(mcp)
register_job_tools(mcp)
register_messaging_tools(mcp)

# Register session management tool
@mcp.tool(
Expand Down
Loading
Loading