Skip to content

Commit

Permalink
fix: retry logic for signing in
Browse files Browse the repository at this point in the history
  • Loading branch information
cullenwatson committed Jul 26, 2024
1 parent 1f492a3 commit 6493b01
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 7 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "staffspy"
version = "0.2.0"
version = "0.2.1"
description = "Staff scraper library for LinkedIn"
authors = ["Cullen Watson <[email protected]>"]
readme = "README.md"
Expand Down
2 changes: 1 addition & 1 deletion staffspy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,4 @@ def scrape_staff(
logger.info(
f"Scraped {len(staff_df)} staff members, with {len(linkedin_member_df)} hidedn LinkedIn Members."
)
return staff_df
return staff_df
4 changes: 2 additions & 2 deletions staffspy/capsolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def capsolver(blob_data: str, api_key: str):
resp = res.json()
task_id = resp.get("taskId")
if not task_id:
logger.info("Failed to create task:", res.text)
logger.info(f"Failed to create task: {res.text}")
return None
logger.info(f"Got captcha solver taskId: {task_id} / Getting result...")

Expand All @@ -41,5 +41,5 @@ def capsolver(blob_data: str, api_key: str):
if status == "ready":
return resp.get("solution", {}).get('token')
if status == "failed" or resp.get("errorId"):
logger.info("Captcha solve failed! response:", res.text)
logger.info(f"Captcha solve failed! response: {res.text}")
return None
3 changes: 3 additions & 0 deletions staffspy/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,6 @@ class BadCookies(RequestException):

class GeoUrnNotFound(RequestException):
"""Could not find geo urn for given location."""

class BlobException(Exception):
pass
15 changes: 12 additions & 3 deletions staffspy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@
import requests
import tldextract
from bs4 import BeautifulSoup
from tenacity import stop_after_attempt, retry_if_exception_type, retry, RetryError

from staffspy.capsolver import capsolver
from staffspy.exceptions import BlobException

logger = logging.getLogger("StaffSpy")
logger.propagate = False
Expand Down Expand Up @@ -70,13 +72,16 @@ def solve_captcha(self, session,data,payload):

code_tag = soup.find('code', id='securedDataExchange')

logger.info('Searching for capcha blob in linkedin to begin captcha solving')
if code_tag:
comment = code_tag.contents[0]
extracted_code = str(comment).strip("<!--\"\"-->").strip()
logger.debug("Extracted captcha blob:", extracted_code)
else:
raise Exception('blob to solve captcha not found')
raise BlobException('blob to solve captcha not found - rerunning the program usually solves this')

if not self.capsolver_api_key:
raise Exception('captcha hit - provide CapSolver API key to solve or switch to the browser-based login with `pip install staffspy[browser]`')
token = capsolver(extracted_code,self.capsolver_api_key)
if not token:
raise Exception('failed to solve captcha after 10 attempts')
Expand Down Expand Up @@ -121,11 +126,11 @@ def solve_captcha(self, session,data,payload):
encoded_payload = {key: f'{quote(str(value), "")}' for key, value in payload.items()}
query_string = '&'.join([f'{key}={value}' for key, value in encoded_payload.items()])
response=session.post("https://www.linkedin.com/checkpoint/challenge/verify", data=query_string)
pass

if not response.ok:
raise Exception(f'verify captcha failed {response.text[:200]}')

@retry(stop=stop_after_attempt(5), retry=retry_if_exception_type(BlobException))
def login_requests(self):

url = "https://www.linkedin.com/uas/authenticate"
Expand Down Expand Up @@ -208,10 +213,14 @@ def save_session(session, session_file):


def load_session(session_file, username: str, password: str, capsolver_api_key: str):
session=None
login_obj=Login(username,password,capsolver_api_key)
if not session_file or not os.path.exists(session_file):
if username and password:
session = login_obj.login_requests()
try:
session = login_obj.login_requests()
except RetryError as retry_err:
retry_err.reraise()
else:
session = login_obj.login_browser()
if not session:
Expand Down

0 comments on commit 6493b01

Please sign in to comment.