Skip to content

Commit

Permalink
fix:sign in again
Browse files Browse the repository at this point in the history
  • Loading branch information
cullenwatson committed Aug 21, 2024
1 parent 2a5b94f commit b126f1a
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 27 deletions.
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
### Installation

```
pip install -U staffspy
pip install -U staffspy[browser]
```

_Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_
Expand All @@ -26,11 +26,11 @@ from staffspy import LinkedInAccount, SolverType

session_file = Path(__file__).resolve().parent / "session.pkl"
account = LinkedInAccount(
# credentials - remove these to sign in with browser
username="[email protected]",
password="mypassword",
solver_api_key="CAP-6D6A8CE981803A309A0D531F8B4790BC", # optional but needed if hit with captcha
solver_service=SolverType.CAPSOLVER,
# commenting these out because the captcha services are not reliable at the moment, so sign in with browser
# username="[email protected]",
# password="mypassword",
# solver_api_key="CAP-6D6A8CE981803A309A0D531F8B4790BC", # optional but needed if hit with captcha
# solver_service=SolverType.CAPSOLVER,

session_file=str(session_file), # save login cookies to only log in once (lasts a week or so)
log_level=1, # 0 for no logs
Expand Down Expand Up @@ -58,7 +58,7 @@ If you rather use a browser to log in, install the browser add-on to StaffSpy .

`pip install staffspy[browser]`

Do not pass the `username` & `password` params, then a browser will open to sign in to LinkedIn on the first sign-in. Press enter after signing in to begin scraping.
If you do not pass the `username` & `password` params, then a browser will open to sign in to LinkedIn on the first sign-in. Press enter after signing in to begin scraping.

### Output

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "staffspy"
version = "0.2.10"
version = "0.2.11"
description = "Staff scraper library for LinkedIn"
authors = ["Cullen Watson <[email protected]>"]
readme = "README.md"
Expand Down
24 changes: 18 additions & 6 deletions staffspy/linkedin/experiences.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,16 @@ def fetch_experiences(self, staff):
ep = self.endpoint.format(employee_id=staff.id)
res = self.session.get(ep)
logger.debug(f"exps, status code - {res.status_code}")
if res.status_code == 429:
if res.reason == "INKApi Error":
raise Exception(
"Delete session file and log in again",
res.status_code,
res.text[:200],
res.reason,
)
elif res.status_code == 429:
return TooManyRequests("429 Too Many Requests")
if not res.ok:
elif not res.ok:
logger.debug(res.text[:200])
return False
try:
Expand Down Expand Up @@ -54,10 +61,15 @@ def parse_experiences(self, elements):
continue

sub_components = entity.get("subComponents")
if (sub_components is None or
len(sub_components.get("components", [])) == 0 or
sub_components["components"][0].get("components") is None or
sub_components["components"][0]["components"].get("pagedListComponent") is None):
if (
sub_components is None
or len(sub_components.get("components", [])) == 0
or sub_components["components"][0].get("components") is None
or sub_components["components"][0]["components"].get(
"pagedListComponent"
)
is None
):

emp_type = start_date = end_date = None

Expand Down
34 changes: 21 additions & 13 deletions staffspy/solvers/two_captcha.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,36 @@
from tenacity import retry_if_exception_type, stop_after_attempt, retry
from twocaptcha import TwoCaptcha, TimeoutException, ApiException
from twocaptcha import TwoCaptcha, TimeoutException, ApiException, NetworkException

from staffspy.solvers.solver import Solver


class TwoCaptchaSolver(Solver):
""" https://2captcha.com/ """
"""https://2captcha.com/"""

attempt = 1

@retry(stop=stop_after_attempt(5), retry=retry_if_exception_type((TimeoutException, ApiException)))
def solve(self, blob_data: str, page_url:str=None):
@retry(
stop=stop_after_attempt(5),
retry=retry_if_exception_type(
(TimeoutException, ApiException, NetworkException)
),
)
def solve(self, blob_data: str, page_url: str = None):
super().solve(blob_data, page_url)
from staffspy.utils.utils import logger

logger.info(f'Waiting on 2Captcha to solve captcha attempt {self.attempt} / 5 ...')
self.attempt+=1
logger.info(
f"Waiting on 2Captcha to solve captcha attempt {self.attempt} / 5 ..."
)
self.attempt += 1

solver = TwoCaptcha(self.solver_api_key)

result = solver.funcaptcha(sitekey=self.public_key,
url=page_url,
**{'data[blob]': blob_data},
surl="https://iframe.arkoselabs.com"
)
logger.info(f'2Captcha finished solving captcha')
return result['code']
result = solver.funcaptcha(
sitekey=self.public_key,
url=page_url,
**{"data[blob]": blob_data},
surl="https://iframe.arkoselabs.com",
)
logger.info(f"2Captcha finished solving captcha")
return result["code"]

0 comments on commit b126f1a

Please sign in to comment.