Skip to content

Commit 2d1f337

Browse files
authored
fix:sign in again (#40)
1 parent 2a5b94f commit 2d1f337

File tree

4 files changed

+47
-27
lines changed

4 files changed

+47
-27
lines changed

README.md

+7-7
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
### Installation
1414

1515
```
16-
pip install -U staffspy
16+
pip install -U staffspy[browser]
1717
```
1818

1919
_Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_
@@ -26,11 +26,11 @@ from staffspy import LinkedInAccount, SolverType
2626

2727
session_file = Path(__file__).resolve().parent / "session.pkl"
2828
account = LinkedInAccount(
29-
# credentials - remove these to sign in with browser
30-
username="[email protected]",
31-
password="mypassword",
32-
solver_api_key="CAP-6D6A8CE981803A309A0D531F8B4790BC", # optional but needed if hit with captcha
33-
solver_service=SolverType.CAPSOLVER,
29+
# commenting these out because the captcha services are not reliable at the moment, so sign in with browser
30+
# username="[email protected]",
31+
# password="mypassword",
32+
# solver_api_key="CAP-6D6A8CE981803A309A0D531F8B4790BC", # optional but needed if hit with captcha
33+
# solver_service=SolverType.CAPSOLVER,
3434

3535
session_file=str(session_file), # save login cookies to only log in once (lasts a week or so)
3636
log_level=1, # 0 for no logs
@@ -58,7 +58,7 @@ If you rather use a browser to log in, install the browser add-on to StaffSpy .
5858

5959
`pip install staffspy[browser]`
6060

61-
Do not pass the `username` & `password` params, then a browser will open to sign in to LinkedIn on the first sign-in. Press enter after signing in to begin scraping.
61+
If you do not pass the `username` & `password` params, then a browser will open to sign in to LinkedIn on the first sign-in. Press enter after signing in to begin scraping.
6262

6363
### Output
6464

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "staffspy"
3-
version = "0.2.10"
3+
version = "0.2.11"
44
description = "Staff scraper library for LinkedIn"
55
authors = ["Cullen Watson <[email protected]>"]
66
readme = "README.md"

staffspy/linkedin/experiences.py

+18-6
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,16 @@ def fetch_experiences(self, staff):
1717
ep = self.endpoint.format(employee_id=staff.id)
1818
res = self.session.get(ep)
1919
logger.debug(f"exps, status code - {res.status_code}")
20-
if res.status_code == 429:
20+
if res.reason == "INKApi Error":
21+
raise Exception(
22+
"Delete session file and log in again",
23+
res.status_code,
24+
res.text[:200],
25+
res.reason,
26+
)
27+
elif res.status_code == 429:
2128
return TooManyRequests("429 Too Many Requests")
22-
if not res.ok:
29+
elif not res.ok:
2330
logger.debug(res.text[:200])
2431
return False
2532
try:
@@ -54,10 +61,15 @@ def parse_experiences(self, elements):
5461
continue
5562

5663
sub_components = entity.get("subComponents")
57-
if (sub_components is None or
58-
len(sub_components.get("components", [])) == 0 or
59-
sub_components["components"][0].get("components") is None or
60-
sub_components["components"][0]["components"].get("pagedListComponent") is None):
64+
if (
65+
sub_components is None
66+
or len(sub_components.get("components", [])) == 0
67+
or sub_components["components"][0].get("components") is None
68+
or sub_components["components"][0]["components"].get(
69+
"pagedListComponent"
70+
)
71+
is None
72+
):
6173

6274
emp_type = start_date = end_date = None
6375

staffspy/solvers/two_captcha.py

+21-13
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,36 @@
11
from tenacity import retry_if_exception_type, stop_after_attempt, retry
2-
from twocaptcha import TwoCaptcha, TimeoutException, ApiException
2+
from twocaptcha import TwoCaptcha, TimeoutException, ApiException, NetworkException
33

44
from staffspy.solvers.solver import Solver
55

66

77
class TwoCaptchaSolver(Solver):
8-
""" https://2captcha.com/ """
8+
"""https://2captcha.com/"""
99

1010
attempt = 1
1111

12-
@retry(stop=stop_after_attempt(5), retry=retry_if_exception_type((TimeoutException, ApiException)))
13-
def solve(self, blob_data: str, page_url:str=None):
12+
@retry(
13+
stop=stop_after_attempt(5),
14+
retry=retry_if_exception_type(
15+
(TimeoutException, ApiException, NetworkException)
16+
),
17+
)
18+
def solve(self, blob_data: str, page_url: str = None):
1419
super().solve(blob_data, page_url)
1520
from staffspy.utils.utils import logger
1621

17-
logger.info(f'Waiting on 2Captcha to solve captcha attempt {self.attempt} / 5 ...')
18-
self.attempt+=1
22+
logger.info(
23+
f"Waiting on 2Captcha to solve captcha attempt {self.attempt} / 5 ..."
24+
)
25+
self.attempt += 1
1926

2027
solver = TwoCaptcha(self.solver_api_key)
2128

22-
result = solver.funcaptcha(sitekey=self.public_key,
23-
url=page_url,
24-
**{'data[blob]': blob_data},
25-
surl="https://iframe.arkoselabs.com"
26-
)
27-
logger.info(f'2Captcha finished solving captcha')
28-
return result['code']
29+
result = solver.funcaptcha(
30+
sitekey=self.public_key,
31+
url=page_url,
32+
**{"data[blob]": blob_data},
33+
surl="https://iframe.arkoselabs.com",
34+
)
35+
logger.info(f"2Captcha finished solving captcha")
36+
return result["code"]

0 commit comments

Comments
 (0)