-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
2a5b94f
commit b126f1a
Showing
4 changed files
with
47 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,7 @@ | |
### Installation | ||
|
||
``` | ||
pip install -U staffspy | ||
pip install -U staffspy[browser] | ||
``` | ||
|
||
_Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_ | ||
|
@@ -26,11 +26,11 @@ from staffspy import LinkedInAccount, SolverType | |
|
||
session_file = Path(__file__).resolve().parent / "session.pkl" | ||
account = LinkedInAccount( | ||
# credentials - remove these to sign in with browser | ||
username="[email protected]", | ||
password="mypassword", | ||
solver_api_key="CAP-6D6A8CE981803A309A0D531F8B4790BC", # optional but needed if hit with captcha | ||
solver_service=SolverType.CAPSOLVER, | ||
# commenting these out because the captcha services are not reliable at the moment, so sign in with browser | ||
# username="[email protected]", | ||
# password="mypassword", | ||
# solver_api_key="CAP-6D6A8CE981803A309A0D531F8B4790BC", # optional but needed if hit with captcha | ||
# solver_service=SolverType.CAPSOLVER, | ||
|
||
session_file=str(session_file), # save login cookies to only log in once (lasts a week or so) | ||
log_level=1, # 0 for no logs | ||
|
@@ -58,7 +58,7 @@ If you rather use a browser to log in, install the browser add-on to StaffSpy . | |
|
||
`pip install staffspy[browser]` | ||
|
||
Do not pass the `username` & `password` params, then a browser will open to sign in to LinkedIn on the first sign-in. Press enter after signing in to begin scraping. | ||
If you do not pass the `username` & `password` params, then a browser will open to sign in to LinkedIn on the first sign-in. Press enter after signing in to begin scraping. | ||
|
||
### Output | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[tool.poetry] | ||
name = "staffspy" | ||
version = "0.2.10" | ||
version = "0.2.11" | ||
description = "Staff scraper library for LinkedIn" | ||
authors = ["Cullen Watson <[email protected]>"] | ||
readme = "README.md" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,36 @@ | ||
from tenacity import retry_if_exception_type, stop_after_attempt, retry | ||
from twocaptcha import TwoCaptcha, TimeoutException, ApiException | ||
from twocaptcha import TwoCaptcha, TimeoutException, ApiException, NetworkException | ||
|
||
from staffspy.solvers.solver import Solver | ||
|
||
|
||
class TwoCaptchaSolver(Solver): | ||
""" https://2captcha.com/ """ | ||
"""https://2captcha.com/""" | ||
|
||
attempt = 1 | ||
|
||
@retry(stop=stop_after_attempt(5), retry=retry_if_exception_type((TimeoutException, ApiException))) | ||
def solve(self, blob_data: str, page_url:str=None): | ||
@retry( | ||
stop=stop_after_attempt(5), | ||
retry=retry_if_exception_type( | ||
(TimeoutException, ApiException, NetworkException) | ||
), | ||
) | ||
def solve(self, blob_data: str, page_url: str = None): | ||
super().solve(blob_data, page_url) | ||
from staffspy.utils.utils import logger | ||
|
||
logger.info(f'Waiting on 2Captcha to solve captcha attempt {self.attempt} / 5 ...') | ||
self.attempt+=1 | ||
logger.info( | ||
f"Waiting on 2Captcha to solve captcha attempt {self.attempt} / 5 ..." | ||
) | ||
self.attempt += 1 | ||
|
||
solver = TwoCaptcha(self.solver_api_key) | ||
|
||
result = solver.funcaptcha(sitekey=self.public_key, | ||
url=page_url, | ||
**{'data[blob]': blob_data}, | ||
surl="https://iframe.arkoselabs.com" | ||
) | ||
logger.info(f'2Captcha finished solving captcha') | ||
return result['code'] | ||
result = solver.funcaptcha( | ||
sitekey=self.public_key, | ||
url=page_url, | ||
**{"data[blob]": blob_data}, | ||
surl="https://iframe.arkoselabs.com", | ||
) | ||
logger.info(f"2Captcha finished solving captcha") | ||
return result["code"] |