fix:sign in again

cullenwatson · Aug 21, 2024 · b126f1a · b126f1a
1 parent 2a5b94f
commit b126f1a
Show file tree

Hide file tree

Showing 4 changed files with 47 additions and 27 deletions.
diff --git a/README.md b/README.md
@@ -13,7 +13,7 @@
 ### Installation
 
 ```
-pip install -U staffspy
+pip install -U staffspy[browser]
 ```
 
 _Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_
@@ -26,11 +26,11 @@ from staffspy import LinkedInAccount, SolverType
 
 session_file = Path(__file__).resolve().parent / "session.pkl"
 account = LinkedInAccount(
-    # credentials - remove these to sign in with browser
-    username="[email protected]",
-    password="mypassword",
-    solver_api_key="CAP-6D6A8CE981803A309A0D531F8B4790BC", # optional but needed if hit with captcha
-    solver_service=SolverType.CAPSOLVER,
+    # commenting these out because the captcha services are not reliable at the moment, so sign in with browser
+    # username="[email protected]",
+    # password="mypassword",
+    # solver_api_key="CAP-6D6A8CE981803A309A0D531F8B4790BC", # optional but needed if hit with captcha
+    # solver_service=SolverType.CAPSOLVER,
 
     session_file=str(session_file), # save login cookies to only log in once (lasts a week or so)
     log_level=1, # 0 for no logs
@@ -58,7 +58,7 @@ If you rather use a browser to log in, install the browser add-on to StaffSpy .
 
 `pip install staffspy[browser]`
 
-Do not pass the `username` & `password` params, then a browser will open to sign in to LinkedIn on the first sign-in. Press enter after signing in to begin scraping.
+If you do not pass the `username` & `password` params, then a browser will open to sign in to LinkedIn on the first sign-in. Press enter after signing in to begin scraping.
 
 ### Output
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "staffspy"
-version = "0.2.10"
+version = "0.2.11"
 description = "Staff scraper library for LinkedIn"
 authors = ["Cullen Watson <[email protected]>"]
 readme = "README.md"

diff --git a/staffspy/linkedin/experiences.py b/staffspy/linkedin/experiences.py
@@ -17,9 +17,16 @@ def fetch_experiences(self, staff):
         ep = self.endpoint.format(employee_id=staff.id)
         res = self.session.get(ep)
         logger.debug(f"exps, status code - {res.status_code}")
-        if res.status_code == 429:
+        if res.reason == "INKApi Error":
+            raise Exception(
+                "Delete session file and log in again",
+                res.status_code,
+                res.text[:200],
+                res.reason,
+            )
+        elif res.status_code == 429:
             return TooManyRequests("429 Too Many Requests")
-        if not res.ok:
+        elif not res.ok:
             logger.debug(res.text[:200])
             return False
         try:
@@ -54,10 +61,15 @@ def parse_experiences(self, elements):
                     continue
 
                 sub_components = entity.get("subComponents")
-                if (sub_components is None or
-                        len(sub_components.get("components", [])) == 0 or
-                        sub_components["components"][0].get("components") is None or
-                        sub_components["components"][0]["components"].get("pagedListComponent") is None):
+                if (
+                    sub_components is None
+                    or len(sub_components.get("components", [])) == 0
+                    or sub_components["components"][0].get("components") is None
+                    or sub_components["components"][0]["components"].get(
+                        "pagedListComponent"
+                    )
+                    is None
+                ):
 
                     emp_type = start_date = end_date = None
 

diff --git a/staffspy/solvers/two_captcha.py b/staffspy/solvers/two_captcha.py
@@ -1,28 +1,36 @@
 from tenacity import retry_if_exception_type, stop_after_attempt, retry
-from twocaptcha import TwoCaptcha, TimeoutException, ApiException
+from twocaptcha import TwoCaptcha, TimeoutException, ApiException, NetworkException
 
 from staffspy.solvers.solver import Solver
 
 
 class TwoCaptchaSolver(Solver):
-    """ https://2captcha.com/ """
+    """https://2captcha.com/"""
 
     attempt = 1
 
-    @retry(stop=stop_after_attempt(5), retry=retry_if_exception_type((TimeoutException, ApiException)))
-    def solve(self, blob_data: str, page_url:str=None):
+    @retry(
+        stop=stop_after_attempt(5),
+        retry=retry_if_exception_type(
+            (TimeoutException, ApiException, NetworkException)
+        ),
+    )
+    def solve(self, blob_data: str, page_url: str = None):
         super().solve(blob_data, page_url)
         from staffspy.utils.utils import logger
 
-        logger.info(f'Waiting on 2Captcha to solve captcha attempt {self.attempt} / 5 ...')
-        self.attempt+=1
+        logger.info(
+            f"Waiting on 2Captcha to solve captcha attempt {self.attempt} / 5 ..."
+        )
+        self.attempt += 1
 
         solver = TwoCaptcha(self.solver_api_key)
 
-        result = solver.funcaptcha(sitekey=self.public_key,
-                                  url=page_url,
-                                   **{'data[blob]': blob_data},
-                                   surl="https://iframe.arkoselabs.com"
-                                  )
-        logger.info(f'2Captcha finished solving captcha')
-        return result['code']
+        result = solver.funcaptcha(
+            sitekey=self.public_key,
+            url=page_url,
+            **{"data[blob]": blob_data},
+            surl="https://iframe.arkoselabs.com",
+        )
+        logger.info(f"2Captcha finished solving captcha")
+        return result["code"]