Skip to content

Commit

Permalink
Company name (#36)
Browse files Browse the repository at this point in the history
  • Loading branch information
cullenwatson authored Aug 4, 2024
1 parent 86c5849 commit 4252853
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 13 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "staffspy"
version = "0.2.8"
version = "0.2.9"
description = "Staff scraper library for LinkedIn"
authors = ["Cullen Watson <[email protected]>"]
readme = "README.md"
Expand Down
24 changes: 12 additions & 12 deletions staffspy/linkedin/linkedin.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import json
import re
from concurrent.futures import ThreadPoolExecutor, as_completed
from urllib.parse import quote
from urllib.parse import quote, unquote

import requests

Expand Down Expand Up @@ -64,17 +64,21 @@ def search_companies(self, company_name):
res.text[:200],
)
logger.debug(f"Searched companies {res.status_code}")
companies = res.json()['data']['searchDashClustersByAll']['elements']
if len(companies) < 2:
raise Exception(f'No companies found for name {company_name}, Response: {res.text[:200]}')
metadata, first_company = companies[:2]
try:
first_company = res.json()['data']['searchDashClustersByAll']['elements'][1]['items'][0]['item'][
'entityResult']
num_results = metadata['items'][0]['item']['simpleTextV2']['text']['text']
first_company = companies[1]['items'][0]['item']['entityResult']
company_link = first_company['navigationUrl']
company_name_id = re.search(r'/company/([^/]+)', company_link).group(1)
company_name_id = unquote(re.search(r'/company/([^/]+)', company_link).group(1))
company_name_new = first_company['title']['text']
except Exception as e:
raise Exception(f'Failed to load json in search_companies {str(e)}, Response: {res.text[:200]}')

logger.info(
f"Searched company {company_name} on LinkedIn and found company id - '{company_name_id}' with company name - '{company_name_new}'")
f"Searched company {company_name} on LinkedIn and were {num_results}, using first result with company name - '{company_name_new}' and company id - '{company_name_id}'")
return company_name_id

def fetch_or_search_company(self, company_name):
Expand Down Expand Up @@ -115,11 +119,7 @@ def get_company_id_and_staff_count(self, company_name: str):
self.domain = utils.extract_base_domain(company["companyPageUrl"]) if company.get('companyPageUrl') else None
staff_count = company["staffCount"]
company_id = company["trackingInfo"]["objectUrn"].split(":")[-1]

try:
company_name = company["universalName"]
except:
pass
company_name = company["universalName"]

logger.info(f"Found company '{company_name}' with {staff_count} staff")
return company_id, staff_count
Expand Down Expand Up @@ -205,7 +205,7 @@ def fetch_location_id(self):
try:
res_json = res.json()
except json.decoder.JSONDecodeError:
if res.reason=='INKApi Error':
if res.reason == 'INKApi Error':
raise Exception('Delete session file and log in again', res.status_code, res.text[:200], res.reason)
raise GeoUrnNotFound("Failed to send request to get geo id", res.status_code, res.text[:200], res.reason)

Expand Down Expand Up @@ -319,6 +319,6 @@ def fetch_user_profile_data_from_public_id(self, user_id: str, key: str):
return data
except (KeyError, TypeError, IndexError) as e:
logger.warning(f"Failed to find user_id {user_id}")
if key=='user_id':
if key == 'user_id':
return ''
raise Exception(f"Failed to fetch '{key}' for user_id {user_id}: {e}")

0 comments on commit 4252853

Please sign in to comment.