Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Company name #36

Merged
merged 3 commits into from
Aug 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "staffspy"
version = "0.2.8"
version = "0.2.9"
description = "Staff scraper library for LinkedIn"
authors = ["Cullen Watson <[email protected]>"]
readme = "README.md"
Expand Down
24 changes: 12 additions & 12 deletions staffspy/linkedin/linkedin.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import json
import re
from concurrent.futures import ThreadPoolExecutor, as_completed
from urllib.parse import quote
from urllib.parse import quote, unquote

import requests

Expand Down Expand Up @@ -64,17 +64,21 @@ def search_companies(self, company_name):
res.text[:200],
)
logger.debug(f"Searched companies {res.status_code}")
companies = res.json()['data']['searchDashClustersByAll']['elements']
if len(companies) < 2:
raise Exception(f'No companies found for name {company_name}, Response: {res.text[:200]}')
metadata, first_company = companies[:2]
try:
first_company = res.json()['data']['searchDashClustersByAll']['elements'][1]['items'][0]['item'][
'entityResult']
num_results = metadata['items'][0]['item']['simpleTextV2']['text']['text']
first_company = companies[1]['items'][0]['item']['entityResult']
company_link = first_company['navigationUrl']
company_name_id = re.search(r'/company/([^/]+)', company_link).group(1)
company_name_id = unquote(re.search(r'/company/([^/]+)', company_link).group(1))
company_name_new = first_company['title']['text']
except Exception as e:
raise Exception(f'Failed to load json in search_companies {str(e)}, Response: {res.text[:200]}')

logger.info(
f"Searched company {company_name} on LinkedIn and found company id - '{company_name_id}' with company name - '{company_name_new}'")
f"Searched company {company_name} on LinkedIn and were {num_results}, using first result with company name - '{company_name_new}' and company id - '{company_name_id}'")
return company_name_id

def fetch_or_search_company(self, company_name):
Expand Down Expand Up @@ -115,11 +119,7 @@ def get_company_id_and_staff_count(self, company_name: str):
self.domain = utils.extract_base_domain(company["companyPageUrl"]) if company.get('companyPageUrl') else None
staff_count = company["staffCount"]
company_id = company["trackingInfo"]["objectUrn"].split(":")[-1]

try:
company_name = company["universalName"]
except:
pass
company_name = company["universalName"]

logger.info(f"Found company '{company_name}' with {staff_count} staff")
return company_id, staff_count
Expand Down Expand Up @@ -205,7 +205,7 @@ def fetch_location_id(self):
try:
res_json = res.json()
except json.decoder.JSONDecodeError:
if res.reason=='INKApi Error':
if res.reason == 'INKApi Error':
raise Exception('Delete session file and log in again', res.status_code, res.text[:200], res.reason)
raise GeoUrnNotFound("Failed to send request to get geo id", res.status_code, res.text[:200], res.reason)

Expand Down Expand Up @@ -319,6 +319,6 @@ def fetch_user_profile_data_from_public_id(self, user_id: str, key: str):
return data
except (KeyError, TypeError, IndexError) as e:
logger.warning(f"Failed to find user_id {user_id}")
if key=='user_id':
if key == 'user_id':
return ''
raise Exception(f"Failed to fetch '{key}' for user_id {user_id}: {e}")
Loading