Skip to content

Commit

Permalink
banner photo (#39)
Browse files Browse the repository at this point in the history
  • Loading branch information
cullenwatson authored Aug 7, 2024
1 parent 4252853 commit 97fb722
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 21 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "staffspy"
version = "0.2.9"
version = "0.2.10"
description = "Staff scraper library for LinkedIn"
authors = ["Cullen Watson <[email protected]>"]
readme = "README.md"
Expand Down
2 changes: 1 addition & 1 deletion staffspy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def scrape_staff(
linkedin_member_df = staff_df[staff_df["name"] == "LinkedIn Member"]
non_linkedin_member_df = staff_df[staff_df["name"] != "LinkedIn Member"]
staff_df = pd.concat([non_linkedin_member_df, linkedin_member_df])
logger.info(f"Scraped {len(staff_df)} staff members from {company_name}")
logger.info(f"Scraped {len(staff_df)} staff members from {company_name}, with {len(linkedin_member_df)} hidden LinkedIn users")
return staff_df

def scrape_users(
Expand Down
22 changes: 12 additions & 10 deletions staffspy/linkedin/employee.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,18 @@ def fetch_employee(self, base_staff, domain):

def parse_emp(self, emp: Staff, emp_dict: dict):
"""Parse the employee data from the employee profile."""
try:
photo_data = emp_dict["profilePicture"]["displayImageReference"][
"vectorImage"
]
photo_base_url = photo_data["rootUrl"]
photo_ext_url = photo_data["artifacts"][-1]["fileIdentifyingUrlPathSegment"]
profile_photo = f"{photo_base_url}{photo_ext_url}"
except (KeyError, TypeError, IndexError, ValueError) as e:
profile_photo = None

def get_photo_url(emp_dict: dict, key: str):
try:
photo_data = emp_dict[key]["displayImageReference"]["vectorImage"]
photo_base_url = photo_data["rootUrl"]
photo_ext_url = photo_data["artifacts"][-1]["fileIdentifyingUrlPathSegment"]
return f"{photo_base_url}{photo_ext_url}"
except (KeyError, TypeError, IndexError, ValueError):
return None

emp.profile_photo = get_photo_url(emp_dict, "profilePicture")
emp.banner_photo = get_photo_url(emp_dict, "backgroundPicture")
emp.profile_id = emp_dict["publicIdentifier"]
try:
emp.headline = emp_dict.get('headline')
Expand All @@ -62,10 +64,10 @@ def parse_emp(self, emp: Staff, emp_dict: dict):
pass
emp.is_connection = next(iter(emp_dict['memberRelationship']['memberRelationshipUnion'])) == 'connection'
emp.open_to_work = emp_dict['profilePicture'].get('frameType')=='OPEN_TO_WORK'
emp.is_hiring = emp_dict['profilePicture'].get('frameType')=='HIRING'

emp.profile_link = f'https://www.linkedin.com/in/{emp_dict["publicIdentifier"]}'

emp.profile_photo = profile_photo
emp.first_name = emp_dict["firstName"]
emp.last_name = emp_dict["lastName"].split(',')[0]
emp.potential_emails = utils.create_emails(
Expand Down
23 changes: 14 additions & 9 deletions staffspy/linkedin/skills.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,23 @@ def parse_skills(self, sections):
"components"
]["elements"]
for elem in elems:
passed_assessment,endorsements = None,0
entity = elem["components"]["entityComponent"]
name = entity["titleV2"]["text"]["text"]
if name in names:
continue
names.add(name)
try:
endorsements = int(
entity["subComponents"]["components"][0]["components"][
"insightComponent"
]["text"]["text"]["text"].replace(" endorsements", "")
)
except:
endorsements = 0
skills.append(Skill(name=name, endorsements=endorsements))
components = entity["subComponents"]["components"]
for component in components:

try:
candidate = component["components"]["insightComponent"]["text"]["text"]["text"]
if " endorsements" in candidate:
endorsements = int(candidate.replace(" endorsements", ""))
if "Passed LinkedIn Skill Assessment" in candidate:
passed_assessment = True
except:
pass

skills.append(Skill(name=name, endorsements=endorsements, passed_assessment=passed_assessment))
return skills
6 changes: 6 additions & 0 deletions staffspy/utils/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@ def to_dict(self):
class Skill(BaseModel):
name: str | None = None
endorsements: int | None = None
passed_assessment: bool | None = None

def to_dict(self):
return {
"name": self.name,
"endorsements": self.endorsements if self.endorsements else 0,
"passed_assessment": self.passed_assessment
}


Expand Down Expand Up @@ -94,7 +96,9 @@ class Staff(BaseModel):
creator: bool | None = None
premium: bool | None = None
open_to_work: bool | None = None
is_hiring: bool | None = None
profile_photo: str | None = None
banner_photo: str | None = None
skills: list[Skill] | None = None
experiences: list[Experience] | None = None
certifications: list[Certification] | None = None
Expand Down Expand Up @@ -156,6 +160,7 @@ def to_dict(self):
"creator": self.creator,
"influencer": self.influencer,
"open_to_work": self.open_to_work,
"is_hiring": self.is_hiring,
"current_position":self.current_position,
"current_company": top_three_companies[0],
"past_company_1": top_three_companies[1],
Expand Down Expand Up @@ -186,6 +191,7 @@ def to_dict(self):
"potential_emails": ', '.join(self.potential_emails) if self.potential_emails else None,
"profile_link": self.profile_link,
"profile_photo": self.profile_photo,
"banner_photo": self.banner_photo,
}

def estimate_age_based_on_education(self):
Expand Down

0 comments on commit 97fb722

Please sign in to comment.