Skip to content

Commit

Permalink
enh:email variations
Browse files Browse the repository at this point in the history
  • Loading branch information
cullenwatson committed Sep 13, 2024
1 parent 61ffaaa commit 8e59cee
Show file tree
Hide file tree
Showing 3 changed files with 158 additions and 98 deletions.
22 changes: 13 additions & 9 deletions staffspy/solvers/capsolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,31 @@ def is_none(value):


class CapSolver(Solver):
""" https://www.capsolver.com/ """
"""https://www.capsolver.com/"""

@retry(stop=stop_after_attempt(10), retry=retry_if_result(is_none))
def solve(self, blob_data: str, page_url: str=None):
def solve(self, blob_data: str, page_url: str = None):
from staffspy.utils.utils import logger
logger.info(f'Waiting on CapSolver to solve captcha...')

logger.info(f"Waiting on CapSolver to solve captcha...")

payload = {
"clientKey": self.solver_api_key,
"task": {
"type": 'FunCaptchaTaskProxyLess',
"type": "FunCaptchaTaskProxyLess",
"websitePublicKey": self.public_key,
"websiteURL": self.page_url,
"data": json.dumps({"blob": blob_data}) if blob_data else ''
}
"data": json.dumps({"blob": blob_data}) if blob_data else "",
},
}
res = requests.post("https://api.capsolver.com/createTask", json=payload)
resp = res.json()
task_id = resp.get("taskId")
if not task_id:
raise Exception("CapSolver failed to create task, try another captcha solver like 2Captcha if this persists or use browser sign in `pip install staffspy[browser]` and then remove the username/password params to the scrape_staff()",res.text)
raise Exception(
"CapSolver failed to create task, try another captcha solver like 2Captcha if this persists or use browser sign in `pip install staffspy[browser]` and then remove the username/password params to the LinkedInAccount()",
res.text,
)
logger.info(f"Received captcha solver taskId: {task_id} / Getting result...")

while True:
Expand All @@ -42,8 +46,8 @@ def solve(self, blob_data: str, page_url: str=None):
resp = res.json()
status = resp.get("status")
if status == "ready":
logger.info(f'CapSolver finished solving captcha')
return resp.get("solution", {}).get('token')
logger.info(f"CapSolver finished solving captcha")
return resp.get("solution", {}).get("token")
if status == "failed" or resp.get("errorId"):
logger.info(f"Captcha solve failed! response: {res.text}")
return None
69 changes: 47 additions & 22 deletions staffspy/utils/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def to_dict(self):
return {
"name": self.name,
"endorsements": self.endorsements if self.endorsements else 0,
"passed_assessment": self.passed_assessment
"passed_assessment": self.passed_assessment,
}


Expand Down Expand Up @@ -107,25 +107,37 @@ class Staff(BaseModel):
def get_top_skills(self):
top_three_skills = []
if self.skills:
sorted_skills = sorted(self.skills, key=lambda x: x.endorsements, reverse=True)
sorted_skills = sorted(
self.skills, key=lambda x: x.endorsements, reverse=True
)
top_three_skills = [skill.name for skill in sorted_skills[:3]]
top_three_skills += [None] * (3 - len(top_three_skills))
return top_three_skills

def to_dict(self):
sorted_schools = sorted(
self.schools, key=lambda x: (x.end_date is None, x.end_date), reverse=True
) if self.schools else []
sorted_schools = (
sorted(
self.schools,
key=lambda x: (x.end_date is None, x.end_date),
reverse=True,
)
if self.schools
else []
)

top_three_school_names = [school.school for school in sorted_schools[:3]]
top_three_school_names += [None] * (3 - len(top_three_school_names))
estimated_age = self.estimate_age_based_on_education()

sorted_experiences = sorted(
self.experiences,
key=lambda x: (x.end_date is None, x.end_date),
reverse=True
) if self.experiences else []
sorted_experiences = (
sorted(
self.experiences,
key=lambda x: (x.end_date is None, x.end_date),
reverse=True,
)
if self.experiences
else []
)

top_three_companies = []
seen_companies = set()
Expand All @@ -137,16 +149,20 @@ def to_dict(self):
break

top_three_companies += [None] * (3 - len(top_three_companies))
top_three_skills=self.get_top_skills()
top_three_skills = self.get_top_skills()
name = filter(None, [self.first_name, self.last_name])

self.emails_in_bio=extract_emails_from_text(self.bio) if self.bio else None
self.current_position = sorted_experiences[0].title if len(sorted_experiences) > 0 and sorted_experiences[0].end_date is None else None
self.emails_in_bio = extract_emails_from_text(self.bio) if self.bio else None
self.current_position = (
sorted_experiences[0].title
if len(sorted_experiences) > 0 and sorted_experiences[0].end_date is None
else None
)
return {
"search_term": self.search_term,
"id": self.id,
"profile_id": self.profile_id,
"name": self.name if self.name else ' '.join(name) if name else None,
"name": self.name if self.name else " ".join(name) if name else None,
"first_name": self.first_name,
"last_name": self.last_name,
"location": self.location,
Expand All @@ -161,7 +177,7 @@ def to_dict(self):
"influencer": self.influencer,
"open_to_work": self.open_to_work,
"is_hiring": self.is_hiring,
"current_position":self.current_position,
"current_position": self.current_position,
"current_company": top_three_companies[0],
"past_company_1": top_three_companies[1],
"past_company_2": top_three_companies[2],
Expand All @@ -187,8 +203,10 @@ def to_dict(self):
if self.certifications
else None
),
"emails_in_bio": ', '.join(self.emails_in_bio) if self.emails_in_bio else None,
"potential_emails": ', '.join(self.potential_emails) if self.potential_emails else None,
"emails_in_bio": (
", ".join(self.emails_in_bio) if self.emails_in_bio else None
),
"potential_emails": self.potential_emails,
"profile_link": self.profile_link,
"profile_photo": self.profile_photo,
"banner_photo": self.banner_photo,
Expand All @@ -198,14 +216,21 @@ def estimate_age_based_on_education(self):
"""Adds 18 to their first college start date"""
college_words = ["uni", "college"]

sorted_schools = sorted(
[school for school in self.schools if school.start_date],
key=lambda x: x.start_date,
) if self.schools else []
sorted_schools = (
sorted(
[school for school in self.schools if school.start_date],
key=lambda x: x.start_date,
)
if self.schools
else []
)

current_date = datetime.now().date()
for school in sorted_schools:
if any(word in school.school.lower() for word in college_words) or school.degree:
if (
any(word in school.school.lower() for word in college_words)
or school.degree
):
if school.start_date:
years_in_education = (current_date - school.start_date).days // 365
return int(18 + years_in_education)
Expand Down
Loading

0 comments on commit 8e59cee

Please sign in to comment.