Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat comments #48

Merged
merged 4 commits into from
Oct 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "staffspy"
version = "0.2.17"
version = "0.2.18"
description = "Staff scraper library for LinkedIn"
authors = ["Cullen Watson <[email protected]>"]
readme = "README.md"
Expand Down
2 changes: 1 addition & 1 deletion staffspy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,4 +138,4 @@ def scrape_comments(self, post_ids: list[str]) -> pd.DataFrame:
comment_dict = [comment.to_dict() for comment in all_comments]
comment_df = pd.DataFrame(comment_dict)

return comment_df
return comment_df.sort_values(by="created_at", ascending=False)
8 changes: 6 additions & 2 deletions staffspy/linkedin/comments.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@ class CommentFetcher:

def __init__(self, session):
self.session = session
self.endpoint = "https://www.linkedin.com/voyager/api/graphql?queryId=voyagerSocialDashComments.200c8ad7e1ad32ba4e5cc827ab5c3193&queryName=SocialDashCommentsBySocialDetail&variables=(origins:List(),sortOrder:REVERSE_CHRONOLOGICAL,count:100,socialDetailUrn:urn%3Ali%3Afsd_socialDetail%3A%28urn%3Ali%3Aactivity%3A{post_id}%2Curn%3Ali%3Aactivity%3A{post_id}%2Curn%3Ali%3AhighlightedReply%3A-%29,start:{start})"
self.endpoint = "https://www.linkedin.com/voyager/api/graphql?queryId=voyagerSocialDashComments.200c8ad7e1ad32ba4e5cc827ab5c3193&queryName=SocialDashCommentsBySocialDetail&variables=(origins:List(),sortOrder:RELEVANCE,count:100,socialDetailUrn:urn%3Ali%3Afsd_socialDetail%3A%28urn%3Ali%3Aactivity%3A{post_id}%2Curn%3Ali%3Aactivity%3A{post_id}%2Curn%3Ali%3AhighlightedReply%3A-%29,start:{start})"
self.post_id = None
self.num_commments = 100

def fetch_comments(self, post_id: str):
all_comments = []
self.post_id = post_id

for i in range(0, 100_000, self.num_commments):
for i in range(0, 200_000, self.num_commments):
logger.info(f"Fetching comments for post {post_id}, start {i}")

ep = self.endpoint.format(post_id=post_id, start=i)
Expand Down Expand Up @@ -61,12 +61,16 @@ def parse_comments(self, comments_json: dict):
linkedin_id = linkedin_id_match.group(1) if linkedin_id_match else None

commentary = element.get("commentary", {}).get("text", "")
comment_id = element["urn"].split(",")[-1].rstrip(")")
num_likes = element["socialDetail"]["totalSocialActivityCounts"]["numLikes"]
comment = Comment(
post_id=self.post_id,
comment_id=comment_id,
internal_profile_id=internal_profile_id,
public_profile_id=linkedin_id,
name=name,
text=commentary,
num_likes=num_likes,
created_at=dt.utcfromtimestamp(element["createdAt"] / 1000),
)
comments.append(comment)
Expand Down
4 changes: 4 additions & 0 deletions staffspy/utils/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,23 @@

class Comment(BaseModel):
post_id: str
comment_id: str | None = None
internal_profile_id: str | None = None
public_profile_id: str | None = None
name: str | None = None
text: str | None = None
num_likes: int | None = None
created_at: dt | None = None

def to_dict(self):
return {
"post_id": self.post_id,
"comment_id": self.comment_id,
"internal_profile_id": self.internal_profile_id,
"public_profile_id": self.public_profile_id,
"name": self.name,
"text": self.text,
"num_likes": self.num_likes,
"created_at": self.created_at,
}

Expand Down
Loading