Skip to content

Commit

Permalink
feat: fetch comments (#48)
Browse files Browse the repository at this point in the history
  • Loading branch information
cullenwatson authored Oct 20, 2024
1 parent 5802188 commit d89bed5
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 4 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "staffspy"
version = "0.2.17"
version = "0.2.18"
description = "Staff scraper library for LinkedIn"
authors = ["Cullen Watson <[email protected]>"]
readme = "README.md"
Expand Down
2 changes: 1 addition & 1 deletion staffspy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,4 +138,4 @@ def scrape_comments(self, post_ids: list[str]) -> pd.DataFrame:
comment_dict = [comment.to_dict() for comment in all_comments]
comment_df = pd.DataFrame(comment_dict)

return comment_df
return comment_df.sort_values(by="created_at", ascending=False)
8 changes: 6 additions & 2 deletions staffspy/linkedin/comments.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@ class CommentFetcher:

def __init__(self, session):
self.session = session
self.endpoint = "https://www.linkedin.com/voyager/api/graphql?queryId=voyagerSocialDashComments.200c8ad7e1ad32ba4e5cc827ab5c3193&queryName=SocialDashCommentsBySocialDetail&variables=(origins:List(),sortOrder:REVERSE_CHRONOLOGICAL,count:100,socialDetailUrn:urn%3Ali%3Afsd_socialDetail%3A%28urn%3Ali%3Aactivity%3A{post_id}%2Curn%3Ali%3Aactivity%3A{post_id}%2Curn%3Ali%3AhighlightedReply%3A-%29,start:{start})"
self.endpoint = "https://www.linkedin.com/voyager/api/graphql?queryId=voyagerSocialDashComments.200c8ad7e1ad32ba4e5cc827ab5c3193&queryName=SocialDashCommentsBySocialDetail&variables=(origins:List(),sortOrder:RELEVANCE,count:100,socialDetailUrn:urn%3Ali%3Afsd_socialDetail%3A%28urn%3Ali%3Aactivity%3A{post_id}%2Curn%3Ali%3Aactivity%3A{post_id}%2Curn%3Ali%3AhighlightedReply%3A-%29,start:{start})"
self.post_id = None
self.num_commments = 100

def fetch_comments(self, post_id: str):
all_comments = []
self.post_id = post_id

for i in range(0, 100_000, self.num_commments):
for i in range(0, 200_000, self.num_commments):
logger.info(f"Fetching comments for post {post_id}, start {i}")

ep = self.endpoint.format(post_id=post_id, start=i)
Expand Down Expand Up @@ -61,12 +61,16 @@ def parse_comments(self, comments_json: dict):
linkedin_id = linkedin_id_match.group(1) if linkedin_id_match else None

commentary = element.get("commentary", {}).get("text", "")
comment_id = element["urn"].split(",")[-1].rstrip(")")
num_likes = element["socialDetail"]["totalSocialActivityCounts"]["numLikes"]
comment = Comment(
post_id=self.post_id,
comment_id=comment_id,
internal_profile_id=internal_profile_id,
public_profile_id=linkedin_id,
name=name,
text=commentary,
num_likes=num_likes,
created_at=dt.utcfromtimestamp(element["createdAt"] / 1000),
)
comments.append(comment)
Expand Down
4 changes: 4 additions & 0 deletions staffspy/utils/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,23 @@

class Comment(BaseModel):
post_id: str
comment_id: str | None = None
internal_profile_id: str | None = None
public_profile_id: str | None = None
name: str | None = None
text: str | None = None
num_likes: int | None = None
created_at: dt | None = None

def to_dict(self):
return {
"post_id": self.post_id,
"comment_id": self.comment_id,
"internal_profile_id": self.internal_profile_id,
"public_profile_id": self.public_profile_id,
"name": self.name,
"text": self.text,
"num_likes": self.num_likes,
"created_at": self.created_at,
}

Expand Down

0 comments on commit d89bed5

Please sign in to comment.