Skip to content

Commit 5bab240

Browse files
committed
fix: pagination
1 parent 543d353 commit 5bab240

File tree

3 files changed

+11
-3
lines changed

3 files changed

+11
-3
lines changed

staffspy/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -138,4 +138,4 @@ def scrape_comments(self, post_ids: list[str]) -> pd.DataFrame:
138138
comment_dict = [comment.to_dict() for comment in all_comments]
139139
comment_df = pd.DataFrame(comment_dict)
140140

141-
return comment_df
141+
return comment_df.sort_values(by="created_at", ascending=False)

staffspy/linkedin/comments.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,15 @@ class CommentFetcher:
1212

1313
def __init__(self, session):
1414
self.session = session
15-
self.endpoint = "https://www.linkedin.com/voyager/api/graphql?queryId=voyagerSocialDashComments.200c8ad7e1ad32ba4e5cc827ab5c3193&queryName=SocialDashCommentsBySocialDetail&variables=(origins:List(),sortOrder:REVERSE_CHRONOLOGICAL,count:100,socialDetailUrn:urn%3Ali%3Afsd_socialDetail%3A%28urn%3Ali%3Aactivity%3A{post_id}%2Curn%3Ali%3Aactivity%3A{post_id}%2Curn%3Ali%3AhighlightedReply%3A-%29,start:{start})"
15+
self.endpoint = "https://www.linkedin.com/voyager/api/graphql?queryId=voyagerSocialDashComments.200c8ad7e1ad32ba4e5cc827ab5c3193&queryName=SocialDashCommentsBySocialDetail&variables=(origins:List(),sortOrder:RELEVANCE,count:100,socialDetailUrn:urn%3Ali%3Afsd_socialDetail%3A%28urn%3Ali%3Aactivity%3A{post_id}%2Curn%3Ali%3Aactivity%3A{post_id}%2Curn%3Ali%3AhighlightedReply%3A-%29,start:{start})"
1616
self.post_id = None
1717
self.num_commments = 100
1818

1919
def fetch_comments(self, post_id: str):
2020
all_comments = []
2121
self.post_id = post_id
2222

23-
for i in range(0, 100_000, self.num_commments):
23+
for i in range(0, 200_000, self.num_commments):
2424
logger.info(f"Fetching comments for post {post_id}, start {i}")
2525

2626
ep = self.endpoint.format(post_id=post_id, start=i)
@@ -61,12 +61,16 @@ def parse_comments(self, comments_json: dict):
6161
linkedin_id = linkedin_id_match.group(1) if linkedin_id_match else None
6262

6363
commentary = element.get("commentary", {}).get("text", "")
64+
comment_id = element["urn"].split(",")[-1].rstrip(")")
65+
num_likes = element["socialDetail"]["totalSocialActivityCounts"]["numLikes"]
6466
comment = Comment(
6567
post_id=self.post_id,
68+
comment_id=comment_id,
6669
internal_profile_id=internal_profile_id,
6770
public_profile_id=linkedin_id,
6871
name=name,
6972
text=commentary,
73+
num_likes=num_likes,
7074
created_at=dt.utcfromtimestamp(element["createdAt"] / 1000),
7175
)
7276
comments.append(comment)

staffspy/utils/models.py

+4
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,23 @@
88

99
class Comment(BaseModel):
1010
post_id: str
11+
comment_id: str | None = None
1112
internal_profile_id: str | None = None
1213
public_profile_id: str | None = None
1314
name: str | None = None
1415
text: str | None = None
16+
num_likes: int | None = None
1517
created_at: dt | None = None
1618

1719
def to_dict(self):
1820
return {
1921
"post_id": self.post_id,
22+
"comment_id": self.comment_id,
2023
"internal_profile_id": self.internal_profile_id,
2124
"public_profile_id": self.public_profile_id,
2225
"name": self.name,
2326
"text": self.text,
27+
"num_likes": self.num_likes,
2428
"created_at": self.created_at,
2529
}
2630

0 commit comments

Comments
 (0)