Skip to content

Commit

Permalink
Porting the facebook submodule to tabular records
Browse files Browse the repository at this point in the history
  • Loading branch information
Yomguithereal committed Dec 12, 2023
1 parent 719aa4f commit 99e8fcd
Show file tree
Hide file tree
Showing 8 changed files with 70 additions and 99 deletions.
4 changes: 2 additions & 2 deletions minet/cli/facebook/comments.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
from minet.cli.utils import with_enricher_and_loading_bar
from minet.cli.facebook.utils import with_facebook_fatal_errors
from minet.facebook import FacebookMobileScraper
from minet.facebook.constants import FACEBOOK_COMMENT_CSV_HEADERS
from minet.facebook.types import MobileFacebookComment
from minet.facebook.exceptions import FacebookInvalidTargetError


@with_facebook_fatal_errors
@with_enricher_and_loading_bar(
headers=FACEBOOK_COMMENT_CSV_HEADERS,
headers=MobileFacebookComment,
title="Scraping comments",
unit="posts",
nested=True,
Expand Down
6 changes: 2 additions & 4 deletions minet/cli/facebook/post.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@
print_translation_warning_if_needed,
)
from minet.facebook import FacebookMobileScraper
from minet.facebook.constants import (
FACEBOOK_POST_WITH_REACTIONS_CSV_HEADERS,
)
from minet.facebook.types import MobileFacebookPostWithReactions
from minet.facebook.exceptions import (
FacebookInvalidTargetError,
FacebookNotPostError,
Expand All @@ -28,7 +26,7 @@

@with_facebook_fatal_errors
@with_enricher_and_loading_bar(
headers=FACEBOOK_POST_WITH_REACTIONS_CSV_HEADERS,
headers=MobileFacebookPostWithReactions,
title="Scraping posts",
unit="posts",
)
Expand Down
4 changes: 2 additions & 2 deletions minet/cli/facebook/post_authors.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
from minet.cli.utils import with_enricher_and_loading_bar
from minet.cli.facebook.utils import with_facebook_fatal_errors
from minet.facebook import FacebookMobileScraper
from minet.facebook.constants import FACEBOOK_USER_CSV_HEADERS
from minet.facebook.types import MobileFacebookUser
from minet.facebook.exceptions import FacebookInvalidTargetError


@with_facebook_fatal_errors
@with_enricher_and_loading_bar(
headers=FACEBOOK_USER_CSV_HEADERS, title="Finding authors", unit="posts"
headers=MobileFacebookUser, title="Finding authors", unit="posts"
)
def action(cli_args, enricher, loading_bar):
scraper = FacebookMobileScraper(cli_args.cookie, throttle=cli_args.throttle)
Expand Down
4 changes: 2 additions & 2 deletions minet/cli/facebook/posts.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
print_translation_warning_if_needed,
)
from minet.facebook import FacebookMobileScraper
from minet.facebook.constants import FACEBOOK_POST_CSV_HEADERS
from minet.facebook.types import MobileFacebookPost
from minet.facebook.exceptions import FacebookInvalidTargetError


@with_facebook_fatal_errors
@with_enricher_and_loading_bar(
headers=FACEBOOK_POST_CSV_HEADERS,
headers=MobileFacebookPost,
title="Scraping group posts",
unit="groups",
nested=True,
Expand Down
54 changes: 0 additions & 54 deletions minet/facebook/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,56 +19,6 @@
)
FACEBOOK_WEB_RATE_LIMITER_STATE = RateLimiterState(1, FACEBOOK_WEB_DEFAULT_THROTTLE)

FACEBOOK_USER_CSV_HEADERS = ["user_label", "user_id", "user_handle", "user_url"]

FACEBOOK_COMMENT_CSV_HEADERS = [
"post_id",
"id",
"user_id",
"user_handle",
"user_url",
"user_label",
"text",
"html",
"formatted_date",
"date",
"reactions",
"replies",
"in_reply_to",
]

FACEBOOK_POST_CSV_HEADERS = [
"url",
"user_id",
"user_handle",
"user_url",
"user_label",
"text",
"html",
"translated_text",
"translated_html",
"translated_from",
"formatted_date",
"date",
"reactions",
"comments",
]

FACEBOOK_POST_STATS_CSV_HEADERS = [
"error",
"canonical",
"account_name",
"timestamp",
"time",
"link",
"aria_label",
"text",
"share_count",
"comment_count",
"reaction_count",
"video_view_count",
]

FACEBOOK_REACTION_KEYS = OrderedDict(
{
1: "like",
Expand All @@ -82,7 +32,3 @@
16: "care",
}
)

FACEBOOK_POST_WITH_REACTIONS_CSV_HEADERS = FACEBOOK_POST_CSV_HEADERS + [
"reactions_types"
]
21 changes: 0 additions & 21 deletions minet/facebook/formatters.py

This file was deleted.

28 changes: 14 additions & 14 deletions minet/facebook/mobile_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@
from minet.scrape.utils import BeautifulSoupWithoutXHTMLWarnings
from minet.scrape.std import get_display_text
from minet.facebook.utils import grab_facebook_cookie
from minet.facebook.formatters import (
FacebookComment,
FacebookPost,
FacebookUser,
FacebookPostWithReaction,
from minet.facebook.types import (
MobileFacebookComment,
MobileFacebookUser,
MobileFacebookPost,
MobileFacebookPostWithReactions,
)
from minet.facebook.exceptions import (
FacebookInvalidCookieError,
Expand Down Expand Up @@ -80,7 +80,7 @@ def resolve_relative_url(url):
return urljoin(FACEBOOK_MOBILE_URL, url)


def scrape_comments(html, direction=None, in_reply_to=None):
def scrape_comments(html, direction=None, in_reply_to=None) -> MobileFacebookComment:
soup = BeautifulSoupWithoutXHTMLWarnings(html, "lxml")

data = {
Expand Down Expand Up @@ -139,7 +139,7 @@ def scrape_comments(html, direction=None, in_reply_to=None):
)

for item in valid_items:
item_id = item.get("id")
item_id = item["id"]

# Skipping comment if same as commented
if item_id == in_reply_to:
Expand Down Expand Up @@ -206,7 +206,7 @@ def scrape_comments(html, direction=None, in_reply_to=None):
data["replies"].append((resolve_relative_url(replies_url), item_id))

data["comments"].append(
FacebookComment(
MobileFacebookComment(
post_id=post_id,
id=item_id,
user_id=getattr(user, "id", ""),
Expand Down Expand Up @@ -302,7 +302,7 @@ def scrape_posts(html):
else None
)

post = FacebookPost(
post = MobileFacebookPost(
url=post_url,
user_id=getattr(user, "id", ""),
user_handle=getattr(user, "handle", ""),
Expand Down Expand Up @@ -401,7 +401,7 @@ def scrape_video(soup):
else None
)

post = FacebookPostWithReaction(
post = MobileFacebookPostWithReactions(
url=video_url,
user_id=getattr(user, "id", ""),
user_handle=getattr(user, "handle", ""),
Expand Down Expand Up @@ -492,7 +492,7 @@ def scrape_photo(soup):
else None
)

post = FacebookPostWithReaction(
post = MobileFacebookPostWithReactions(
url=photo_url,
user_id=getattr(user, "id", ""),
user_handle=getattr(user, "handle", ""),
Expand Down Expand Up @@ -597,7 +597,7 @@ def scrape_post(html):
else None
)

post = FacebookPostWithReaction(
post = MobileFacebookPostWithReactions(
url=post_url,
user_id=getattr(user, "id", ""),
user_handle=getattr(user, "handle", ""),
Expand Down Expand Up @@ -765,8 +765,8 @@ def post_author(self, url):
user_label = user_item.get_text().strip()

if isinstance(parsed, ParsedFacebookHandle):
return FacebookUser(user_label, None, parsed.handle, parsed.url)
return MobileFacebookUser(user_label, None, parsed.handle, parsed.url)
elif isinstance(parsed, ParsedFacebookUser):
return FacebookUser(user_label, parsed.id, parsed.handle, parsed.url)
return MobileFacebookUser(user_label, parsed.id, parsed.handle, parsed.url)
else:
raise TypeError
48 changes: 48 additions & 0 deletions minet/facebook/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,51 @@ def walk(entries: Iterable[Tuple["FacebookComment", List["FacebookComment"]]]):
walk(filter(lambda entry: entry[0].depth == 0, index.values()))

return sorted_comments


@dataclass
class MobileFacebookComment(TabularRecord):
post_id: str
id: str
user_id: str
user_handle: str
user_url: str
user_label: str
text: str
html: str
formatted_date: str
date: Optional[str]
reactions: str
replies: str
in_reply_to: Optional[str]


@dataclass
class MobileFacebookUser(TabularRecord):
label: str
id: Optional[str]
handle: Optional[str]
url: str


@dataclass
class MobileFacebookPost(TabularRecord):
url: str
user_id: str
user_handle: str
user_url: str
user_label: str
text: str
html: str
translated_text: Optional[str]
translated_html: Optional[str]
translated_from: Optional[str]
formatted_date: str
date: Optional[str]
reactions: str
comments: str


@dataclass
class MobileFacebookPostWithReactions(MobileFacebookPost):
reactions_types: Optional[str]

0 comments on commit 99e8fcd

Please sign in to comment.