Skip to content

Commit

Permalink
added scraping of FB users places lived
Browse files Browse the repository at this point in the history
  • Loading branch information
camillechanial committed Feb 22, 2024
1 parent da98a7a commit bdef6dc
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 0 deletions.
32 changes: 32 additions & 0 deletions ftest/facebook_user_places_lived.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import csv
import sys
from tqdm import tqdm

from minet.facebook import FacebookMobileScraper

scraper = FacebookMobileScraper(cookie="firefox")

USERS_URL = [
'https://www.facebook.com/cyr.esseh',
'https://www.facebook.com/akim.malonga',
'https://www.facebook.com/sarah.matoko',
'https://www.facebook.com/chrismonick',
'https://www.facebook.com/mavie.coeurmbeye',
'https://www.facebook.com/clement.tsith',
# 'https://www.facebook.com/johnny.levey.5',
# 'https://www.facebook.com/profile.php?id=100004392515502',
# 'https://www.facebook.com/blaidynson',
# 'https://www.facebook.com/fabien.Cydel',
# 'https://www.facebook.com/xavierdestaing.baboueya',
# 'https://www.facebook.com/beaugarel.malonga',
# 'https://www.facebook.com/cedric.mabiala.714',
# 'https://www.facebook.com/gracedaisy.londa',
# 'https://www.facebook.com/aichath.tidjani.35',
# 'https://www.facebook.com/profile.php?id=100022219963045',
# 'https://www.facebook.com/zadkiel.esuszico',
# 'https://www.facebook.com/claude.bikoulou.7',
# 'https://www.facebook.com/sheila.mabiala.1'
]

for url in USERS_URL:
print(scraper.user_places_lived_info(url))
19 changes: 19 additions & 0 deletions minet/cli/facebook/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,24 @@
$ minet fb url-likes url -i url.csv > url_likes.csv
""",
variadic_input={"dummy_column": "url", "item_label": "url"},

)

FACEBOOK_USER_PLACES_LIVED_SUBCOMMAND = command(
"user-places-lived",
"minet.cli.facebook.user_places_lived",
title="Minet Facebook User Places Lived Command",
description="""
Retrieve the hometown and current city of a given Facebook user..
""",
epilog="""
Examples:
. Fetching placed lived of a series of users in a CSV file:
$ minet fb user-places-lived user_url -i fb-users.csv > placed-lived.csv
""",
variadic_input={"dummy_column": "user_url", "item_label": "user"},
arguments=[*MOBILE_ARGUMENTS],
)

FACEBOOK_COMMAND = command(
Expand All @@ -265,5 +283,6 @@
FACEBOOK_POST_SUBCOMMAND,
FACEBOOK_POSTS_SUBCOMMAND,
FACEBOOK_URL_LIKES_SUBCOMMAND,
FACEBOOK_USER_PLACES_LIVED_SUBCOMMAND
],
)
27 changes: 27 additions & 0 deletions minet/cli/facebook/user_places_lived.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# =============================================================================
# Minet Facebook User Places Lived CLI Action
# =============================================================================
#
# Logic of the `fb user-places-lived` action.
#
from minet.cli.utils import with_enricher_and_loading_bar
from minet.cli.facebook.utils import with_facebook_fatal_errors
from minet.facebook import FacebookMobileScraper
from minet.facebook.types import MobileFacebookUserPlacesLived
from minet.facebook.exceptions import FacebookInvalidTargetError


@with_facebook_fatal_errors
@with_enricher_and_loading_bar(
headers=MobileFacebookUserPlacesLived, title="Finding places lived", unit="users"
)
def action(cli_args, enricher, loading_bar):
scraper = FacebookMobileScraper(cli_args.cookie, throttle=cli_args.throttle)

for i, row, user_url in enricher.enumerate_cells(
cli_args.column, with_rows=True, start=1
):
with loading_bar.step():
places_lived = scraper.user_places_lived_info(user_url)
print(row)
enricher.writerow(row, places_lived.as_csv_row() if places_lived is not None else None)
29 changes: 29 additions & 0 deletions minet/facebook/mobile_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
MobileFacebookUser,
MobileFacebookPost,
MobileFacebookPostWithReactions,
MobileFacebookUserPlacesLived
)
from minet.facebook.exceptions import (
FacebookInvalidCookieError,
Expand Down Expand Up @@ -770,3 +771,31 @@ def post_author(self, url):
return MobileFacebookUser(user_label, parsed.id, parsed.handle, parsed.url)
else:
raise TypeError

def user_places_lived_info(self, url) :

url = convert_url_to_mobile(url)

html = self.request_page(url)
soup = BeautifulSoupWithoutXHTMLWarnings(html, "lxml")

hometown_element = soup.find('span', string='Hometown')
if hometown_element is not None :
hometown = hometown_element.parent.parent.next_sibling.text
else :
hometown = None

current_city_element = soup.find('span', string='Current city')
if current_city_element is not None :
current_city = current_city_element.parent.parent.next_sibling.text
else :
current_city = None

return MobileFacebookUserPlacesLived(hometown, current_city)







4 changes: 4 additions & 0 deletions minet/facebook/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,10 @@ class MobileFacebookUser(TabularRecord):
handle: Optional[str]
url: str

@dataclass
class MobileFacebookUserPlacesLived(TabularRecord):
hometown: Optional[str]
current_city: Optional[str]

@dataclass
class MobileFacebookPost(TabularRecord):
Expand Down

0 comments on commit bdef6dc

Please sign in to comment.