Skip to content

Commit

Permalink
added scraping of FB users profile info
Browse files Browse the repository at this point in the history
  • Loading branch information
camillechanial committed Feb 22, 2024
1 parent da98a7a commit a5df01f
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 0 deletions.
13 changes: 13 additions & 0 deletions ftest/facebook_user_infos.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import csv
import sys
from tqdm import tqdm
import time
from minet.facebook import FacebookMobileScraper

scraper = FacebookMobileScraper(cookie="firefox")

USERS_URL = [
]

for url in USERS_URL:
print(scraper.user_infos(url))
19 changes: 19 additions & 0 deletions minet/cli/facebook/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,24 @@
$ minet fb url-likes url -i url.csv > url_likes.csv
""",
variadic_input={"dummy_column": "url", "item_label": "url"},

)

FACEBOOK_USER_INFOS_SUBCOMMAND = command(
"user-infos",
"minet.cli.facebook.user_infos",
title="Minet Facebook User Infos Command",
description="""
Retrieve the name, hometow, current city and gender of a given Facebook user..
""",
epilog="""
Examples:
. Fetching user infos of a series of users in a CSV file:
$ minet fb user-infos user_url -i fb-users.csv > user-infos.csv
""",
variadic_input={"dummy_column": "user_url", "item_label": "user"},
arguments=[*MOBILE_ARGUMENTS],
)

FACEBOOK_COMMAND = command(
Expand All @@ -265,5 +283,6 @@
FACEBOOK_POST_SUBCOMMAND,
FACEBOOK_POSTS_SUBCOMMAND,
FACEBOOK_URL_LIKES_SUBCOMMAND,
FACEBOOK_USER_INFOS_SUBCOMMAND
],
)
25 changes: 25 additions & 0 deletions minet/cli/facebook/user_infos.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# =============================================================================
# Minet Facebook User Places Lived CLI Action
# =============================================================================
#
# Logic of the `fb user-places-lived` action.
#
from minet.cli.utils import with_enricher_and_loading_bar
from minet.cli.facebook.utils import with_facebook_fatal_errors
from minet.facebook import FacebookMobileScraper
from minet.facebook.types import MobileFacebookUserInfo

@with_facebook_fatal_errors
@with_enricher_and_loading_bar(
headers=MobileFacebookUserInfo, title="Finding user profile infos", unit="users"
)
def action(cli_args, enricher, loading_bar):
scraper = FacebookMobileScraper(cli_args.cookie, throttle=cli_args.throttle)

for i, row, user_url in enricher.enumerate_cells(
cli_args.column, with_rows=True, start=1
):
with loading_bar.step():
user_infos = scraper.user_infos(user_url)
print(row)
enricher.writerow(row, user_infos.as_csv_row() if user_infos is not None else None)
39 changes: 39 additions & 0 deletions minet/facebook/mobile_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
MobileFacebookUser,
MobileFacebookPost,
MobileFacebookPostWithReactions,
MobileFacebookUserInfo,
)
from minet.facebook.exceptions import (
FacebookInvalidCookieError,
Expand Down Expand Up @@ -770,3 +771,41 @@ def post_author(self, url):
return MobileFacebookUser(user_label, parsed.id, parsed.handle, parsed.url)
else:
raise TypeError

def user_infos(self, url) :

url = convert_url_to_mobile(url)

html = self.request_page(url)
soup = BeautifulSoupWithoutXHTMLWarnings(html, "lxml")

name = soup.find('title').text
if name == 'Content Not Found' :
name = None

hometown_field = soup.find('span', string='Hometown')
if hometown_field is not None :
hometown = hometown_field.parent.parent.next_sibling.text
else :
hometown = None

current_city_field = soup.find('span', string='Current city')
if current_city_field is not None :
current_city = current_city_field.parent.parent.next_sibling.text
else :
current_city = None

gender_field = soup.find('span', string='Gender')
if gender_field is not None :
gender = gender_field.parent.parent.next_sibling.text
else :
gender = None

return MobileFacebookUserInfo(name, hometown, current_city, gender)







6 changes: 6 additions & 0 deletions minet/facebook/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,12 @@ class MobileFacebookUser(TabularRecord):
handle: Optional[str]
url: str

@dataclass
class MobileFacebookUserInfo(TabularRecord):
name: Optional[str]
hometown: Optional[str]
current_city: Optional[str]
gender: Optional[str]

@dataclass
class MobileFacebookPost(TabularRecord):
Expand Down

0 comments on commit a5df01f

Please sign in to comment.