Skip to content

Commit

Permalink
added gender and name info and renamed command
Browse files Browse the repository at this point in the history
  • Loading branch information
camillechanial committed Feb 22, 2024
1 parent 2413a74 commit 005e7fa
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 55 deletions.
13 changes: 13 additions & 0 deletions ftest/facebook_user_infos.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import csv
import sys
from tqdm import tqdm
import time
from minet.facebook import FacebookMobileScraper

scraper = FacebookMobileScraper(cookie="firefox")

USERS_URL = [
]

for url in USERS_URL:
print(scraper.user_infos(url))
32 changes: 0 additions & 32 deletions ftest/facebook_user_places_lived.py

This file was deleted.

16 changes: 8 additions & 8 deletions minet/cli/facebook/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,18 +250,18 @@

)

FACEBOOK_USER_PLACES_LIVED_SUBCOMMAND = command(
"user-places-lived",
"minet.cli.facebook.user_places_lived",
title="Minet Facebook User Places Lived Command",
FACEBOOK_USER_INFOS_SUBCOMMAND = command(
"user-infos",
"minet.cli.facebook.user_infos",
title="Minet Facebook User Infos Command",
description="""
Retrieve the hometown and current city of a given Facebook user..
Retrieve the name, hometow, current city and gender of a given Facebook user..
""",
epilog="""
Examples:
. Fetching placed lived of a series of users in a CSV file:
$ minet fb user-places-lived user_url -i fb-users.csv > placed-lived.csv
. Fetching user infos of a series of users in a CSV file:
$ minet fb user-infos user_url -i fb-users.csv > user-infos.csv
""",
variadic_input={"dummy_column": "user_url", "item_label": "user"},
arguments=[*MOBILE_ARGUMENTS],
Expand All @@ -283,6 +283,6 @@
FACEBOOK_POST_SUBCOMMAND,
FACEBOOK_POSTS_SUBCOMMAND,
FACEBOOK_URL_LIKES_SUBCOMMAND,
FACEBOOK_USER_PLACES_LIVED_SUBCOMMAND
FACEBOOK_USER_INFOS_SUBCOMMAND
],
)
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
from minet.cli.utils import with_enricher_and_loading_bar
from minet.cli.facebook.utils import with_facebook_fatal_errors
from minet.facebook import FacebookMobileScraper
from minet.facebook.types import MobileFacebookUserPlacesLived
from minet.facebook.types import MobileFacebookUserInfo

@with_facebook_fatal_errors
@with_enricher_and_loading_bar(
headers=MobileFacebookUserPlacesLived, title="Finding places lived", unit="users"
headers=MobileFacebookUserInfo, title="Finding user profile infos", unit="users"
)
def action(cli_args, enricher, loading_bar):
scraper = FacebookMobileScraper(cli_args.cookie, throttle=cli_args.throttle)
Expand All @@ -20,6 +20,6 @@ def action(cli_args, enricher, loading_bar):
cli_args.column, with_rows=True, start=1
):
with loading_bar.step():
places_lived = scraper.user_places_lived_info(user_url)
user_infos = scraper.user_infos(user_url)
print(row)
enricher.writerow(row, places_lived.as_csv_row() if places_lived is not None else None)
enricher.writerow(row, user_infos.as_csv_row() if user_infos is not None else None)
28 changes: 19 additions & 9 deletions minet/facebook/mobile_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
MobileFacebookUser,
MobileFacebookPost,
MobileFacebookPostWithReactions,
MobileFacebookUserPlacesLived
MobileFacebookUserInfo,
)
from minet.facebook.exceptions import (
FacebookInvalidCookieError,
Expand Down Expand Up @@ -772,26 +772,36 @@ def post_author(self, url):
else:
raise TypeError

def user_places_lived_info(self, url) :
def user_infos(self, url) :

url = convert_url_to_mobile(url)

html = self.request_page(url)
soup = BeautifulSoupWithoutXHTMLWarnings(html, "lxml")

hometown_element = soup.find('span', string='Hometown')
if hometown_element is not None :
hometown = hometown_element.parent.parent.next_sibling.text
name = soup.find('title').text
if name == 'Content Not Found' :
name = None

hometown_field = soup.find('span', string='Hometown')
if hometown_field is not None :
hometown = hometown_field.parent.parent.next_sibling.text
else :
hometown = None

current_city_element = soup.find('span', string='Current city')
if current_city_element is not None :
current_city = current_city_element.parent.parent.next_sibling.text
current_city_field = soup.find('span', string='Current city')
if current_city_field is not None :
current_city = current_city_field.parent.parent.next_sibling.text
else :
current_city = None

return MobileFacebookUserPlacesLived(hometown, current_city)
gender_field = soup.find('span', string='Gender')
if gender_field is not None :
gender = gender_field.parent.parent.next_sibling.text
else :
gender = None

return MobileFacebookUserInfo(name, hometown, current_city, gender)



Expand Down
6 changes: 4 additions & 2 deletions minet/facebook/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,11 @@ class MobileFacebookUser(TabularRecord):
url: str

@dataclass
class MobileFacebookUserPlacesLived(TabularRecord):
class MobileFacebookUserInfo(TabularRecord):
name: Optional[str]
hometown: Optional[str]
current_city: Optional[str]
current_city: Optional[str]
gender: Optional[str]

@dataclass
class MobileFacebookPost(TabularRecord):
Expand Down

0 comments on commit 005e7fa

Please sign in to comment.