-
Notifications
You must be signed in to change notification settings - Fork 5
/
descr_users_gold_ans.py
95 lines (79 loc) · 3.18 KB
/
descr_users_gold_ans.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
"""
CMSC 12300 / CAPP 30123
Task: Descriptive analysis (Exploring Users)
Main author: Sanittawan (Nikki) and Dhruval
"""
import csv
from geopy.exc import GeocoderTimedOut
from geopy.geocoders import Nominatim
from mrjob.job import MRJob
class FindLocUsersGoldBadges(MRJob):
"""
A class for finding the location of users with
gold badge in answers, called illuminator.
"""
def mapper(self, _, line):
"""
Maps User ID to badges and user id to user locations.
Inputs:
line: a single line in a CSV file
Returns:
user id, badges or user id and location
(depending on the file source)
"""
row = csv.reader([line]).__next__()
file = str(row[-1]).strip().lower()
try:
if file == "badges":
badge_name = str(row[2]).strip().lower()
badge_name = ''.join([char for char in \
badge_name if char != "'"])
user_id = str(row[1]).strip().lower()
user_id = ''.join([char for char in user_id if char != "'"])
if badge_name == "illuminator":
yield user_id, badge_name
elif file == "users":
user_id = str(row[0]).strip()
location = str(row[6]).strip()
if not location or user_id == "-1":
location = None
coord = None
else:
try:
geolocator=Nominatim(timeout=3)
raw_location = geolocator.geocode(location)
if raw_location:
coord = (raw_location.latitude,
raw_location.longitude)
address = geolocator.reverse(
[raw_location.latitude,
raw_location.longitude],
language='en')
country = address.address.split()[-1]
else:
country = None
coord = None
except GeocoderTimedOut as e:
msg = ("Error: geocode failed on input {}"
" with message {}".format(location,
e.message))
print(msg)
yield user_id, (coord, country)
except (IndexError, ValueError):
pass
def reducer(self, user_id, vals):
"""
Reduces to badges and location for a given userid
Inputs:
key: (string) User ID
vals: (int) tuple of location and/or badge
Returns: User ID as key and a list of location and badge as key
"""
try:
val_list = list(vals)
if len(val_list) == 2:
yield user_id, val_list
except (TypeError, ValueError):
pass
if __name__ == '__main__':
FindLocUsersGoldBadges.run()