-
Notifications
You must be signed in to change notification settings - Fork 5
/
descr_gmap_users_gold_ans.py
92 lines (78 loc) · 2.92 KB
/
descr_gmap_users_gold_ans.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
"""
CMSC 12300 / CAPP 30123
Task: Descriptive analysis (Exploring Answer Providers)
Main author: Dhruval, Sanittawan (Nikki)
"""
import csv
import googlemaps
from mrjob.job import MRJob
API_KEY = # you need to fill in a Google API Key here #
class FindLocUsersGoldBadges(MRJob):
"""
A class for finding the location of users with
gold badge in answers, called illuminator.
"""
def mapper(self, _, line):
"""
Maps User ID to badges and user id to user locations.
Inputs:
line: a single line in a CSV file
Returns:
user id, badges or user id and location
(depending on the file source)
"""
row = csv.reader([line]).__next__()
file = str(row[-1]).strip().lower()
try:
if file == "badges":
badge_name = str(row[2]).strip().lower()
badge_name = ''.join([char for char in \
badge_name if char != "'"])
user_id = str(row[1]).strip().lower()
user_id = ''.join([char for char in user_id if char != "'"])
if badge_name == "illuminator":
yield user_id, badge_name
elif file == "users":
user_id = str(row[0]).strip()
location = str(row[6]).strip()
if not location or user_id == "-1":
location = None
coord = None
country = None
else:
gmaps = googlemaps.Client(key=API_KEY)
geocode_result = gmaps.geocode(location)
if geocode_result:
lat = geocode_result[0]['geometry']['location']['lat']
lng = geocode_result[0]['geometry']['location']['lng']
coord = (lat, lng)
address = geocode_result[0]['address_components'][-1]
country = address['long_name']
else:
country = None
coord = None
yield user_id, (coord, country)
except (IndexError, ValueError):
pass
def reducer(self, user_id, vals):
"""
Reduces to badges and location for a given userid
Inputs:
key: (string) User ID
vals: (int) tuple of location and/or badge
Returns: User ID as key and a list of location and badge as key
"""
try:
val_list = list(vals)
if len(val_list) == 2:
a = val_list[0]
b = val_list[1]
if isinstance(a, list):
final = a, b
else:
final = b, a
yield user_id, final
except (TypeError, ValueError):
pass
if __name__ == '__main__':
FindLocUsersGoldBadges.run()