-
Notifications
You must be signed in to change notification settings - Fork 1
/
gender.py
65 lines (54 loc) · 1.37 KB
/
gender.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import numpy as np
import pandas as pd
import re
from pandas import DataFrame, Series
dataset = pd.read_csv("austin_listings.csv", header = 0)
import gender_guesser.detector as gender
d = gender.Detector()
genders = []
comp = []
comp.append("rent")
comp.append("Rent")
comp.append("propert")
comp.append("Propert")
comp.append("vacation")
comp.append("Vacation")
comp.append("LLC")
comp.append("llc")
comp.append("estate")
comp.append("Estate")
comp.append("The")
comp.append('manag')
comp.append("Manag")
compound = []
compound.append('and')
compound.append('And')
compound.append('+')
compound.append('&')
for row in dataset.itertuples():
name = str(row.host_name)
gender = d.get_gender(row.host_name)
if gender == 'male' or gender == 'female' or gender == 'mostly_male' or gender == 'mostly_female':
genders.append(gender)
continue
try:
list_name = name.split()
except:
genders.append('nonsense')
flag = False
for thing in comp:
x = re.search(thing, name)
if x:
flag=True
if 'and' in name or 'And' in name or '&' in name or '+' in name:
gender = 'couple'
elif len(list_name) >= 3:
gender = "company"
elif flag:
gender = "company"
genders.append(gender)
dataset['host_gender'] = Series(genders, index=dataset.index)
thing = dataset['host_name']
result = DataFrame(thing)
result['host_gender'] = Series(genders, index=dataset.index)
result.to_csv('out1.csv')