-
Notifications
You must be signed in to change notification settings - Fork 1
/
amenities.py
59 lines (56 loc) · 1.34 KB
/
amenities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import numpy as np
import pandas as pd
import re
from pandas import DataFrame, Series
dataset = pd.read_csv("listings.csv", header = 0)
data = pd.read_csv('data.csv', header = 0)
mem = []
for row in dataset.itertuples():
am = str(row.amenities).strip('{')
am = am.strip('}')
am = am.split(',')
for a in am:
a = a.strip('"')
if a not in mem and 'ther' not in mem and '_49' not in a and 'Stair g' not in a and ' hoist' not in a and 'corner guar' not in a and a != '' and '_50' not in a:
a = a.replace(' ', '_')
mem.append(a)
for i in range(len(mem)):
mem[i] = [mem[i], ['no']]
for row in dataset.itertuples():
am = str(row.amenities).strip('{')
am = am.strip('}')
am = am.split(',')
for m in mem:
flag = False
for a in am:
a = a.strip('"')
a = a.replace(' ', '_')
if a == m[0]:
flag = True
if flag == False:
if len(m) < 3:
m.append('count')
else:
m[1].append('no')
else:
if len(m) < 3:
m.append('count')
m[1] = ['yes']
else:
m[1].append('yes')
hitlist = []
for i in range(len(mem)-1):
att = mem[i]
att = list(filter(('no').__ne__, att[1]))
if len(att) < 750:
hitlist.append(i)
hitlist = sorted(hitlist, reverse=True)
for x in hitlist:
del mem[x]
for m in mem:
print(len(m[1]))
for i in m[1]:
if i != 1 and i != 0:
print(i)
data[m[0]] = Series(m[1], index= data.index)
print(data.head())