-
Notifications
You must be signed in to change notification settings - Fork 1
/
test.py
60 lines (52 loc) · 1.87 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import csv
import pymongo
import mli_lib as mli
import operator
import ast
import lda
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.decomposition import NMF, LatentDirichletAllocation
import sklearn.cluster.mean_shift_ as ms
import simplejson as json
client = pymongo.MongoClient('localhost', 27017)
db = client['ml']
shops_collection = db.get_collection("shops")
occurance = {}
def add_dict(topics):
for topic in topics:
for word in topic:
count = occurance.get(word, -1)
if count == -1:
occurance[word] = 1
else:
occurance[word] += 1
total_review = 0 ;
test_size = 150000
k = 0
i = 0
ngram = (2,4)
cities = ["Las Vegas", "Phoenix"]
print(cities)
for city in cities:
for id in mli.get_business_id_list(20,city):
print("Count ",id['count'])
if(k >= test_size):
break
reviews = mli.get_reviews_business(id['_id'], type="neg")
print("#",k," ", id['_id'], len(reviews))
if (len(reviews) < 5):
print("Passed!")
continue
topics = mli.do_lda(reviews, n_features=2000, n_topics=1, maxdf=0.95, n_top_words=25, range=ngram,isSplit=1)
add_dict(topics[0])
k +=1
total_review += len(reviews)
sorted_x = sorted(occurance.items(), key=operator.itemgetter(1), reverse=True)
print(sorted_x)
print("Total Review: ",total_review, "Ngram: " , ngram, "City: ", city)
toWrite = '\n\nTotal Review:' + str(total_review) + ' - Ngram: ' + str(ngram) + ' - City: ' + city + ' \n' + str(sorted_x)
with open("final_results.txt", "a") as testfile:
testfile.write(toWrite)
occurance.clear()
#mli.do_nmf(reviews[:1000],n_features=2000,n_topics=1,maxdf=0.95,n_top_words=10,range=(1,3),isSplit=1)