-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathitempredict.py
More file actions
96 lines (90 loc) · 2.78 KB
/
itempredict.py
File metadata and controls
96 lines (90 loc) · 2.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
from openpyxl import load_workbook
import csv
import time
import math
import pickle
import numpy as np
import pandas as pd
def cosineS(a, b):
print(a)
print(b)
p = a * b
c, d = p / b, p / a
c[np.isnan(c)] = 0
d[np.isnan(d)] = 0
norm = np.linalg.norm(c) * np.linalg.norm(d)
if not norm:
return 0
value = np.dot(c, d) / norm
if(not math.isnan(value)):
print('cosineS' + str(value))
else:
print('Nan')
return value
def cosineMatrix(ratingMatrix):
print(ratingMatrix)
noitems = np.shape(ratingMatrix)[1]
print(np.shape(ratingMatrix))
sim = np.zeros((noitems, noitems))
for i in range(noitems):
for j in range(i, noitems):
sim[i][j] = cosineS(ratingMatrix[:, i], ratingMatrix[:, j])
print(i, j)
sim[j][i] = sim[i][j]
print(sim)
sim.dump('tempsimilarity.pkl')
return sim
book = load_workbook('ratingitemp.xlsx')
sheet = book['premiss']
data = pd.read_csv('userrating.csv')
itemsim = data.pivot_table(index=['u_id'], columns=['m_id'], values='rating')
itemsim = itemsim.iloc[27:31, 0:3]
sheetoriginal=book['original1']
for i in range(4):
sheetoriginal.append(itemsim.iloc[i].tolist())
print(itemsim)
for i in range(4):
for j in range(0,3):
print(i,j)
if not math.isnan(itemsim.iloc[i, j]):
itemsim.iloc[i, j] = np.nan
break
print(i, j)
print(itemsim)
isim = itemsim.reset_index().values
isim = isim[:, 1:]
csim = cosineMatrix(isim)
simCand = pd.Series()
cum = 0
dcum = 0
cormatrix = pd.DataFrame(index=itemsim.iloc[0].index, columns=itemsim.iloc[0].index, data=csim[:, :])
print(cormatrix)
for k in range(0, len(itemsim.index)):
newuser = itemsim.iloc[k]
newratings = newuser.dropna()
for i in newuser.index:
print(i, end=' ')
for i in newratings.index:
print(i, end=' ')
l = []
for i in range(0, len(newuser.index)):
cum = 0
dcum = 0
if newuser.index[i] not in newratings.index:
for j in range(0, len(newratings.index)):
if(not math.isnan(cormatrix[newuser.index[i]][newratings.index[j]])):
cum = cum + cormatrix[newuser.index[i]][newratings.index[j]] * newratings[newratings.index[j]]
dcum = dcum + cormatrix[newuser.index[i]][newratings.index[j]]
print(cum, dcum)
if(dcum == 0):
l.append(-1)
else:
a = cum / dcum
if(not math.isnan(a)):
l.append(a)
else:
l.append(-2)
else:
l.append(newuser[newuser.index[i]])
sheet.append(l)
book.save('D:\\sai\\pca\\tfvector\\\itemcolloborative\\item_collobarative_movies_data\\ratingitemp.xlsx')