-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathload_querie_TF.py
65 lines (50 loc) · 2.07 KB
/
load_querie_TF.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
'''
------------------------------------------------------------
Created on Nov 2015 by
Name = Gerardo Roa Dabike
University ID = acp15gr
Registration Number = 150105918
------------------------------------------------------------
'''
from nested_dictionary import NestedDict
import math
class LoadQuerie_TF(object):
'''
This Class load the queries and process Term Weigth by Binary
'''
def __init__(self, querieIndex,index,collectionVector):
'''
Constructor
'''
self.__indexDict = index
#self.__querieFile = querieFile
self.__collectionVector = collectionVector
self.__querieDict = querieIndex
self.__querieVector = self.__querieLengthVector()
self.__resultDict = self.__retrievalTF()
def __querieLengthVector(self):
querieVector = NestedDict()
for q in self.__querieDict:
querieVector[q]=0
vector = 0
for w in self.__querieDict[q]:
vector += float(self.__querieDict[q][w])**2
querieVector[q] = math.sqrt(vector)
return querieVector
def __retrievalTF(self):
resultDict = NestedDict()
for q in self.__querieVector:
for w in self.__querieDict[q]:
for d in self.__collectionVector:
if q not in resultDict:
resultDict[q][d] = 0
if d not in resultDict[q]:
resultDict[q][d] = 0
if d in self.__indexDict[w]:
resultDict[q][d] += self.__indexDict[w][d]*self.__querieDict[q][w]
for q in self.__querieVector:
for d in self.__collectionVector:
resultDict[q][d] = resultDict[q][d] / (self.__collectionVector[d]* self.__querieVector[q])
return resultDict
def getResultDict(self):
return self.__resultDict