-
Notifications
You must be signed in to change notification settings - Fork 2
/
get_distance.py
48 lines (42 loc) · 1.26 KB
/
get_distance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import gensim
import logging
import itertools
import math
import scipy.spatial
import numpy
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
level=logging.INFO)
model = gensim.models.Word2Vec.load('/scratch1/NOT_BACKED_UP/dbuchan/interpro/derived/word2vec_E.model')
header = ","
for word in sorted(model.wv.vocab):
header += word+","
header = header.rstrip(",")
print(header)
dist = 0
for word in sorted(model.wv.vocab):
line = word+","
for word2 in sorted(model.wv.vocab):
sim = model.wv.similarity(word, word2)
if math.isclose(1.0, sim, rel_tol=1e-8):
dist = 0
elif sim > float(1):
exit()
else:
try:
dist = math.acos(sim)/math.pi
except Exception as e:
print("hi")
print(sim)
exit()
line += "%.5f" % dist + ","
line = line.rstrip(",")
print(line)
# for word in sorted(model.wv.vocab):
# line = word+","
# for word2 in sorted(model.wv.vocab):
# vec1 = model.wv[word]
# vec2 = model.wv[word2]
# cos_dist = scipy.spatial.distance.cosine(vec1, vec2)
# line += "%.5f" % cos_dist + ","
# line = line.rstrip(",")
# print(line)