-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathutils.py
103 lines (91 loc) · 3.46 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import pickle
import numpy as np
import scipy.sparse as sp
import tensorflow as tf
from sklearn import metrics
from Parameters import Parameters as pm
def read_mention2id_dict():
# 注:id从1开始
with open('data/tang.poetry.mention2id_dict','rb') as f:
mention2id=pickle.load(f)
return mention2id
def read_category():
categories = ['战争', '送别', '闺怨', '咏物', '怀古', '思乡', '怀人', '田园', '山水']
cat_to_id = dict(zip(categories, range(len(categories))))
return categories, cat_to_id
def compute_criterion(y_pred,y):
y_pred=np.argmax(y_pred,1)
y=np.argmax(y,1)
correct=np.equal(y_pred,y)
acc=np.average(correct)
pre=metrics.precision_score(y,y_pred,average='macro')
rec=metrics.recall_score(y,y_pred,average='macro')
f1=metrics.f1_score(y,y_pred,average='macro')
return acc,pre,rec,f1
def evaluate(sess,data,model,name):
if name=='dev':
xs=data.dev_ids
ys=data.dev_label_ids
masks=data.dev_masks
elif name=='test':
xs=data.test_ids
ys=data.test_label_ids
masks=data.test_masks
else:
print("Error: wrong evaluate name,", name)
sample_num=len(ys)
num_batch=sample_num//pm.batch_size+1
y_preds=[]
for batch_id in range(num_batch):
start = batch_id * pm.batch_size
end = (batch_id + 1) * pm.batch_size
if sample_num < end:
end = sample_num
if start == end:
break
batch_x = xs[start:end]
batch_y = ys[start:end]
batch_mask = masks[start:end]
feed_dict = model.feed_data(batch_x, batch_y, batch_mask, data.features, data.biases, 0.0, 0.0)
loss,y_pred=sess.run([model.loss,model.logits],feed_dict=feed_dict)
y_preds.append(y_pred)
y_preds=np.concatenate(y_preds,0)
acc,pre,rec,f1=compute_criterion(y_preds,ys)
return acc,pre,rec,f1
def preprocess_adj_bias(adj):
num_nodes = adj.shape[0]
adj = adj + sp.eye(num_nodes) # self-loop
adj[adj > 0.0] = 1.0
if not sp.isspmatrix_coo(adj):
adj = adj.tocoo()
adj = adj.astype(np.float32)
indices = np.vstack((adj.col, adj.row)).transpose() # This is where I made a mistake, I used (adj.row, adj.col) instead
# return tf.SparseTensor(indices=indices, values=adj.data, dense_shape=adj.shape)
return indices, adj.data, adj.shape
def sparse_to_tuple(sparse_mx):
"""Convert sparse matrix to tuple representation."""
def to_tuple(mx):
if not sp.isspmatrix_coo(mx):
mx = mx.tocoo()
coords = np.vstack((mx.row, mx.col)).transpose()
values = mx.data
shape = mx.shape
return coords, values, shape
# coords和values对照着看。coords:(49216,2) values:(49216) shape:(2708,1433)
if isinstance(sparse_mx, list):
for i in range(len(sparse_mx)):
sparse_mx[i] = to_tuple(sparse_mx[i])
else:
sparse_mx = to_tuple(sparse_mx)
return sparse_mx
def preprocess_features(features):
"""Row-normalize feature matrix and convert to tuple representation"""
rowsum = np.array(features.sum(1)) # (2708,1)
rowsum=rowsum.astype('float32')
r_inv = np.power(rowsum, -1).flatten()
r_inv[np.isinf(r_inv)] = 0.
r_mat_inv = sp.diags(r_inv)
features = r_mat_inv.dot(features)
# 对features每行做归一化,即每行中的每个元素在0~1之间,且一行中的元素加起来为1.
# return features.todense(), sparse_to_tuple(features)
return features