forked from dennybritz/cnn-text-classification-tf
-
Notifications
You must be signed in to change notification settings - Fork 54
/
eval.py
executable file
·136 lines (113 loc) · 5.65 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#! /usr/bin/env python
import tensorflow as tf
import numpy as np
import os
import data_helpers
from tensorflow.contrib import learn
import csv
from sklearn import metrics
import yaml
def softmax(x):
"""Compute softmax values for each sets of scores in x."""
if x.ndim == 1:
x = x.reshape((1, -1))
max_x = np.max(x, axis=1).reshape((-1, 1))
exp_x = np.exp(x - max_x)
return exp_x / np.sum(exp_x, axis=1).reshape((-1, 1))
with open("config.yml", 'r') as ymlfile:
cfg = yaml.load(ymlfile)
# Parameters
# ==================================================
# Data Parameters
# Eval Parameters
tf.flags.DEFINE_integer("batch_size", 64, "Batch Size (default: 64)")
tf.flags.DEFINE_string("checkpoint_dir", "", "Checkpoint directory from training run")
tf.flags.DEFINE_boolean("eval_train", False, "Evaluate on all training data")
# Misc Parameters
tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")
FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
print("{}={}".format(attr.upper(), value))
print("")
datasets = None
# CHANGE THIS: Load data. Load your own data here
dataset_name = cfg["datasets"]["default"]
if FLAGS.eval_train:
if dataset_name == "mrpolarity":
datasets = data_helpers.get_datasets_mrpolarity(cfg["datasets"][dataset_name]["positive_data_file"]["path"],
cfg["datasets"][dataset_name]["negative_data_file"]["path"])
elif dataset_name == "20newsgroup":
datasets = data_helpers.get_datasets_20newsgroup(subset="test",
categories=cfg["datasets"][dataset_name]["categories"],
shuffle=cfg["datasets"][dataset_name]["shuffle"],
random_state=cfg["datasets"][dataset_name]["random_state"])
x_raw, y_test = data_helpers.load_data_labels(datasets)
y_test = np.argmax(y_test, axis=1)
print("Total number of test examples: {}".format(len(y_test)))
else:
if dataset_name == "mrpolarity":
datasets = {"target_names": ['positive_examples', 'negative_examples']}
x_raw = ["a masterpiece four years in the making", "everything is off."]
y_test = [1, 0]
else:
datasets = {"target_names": ['alt.atheism', 'comp.graphics', 'sci.med', 'soc.religion.christian']}
x_raw = ["The number of reported cases of gonorrhea in Colorado increased",
"I am in the market for a 24-bit graphics card for a PC"]
y_test = [2, 1]
# Map data into vocabulary
vocab_path = os.path.join(FLAGS.checkpoint_dir, "..", "vocab")
vocab_processor = learn.preprocessing.VocabularyProcessor.restore(vocab_path)
x_test = np.array(list(vocab_processor.transform(x_raw)))
print("\nEvaluating...\n")
# Evaluation
# ==================================================
checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
graph = tf.Graph()
with graph.as_default():
session_conf = tf.ConfigProto(
allow_soft_placement=FLAGS.allow_soft_placement,
log_device_placement=FLAGS.log_device_placement)
sess = tf.Session(config=session_conf)
with sess.as_default():
# Load the saved meta graph and restore variables
saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
saver.restore(sess, checkpoint_file)
# Get the placeholders from the graph by name
input_x = graph.get_operation_by_name("input_x").outputs[0]
# input_y = graph.get_operation_by_name("input_y").outputs[0]
dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]
# Tensors we want to evaluate
scores = graph.get_operation_by_name("output/scores").outputs[0]
# Tensors we want to evaluate
predictions = graph.get_operation_by_name("output/predictions").outputs[0]
# Generate batches for one epoch
batches = data_helpers.batch_iter(list(x_test), FLAGS.batch_size, 1, shuffle=False)
# Collect the predictions here
all_predictions = []
all_probabilities = None
for x_test_batch in batches:
batch_predictions_scores = sess.run([predictions, scores], {input_x: x_test_batch, dropout_keep_prob: 1.0})
all_predictions = np.concatenate([all_predictions, batch_predictions_scores[0]])
probabilities = softmax(batch_predictions_scores[1])
if all_probabilities is not None:
all_probabilities = np.concatenate([all_probabilities, probabilities])
else:
all_probabilities = probabilities
# Print accuracy if y_test is defined
if y_test is not None:
correct_predictions = float(sum(all_predictions == y_test))
print("Total number of test examples: {}".format(len(y_test)))
print("Accuracy: {:g}".format(correct_predictions/float(len(y_test))))
print(metrics.classification_report(y_test, all_predictions, target_names=datasets['target_names']))
print(metrics.confusion_matrix(y_test, all_predictions))
# Save the evaluation to a csv
predictions_human_readable = np.column_stack((np.array(x_raw),
[int(prediction) for prediction in all_predictions],
[ "{}".format(probability) for probability in all_probabilities]))
out_path = os.path.join(FLAGS.checkpoint_dir, "..", "prediction.csv")
print("Saving evaluation to {0}".format(out_path))
with open(out_path, 'w') as f:
csv.writer(f).writerows(predictions_human_readable)