-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmain.py
142 lines (114 loc) · 5.52 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import argparse
import numpy as np
import tensorflow as tf
import params
import model as model
FLAGS = None
def write_file(predict_results):
i = 1
with open(FLAGS.test_origin) as f:
lines = f.readlines()
with open(FLAGS.pred_dir, 'w') as f:
with open(FLAGS.prob_dir, 'w') as f1:
f.write('ID\tTweet\tanger\tanticipation\tdisgust\tfear\tjoy\tlove\toptimism\tpessimism\tsadness\tsurprise\ttrust\n')
f1.write('ID\tTweet\tanger\tanticipation\tdisgust\tfear\tjoy\tlove\toptimism\tpessimism\tsadness\tsurprise\ttrust\n')
while True:
try :
output = predict_results.next()
# rstrip for remove '\n' for some data
line_concat = '\t'.join(lines[i].split('\t')[:2]).rstrip() + '\t' + '\t'.join(output['probability'].astype(str)) + '\n'
f1.write(line_concat)
line_concat = '\t'.join(lines[i].split('\t')[:2]).rstrip() + '\t' + '\t'.join(output['sentiment'].astype(str)) + '\n'
f.write(line_concat)
i += 1
except StopIteration:
break
def main(unused):
# Enable logging for tf.estimator
tf.logging.set_verbosity(tf.logging.INFO)
# config
config = tf.contrib.learn.RunConfig(
model_dir = FLAGS.model_dir,
keep_checkpoint_max = 500,
save_checkpoints_steps = 100)
# load parameters
model_params = getattr(params, FLAGS.params)().values()
# define estimator
nn = tf.estimator.Estimator(model_fn=model.attn_net, config = config, params=model_params)
# load training data
train_data = np.load(FLAGS.train_data)
train_label = np.load(FLAGS.train_label)
# load lexicon data
if model_params['lexicon_effect'] is not None:
train_lexicon = np.load(FLAGS.lexicon_train)
dev_lexicon = np.load(FLAGS.lexicon_dev)
test_lexicon = np.load(FLAGS.lexicon_test)
else:
train_lexicon = None
dev_lexicon = None
test_lexicon = None
# data shuffling for training data
permutation = np.random.permutation(len(train_label))
train_data = train_data[permutation]
train_label = train_label[permutation]
if model_params['lexicon_effect'] is not None:
train_lexicon = train_lexicon[permutation]
# training input function for estimator
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": train_data, 'lexicon': train_lexicon if train_lexicon is not None else train_data},
y=train_label,
batch_size = model_params['batch_size'],
num_epochs=FLAGS.num_epochs,
shuffle=True)
# load evaluation data
eval_data = np.load(FLAGS.eval_data)
eval_label = np.load(FLAGS.eval_label)
# evaluation input function for estimator
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
x = {"x": eval_data, 'lexicon': dev_lexicon if dev_lexicon is not None else eval_data},
y = eval_label,
num_epochs=1,
shuffle=False)
# define experiment
exp_nn = tf.contrib.learn.Experiment(
estimator = nn,
train_input_fn = train_input_fn,
eval_input_fn = eval_input_fn,
train_steps = FLAGS.steps,
min_eval_frequency = 100)
# train and evaluate
if FLAGS.mode == 'train':
exp_nn.train_and_evaluate()
elif FLAGS.mode == 'eval':
exp_nn.evaluate(delay_secs = 0)
else: # 'pred'
# load preprocessed prediction data
pred_data = np.load(FLAGS.test_data)
# prediction input function for estimator
pred_input_fn = tf.estimator.inputs.numpy_input_fn(
x = {"x" : pred_data, 'lexicon': test_lexicon if test_lexicon is not None else pred_data},
shuffle = False
)
# prediction
predict_results = nn.predict(input_fn = pred_input_fn)
write_file(predict_results)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--mode', type = str, help = 'train, eval')
parser.add_argument('--train_data', type = str, default= '', help = 'path to the training data.')
parser.add_argument('--train_label', type = str, default = '', help = 'path to the training label.')
parser.add_argument('--eval_data', type = str, default = '', help = 'path to the evaluation data. ')
parser.add_argument('--eval_label', type = str, default = '', help = 'path to the evaluation label.')
parser.add_argument('--test_data', type = str, default = '', help = 'path to the test data')
parser.add_argument('--test_origin', type = str, default = '')
parser.add_argument('--lexicon_train', type = str, help = 'path to lexicon data')
parser.add_argument('--lexicon_dev', type = str, help = 'path to the lexicon data')
parser.add_argument('--lexicon_test', type = str, help = 'path to the lexicon data')
parser.add_argument('--model_dir', type = str, help = 'path to save the model')
parser.add_argument('--pred_dir', type = str, help = 'path to save the predictions')
parser.add_argument('--prob_dir', type = str, default = 'None', help = 'path to save the predicted probability')
parser.add_argument('--params', type = str, help = 'parameter setting')
parser.add_argument('--steps', type = int, default = 200000, help = 'training step size')
parser.add_argument('--num_epochs', default = 10, help = 'training epoch size')
FLAGS = parser.parse_args()
tf.app.run(main)