diff --git a/assignments/chatbot/chatbot.py b/assignments/chatbot/chatbot.py index 79efcebf..0521bbf6 100644 --- a/assignments/chatbot/chatbot.py +++ b/assignments/chatbot/chatbot.py @@ -19,7 +19,6 @@ class CS 20SI: "TensorFlow for Deep Learning Research" import argparse import os -os.environ['TF_CPP_MIN_LOG_LEVEL']='2' import random import sys import time diff --git a/assignments/chatbot/config.py b/assignments/chatbot/config.py index 9e40f7c8..d93266bc 100644 --- a/assignments/chatbot/config.py +++ b/assignments/chatbot/config.py @@ -16,7 +16,7 @@ class CS 20SI: "TensorFlow for Deep Learning Research" """ # parameters for processing the dataset -DATA_PATH = '/Users/Chip/data/cornell movie-dialogs corpus' +DATA_PATH = 'data' CONVO_FILE = 'movie_conversations.txt' LINE_FILE = 'movie_lines.txt' OUTPUT_FILE = 'output_convo.txt' @@ -47,8 +47,7 @@ class CS 20SI: "TensorFlow for Deep Learning Research" # [37049, 33519, 30223, 33513, 37371] # BUCKETS = [(8, 10), (12, 14), (16, 19), (23, 26), (39, 43)] -# BUCKETS = [(8, 10), (12, 14), (16, 19)] -BUCKETS = [(16, 19)] +BUCKETS = [(8, 10), (12, 14), (16, 19)] NUM_LAYERS = 3 HIDDEN_SIZE = 256 @@ -58,3 +57,5 @@ class CS 20SI: "TensorFlow for Deep Learning Research" MAX_GRAD_NORM = 5.0 NUM_SAMPLES = 512 +ENC_VOCAB = 24515 +DEC_VOCAB = 24671 diff --git a/assignments/chatbot/data.py b/assignments/chatbot/data.py index 673087d7..41cd8cc2 100644 --- a/assignments/chatbot/data.py +++ b/assignments/chatbot/data.py @@ -17,9 +17,9 @@ class CS 20SI: "TensorFlow for Deep Learning Research" """ from __future__ import print_function -import os import random import re +import os import numpy as np @@ -177,6 +177,7 @@ def prepare_raw_data(): print('Preparing raw data into train set and test set ...') id2line = get_lines() convos = get_convos() + print (convos) questions, answers = question_answers(id2line, convos) prepare_dataset(questions, answers) @@ -253,4 +254,4 @@ def get_batch(data_bucket, bucket_id, batch_size=1): if __name__ == '__main__': prepare_raw_data() - process_data() \ No newline at end of file + # process_data() \ No newline at end of file diff --git a/assignments/chatbot/model.py b/assignments/chatbot/model.py index 369c0955..d6e7e296 100644 --- a/assignments/chatbot/model.py +++ b/assignments/chatbot/model.py @@ -22,6 +22,8 @@ class CS 20SI: "TensorFlow for Deep Learning Research" import tensorflow as tf import config +import copy +import pdb class ChatBotModel(object): def __init__(self, forward_only, batch_size): @@ -49,33 +51,51 @@ def _inference(self): # If we use sampled softmax, we need an output projection. # Sampled softmax only makes sense if we sample less than vocabulary size. if config.NUM_SAMPLES > 0 and config.NUM_SAMPLES < config.DEC_VOCAB: - w = tf.get_variable('proj_w', [config.HIDDEN_SIZE, config.DEC_VOCAB]) - b = tf.get_variable('proj_b', [config.DEC_VOCAB]) + w = tf.get_variable('proj_w', [config.HIDDEN_SIZE, config.DEC_VOCAB],dtype=tf.float32) + w_t = tf.transpose(w) + b = tf.get_variable('proj_b', [config.DEC_VOCAB],dtype=tf.float32) self.output_projection = (w, b) - def sampled_loss(inputs, labels): + def sampled_loss(labels, logits): + labels = tf.reshape(labels, [-1, 1]) - return tf.nn.sampled_softmax_loss(tf.transpose(w), b, inputs, labels, - config.NUM_SAMPLES, config.DEC_VOCAB) + # We need to compute the sampled_softmax_loss using 32bit floats to + # avoid numerical instabilities. + local_w_t = tf.cast(w_t, tf.float32) + local_b = tf.cast(b, tf.float32) + local_inputs = tf.cast(logits, tf.float32) + # return tf.nn.sampled_softmax_loss(tf.transpose(w), b, labels, logits, + # config.NUM_SAMPLES, config.DEC_VOCAB) + return tf.cast( + tf.nn.sampled_softmax_loss( + weights=local_w_t, + biases=local_b, + labels=labels, + inputs=local_inputs, + num_sampled=config.NUM_SAMPLES, + num_classes=config.DEC_VOCAB), + dtype=tf.float32) self.softmax_loss_function = sampled_loss - single_cell = tf.nn.rnn_cell.GRUCell(config.HIDDEN_SIZE) - self.cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * config.NUM_LAYERS) + single_cell = tf.contrib.rnn.GRUCell(config.HIDDEN_SIZE) + self.cell = tf.contrib.rnn.MultiRNNCell([single_cell] * config.NUM_LAYERS) def _create_loss(self): print('Creating loss... \nIt might take a couple of minutes depending on how many buckets you have.') start = time.time() def _seq2seq_f(encoder_inputs, decoder_inputs, do_decode): - return tf.nn.seq2seq.embedding_attention_seq2seq( - encoder_inputs, decoder_inputs, self.cell, + tmp_cell = copy.deepcopy(self.cell) + return tf.contrib.legacy_seq2seq.embedding_attention_seq2seq( + encoder_inputs, decoder_inputs, tmp_cell, num_encoder_symbols=config.ENC_VOCAB, num_decoder_symbols=config.DEC_VOCAB, embedding_size=config.HIDDEN_SIZE, output_projection=self.output_projection, - feed_previous=do_decode) + feed_previous=do_decode, + dtype=tf.float32) if self.fw_only: - self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets( + self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, self.targets, @@ -90,7 +110,7 @@ def _seq2seq_f(encoder_inputs, decoder_inputs, do_decode): self.output_projection[0]) + self.output_projection[1] for output in self.outputs[bucket]] else: - self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets( + self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, self.targets,