From 01d7d553a26f6479d444ea096e73e8ca1179017e Mon Sep 17 00:00:00 2001
From: Andreas Argyriou <anargyri@users.noreply.github.com>
Date: Mon, 25 Sep 2017 13:27:20 +0100
Subject: [PATCH] Delete lstm_word2vec_small.ipynb

---
 lstm_word2vec_small.ipynb | 680 --------------------------------------
 1 file changed, 680 deletions(-)
 delete mode 100644 lstm_word2vec_small.ipynb

diff --git a/lstm_word2vec_small.ipynb b/lstm_word2vec_small.ipynb
deleted file mode 100644
index b070565..0000000
--- a/lstm_word2vec_small.ipynb
+++ /dev/null
@@ -1,680 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "deletable": true,
-    "editable": true
-   },
-   "source": [
-    "The code in this notebook is based on the [Keras documentation](https://keras.io/) and [blog](https://blog.keras.io/using-pre-trained-word-embeddings-in-a-keras-model.html) as well as this [word2vec tutorial](http://adventuresinmachinelearning.com/gensim-word2vec-tutorial/)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Using CNTK backend\n"
-     ]
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "import os\n",
-    "import pandas as pd\n",
-    "import pickle\n",
-    "import time\n",
-    "\n",
-    "os.environ['KERAS_BACKEND']='cntk'\n",
-    "from keras.preprocessing import sequence\n",
-    "from keras.preprocessing.text import Tokenizer, text_to_word_sequence\n",
-    "from keras.models import Sequential, load_model\n",
-    "from keras import regularizers\n",
-    "from keras.optimizers import SGD\n",
-    "from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional\n",
-    "from keras.callbacks import History, CSVLogger\n",
-    "from keras.utils import to_categorical"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "deletable": true,
-    "editable": true
-   },
-   "source": [
-    "Download the book reviews data from Azure Machine Learning"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "from azureml import Workspace\n",
-    "ws = Workspace(\n",
-    "    workspace_id='817780d9ee0d4a878e25f8c9deb3b866',\n",
-    "    authorization_token='6df8a52943bd49eba6e57446bc73f5fc',\n",
-    "    endpoint='https://studioapi.azureml.net'\n",
-    ")\n",
-    "ds = ws.datasets['Book Reviews from Amazon']\n",
-    "all_data = ds.to_dataframe()\n",
-    "all_data.rename(columns={0: 'rating', 1: 'text'}, inplace=True)\n",
-    "all_data.loc[:, 'rating'] = all_data['rating'] - 1           # reindex ratings to start from 0"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "\"\"\"\n",
-    "from azureml import Workspace\n",
-    "ws = Workspace(\n",
-    "    workspace_id='817780d9ee0d4a878e25f8c9deb3b866',\n",
-    "    authorization_token='6df8a52943bd49eba6e57446bc73f5fc',\n",
-    "    endpoint='https://studioapi.azureml.net'\n",
-    ")\n",
-    "ds = ws.datasets['dfe_happysad_utf.csv']\n",
-    "all_data = ds.to_dataframe()\n",
-    "all_data.rename(columns={'features': 'text', 'label': 'rating'}, inplace=True)\n",
-    "all_data.replace({'rating': {'sadness': 0, 'happiness': 1}}, inplace=True)\n",
-    "\"\"\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "deletable": true,
-    "editable": true
-   },
-   "source": [
-    "Split data into a training and a test set. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "n_tr = 7500\n",
-    "\n",
-    "ind_range = np.arange(all_data.shape[0])\n",
-    "tr_ind = np.random.choice(ind_range, n_tr, replace=False)\n",
-    "\n",
-    "train_data = all_data.iloc[tr_ind, :]\n",
-    "test_data = all_data.iloc[np.setdiff1d(ind_range, tr_ind), :]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "deletable": true,
-    "editable": true
-   },
-   "source": [
-    "Set the dimensions of the input and the embedding. \n",
-    "\n",
-    "MAX_DOC_LEN : the size of the input i.e. the number of words in the document. Longer documents will be truncated, shorter ones will be padded with zeros.\n",
-    "\n",
-    "VOCAB_SIZE : the size of the word encoding (number of most frequent words to keep in the vocabulary)\n",
-    "\n",
-    "EMBEDDING_DIM : the dimensionality of the word embedding"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "MAX_DOC_LEN = 300\n",
-    "VOCAB_SIZE = 6000\n",
-    "EMBEDDING_DIM = 200"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "TEXT_COL = 'text'\n",
-    "LABEL_COL = 'rating'"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "deletable": true,
-    "editable": true
-   },
-   "source": [
-    "Fit a Keras tokenizer to the most frequent words using the entire training data set as the corpus."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "# tokenize, create seqs, pad\n",
-    "tok = Tokenizer(num_words=VOCAB_SIZE, lower=True, split=\" \")\n",
-    "tok.fit_on_texts(train_data[TEXT_COL])\n",
-    "train_seq = tok.texts_to_sequences(train_data[TEXT_COL])\n",
-    "train_seq = sequence.pad_sequences(train_seq, maxlen=MAX_DOC_LEN)\n",
-    "test_seq = tok.texts_to_sequences(test_data[TEXT_COL])\n",
-    "test_seq = sequence.pad_sequences(test_seq, maxlen=MAX_DOC_LEN)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "deletable": true,
-    "editable": true
-   },
-   "source": [
-    "Convert the ratings to one-hot categorical labels."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "labels = to_categorical(np.asarray(train_data[LABEL_COL]))\n",
-    "labels = labels.astype('float32')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "n_classes = labels.shape[1]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "deletable": true,
-    "editable": true
-   },
-   "source": [
-    "Train word2vec on the training documents in order to initialize the word embedding. Ignore rare words (min_count=6). Use skip-gram as the training algorithm (sg=1)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 53,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[nltk_data] Downloading package punkt to /home/anargyri/nltk_data...\n",
-      "[nltk_data]   Package punkt is already up-to-date!\n"
-     ]
-    }
-   ],
-   "source": [
-    "import nltk \n",
-    "\n",
-    "nltk.download('punkt')\n",
-    "\n",
-    "sent_lst = []\n",
-    "\n",
-    "for doc in train_data[TEXT_COL]:\n",
-    "    sentences = nltk.tokenize.sent_tokenize(doc)\n",
-    "    for sent in sentences:\n",
-    "        word_lst = [w for w in nltk.tokenize.word_tokenize(sent) if w.isalnum()]\n",
-    "        sent_lst.append(word_lst)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 54,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true,
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2017-09-15 11:21:16,427 : INFO : collecting all words and their counts\n",
-      "2017-09-15 11:21:16,428 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types\n",
-      "2017-09-15 11:21:16,451 : INFO : PROGRESS: at sentence #10000, processed 75804 words, keeping 13063 word types\n",
-      "2017-09-15 11:21:16,459 : INFO : collected 15977 word types from a corpus of 100883 raw words and 13257 sentences\n",
-      "2017-09-15 11:21:16,460 : INFO : Loading a fresh vocabulary\n",
-      "2017-09-15 11:21:16,470 : INFO : min_count=6 retains 1613 unique words (10% of original 15977, drops 14364)\n",
-      "2017-09-15 11:21:16,471 : INFO : min_count=6 leaves 80706 word corpus (79% of original 100883, drops 20177)\n",
-      "2017-09-15 11:21:16,476 : INFO : deleting the raw counts dictionary of 15977 items\n",
-      "2017-09-15 11:21:16,478 : INFO : sample=0.001 downsamples 65 most-common words\n",
-      "2017-09-15 11:21:16,479 : INFO : downsampling leaves estimated 59121 word corpus (73.3% of prior 80706)\n",
-      "2017-09-15 11:21:16,479 : INFO : estimated required memory for 1613 words and 200 dimensions: 3387300 bytes\n",
-      "2017-09-15 11:21:16,484 : INFO : resetting layer weights\n",
-      "2017-09-15 11:21:16,520 : INFO : training model with 24 workers on 1613 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=5\n",
-      "2017-09-15 11:21:16,831 : INFO : worker thread finished; awaiting finish of 23 more threads\n",
-      "2017-09-15 11:21:16,833 : INFO : worker thread finished; awaiting finish of 22 more threads\n",
-      "2017-09-15 11:21:16,844 : INFO : worker thread finished; awaiting finish of 21 more threads\n",
-      "2017-09-15 11:21:16,846 : INFO : worker thread finished; awaiting finish of 20 more threads\n",
-      "2017-09-15 11:21:16,848 : INFO : worker thread finished; awaiting finish of 19 more threads\n",
-      "2017-09-15 11:21:16,854 : INFO : worker thread finished; awaiting finish of 18 more threads\n",
-      "2017-09-15 11:21:16,858 : INFO : worker thread finished; awaiting finish of 17 more threads\n",
-      "2017-09-15 11:21:16,861 : INFO : worker thread finished; awaiting finish of 16 more threads\n",
-      "2017-09-15 11:21:16,865 : INFO : worker thread finished; awaiting finish of 15 more threads\n",
-      "2017-09-15 11:21:16,880 : INFO : worker thread finished; awaiting finish of 14 more threads\n",
-      "2017-09-15 11:21:16,882 : INFO : worker thread finished; awaiting finish of 13 more threads\n",
-      "2017-09-15 11:21:16,889 : INFO : worker thread finished; awaiting finish of 12 more threads\n",
-      "2017-09-15 11:21:16,891 : INFO : worker thread finished; awaiting finish of 11 more threads\n",
-      "2017-09-15 11:21:16,895 : INFO : worker thread finished; awaiting finish of 10 more threads\n",
-      "2017-09-15 11:21:16,897 : INFO : worker thread finished; awaiting finish of 9 more threads\n",
-      "2017-09-15 11:21:16,898 : INFO : worker thread finished; awaiting finish of 8 more threads\n",
-      "2017-09-15 11:21:16,904 : INFO : worker thread finished; awaiting finish of 7 more threads\n",
-      "2017-09-15 11:21:16,907 : INFO : worker thread finished; awaiting finish of 6 more threads\n",
-      "2017-09-15 11:21:16,908 : INFO : worker thread finished; awaiting finish of 5 more threads\n",
-      "2017-09-15 11:21:16,909 : INFO : worker thread finished; awaiting finish of 4 more threads\n",
-      "2017-09-15 11:21:16,910 : INFO : worker thread finished; awaiting finish of 3 more threads\n",
-      "2017-09-15 11:21:16,914 : INFO : worker thread finished; awaiting finish of 2 more threads\n",
-      "2017-09-15 11:21:16,920 : INFO : worker thread finished; awaiting finish of 1 more threads\n",
-      "2017-09-15 11:21:16,921 : INFO : worker thread finished; awaiting finish of 0 more threads\n",
-      "2017-09-15 11:21:16,921 : INFO : training on 504415 raw words (295266 effective words) took 0.4s, 758339 effective words/s\n",
-      "2017-09-15 11:21:16,922 : WARNING : under 10 jobs per worker: consider setting a smaller `batch_words' for smoother alpha decay\n"
-     ]
-    }
-   ],
-   "source": [
-    "import gensim, logging\n",
-    "\n",
-    "logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)\n",
-    "# use skip-gram\n",
-    "word2vec_model = gensim.models.Word2Vec(sentences=sent_lst, min_count=6, size=EMBEDDING_DIM, sg=1, workers=os.cpu_count())"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "deletable": true,
-    "editable": true
-   },
-   "source": [
-    "Create the initial embedding matrix from the output of word2vec."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 55,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Total 1613 word vectors.\n"
-     ]
-    }
-   ],
-   "source": [
-    "embeddings_index = {}\n",
-    "\n",
-    "for word in word2vec_model.wv.vocab:\n",
-    "    coefs = np.asarray(word2vec_model.wv[word], dtype='float32')\n",
-    "    embeddings_index[word] = coefs\n",
-    "\n",
-    "print('Total %s word vectors.' % len(embeddings_index))\n",
-    "\n",
-    "# Initial embedding\n",
-    "embedding_matrix = np.zeros((VOCAB_SIZE, EMBEDDING_DIM))\n",
-    "\n",
-    "for word, i in tok.word_index.items():\n",
-    "    embedding_vector = embeddings_index.get(word)\n",
-    "    if embedding_vector is not None and i < VOCAB_SIZE:\n",
-    "        embedding_matrix[i] = embedding_vector"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "deletable": true,
-    "editable": true
-   },
-   "source": [
-    "LSTM_DIM is the dimensionality of each LSTM output (the number of LSTM units).\n",
-    "The mask_zero option determines whether masking is performed, i.e. whether the layers ignore the padded zeros in shorter documents."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 56,
-   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "BATCH_SIZE = 100\n",
-    "NUM_EPOCHS = 10\n",
-    "LSTM_DIM = 100\n",
-    "OPTIMIZER = SGD(lr=0.01, nesterov=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 57,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "def lstm_create_train(reg_param, ref_str):\n",
-    "    l2_reg = regularizers.l2(reg_param)\n",
-    "\n",
-    "    # model init\n",
-    "    embedding_layer = Embedding(VOCAB_SIZE,\n",
-    "                                EMBEDDING_DIM,\n",
-    "                                input_length=MAX_DOC_LEN,\n",
-    "                                trainable=True,\n",
-    "                                mask_zero=False,\n",
-    "                                embeddings_regularizer=l2_reg,\n",
-    "                                weights=[embedding_matrix])\n",
-    "\n",
-    "    lstm_layer = LSTM(units=LSTM_DIM, kernel_regularizer=l2_reg)\n",
-    "    dense_layer = Dense(n_classes, activation='softmax', kernel_regularizer=l2_reg)\n",
-    "\n",
-    "    model = Sequential()\n",
-    "    model.add(embedding_layer)\n",
-    "    model.add(Bidirectional(lstm_layer))\n",
-    "    model.add(dense_layer)\n",
-    "\n",
-    "    model.compile(loss='categorical_crossentropy',\n",
-    "                  optimizer=OPTIMIZER,\n",
-    "                  metrics=['acc'])\n",
-    "\n",
-    "    history = History()\n",
-    "    csv_logger = CSVLogger('./lstm_model_wvec_{0}_{1}.log'.format(reg_param, ref_str),\n",
-    "                           separator=',',\n",
-    "                           append=True)\n",
-    "\n",
-    "    print(\"Training model with regularization parameter = {}\".format(reg_param))\n",
-    "    t1 = time.time()\n",
-    "    # model fit\n",
-    "    model.fit(train_seq,\n",
-    "              labels.astype('float32'),\n",
-    "              batch_size=BATCH_SIZE,\n",
-    "              epochs=NUM_EPOCHS,\n",
-    "              callbacks=[history, csv_logger],\n",
-    "              verbose=2)\n",
-    "    t2 = time.time()\n",
-    "    print(\"\\n\")\n",
-    "    \n",
-    "    # save model\n",
-    "    model.save('./lstm_wvec_{0}_{1}_model.h5'.format(reg_param, ref_str))\n",
-    "    np.savetxt('./lstm_wvec_{0}_{1}_time.txt'.format(reg_param, ref_str), \n",
-    "               [reg_param, (t2-t1) / 3600])\n",
-    "    with open('./lstm_wvec_{0}_{1}_history.txt'.format(reg_param, ref_str), \"w\") as res_file:\n",
-    "        res_file.write(str(history.history))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 58,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Training model with regularization parameter = 1e-10\n",
-      "Epoch 1/10\n",
-      "33s - loss: 0.6922 - acc: 0.5447\n",
-      "Epoch 2/10\n",
-      "32s - loss: 0.6916 - acc: 0.5557\n",
-      "Epoch 3/10\n",
-      "32s - loss: 0.6910 - acc: 0.5652\n",
-      "Epoch 4/10\n",
-      "32s - loss: 0.6905 - acc: 0.5893\n",
-      "Epoch 5/10\n",
-      "32s - loss: 0.6899 - acc: 0.5828\n",
-      "Epoch 6/10\n",
-      "32s - loss: 0.6893 - acc: 0.5827\n",
-      "Epoch 7/10\n",
-      "32s - loss: 0.6887 - acc: 0.5943\n",
-      "Epoch 8/10\n",
-      "32s - loss: 0.6882 - acc: 0.5901\n",
-      "Epoch 9/10\n",
-      "32s - loss: 0.6876 - acc: 0.5933\n",
-      "Epoch 10/10\n",
-      "32s - loss: 0.6870 - acc: 0.5905\n",
-      "\n",
-      "\n",
-      "Training model with regularization parameter = 1e-07\n",
-      "Epoch 1/10\n",
-      "32s - loss: 0.6944 - acc: 0.4816\n",
-      "Epoch 2/10\n",
-      "32s - loss: 0.6936 - acc: 0.4899\n",
-      "Epoch 3/10\n",
-      "32s - loss: 0.6929 - acc: 0.5145\n",
-      "Epoch 4/10\n",
-      "32s - loss: 0.6924 - acc: 0.5299\n",
-      "Epoch 5/10\n",
-      "32s - loss: 0.6917 - acc: 0.5463\n",
-      "Epoch 6/10\n",
-      "32s - loss: 0.6911 - acc: 0.5524\n",
-      "Epoch 7/10\n",
-      "32s - loss: 0.6905 - acc: 0.5656\n",
-      "Epoch 8/10\n",
-      "32s - loss: 0.6899 - acc: 0.5616\n",
-      "Epoch 9/10\n",
-      "32s - loss: 0.6893 - acc: 0.5647\n",
-      "Epoch 10/10\n",
-      "32s - loss: 0.6888 - acc: 0.5716\n",
-      "\n",
-      "\n",
-      "Training model with regularization parameter = 0.0001\n",
-      "Epoch 1/10\n",
-      "33s - loss: 0.9469 - acc: 0.4776\n",
-      "Epoch 2/10\n",
-      "32s - loss: 0.9460 - acc: 0.4563\n",
-      "Epoch 3/10\n",
-      "32s - loss: 0.9453 - acc: 0.4888\n",
-      "Epoch 4/10\n",
-      "32s - loss: 0.9447 - acc: 0.5072\n",
-      "Epoch 5/10\n",
-      "32s - loss: 0.9441 - acc: 0.5355\n",
-      "Epoch 6/10\n",
-      "32s - loss: 0.9435 - acc: 0.5539\n",
-      "Epoch 7/10\n",
-      "32s - loss: 0.9429 - acc: 0.5832\n",
-      "Epoch 8/10\n",
-      "32s - loss: 0.9423 - acc: 0.5837\n",
-      "Epoch 9/10\n",
-      "32s - loss: 0.9418 - acc: 0.5871\n",
-      "Epoch 10/10\n",
-      "32s - loss: 0.9412 - acc: 0.5881\n",
-      "\n",
-      "\n",
-      "Training model with regularization parameter = 0.1\n",
-      "Epoch 1/10\n",
-      "33s - loss: 218.8508 - acc: 0.4709\n",
-      "Epoch 2/10\n",
-      "32s - loss: 162.2584 - acc: 0.4561\n",
-      "Epoch 3/10\n",
-      "32s - loss: 120.3472 - acc: 0.4695\n",
-      "Epoch 4/10\n",
-      "32s - loss: 89.3082 - acc: 0.4863\n",
-      "Epoch 5/10\n",
-      "32s - loss: 66.3210 - acc: 0.4891\n",
-      "Epoch 6/10\n",
-      "32s - loss: 49.2968 - acc: 0.5004\n",
-      "Epoch 7/10\n",
-      "32s - loss: 36.6888 - acc: 0.5081\n",
-      "Epoch 8/10\n",
-      "32s - loss: 27.3514 - acc: 0.5031\n",
-      "Epoch 9/10\n",
-      "32s - loss: 20.4361 - acc: 0.5056\n",
-      "Epoch 10/10\n",
-      "32s - loss: 15.3147 - acc: 0.5019\n",
-      "\n",
-      "\n",
-      "Training model with regularization parameter = 100.0\n",
-      "Epoch 1/10\n",
-      "32s - loss: 252060.6371 - acc: 0.4981\n",
-      "Epoch 2/10\n",
-      "32s - loss: 252059.9525 - acc: 0.5048\n",
-      "Epoch 3/10\n",
-      "32s - loss: 252059.1579 - acc: 0.5069\n",
-      "Epoch 4/10\n",
-      "32s - loss: 252058.5729 - acc: 0.5048\n",
-      "Epoch 5/10\n",
-      "32s - loss: 252057.9460 - acc: 0.5047\n",
-      "Epoch 6/10\n",
-      "32s - loss: 252057.1785 - acc: 0.5205\n",
-      "Epoch 7/10\n",
-      "32s - loss: 252056.3502 - acc: 0.5075\n",
-      "Epoch 8/10\n",
-      "32s - loss: 252055.6600 - acc: 0.5020\n",
-      "Epoch 9/10\n",
-      "32s - loss: 252054.9252 - acc: 0.5036\n",
-      "Epoch 10/10\n",
-      "32s - loss: 252054.3204 - acc: 0.5081\n",
-      "\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "for rp in [1e-10, 1e-7, 1e-4, 1e-1, 1e2]:\n",
-    "    lstm_create_train(rp, 'tweets')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 63,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(1e-10, 0.59364081062194274)\n",
-      "(1e-07, 0.57092941998602376)\n",
-      "(0.0001, 0.57477288609364086)\n",
-      "(0.1, 0.50454227812718377)\n",
-      "(100.0, 0.56533892382948991)\n"
-     ]
-    }
-   ],
-   "source": [
-    "from sklearn.metrics import accuracy_score\n",
-    "\n",
-    "for rp in [1e-10, 1e-7, 1e-4, 1e-1, 1e2]:\n",
-    "    model = load_model('./lstm_wvec_{0}_{1}_model.h5'.format(rp, 'tweets'))\n",
-    "    preds = model.predict_classes(test_seq, verbose=0)\n",
-    "    print((rp, accuracy_score(test_data[LABEL_COL], preds)))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.5.2"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}