From 0d5c14da7e0fa2ec94370805b67149b53c1c9453 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 21 Sep 2017 20:09:12 +0000 Subject: [PATCH] LSTM on data from archive.org --- lstm_word2vec_archive.ipynb | 771 ++++++++++++++++++++++++++++++++++++ lstm_word2vec_small.ipynb | 134 +------ 2 files changed, 787 insertions(+), 118 deletions(-) create mode 100644 lstm_word2vec_archive.ipynb diff --git a/lstm_word2vec_archive.ipynb b/lstm_word2vec_archive.ipynb new file mode 100644 index 0000000..5b7ad05 --- /dev/null +++ b/lstm_word2vec_archive.ipynb @@ -0,0 +1,771 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "The code in this notebook is based on the [Keras documentation](https://keras.io/) and [blog](https://blog.keras.io/using-pre-trained-word-embeddings-in-a-keras-model.html) as well as this [word2vec tutorial](http://adventuresinmachinelearning.com/gensim-word2vec-tutorial/). " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using CNTK backend\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import os\n", + "import pandas as pd\n", + "import pickle\n", + "import time\n", + "\n", + "os.environ['KERAS_BACKEND']='cntk'\n", + "from keras.preprocessing import sequence\n", + "from keras.preprocessing.text import Tokenizer, text_to_word_sequence\n", + "from keras.models import Sequential\n", + "from keras import regularizers\n", + "from keras.optimizers import SGD\n", + "from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional\n", + "from keras.callbacks import History, CSVLogger\n", + "from keras.utils import to_categorical" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Download the Amazon reviews data for food from the Internet archive \n", + "[J. McAuley and J. Leskovec. Hidden factors and hidden topics: understanding rating dimensions with review text. RecSys, 2013]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2017-09-21 15:00:07-- https://archive.org/download/amazon-reviews-1995-2013/Gourmet_Foods.txt.gz\n", + "Resolving archive.org (archive.org)... 207.241.224.2\n", + "Connecting to archive.org (archive.org)|207.241.224.2|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://ia801306.us.archive.org/24/items/amazon-reviews-1995-2013/Gourmet_Foods.txt.gz [following]\n", + "--2017-09-21 15:00:07-- https://ia801306.us.archive.org/24/items/amazon-reviews-1995-2013/Gourmet_Foods.txt.gz\n", + "Resolving ia801306.us.archive.org (ia801306.us.archive.org)... 207.241.228.136\n", + "Connecting to ia801306.us.archive.org (ia801306.us.archive.org)|207.241.228.136|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 31388180 (30M) [application/octet-stream]\n", + "Saving to: ‘Gourmet_Foods.txt.gz’\n", + "\n", + "Gourmet_Foods.txt.g 100%[===================>] 29.93M 186KB/s in 77s \n", + "\n", + "2017-09-21 15:01:25 (397 KB/s) - ‘Gourmet_Foods.txt.gz’ saved [31388180/31388180]\n", + "\n" + ] + } + ], + "source": [ + "!wget \"https://archive.org/download/amazon-reviews-1995-2013/Gourmet_Foods.txt.gz\"" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "!gunzip -f Gourmet_Foods.txt.gz" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "with open(\"Gourmet_Foods.txt\", \"r\") as fp:\n", + " lst = fp.readlines()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Extract scores and review texts from file " + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "text_lst = lst[9:len(lst):11]\n", + "score_lst = lst[6:len(lst):11]\n", + "score_lst2 = [sc[14:17] for sc in score_lst]\n", + "text_lst2 = [txt[13:] for txt in text_lst]" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "all_data = pd.DataFrame(data={'text': text_lst2, 'rating': score_lst2})\n", + "all_data.loc[:, 'rating'] = all_data['rating'].astype(float)\n", + "all_data.loc[:, 'rating'] = all_data['rating'].astype(int)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Remove medium rating and convert to binary classification (high vs. low rating). " + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "all_data = all_data[all_data['rating'] != 3]" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "new_data = all_data.replace({'rating': {1: '0', 2: '0', 4: '1', 5: '1'}})\n", + "new_data.loc[:, 'rating'] = new_data['rating'].astype(int)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Extract a balanced subsample and split into training and test sets." + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "sample_data = pd.concat([new_data[new_data.rating == 0].sample(10000), new_data[new_data.rating == 1].sample(10000)])\n", + "shuffled = sample_data.iloc[np.random.permutation(20000), :]\n", + "train_data = shuffled.iloc[:10000, :]\n", + "test_data = shuffled.iloc[10000:, :]" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1 5005\n", + "0 4995\n", + "Name: rating, dtype: int64" + ] + }, + "execution_count": 124, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_data.rating.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 5005\n", + "1 4995\n", + "Name: rating, dtype: int64" + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_data.rating.value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Set the dimensions of the input and the embedding. \n", + "\n", + "MAX_DOC_LENGTH : the size of the input i.e. the number of words in the document. Longer documents will be truncated, shorter ones will be padded with zeros.\n", + "\n", + "VOCAB_SIZE : the size of the word encoding (number of most frequent words to keep in the vocabulary)\n", + "\n", + "EMBEDDING_DIM : the dimensionality of the word embedding" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "MAX_DOC_LEN = 300\n", + "VOCAB_SIZE = 6000\n", + "EMBEDDING_DIM = 100" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "TEXT_COL = 'text'\n", + "LABEL_COL = 'rating'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Fit a Keras tokenizer to the most frequent words using the entire training data set as the corpus." + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "# tokenize, create seqs, pad\n", + "tok = Tokenizer(num_words=VOCAB_SIZE, lower=True, split=\" \")\n", + "tok.fit_on_texts(train_data[TEXT_COL])\n", + "train_seq = tok.texts_to_sequences(train_data[TEXT_COL])\n", + "train_seq = sequence.pad_sequences(train_seq, maxlen=MAX_DOC_LEN)\n", + "test_seq = tok.texts_to_sequences(test_data[TEXT_COL])\n", + "test_seq = sequence.pad_sequences(test_seq, maxlen=MAX_DOC_LEN)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Convert the ratings to one-hot categorical labels." + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "labels = to_categorical(np.asarray(train_data[LABEL_COL]))\n", + "labels = labels.astype('float32')" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of reviews by class in training set\n", + "[ 4995. 5005.]\n" + ] + } + ], + "source": [ + "print('Number of reviews by class in training set')\n", + "print(labels.sum(axis=0))\n", + "n_classes = labels.shape[1]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Train word2vec on all the documents in order to initialize the word embedding. Ignore rare words (min_count=6). Use skip-gram as the training algorithm (sg=1)." + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package punkt to /home/anargyri/nltk_data...\n", + "[nltk_data] Package punkt is already up-to-date!\n" + ] + } + ], + "source": [ + "import nltk \n", + "\n", + "nltk.download('punkt')\n", + "\n", + "sent_lst = []\n", + "\n", + "for doc in train_data[TEXT_COL]:\n", + " sentences = nltk.tokenize.sent_tokenize(doc)\n", + " for sent in sentences:\n", + " word_lst = [w for w in nltk.tokenize.word_tokenize(sent) if w.isalnum()]\n", + " sent_lst.append(word_lst)" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true, + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2017-09-21 16:29:01,720 : INFO : collecting all words and their counts\n", + "2017-09-21 16:29:01,721 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types\n", + "2017-09-21 16:29:01,761 : INFO : PROGRESS: at sentence #10000, processed 151224 words, keeping 11866 word types\n", + "2017-09-21 16:29:01,800 : INFO : PROGRESS: at sentence #20000, processed 300037 words, keeping 17003 word types\n", + "2017-09-21 16:29:01,843 : INFO : PROGRESS: at sentence #30000, processed 453236 words, keeping 21102 word types\n", + "2017-09-21 16:29:01,890 : INFO : PROGRESS: at sentence #40000, processed 606597 words, keeping 24141 word types\n", + "2017-09-21 16:29:01,919 : INFO : collected 26181 word types from a corpus of 712241 raw words and 47144 sentences\n", + "2017-09-21 16:29:01,921 : INFO : Loading a fresh vocabulary\n", + "2017-09-21 16:29:01,952 : INFO : min_count=6 retains 6289 unique words (24% of original 26181, drops 19892)\n", + "2017-09-21 16:29:01,953 : INFO : min_count=6 leaves 678196 word corpus (95% of original 712241, drops 34045)\n", + "2017-09-21 16:29:01,975 : INFO : deleting the raw counts dictionary of 26181 items\n", + "2017-09-21 16:29:01,977 : INFO : sample=0.001 downsamples 54 most-common words\n", + "2017-09-21 16:29:01,977 : INFO : downsampling leaves estimated 500290 word corpus (73.8% of prior 678196)\n", + "2017-09-21 16:29:01,978 : INFO : estimated required memory for 6289 words and 100 dimensions: 8175700 bytes\n", + "2017-09-21 16:29:01,993 : INFO : resetting layer weights\n", + "2017-09-21 16:29:02,117 : INFO : training model with 24 workers on 6289 vocabulary and 100 features, using sg=1 hs=0 sample=0.001 negative=5 window=5\n", + "2017-09-21 16:29:03,133 : INFO : PROGRESS: at 48.01% examples, 1196686 words/s, in_qsize 46, out_qsize 1\n", + "2017-09-21 16:29:03,918 : INFO : worker thread finished; awaiting finish of 23 more threads\n", + "2017-09-21 16:29:03,923 : INFO : worker thread finished; awaiting finish of 22 more threads\n", + "2017-09-21 16:29:03,928 : INFO : worker thread finished; awaiting finish of 21 more threads\n", + "2017-09-21 16:29:03,932 : INFO : worker thread finished; awaiting finish of 20 more threads\n", + "2017-09-21 16:29:03,946 : INFO : worker thread finished; awaiting finish of 19 more threads\n", + "2017-09-21 16:29:03,949 : INFO : worker thread finished; awaiting finish of 18 more threads\n", + "2017-09-21 16:29:03,960 : INFO : worker thread finished; awaiting finish of 17 more threads\n", + "2017-09-21 16:29:03,961 : INFO : worker thread finished; awaiting finish of 16 more threads\n", + "2017-09-21 16:29:03,964 : INFO : worker thread finished; awaiting finish of 15 more threads\n", + "2017-09-21 16:29:03,971 : INFO : worker thread finished; awaiting finish of 14 more threads\n", + "2017-09-21 16:29:03,972 : INFO : worker thread finished; awaiting finish of 13 more threads\n", + "2017-09-21 16:29:03,973 : INFO : worker thread finished; awaiting finish of 12 more threads\n", + "2017-09-21 16:29:03,975 : INFO : worker thread finished; awaiting finish of 11 more threads\n", + "2017-09-21 16:29:03,976 : INFO : worker thread finished; awaiting finish of 10 more threads\n", + "2017-09-21 16:29:03,979 : INFO : worker thread finished; awaiting finish of 9 more threads\n", + "2017-09-21 16:29:03,981 : INFO : worker thread finished; awaiting finish of 8 more threads\n", + "2017-09-21 16:29:03,982 : INFO : worker thread finished; awaiting finish of 7 more threads\n", + "2017-09-21 16:29:03,983 : INFO : worker thread finished; awaiting finish of 6 more threads\n", + "2017-09-21 16:29:03,985 : INFO : worker thread finished; awaiting finish of 5 more threads\n", + "2017-09-21 16:29:03,990 : INFO : worker thread finished; awaiting finish of 4 more threads\n", + "2017-09-21 16:29:03,990 : INFO : worker thread finished; awaiting finish of 3 more threads\n", + "2017-09-21 16:29:03,992 : INFO : worker thread finished; awaiting finish of 2 more threads\n", + "2017-09-21 16:29:04,003 : INFO : worker thread finished; awaiting finish of 1 more threads\n", + "2017-09-21 16:29:04,011 : INFO : worker thread finished; awaiting finish of 0 more threads\n", + "2017-09-21 16:29:04,012 : INFO : training on 3561205 raw words (2500170 effective words) took 1.9s, 1328895 effective words/s\n" + ] + } + ], + "source": [ + "import gensim, logging\n", + "\n", + "logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)\n", + "# use skip-gram\n", + "word2vec_model = gensim.models.Word2Vec(sentences=sent_lst, min_count=6, size=EMBEDDING_DIM, sg=1, workers=os.cpu_count())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Create the initial embedding matrix from the output of word2vec." + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total 6289 word vectors.\n" + ] + } + ], + "source": [ + "embeddings_index = {}\n", + "\n", + "for word in word2vec_model.wv.vocab:\n", + " coefs = np.asarray(word2vec_model.wv[word], dtype='float32')\n", + " embeddings_index[word] = coefs\n", + "\n", + "print('Total %s word vectors.' % len(embeddings_index))\n", + "\n", + "# Initial embedding\n", + "embedding_matrix = np.zeros((VOCAB_SIZE, EMBEDDING_DIM))\n", + "\n", + "for word, i in tok.word_index.items():\n", + " embedding_vector = embeddings_index.get(word)\n", + " if embedding_vector is not None and i < VOCAB_SIZE:\n", + " embedding_matrix[i] = embedding_vector" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "LSTM_DIM is the dimensionality of each LSTM output (the number of LSTM units).\n", + "The mask_zero option determines whether masking is performed, i.e. whether the layers ignore the padded zeros in shorter documents. CNTK / Keras does not support masking yet." + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "BATCH_SIZE = 50\n", + "NUM_EPOCHS = 30\n", + "LSTM_DIM = 100\n", + "OPTIMIZER = SGD(lr=0.01, nesterov=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 142, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "def lstm_create_train(reg_param):\n", + " l2_reg = regularizers.l2(reg_param)\n", + "\n", + " # model init\n", + " embedding_layer = Embedding(VOCAB_SIZE,\n", + " EMBEDDING_DIM,\n", + " input_length=MAX_DOC_LEN,\n", + " trainable=True,\n", + " mask_zero=False,\n", + " embeddings_regularizer=l2_reg,\n", + " weights=[embedding_matrix])\n", + "\n", + " lstm_layer = LSTM(units=LSTM_DIM, kernel_regularizer=l2_reg)\n", + " dense_layer = Dense(n_classes, activation='softmax', kernel_regularizer=l2_reg)\n", + "\n", + " model = Sequential()\n", + " model.add(embedding_layer)\n", + " model.add(Bidirectional(lstm_layer))\n", + " model.add(dense_layer)\n", + "\n", + " model.compile(loss='categorical_crossentropy',\n", + " optimizer=OPTIMIZER,\n", + " metrics=['acc'])\n", + "\n", + " fname = \"lstm_food\"\n", + " history = History()\n", + " csv_logger = CSVLogger('./{0}_{1}.log'.format(fname, reg_param),\n", + " separator=',',\n", + " append=True)\n", + "\n", + " t1 = time.time()\n", + " # model fit\n", + " model.fit(train_seq,\n", + " labels.astype('float32'),\n", + " batch_size=BATCH_SIZE,\n", + " epochs=NUM_EPOCHS,\n", + " callbacks=[history, csv_logger],\n", + " verbose=2)\n", + " t2 = time.time()\n", + "\n", + " # save model\n", + " model.save('./{0}_{1}_model.h5'.format(fname, reg_param))\n", + " np.savetxt('./{0}_{1}_time.txt'.format(fname, reg_param), \n", + " [reg_param, (t2-t1) / 3600])\n", + " with open('./{0}_{1}_history.txt'.format(fname, reg_param), \"w\") as res_file:\n", + " res_file.write(str(history.history))" + ] + }, + { + "cell_type": "code", + "execution_count": 151, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/15\n", + "76s - loss: 0.6882 - acc: 0.5696\n", + "Epoch 2/15\n", + "75s - loss: 0.6825 - acc: 0.6040\n", + "Epoch 3/15\n", + "75s - loss: 0.6768 - acc: 0.6188\n", + "Epoch 4/15\n", + "75s - loss: 0.6705 - acc: 0.6373\n", + "Epoch 5/15\n", + "74s - loss: 0.6633 - acc: 0.6426\n", + "Epoch 6/15\n", + "75s - loss: 0.6543 - acc: 0.6563\n", + "Epoch 7/15\n", + "74s - loss: 0.6412 - acc: 0.6653\n", + "Epoch 8/15\n", + "74s - loss: 0.6125 - acc: 0.6884\n", + "Epoch 9/15\n", + "74s - loss: 0.5350 - acc: 0.7356\n", + "Epoch 10/15\n", + "75s - loss: 0.5045 - acc: 0.7573\n", + "Epoch 11/15\n", + "75s - loss: 0.4971 - acc: 0.7684\n", + "Epoch 12/15\n", + "75s - loss: 0.4850 - acc: 0.7749\n", + "Epoch 13/15\n", + "75s - loss: 0.4825 - acc: 0.7734\n", + "Epoch 14/15\n", + "74s - loss: 0.4708 - acc: 0.7800\n", + "Epoch 15/15\n", + "74s - loss: 0.4700 - acc: 0.7798\n" + ] + } + ], + "source": [ + "lstm_create_train(1e-7)" + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy = 0.8023 \t AUC = 0.8845375045375046\n" + ] + } + ], + "source": [ + "from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve\n", + "\n", + "model = load_model('./lstm_food_{0}_model.h5'.format(1e-7))\n", + "preds = model.predict(test_seq, verbose=0)\n", + "print((\"Accuracy = {0} \\t AUC = {1}\".format(accuracy_score(test_data[LABEL_COL], preds.argmax(axis=1)), \n", + " roc_auc_score(test_data[LABEL_COL], preds[:, 1]))))" + ] + }, + { + "cell_type": "code", + "execution_count": 153, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Populating the interactive namespace from numpy and matplotlib\n" + ] + } + ], + "source": [ + "%pylab inline" + ] + }, + { + "cell_type": "code", + "execution_count": 155, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true, + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 155, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiAAAAF5CAYAAACm4JG+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAIABJREFUeJzt3XmcXFWd9/HPLysESCAkJgSjbBJA1jQgi4g8QCIygLIM\nBlAEB4dtBgPuyws35FEUxCE8IA7bKDGozwgIEiSgiBiWjoEgiyxhkyFsplmykj7PH6f76aZJk3Sn\nqm5V3c/79arXqbp17+1fXZqub84999xIKSFJklRLA4ouQJIklY8BRJIk1ZwBRJIk1ZwBRJIk1ZwB\nRJIk1ZwBRJIk1ZwBRJIk1ZwBRJIk1ZwBRJIk1ZwBRJIk1VxdBJCI2Csiro2Iv0dEe0QcvBrbfDAi\nWiNiSUT8LSKOrUWtkiRpzdVFAAHWAeYCJwOrvDlNRGwC/AaYBewAnA/8JCL2r16JkiSpUqLebkYX\nEe3AR1JK177NOt8FDkgpbd9t2XRgRErpwzUoU5IkrYF66QHpq92Am3ssmwnsXkAtkiSpjxo1gIwF\nFvRYtgAYHhFDC6hHkiT1waCiC6iViNgQmAw8ASwpthpJkhrKWsAmwMyU0kuV2GGjBpDngDE9lo0B\nXkkpLe1lm8nAz6palSRJze1o4KpK7KhRA8ifgQN6LJvUsbw3TwD89Kc/Zeutt65SWepp6tSpnHfe\neUWXUSoe89rzmNde5zFfvhyeew6WLYP2dlixIrevvQbPPAN//3t+77HH4N57YcAAWLRo9X/OqFGw\n1lr5eUTePiK/7nze+Tql/LPe+U4YNCjXsMUWud1kk67tO7cbMCCvv8EGMHQojB2btxswAAYOhMGD\nYd11u7bpvt2QIbD22hU9pG/rwQcf5JhjjoGO79JKqIsAEhHrAFsAHf8Z2SwidgBeTik9HRFnA+NS\nSp1zfVwEnNJxNcylwL7A4cDbXQGzBGDrrbdm4sSJ1fgYWokRI0Z4vGvMY157HvM1s2IFLF6c287H\nokXQ1pYfd94Jd9+dnz/+eP6yvv/+Eeyxx0SW9tbn3WHDDeEd78hf7OPHw3775TAwYgSMHg3rrQdj\nxuSgMXBgfgwaBOuskwOA3qJiQxjqIoAAOwO3kucAScAPOpZfARxPHnQ6vnPllNITEXEgcB7w78Az\nwKdSSj2vjJEkVdDixfD887BwYf7X+9//npcvX557GYYOhQceyF/gDzyQv+DfeAOWLIEHH4SRI7t6\nKh58EIYNg9dfX72fPW4cbLUVvOtd8OqrcNRRsP76OUhMmJCDQ2eIGDgw9z7UspdAfVMXASSl9Afe\n5oqclNJxK1l2G9BSzbokqSxWrMg9DE8+CXPnwvXXw/3350DR3p6/8J98cvX21dl7sOmmMH9+Dg3D\nhsG228Lw4TkwDBoEBxwAS5fm9YYNy70V3QPEoEH5tMT66+fQMaDbt8TBB8N3vlOdY6HaqIsAIknq\nv85xEI8+2tW78MYbsGBBHpfQ2TsxZAjMm5d7Je67L4eBv/4192b0ZtKkHCAGD869H6NHw8SJuWeh\nM0i84x05qAwd2jVeQloVA4iqasqUKUWXUDoe89pb02Pe1pZPZ3QfA/HCCzlMLF+eg8SDD+ZegVmz\n8qDGBQvydi+txgWRETmIQB7/MHBg7nV44w044gh45ZV8CmPLLXM42WwzeO97uwZX1iN/zxtf3U3F\nXi0RMRFobW1tdbCYpJpZsiT3Ttx5Zw4BL78Mt9wCzz6bw8U996z+vgYMyFdMLF6cg8PixbDDDvm9\nTTeFjTaCd7+76/TFwIH59MVQp2fUGpozZw4tLS0ALSmlOZXYpz0gktRHKeWBmHffncc4DByYeyRe\nfz0HizfeyO3dd+eejZ7GjIH998/jHvbcMw+unDDhzeMfVqyAjTfOQWLw4DwWYr31av9ZpWoxgEhS\nL1LKV3LMnZtf//738Ktf5Z6L117rWm/gwNw7sXx5DhJtbbDjjrDrrrDTTvD+9+cBlrvsksdIDB5c\n36c3pFowgEgqvddeg9tvhxkzck/G00/n0yYvvrjy9XfeGU45JZ/y2HXXfFpEUt8YQCQ1vZTyeInH\nH4crrsizSz75JNxxR+6teO65N6+/7bawzTb5FMj73gcf/nCexGrIkDdfCiqp/wwgkhrW88/D3/6W\nLyl94ol8emPevNyDsd56eQDo7Nkr33bjjfMcFLvvDgcemEPH9ts7zkKqFQOIpIbywgvw9a/DhRe+\n9b0NNshzWwwcmAd4TpiQZ8OcMCFfHTJ4cJ7DYssta162pB4MIJLq1sKF+ZLVe+6Ba67JA0K7+/d/\nh0MOgc03zzNlOrBTahwGEEmFe+opmDat666lN96YryjpbvhwOOGEPBZjl11g3329WZjUyAwgkgrR\n1gZXXQU/+hE89FBeNmJEPl2y1155Js53vjPfvXTChHxKxR4OqXkYQCRV1aJFuUdj5sx8qevrr7/1\npmanngqf/3zu3ZBUDgYQSWsspTzz5/z5cN118MgjefzGrbe+dd3Jk+HYY/NsoXvtlV/bsyGVjwFE\n0mpbsgR+9zu49164+ebcmzFvXr6ctafRo/NYjfe9Dw49FHbbLd9BVZLAACLpbbz2Wp6s6ytfyQNE\n77vvrescdlgOGy0t8J735Me4cbWvVVJjMYBIAvJplLvuylehvPIKnHTSm9/ffPMcNiZPho99zAm7\nJK0ZA4hUYrNm5RlEH3oILr003yq+u803z5fH7rGHgUNSZRlApBJJKU/oNWMG/Pznb35vyBD47Gfh\n3/4tn1JxvIakajKASE1s0aI8buPqq/M9Uf785673Nt8cPvCBPKX5WmsVV6OkcjKASA1u4cIcMmbN\ngr/+NQeNZ5/NvR09HXYY7L8/fPzjeWIvSSqKAURqMMuW5Qm9PvnJfNfX7oYMyXd1PfzwfEO2TTfN\ns4nuvjuMGVNIuZK0UgYQqc7Nm5dnEr344jzfxjPPdL3X0gIHHAB77pkHig4fXlydktQXBhCpzjz5\nJNxwA/z613DTTV3LR4+GrbfOp08mTIBJk2CjjYqrU5LWhAFEKsiyZXmq8gsugMcfz8u6325+9GjY\neec8buPf/g3WWaeYOiWpGgwgUo394Q9w/vnw3/+dXw8cmE+lrLtuvjfKuHHw6U/D2LHF1ilJ1WQA\nkapo0SL47W/zlSl33AF33gkrVuT3DjwQjjwy93B4RYqksjGASBWydCn84hf5tvOzZ+eejYcffvM6\nw4fnwPHDHzpgVFK5GUCkNdDeDo8+CmecAb/5TV42aFA+lbJkCRx9NOy3H3ziEzBgQLG1SlI9MYBI\n/bB0KXzhC3ksR3fnnguf+QxEFFOXJDUKA4jUBw89BF/9ar5EtnMsx7Rp+Q6xm29ebG2S1EgMIFIv\n5s2D//t/4ZFH8hUqF1/c9d7o0XDWWXDCCcXVJ0mNzAAiddPWlm9Lf/75eUKwThtsANtvD+9+N3z3\nu3lCMElS/xlAVGrt7fmqlWuvhYsuevN7kybBeefBNtsUU5skNTMDiEpn+XK49958SuUnP+laPnhw\nvmrlsMNg331h7bWLq1GSmp0BRE3v5ZfzFOdf/nKeCGzZsq73Nt0UPvIR+Pa3nQxMkmrJAKKmdfvt\ncNBBsHBh17Jhw/Kt6vfeO89EuvHGxdUnSWVmAFFTmTcP5s6F3/8+DyYF2HXXfOlsS0u+z4okqXgG\nEDW0tja47jq4/PJ8uexTT+XlQ4fm4HHNNd7UTZLqkQFEDWnhwnyL+p/+tGvZsGHwf/4PHHoovOMd\nxdUmSVo1A4gayl//ClddBd/5TteyW2/NYzqc/lySGocBRHVv6VL43e/gssvyzKQA224Lp50Gn/qU\nwUOSGpEBRHWrrQ1OPBF+/vOuZZMn5/EejuuQpMbmDcJVl847D9ZfP4eP978ffvUrePVVuPFGw4ck\nNQN7QFRXUsp3nD399Pz6rLPyBGKSpOZiAFHdeOKJPDNpp2uugYMPLqwcSVIVeQpGdeGBB7rCx1ln\n5deGD0lqXvaAqDBLl8LJJ0Nra745HMA3vuEpF0kqAwOIam7p0jyJ2CWXdC079VQ44QTYfvvi6pIk\n1Y4BRDXzxht5Lo9Pf7pr2a9+le9GO8CTgZJUKv7ZV9WllHs4NtywK3x873vQ3p6nTTd8SFL51M2f\n/og4JSLmR8TiiJgdEbusYv2jI2JuRLweEc9GxH9GxMha1atVa2/Pd6QdMACmTYNXXsn3bnn1Vfjc\n55zBVJLKrC4CSEQcCfwAOBPYCbgXmBkRo3pZf0/gCuASYBvgcGBX4Mc1KVir5aij8lTpkMd3vPoq\nHH00rLtusXVJkopXL2NApgIXp5SuBIiIE4EDgeOB761k/d2A+SmlaR2vn4yIi4HP16JYrdrZZ8OM\nGfn5woUwYkSx9UiS6kvhPSARMRhoAWZ1LkspJeBmYPdeNvszMD4iDujYxxjgCOD66lar1fHLX3Zd\nSvvII4YPSdJbFR5AgFHAQGBBj+ULgJXe9SOldAdwDDAjIpYB/wP8Azi1inXqbaSUx3m8+91wxBF5\nWVsbbLFFsXVJkupTPQSQPouIbYDzga8DE4HJwKbAxQWWVVpz50JLS77S5amn8gDTRx+F4cOLrkyS\nVK/qYQzIi8AKYEyP5WOA53rZ5ovAn1JK53a8vj8iTgb+GBFfSSn17E35/6ZOncqIHucEpkyZwpQp\nU/pVfJldeCGcdlqe3wPg29+Gz3wG1lmn2LokSf03ffp0pk+f/qZlbW1tFf85kYdbFCsiZgN3ppRO\n63gdwFPAj1JK56xk/V8Cy1JKR3VbtjtwO7BxSuktwSUiJgKtra2tTJw4sUqfpDx22in3fAB861v5\n6pbuN5KTJDWPOXPm0NLSAtCSUppTiX3WQw8IwLnA5RHRCtxFvipmGHA5QEScDYxLKR3bsf51wI87\nrpaZCYwDziOHmN56TVQBv/897LNP1+uXXoKRzr4iSeqjuhgDklK6Gvgs8E3gL8D2wOSU0gsdq4wF\nxndb/wrgdOAUYB4wA3gQOKyGZZfKCy/kXo/O8HH66bBiheFDktQ/9dIDQkrpQuDCXt47biXLpgHT\nVrK6KuyBB+C9783P/+mfYPp0JxOTJK2ZuugBUf168cWu8HHppXDddYYPSdKaM4CoVyl1hY/vfAeO\ne0s/lCRJ/VM3p2BUX9rbYeDA/PyMM+BLXyq2HklSc7EHRG9x5ZWw0Ub5+W67wfe/X2w9kqTmYw+I\n/r9XXsnBY9Gi/PoXv4DDvK5IklQFBhABMGdOnk4d8riP1lYYOrTYmiRJzctTMOK667rCx/XXw/33\nGz4kSdVlACm5b30LDj44P7/pJvjwh4utR5JUDp6CKanXX8+9Hg8/nK92+fvfYUzP2wFKklQl9oCU\nzJIl8MUv5snEHn4YNtsMli83fEiSassAUiLf+hasvTZ897uw3XYwbRo89hhEFF2ZJKlsPAVTAgsW\nwFFHwS235NcXXACnnFJsTZKkcjOANLk77oA99+x6/dRTMH587+tLklQLnoJpciedlNubb873djF8\nSJLqgT0gTSoleOc74dln4Wtfg333LboiSZK62APShB59FEaOzOFjwgT4+teLrkiSpDczgDSZ/faD\n97wHFi7Mz++/Hwb4X1mSVGf8amoil18Os2blUy/33gu/+x0M8iSbJKkO+fXUBFKCcePguefy67/8\nBUaNKrYmSZLejgGkwbW3wzrr5BlOAZ55xvAhSap/noJpYC+9BFtskcPHdtvlnpCNNy66KkmSVs0A\n0qBSgr32gvnz4bTT4L77iq5IkqTVZwBpUIceCg8+CF/+Mvzwh0VXI0lS3zgGpME8/jhsvnl+PmEC\nnHVWsfVIktQf9oA0kJkzu8LHwQd72kWS1LjsAWkQS5fChz4EgwfD3LmwzTZFVyRJUv/ZA9IAFi/u\nurrlggsMH5KkxmcAaQDvfW++5HbrreHTny66GkmS1pwBpM7NmJEvtT3qKHjggaKrkSSpMgwgde6S\nS3J7+eWFliFJUkUZQOrY736Xby43eXIefCpJUrMwgNSp55+HSZPy86uuKrYWSZIqzQBSh559FsaM\nyc//4z9g5Mhi65EkqdIMIHXmnnu6Lrm96CI49dRi65EkqRoMIHWkrQ122SU//8xn4F//tdh6JEmq\nFgNInbjzTlh//fz8y1+G884rth5JkqrJqdjrQHs7fPjD+fm8ebDttsXWI0lStdkDUrDbboPddoOX\nX4bPftbwIUkqB3tACrJ4MRxxBFx/PQwZAqefDuecU3RVkiTVhgGkAEuW5Pu7zJ8P++0HN94IAwcW\nXZUkSbXjKZgC7LRTDh9nnJFnOzV8SJLKxgBSY9/+Njz0EBx7LHz/+0VXI0lSMQwgNfa1r+X2ssuK\nrUOSpCIZQGrohhtye/TREFFsLZIkFckAUiMpwUc/mp9Pm1ZsLZIkFc0AUiM/+AEsWwZf/SqMGFF0\nNZIkFcsAUgPt7fC5z+X5Ps48s+hqJEkqngGkBr73vdxedhkMcuYVSZIMILXwpS/BOuvAUUcVXYkk\nSfXBAFJlKeV2n32KrUOSpHpiAKmyiy7Krb0fkiR1qZsAEhGnRMT8iFgcEbMjYpdVrD8kIs6KiCci\nYklEPB4Rn6xRuavttNNy+5GPFFuHJEn1pC6GREbEkcAPgE8DdwFTgZkRsWVK6cVeNvsFMBo4DngM\n2Ig6ClQAv/wlLF8OH/84rL120dVIklQ/6iKAkAPHxSmlKwEi4kTgQOB44Hs9V46IDwF7AZullBZ2\nLH6qRrWutv/6r9xecEGxdUiSVG8K7zGIiMFACzCrc1lKKQE3A7v3stlBwD3AFyLimYh4OCLOiYi1\nql5wHzz5JHzwgzB8eNGVSJJUXwoPIMAoYCCwoMfyBcDYXrbZjNwD8l7gI8BpwOFA3Uxyft99cO+9\nznoqSdLK1MspmL4aALQDR6WUXgOIiNOBX0TEySmlpb1tOHXqVEb0SAVTpkxhypQpFS2wc3df+EJF\ndytJUlVNnz6d6dOnv2lZW1tbxX9OpM6JKgrScQpmEXBYSunabssvB0aklD66km0uB/ZIKW3ZbdlW\nwF+BLVNKj61km4lAa2trKxMnTqz45+iuvR0GDoQ99oA//amqP0qSpKqbM2cOLS0tAC0ppTmV2Gfh\np2BSSsuBVmDfzmURER2v7+hlsz8B4yJiWLdlE8i9Is9UqdTV1jnp2DHHFFuHJEn1qvAA0uFc4ISI\n+ERHT8ZFwDDgcoCIODsirui2/lXAS8BlEbF1RHyAfLXMf77d6ZdqSwl23RVuuw3GjYOTTiqqEkmS\n6ltdjAFJKV0dEaOAbwJjgLnA5JTSCx2rjAXGd1v/9YjYH/gP4G5yGJkBfK2mhfdw4olw992w3XbQ\n2lpkJZIk1be6CCAAKaULgQt7ee+4lSz7GzC52nWtrpTgyivz87lzYUC99C1JklSH/JqskHvugSVL\n4H//b8OHJEmr4ldlhXz847n9l38ptg5JkhqBAaQCHngAHn4Y9tsPNtyw6GokSap/BpAK+Pznc3vp\npcXWIUlSozCAVMD118M668D48ateV5IkGUDW2Guv5fZznyu2DkmSGokBZA3dcENud9652DokSWok\nBpA19P3v53b33YutQ5KkRmIAWQPLl+eZTw85BEaOLLoaSZIahwFkDdx5Z24PPLDYOiRJajQGkDUw\nc2ZuDzqo2DokSWo0BpA10Dnvh5OPSZLUNwaQfrr9dnj2WZgyBQYPLroaSZIaiwGknw4/PLfnnFNs\nHZIkNaKKBZCIODQi7qvU/urdggXwwQ/CxhsXXYkkSY2nTwEkIv41In4ZEVdFxPs6lv2viPgL8F/A\nn6pRZL2ZMSO3nXfAlSRJfbPaASQivgj8B/Bu4GDgloj4MvAzYAbwzpTSSVWpss5ccklujzyy2Dok\nSWpUg/qw7nHACSmlKyJiL+APwB7AFiml16tSXZ2aNQv23DPfgE6SJPVdX07BvAu4BSCl9EdgOXBm\n2cLH7Nm5/ehHi61DkqRG1pcAMhRY0u31MuDlypZT//7wh9wef3yxdUiS1Mj6cgoG4FsRsajj+RDg\nqxHR1n2FlNLpFamsDi1fDl/9Kmy6KWywQdHVSJLUuPoSQG4DJnR7fQewWY910hpXVMduuw3eeCOH\nEEmS1H+rHUBSSh+sYh0N4ec/z+0RRxRbhyRJja5Pp2AiYj1gN/Lpl7tSSi9Upao6NWMGjBgB661X\ndCWSJDW2vswDsiPwMDATuA54NCImV6uwerNoEbz6qne+lSSpEvpyFcx3gcfJc3+0ALOAC6pRVD26\n4YbcHndcsXVIktQM+nIKpgWYlFKaAxARxwMvR8TwlNIrVamujvzxj7ndZ59i65AkqRn0pQdkJPBM\n54uU0kLgdWDDShdVj267LV96G1F0JZIkNb6+zgOyTUSM7fY6gK07BqcCkFJqujviLl0Kc+fCvvsW\nXYkkSc2hrwFkFjl0dPcb8vwf0dEOrEBddeWWW3J78snF1iFJUrPoSwDZtGpV1Lkbb8zthz5UbB2S\nJDWLvgSQY4Hvp5QWrXLNJvPb38Lo0TBsWNGVSJLUHPoyCPVMYN1qFVKvVqyARx6BPfYouhJJkppH\nXwJIKa//mDMnt4ccUmwdkiQ1k74EEGjym82tzJe+lFvn/5AkqXL6ehXM3yLibUNISmnkGtRTd265\nBfbaCzbZpOhKJElqHn0NIGcCbdUopB6llB8HHFB0JZIkNZe+BpCfp5Ser0oldei553K70UbF1iFJ\nUrPpyxiQ0o3/+PWvc7vTTsXWIUlSs/EqmLfx2mu53X77YuuQJKnZrPYpmJRSX6+YaXjXXAPrr+8N\n6CRJqrTShYq+GDYMNtus6CokSWo+BpC3cdddsN12RVchSVLzMYD0IiVoa4O11iq6EkmSmo8BpBd/\n+ENud9652DokSWpGBpBe/OpXuT300GLrkCSpGRlAenH99TB4MIxsqonlJUmqDwaQlVixAubPh2OP\nLboSSZKakwFkJW65Jbd7711sHZIkNSsDyEo8/XRuDzmk2DokSWpWBpCVeOml3K63XrF1SJLUrOom\ngETEKRExPyIWR8TsiNhlNbfbMyKWR8ScStVyyy0woG6OjCRJzacuvmYj4kjgB8CZwE7AvcDMiBi1\niu1GAFcAN1eynldfhXe9q5J7lCRJ3dVFAAGmAhenlK5MKT0EnAgsAo5fxXYXAT8DZleymHnzYNKk\nSu5RkiR1V3gAiYjBQAswq3NZSimRezV2f5vtjgM2Bb5R6ZqWLvUOuJIkVdOgogsARgEDgQU9li8A\nJqxsg4h4D/Ad4P0ppfaoYFpoa8sBZIcdKrZLSZLUQ+E9IH0VEQPIp13OTCk91rm4UvufNy+3m29e\nqT1KkqSe6qEH5EVgBTCmx/IxwHMrWX89YGdgx4iY1rFsABARsQyYlFL6fW8/bOrUqYwYMeJNy6ZM\nmcKUKVMAeOGFvGzLLfv4KSRJagLTp09n+vTpb1rW1tZW8Z8TebhFsSJiNnBnSum0jtcBPAX8KKV0\nTo91A9i6xy5OAfYBDgOeSCktXsnPmAi0tra2MnHixF5rOeMMOPdcWLwY1lprTT6VJEnNYc6cObS0\ntAC0pJQqMu1FPfSAAJwLXB4RrcBd5KtihgGXA0TE2cC4lNKxHQNUH+i+cUQ8DyxJKT24poU89VRu\nhw5d0z1JkqTe1EUASSld3THnxzfJp17mApNTSh0nRBgLjK9FLbffDocf7lUwkiRVU10EEICU0oXA\nhb28d9wqtv0GFbgc9x//gOeeg3XXXdM9SZKkt9NwV8FU0yWX5PZTnyq2DkmSmp0BpJuFC3P7/vcX\nW4ckSc3OANJNaytss03RVUiS1PzqZgxIPZg/H+rgqmRJkpqePSAd2tvhkUfgoIOKrkSSpOZnAOlw\n77253XHHYuuQJKkMDCAdvvnN3O6/f7F1SJJUBgaQDjfdBHvvDRttVHQlkiQ1PwNIh0WLIE9zL0mS\nqs0AArz8cm7HjSu2DkmSysIAQlcA2WmnYuuQJKksDCDAf/93bjfYoNg6JEkqCwMIXZfg7rBDsXVI\nklQWBhCgrQ3+6Z9ggEdDkqSa8CsXuPlmGDmy6CokSSqP0geQ1lZYsgS22qroSiRJKo/SB5Cnnsrt\nkUcWW4ckSWVS+gByxx25HTWq2DokSSqT0geQuXMhAoYPL7oSSZLKo/QB5K67YPfdi65CkqRyKX0A\nWbwYdtyx6CokSSqX0geQCNhmm6KrkCSpXEodQFKCZctg8OCiK5EkqVxKHUBWrMjtkCHF1iFJUtmU\nOoA8+2xuO4OIJEmqjVIHkFdfze3WWxdbhyRJZVPqANI5C+raaxdbhyRJZVPqAPL667kdN67YOiRJ\nKptSB5AlS3LrLKiSJNVWqQPIww/ndujQYuuQJKlsSh1A/vKX3A4o9VGQJKn2Sv3Vm5KzoEqSVIRS\nB5AFC2CHHYquQpKk8il1AHn88a4rYSRJUu2UOoAMHAjbb190FZIklU+pA8iLL3oJriRJRShtAOmc\nhv2NN4qtQ5KkMip9AHEQqiRJtVfaAPLkk7kdPLjYOiRJKqPSBpDFi3P7rncVW4ckSWVU2gCybFlu\nhw0rtg5JksqotAFk+fLcegpGkqTaK20Aefrp3A4ZUmwdkiSVUWkDSOcMqM4DIklS7ZU2gMyeDaNG\nwaBBRVciSVL5lDaA/OMfsOGGRVchSVI5lTaALFgAI0YUXYUkSeVU2gAyYADsvHPRVUiSVE6lDSDt\n7V4BI0lSUUobQN54wwGokiQVxQAiSZJqrm4CSEScEhHzI2JxRMyOiF3eZt2PRsRNEfF8RLRFxB0R\nMWl1f1ZK8PjjzoIqSVJR6iKARMSRwA+AM4GdgHuBmRExqpdNPgDcBBwATARuBa6LiB1W5+c98URu\nvQ+MJEnFqIsAAkwFLk4pXZlSegg4EVgEHL+ylVNKU1NK308ptaaUHkspfQV4BDhodX7YQw/ldtJq\n95lIkqRKKjyARMRgoAWY1bkspZSAm4HdV3MfAawHvLw663cGkM0261utkiSpMgoPIMAoYCCwoMfy\nBcDY1dzH54B1gKtXZ+UlS3LrRGSSJBWj4a8DiYijgK8BB6eUXlydbVpbYfx4iKhubZIkaeXqIYC8\nCKwAxvQsGi34AAAKvklEQVRYPgZ47u02jIiPAT8GDk8p3bo6P2zq1Kk8+ugIXn4ZDj44L5syZQpT\npkzpa92SJDWd6dOnM3369Dcta2trq/jPiTzcolgRMRu4M6V0WsfrAJ4CfpRSOqeXbaYAPwGOTCn9\nZjV+xkSgtbW1lU9+ciKbbALXXluxjyBJUtOaM2cOLS0tAC0ppTmV2Gc9jAEBOBc4ISI+ERFbARcB\nw4DLASLi7Ii4onPljtMuVwBnAHdHxJiOx/DV+WHz5sHSpZX+CJIkaXXVwykYUkpXd8z58U3yqZe5\nwOSU0gsdq4wFxnfb5ATywNVpHY9OV9DLpbud/vGP3H7oQ5WoXJIk9UddBBCAlNKFwIW9vHdcj9f7\n9PfnLF6c2+226+8eJEnSmqqXUzA1s3x5br0TriRJxSldAOkc+2EAkSSpOKULINdck9uRI4utQ5Kk\nMitdAHnssdxuuWWxdUiSVGalCyB33+1N6CRJKlrpAogkSSpeKQPIqacWXYEkSeVWygAyblzRFUiS\nVG6lDCBjet72TpIk1VQpA8jYsUVXIElSuZUugKy9NgyqmwnoJUkqp9IFkM6p2CVJUnFKF0C8CZ0k\nScUrXQBZtKjoCiRJUukCyHveU3QFkiSpdAFk6NCiK5AkSaULIAMHFl2BJEkygEiSpJozgEiSpJor\nXQAZULpPLElS/Snd13FKRVcgSZJKF0A23bToCiRJUukCiPeBkSSpeKULIKNHF12BJEkqXQAZM6bo\nCiRJUukCyNixRVcgSZJKF0AGDy66AkmSVLoA4jwgkiQVz69jSZJUcwYQSZJUcwYQSZJUcwYQSZJU\ncwYQSZJUcwYQSZJUcwYQSZJUcwYQSZJUcwYQSZJUcwYQSZJUcwYQSZJUcwYQSZJUcwYQSZJUcwYQ\nSZJUcwYQSZJUcwYQSZJUcwYQSZJUcwYQSZJUcwYQSZJUcwYQSZJUcwYQSZJUcwYQSZJUcwYQSZJU\nc3UTQCLilIiYHxGLI2J2ROyyivU/GBGtEbEkIv4WEcfWqlatvunTpxddQul4zGvPY157HvPGVxcB\nJCKOBH4AnAnsBNwLzIyIUb2svwnwG2AWsANwPvCTiNi/FvVq9flHovY85rXnMa89j3njq4sAAkwF\nLk4pXZlSegg4EVgEHN/L+icBj6eUPp9SejilNA34Zcd+JElSnSs8gETEYKCF3JsBQEopATcDu/ey\n2W4d73c3823WlyRJdaTwAAKMAgYCC3osXwCM7WWbsb2sPzwihla2PEmSVGmDii6ghtYCePDBB4uu\no1Ta2tqYM2dO0WWUise89jzmtecxr61u351rVWqf9RBAXgRWAGN6LB8DPNfLNs/1sv4rKaWlvWyz\nCcAxxxzTvyrVby0tLUWXUDoe89rzmNeex7wQmwB3VGJHhQeQlNLyiGgF9gWuBYiI6Hj9o142+zNw\nQI9lkzqW92YmcDTwBLBkDUqWJKls1iKHj5mV2mHk8Z7Fioh/Bi4nX/1yF/lqlsOBrVJKL0TE2cC4\nlNKxHetvAswDLgQuJYeVHwIfTin1HJwqSZLqTOE9IAAppas75vz4JvlUylxgckrphY5VxgLju63/\nREQcCJwH/DvwDPApw4ckSY2hLnpAJElSudTDZbiSJKlkDCCSJKnmmiaAeDO72uvLMY+Ij0bETRHx\nfES0RcQdETGplvU2g77+nnfbbs+IWB4RTpzQR/342zIkIs6KiCc6/r48HhGfrFG5TaEfx/zoiJgb\nEa9HxLMR8Z8RMbJW9Ta6iNgrIq6NiL9HRHtEHLwa26zxd2hTBBBvZld7fT3mwAeAm8iXT08EbgWu\ni4gdalBuU+jHMe/cbgRwBW+9fYFWoZ/H/BfAPsBxwJbAFODhKpfaNPrx93xP8u/3JcA25CsodwV+\nXJOCm8M65Is/TgZWOTC0Yt+hKaWGfwCzgfO7vQ7ylTGf72X97wL39Vg2Hbih6M/SKI++HvNe9nE/\n8NWiP0ujPPp7zDt+t79B/oM+p+jP0UiPfvxt+RDwMrB+0bU36qMfx/wM4JEey04Fnir6szTiA2gH\nDl7FOhX5Dm34HhBvZld7/TzmPfcRwHrkP9Zahf4e84g4DtiUHEDUB/085gcB9wBfiIhnIuLhiDgn\nIio2fXUz6+cx/zMwPiIO6NjHGOAI4PrqVltqFfkObfgAgjezK0J/jnlPnyN3+11dwbqaWZ+PeUS8\nB/gOcHRKqb265TWl/vyebwbsBbwX+AhwGvmUwLQq1dhs+nzMU0p3AMcAMyJiGfA/wD/IvSCqjop8\nhzZDAFGDiYijgK8BR6SUXiy6nmYUEQOAnwFnppQe61xcYEllMYDchX1USumelNKNwOnAsf7jpjoi\nYhvyGISvk8eXTSb3+l1cYFlaDXUxE+oaqtXN7NSlP8ccgIj4GHlw2OEppVurU15T6usxXw/YGdgx\nIjr/9T2AfPZrGTAppfT7KtXaLPrze/4/wN9TSq91W/YgOfy9E3hspVupU3+O+ReBP6WUzu14fX9E\nnAz8MSK+klLq+S91rbmKfIc2fA9ISmk50HkzO+BNN7Pr7Y59f+6+fodV3cxOHfp5zImIKcB/Ah/r\n+JehVlM/jvkrwLbAjuRR6jsAFwEPdTy/s8olN7x+/p7/CRgXEcO6LZtA7hV5pkqlNo1+HvNhwBs9\nlrWTr+aw1686KvMdWvSI2wqN2v1nYBHwCWArctfbS8DojvfPBq7otv4mwKvkkbwTyJceLQP2K/qz\nNMqjH8f8qI5jfCI5KXc+hhf9WRrl0ddjvpLtvQqmysecPK7pSWAGsDX58vOHgYuK/iyN8ujHMT8W\nWNrxt2VTYE/yTU3vKPqzNMqj4/d2B/I/WNqBz3S8Ht/LMa/Id2jhH7yCB/Bk4AlgMTmF7dztvcuA\nW3qs/wFy0l4MPAJ8vOjP0GiPvhxz8rwfK1byuLToz9FIj77+nvfY1gBSg2NOnvtjJvBaRxj5HjC0\n6M/RSI9+HPNTyHdIf43c03QFsFHRn6NRHsDeHcFjpX+fq/Ud6s3oJElSzTX8GBBJktR4DCCSJKnm\nDCCSJKnmDCCSJKnmDCCSJKnmDCCSJKnmDCCSJKnmDCCSJKnmDCCSJKnmDCCSqiYiLouI9ohY0dF2\nPt8sIi7v9nppRDwSEV+LiAEd2+7dY9vnI+L6iNi26M8lac0ZQCRV22+Bsd0eG5Hv85G6vbcFcA75\nfjWf7bZtIt9bZSz5bptDgd9ExKAa1S6pSgwgkqptaUrphZTS890e7T3eezql9GPgZuCQHtt3bjsX\nOA8YT75LqqQGZgCRVE+WAEN6LAuAiBgBHN2xbFkti5JUeXZjSqq2gyLi1W6vb0gpHdlzpYjYD5gM\nnN99MfB0RASwTseyX6eU/la1aiXVhAFEUrXdApxIR08G8Hq39zrDyeCO938GfKPb+wl4P7AY2A34\nMnBStQuWVH0GEEnV9npKaX4v73WGk+XAs93GhnT3RErpFeCRiBgDXA3sXZ1SJdWKY0AkFen1lNL8\nlNIzvYSPnqYB20ZEz4GqkhqMAURSPYvuL1JKi4FLgG8WU46kSjGASKpnaSXLLgC2iojDa12MpMqJ\nlFb2/7ckSVL12AMiSZJqzgAiSZJqzgAiSZJqzgAiSZJqzgAiSZJqzgAiSZJqzgAiSZJqzgAiSZJq\nzgAiSZJqzgAiSZJqzgAiSZJqzgAiSZJq7v8BAPF+7u+d/1EAAAAASUVORK5CYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fpr, tpr, _ = roc_curve(test_data[LABEL_COL], preds[:, 1])\n", + "plot(fpr, tpr)\n", + "xlabel('FPR')\n", + "ylabel('TPR')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/lstm_word2vec_small.ipynb b/lstm_word2vec_small.ipynb index bdbec87..b070565 100644 --- a/lstm_word2vec_small.ipynb +++ b/lstm_word2vec_small.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": { "collapsed": false, "deletable": true, @@ -57,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": { "collapsed": false, "deletable": true, @@ -65,7 +65,6 @@ }, "outputs": [], "source": [ - "\"\"\"\n", "from azureml import Workspace\n", "ws = Workspace(\n", " workspace_id='817780d9ee0d4a878e25f8c9deb3b866',\n", @@ -75,13 +74,12 @@ "ds = ws.datasets['Book Reviews from Amazon']\n", "all_data = ds.to_dataframe()\n", "all_data.rename(columns={0: 'rating', 1: 'text'}, inplace=True)\n", - "all_data.loc[:, 'rating'] = all_data['rating'] - 1 # reindex ratings to start from 0\n", - "\"\"\"" + "all_data.loc[:, 'rating'] = all_data['rating'] - 1 # reindex ratings to start from 0" ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 2, "metadata": { "collapsed": true, "deletable": true, @@ -89,6 +87,7 @@ }, "outputs": [], "source": [ + "\"\"\"\n", "from azureml import Workspace\n", "ws = Workspace(\n", " workspace_id='817780d9ee0d4a878e25f8c9deb3b866',\n", @@ -98,7 +97,8 @@ "ds = ws.datasets['dfe_happysad_utf.csv']\n", "all_data = ds.to_dataframe()\n", "all_data.rename(columns={'features': 'text', 'label': 'rating'}, inplace=True)\n", - "all_data.replace({'rating': {'sadness': 0, 'happiness': 1}}, inplace=True)" + "all_data.replace({'rating': {'sadness': 0, 'happiness': 1}}, inplace=True)\n", + "\"\"\"" ] }, { @@ -113,7 +113,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 3, "metadata": { "collapsed": false, "deletable": true, @@ -148,7 +148,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 4, "metadata": { "collapsed": true, "deletable": true, @@ -163,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 5, "metadata": { "collapsed": true, "deletable": true, @@ -187,7 +187,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 6, "metadata": { "collapsed": false, "deletable": true, @@ -216,7 +216,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 7, "metadata": { "collapsed": false, "deletable": true, @@ -230,7 +230,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 8, "metadata": { "collapsed": false, "deletable": true, @@ -478,110 +478,6 @@ " res_file.write(str(history.history))" ] }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true, - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Training model with regularization parameter = 0.0001\n", - "Epoch 1/10\n", - "33s - loss: 3.5155 - acc: 0.5719\n", - "Epoch 2/10\n", - "32s - loss: 3.3546 - acc: 0.5819\n", - "Epoch 3/10\n", - "32s - loss: 3.3257 - acc: 0.5819\n", - "Epoch 4/10\n", - "32s - loss: 3.3131 - acc: 0.5819\n", - "Epoch 5/10\n", - "32s - loss: 3.3049 - acc: 0.5819\n", - "Epoch 6/10\n", - "32s - loss: 3.2991 - acc: 0.5819\n", - "Epoch 7/10\n", - "32s - loss: 3.2944 - acc: 0.5819\n", - "Epoch 8/10\n", - "32s - loss: 3.2905 - acc: 0.5819\n", - "Epoch 9/10\n", - "32s - loss: 3.2875 - acc: 0.5819\n", - "Epoch 10/10\n", - "32s - loss: 3.2847 - acc: 0.5819\n", - "\n", - "\n" - ] - } - ], - "source": [ - "lstm_create_train(1e-4)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "model = load_model('./lstm_wvec_{}_model.h5'.format(1e-4))" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2500/2500 [==============================] - 12s \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\n" - ] - } - ], - "source": [ - "preds = model.predict_classes(test_seq) " - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [ - { - "data": { - "text/plain": [ - "0.58640000000000003" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from sklearn.metrics import accuracy_score\n", - "\n", - "accuracy_score(test_data[LABEL_COL], preds)" - ] - }, { "cell_type": "code", "execution_count": 58, @@ -752,7 +648,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true, + "deletable": true, + "editable": true }, "outputs": [], "source": []