diff --git a/lstm_word2vec.ipynb b/lstm_word2vec.ipynb deleted file mode 100644 index abf96e7..0000000 --- a/lstm_word2vec.ipynb +++ /dev/null @@ -1,1496 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "The code in this notebook is based on the [Keras documentation](https://keras.io/) and [blog](https://blog.keras.io/using-pre-trained-word-embeddings-in-a-keras-model.html) as well as this [word2vec tutorial](http://adventuresinmachinelearning.com/gensim-word2vec-tutorial/). " - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "import os\n", - "import pandas as pd\n", - "import pickle\n", - "import time\n", - "\n", - "os.environ['KERAS_BACKEND']='cntk'\n", - "from keras.preprocessing import sequence\n", - "from keras.preprocessing.text import Tokenizer, text_to_word_sequence\n", - "from keras.models import Sequential\n", - "from keras import regularizers\n", - "from keras.optimizers import SGD\n", - "from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional\n", - "from keras.callbacks import History, CSVLogger\n", - "from keras.utils import to_categorical" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "Download the Amazon reviews data from a public Azure blob" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "CONTAINER_URL = \"https://anargyridsa.blob.core.windows.net/dlvm/\"\n", - "trainFile = \"amazon_reviews_train.csv\"\n", - "testFile = \"amazon_reviews_test.csv\"" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "# read data\n", - "train_data = pd.read_csv(CONTAINER_URL + trainFile, header=None, names=['rating', 'title', 'text'])\n", - "test_data = pd.read_csv(CONTAINER_URL + testFile, header=None, names=['rating', 'title', 'text'])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "Set the dimensions of the input and the embedding. \n", - "\n", - "MAX_DOC_LENGTH : the size of the input i.e. the number of words in the document. Longer documents will be truncated, shorter ones will be padded with zeros.\n", - "\n", - "VOCAB_SIZE : the size of the word encoding (number of most frequent words to keep in the vocabulary)\n", - "\n", - "EMBEDDING_DIM : the dimensionality of the word embedding" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "MAX_DOC_LENGTH = 300\n", - "VOCAB_SIZE = 6000\n", - "EMBEDDING_DIM = 200" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "TEXT_COL = 'text'\n", - "LABEL_COL = 'rating'" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "Fit a Keras tokenizer to the most frequent words using the entire training data set as the corpus." - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "# tokenize, create seqs, pad\n", - "tok = Tokenizer(num_words=VOCAB_SIZE, lower=True, split=\" \")\n", - "tok.fit_on_texts(train_data[TEXT_COL])\n", - "train_seq = tok.texts_to_sequences(train_data[TEXT_COL])\n", - "train_seq = sequence.pad_sequences(train_seq, maxlen=MAX_DOC_LEN)\n", - "test_seq = tok.texts_to_sequences(test_data[TEXT_COL])\n", - "test_seq = sequence.pad_sequences(test_seq, maxlen=MAX_DOC_LEN)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "np.save('train_seq', train_seq)\n", - "np.save('test_seq', test_seq)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "Convert the ratings to one-hot categorical labels." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "labels = to_categorical(np.asarray(train_data[LABEL_COL]))\n", - "labels = labels[:,1:]\n", - "labels = labels.astype('float32')" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of reviews by class in training set\n", - "[ 600000. 600000. 600000. 600000. 600000.]\n" - ] - } - ], - "source": [ - "print('Number of reviews by class in training set')\n", - "print(labels.sum(axis=0))\n", - "n_classes = labels.shape[1]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "Train word2vec on all the documents in order to initialize the word embedding. Ignore rare words (min_count=6). Use skip-gram as the training algorithm (sg=1)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "import nltk \n", - "\n", - "nltk.download('punkt')\n", - "\n", - "sent_lst = []\n", - "\n", - "for doc in train_data[TEXT_COL]:\n", - " sentences = nltk.tokenize.sent_tokenize(doc)\n", - " for sent in sentences:\n", - " word_lst = [w for w in nltk.tokenize.word_tokenize(sent) if w.isalnum()]\n", - " sent_lst.append(word_lst)" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2017-08-21 22:29:22,266 : INFO : collecting all words and their counts\n", - "2017-08-21 22:29:22,266 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types\n", - "2017-08-21 22:29:22,432 : INFO : PROGRESS: at sentence #10000, processed 768914 words, keeping 16356 word types\n", - "2017-08-21 22:29:22,593 : INFO : PROGRESS: at sentence #20000, processed 1558434 words, keeping 18518 word types\n", - "2017-08-21 22:29:22,761 : INFO : PROGRESS: at sentence #30000, processed 2356584 words, keeping 19297 word types\n", - "2017-08-21 22:29:22,927 : INFO : PROGRESS: at sentence #40000, processed 3151418 words, keeping 19590 word types\n", - "2017-08-21 22:29:23,091 : INFO : PROGRESS: at sentence #50000, processed 3928526 words, keeping 19754 word types\n", - "2017-08-21 22:29:23,253 : INFO : PROGRESS: at sentence #60000, processed 4704430 words, keeping 19837 word types\n", - "2017-08-21 22:29:23,416 : INFO : PROGRESS: at sentence #70000, processed 5484612 words, keeping 19896 word types\n", - "2017-08-21 22:29:23,576 : INFO : PROGRESS: at sentence #80000, processed 6269574 words, keeping 19940 word types\n", - "2017-08-21 22:29:23,742 : INFO : PROGRESS: at sentence #90000, processed 7056213 words, keeping 19961 word types\n", - "2017-08-21 22:29:23,908 : INFO : PROGRESS: at sentence #100000, processed 7846571 words, keeping 19970 word types\n", - "2017-08-21 22:29:24,083 : INFO : PROGRESS: at sentence #110000, processed 8622413 words, keeping 19978 word types\n", - "2017-08-21 22:29:24,254 : INFO : PROGRESS: at sentence #120000, processed 9399743 words, keeping 19980 word types\n", - "2017-08-21 22:29:24,428 : INFO : PROGRESS: at sentence #130000, processed 10190492 words, keeping 19986 word types\n", - "2017-08-21 22:29:24,599 : INFO : PROGRESS: at sentence #140000, processed 10987183 words, keeping 19989 word types\n", - "2017-08-21 22:29:24,760 : INFO : PROGRESS: at sentence #150000, processed 11771340 words, keeping 19993 word types\n", - "2017-08-21 22:29:24,925 : INFO : PROGRESS: at sentence #160000, processed 12551491 words, keeping 19996 word types\n", - "2017-08-21 22:29:25,100 : INFO : PROGRESS: at sentence #170000, processed 13352226 words, keeping 19997 word types\n", - "2017-08-21 22:29:25,270 : INFO : PROGRESS: at sentence #180000, processed 14134027 words, keeping 19999 word types\n", - "2017-08-21 22:29:25,437 : INFO : PROGRESS: at sentence #190000, processed 14924778 words, keeping 19999 word types\n", - "2017-08-21 22:29:25,601 : INFO : PROGRESS: at sentence #200000, processed 15713455 words, keeping 19999 word types\n", - "2017-08-21 22:29:25,767 : INFO : PROGRESS: at sentence #210000, processed 16499209 words, keeping 19999 word types\n", - "2017-08-21 22:29:25,928 : INFO : PROGRESS: at sentence #220000, processed 17282434 words, keeping 19999 word types\n", - "2017-08-21 22:29:26,090 : INFO : PROGRESS: at sentence #230000, processed 18067357 words, keeping 19999 word types\n", - "2017-08-21 22:29:26,253 : INFO : PROGRESS: at sentence #240000, processed 18841853 words, keeping 19999 word types\n", - "2017-08-21 22:29:26,410 : INFO : PROGRESS: at sentence #250000, processed 19611094 words, keeping 19999 word types\n", - "2017-08-21 22:29:26,574 : INFO : PROGRESS: at sentence #260000, processed 20389391 words, keeping 19999 word types\n", - "2017-08-21 22:29:26,735 : INFO : PROGRESS: at sentence #270000, processed 21168280 words, keeping 19999 word types\n", - "2017-08-21 22:29:26,895 : INFO : PROGRESS: at sentence #280000, processed 21946407 words, keeping 19999 word types\n", - "2017-08-21 22:29:27,058 : INFO : PROGRESS: at sentence #290000, processed 22732068 words, keeping 19999 word types\n", - "2017-08-21 22:29:27,222 : INFO : PROGRESS: at sentence #300000, processed 23526262 words, keeping 19999 word types\n", - "2017-08-21 22:29:27,382 : INFO : PROGRESS: at sentence #310000, processed 24307432 words, keeping 19999 word types\n", - "2017-08-21 22:29:27,561 : INFO : PROGRESS: at sentence #320000, processed 25080357 words, keeping 20000 word types\n", - "2017-08-21 22:29:27,729 : INFO : PROGRESS: at sentence #330000, processed 25852342 words, keeping 20000 word types\n", - "2017-08-21 22:29:27,892 : INFO : PROGRESS: at sentence #340000, processed 26628886 words, keeping 20000 word types\n", - "2017-08-21 22:29:28,057 : INFO : PROGRESS: at sentence #350000, processed 27415861 words, keeping 20000 word types\n", - "2017-08-21 22:29:28,229 : INFO : PROGRESS: at sentence #360000, processed 28174170 words, keeping 20000 word types\n", - "2017-08-21 22:29:28,395 : INFO : PROGRESS: at sentence #370000, processed 28962552 words, keeping 20000 word types\n", - "2017-08-21 22:29:28,569 : INFO : PROGRESS: at sentence #380000, processed 29729730 words, keeping 20000 word types\n", - "2017-08-21 22:29:28,736 : INFO : PROGRESS: at sentence #390000, processed 30508357 words, keeping 20000 word types\n", - "2017-08-21 22:29:28,905 : INFO : PROGRESS: at sentence #400000, processed 31303733 words, keeping 20000 word types\n", - "2017-08-21 22:29:29,067 : INFO : PROGRESS: at sentence #410000, processed 32072391 words, keeping 20000 word types\n", - "2017-08-21 22:29:29,230 : INFO : PROGRESS: at sentence #420000, processed 32836900 words, keeping 20000 word types\n", - "2017-08-21 22:29:29,392 : INFO : PROGRESS: at sentence #430000, processed 33617530 words, keeping 20000 word types\n", - "2017-08-21 22:29:29,565 : INFO : PROGRESS: at sentence #440000, processed 34390417 words, keeping 20000 word types\n", - "2017-08-21 22:29:29,736 : INFO : PROGRESS: at sentence #450000, processed 35171239 words, keeping 20000 word types\n", - "2017-08-21 22:29:29,918 : INFO : PROGRESS: at sentence #460000, processed 35933816 words, keeping 20000 word types\n", - "2017-08-21 22:29:30,102 : INFO : PROGRESS: at sentence #470000, processed 36731467 words, keeping 20000 word types\n", - "2017-08-21 22:29:30,271 : INFO : PROGRESS: at sentence #480000, processed 37510241 words, keeping 20000 word types\n", - "2017-08-21 22:29:30,437 : INFO : PROGRESS: at sentence #490000, processed 38299639 words, keeping 20000 word types\n", - "2017-08-21 22:29:30,612 : INFO : PROGRESS: at sentence #500000, processed 39061913 words, keeping 20000 word types\n", - "2017-08-21 22:29:30,775 : INFO : PROGRESS: at sentence #510000, processed 39839074 words, keeping 20000 word types\n", - "2017-08-21 22:29:30,939 : INFO : PROGRESS: at sentence #520000, processed 40618518 words, keeping 20000 word types\n", - "2017-08-21 22:29:31,108 : INFO : PROGRESS: at sentence #530000, processed 41393564 words, keeping 20000 word types\n", - "2017-08-21 22:29:31,287 : INFO : PROGRESS: at sentence #540000, processed 42169244 words, keeping 20000 word types\n", - "2017-08-21 22:29:31,453 : INFO : PROGRESS: at sentence #550000, processed 42941848 words, keeping 20000 word types\n", - "2017-08-21 22:29:31,623 : INFO : PROGRESS: at sentence #560000, processed 43735448 words, keeping 20000 word types\n", - "2017-08-21 22:29:31,789 : INFO : PROGRESS: at sentence #570000, processed 44530741 words, keeping 20000 word types\n", - "2017-08-21 22:29:31,951 : INFO : PROGRESS: at sentence #580000, processed 45295990 words, keeping 20000 word types\n", - "2017-08-21 22:29:32,118 : INFO : PROGRESS: at sentence #590000, processed 46083665 words, keeping 20000 word types\n", - "2017-08-21 22:29:32,291 : INFO : PROGRESS: at sentence #600000, processed 46853201 words, keeping 20000 word types\n", - "2017-08-21 22:29:32,444 : INFO : PROGRESS: at sentence #610000, processed 47601786 words, keeping 20000 word types\n", - "2017-08-21 22:29:32,610 : INFO : PROGRESS: at sentence #620000, processed 48387543 words, keeping 20000 word types\n", - "2017-08-21 22:29:32,772 : INFO : PROGRESS: at sentence #630000, processed 49177254 words, keeping 20000 word types\n", - "2017-08-21 22:29:32,940 : INFO : PROGRESS: at sentence #640000, processed 49963344 words, keeping 20000 word types\n", - "2017-08-21 22:29:33,102 : INFO : PROGRESS: at sentence #650000, processed 50732202 words, keeping 20000 word types\n", - "2017-08-21 22:29:33,270 : INFO : PROGRESS: at sentence #660000, processed 51511655 words, keeping 20000 word types\n", - "2017-08-21 22:29:33,434 : INFO : PROGRESS: at sentence #670000, processed 52283396 words, keeping 20000 word types\n", - "2017-08-21 22:29:33,604 : INFO : PROGRESS: at sentence #680000, processed 53052046 words, keeping 20000 word types\n", - "2017-08-21 22:29:33,771 : INFO : PROGRESS: at sentence #690000, processed 53843165 words, keeping 20000 word types\n", - "2017-08-21 22:29:33,938 : INFO : PROGRESS: at sentence #700000, processed 54622238 words, keeping 20000 word types\n", - "2017-08-21 22:29:34,097 : INFO : PROGRESS: at sentence #710000, processed 55376336 words, keeping 20000 word types\n", - "2017-08-21 22:29:34,265 : INFO : PROGRESS: at sentence #720000, processed 56139675 words, keeping 20000 word types\n", - "2017-08-21 22:29:34,439 : INFO : PROGRESS: at sentence #730000, processed 56907223 words, keeping 20000 word types\n", - "2017-08-21 22:29:34,611 : INFO : PROGRESS: at sentence #740000, processed 57672298 words, keeping 20000 word types\n", - "2017-08-21 22:29:34,781 : INFO : PROGRESS: at sentence #750000, processed 58438935 words, keeping 20000 word types\n", - "2017-08-21 22:29:34,943 : INFO : PROGRESS: at sentence #760000, processed 59195725 words, keeping 20000 word types\n", - "2017-08-21 22:29:35,110 : INFO : PROGRESS: at sentence #770000, processed 59979596 words, keeping 20000 word types\n", - "2017-08-21 22:29:35,283 : INFO : PROGRESS: at sentence #780000, processed 60765708 words, keeping 20000 word types\n", - "2017-08-21 22:29:35,456 : INFO : PROGRESS: at sentence #790000, processed 61550036 words, keeping 20000 word types\n", - "2017-08-21 22:29:35,622 : INFO : PROGRESS: at sentence #800000, processed 62328620 words, keeping 20000 word types\n", - "2017-08-21 22:29:35,786 : INFO : PROGRESS: at sentence #810000, processed 63110197 words, keeping 20000 word types\n", - "2017-08-21 22:29:35,947 : INFO : PROGRESS: at sentence #820000, processed 63878291 words, keeping 20000 word types\n", - "2017-08-21 22:29:36,116 : INFO : PROGRESS: at sentence #830000, processed 64663090 words, keeping 20000 word types\n", - "2017-08-21 22:29:36,291 : INFO : PROGRESS: at sentence #840000, processed 65442204 words, keeping 20000 word types\n", - "2017-08-21 22:29:36,455 : INFO : PROGRESS: at sentence #850000, processed 66194830 words, keeping 20000 word types\n", - "2017-08-21 22:29:36,620 : INFO : PROGRESS: at sentence #860000, processed 66971778 words, keeping 20000 word types\n", - "2017-08-21 22:29:36,783 : INFO : PROGRESS: at sentence #870000, processed 67755131 words, keeping 20000 word types\n", - "2017-08-21 22:29:36,947 : INFO : PROGRESS: at sentence #880000, processed 68535197 words, keeping 20000 word types\n", - "2017-08-21 22:29:37,106 : INFO : PROGRESS: at sentence #890000, processed 69301746 words, keeping 20000 word types\n", - "2017-08-21 22:29:37,272 : INFO : PROGRESS: at sentence #900000, processed 70078854 words, keeping 20000 word types\n", - "2017-08-21 22:29:37,432 : INFO : PROGRESS: at sentence #910000, processed 70846717 words, keeping 20000 word types\n", - "2017-08-21 22:29:37,599 : INFO : PROGRESS: at sentence #920000, processed 71622223 words, keeping 20000 word types\n", - "2017-08-21 22:29:37,760 : INFO : PROGRESS: at sentence #930000, processed 72381192 words, keeping 20000 word types\n", - "2017-08-21 22:29:37,925 : INFO : PROGRESS: at sentence #940000, processed 73165217 words, keeping 20000 word types\n", - "2017-08-21 22:29:38,084 : INFO : PROGRESS: at sentence #950000, processed 73931713 words, keeping 20000 word types\n", - "2017-08-21 22:29:38,250 : INFO : PROGRESS: at sentence #960000, processed 74713846 words, keeping 20000 word types\n", - "2017-08-21 22:29:38,428 : INFO : PROGRESS: at sentence #970000, processed 75493505 words, keeping 20000 word types\n", - "2017-08-21 22:29:38,598 : INFO : PROGRESS: at sentence #980000, processed 76270730 words, keeping 20000 word types\n", - "2017-08-21 22:29:38,764 : INFO : PROGRESS: at sentence #990000, processed 77038020 words, keeping 20000 word types\n", - "2017-08-21 22:29:38,926 : INFO : PROGRESS: at sentence #1000000, processed 77798885 words, keeping 20000 word types\n", - "2017-08-21 22:29:39,096 : INFO : PROGRESS: at sentence #1010000, processed 78577967 words, keeping 20000 word types\n", - "2017-08-21 22:29:39,269 : INFO : PROGRESS: at sentence #1020000, processed 79380484 words, keeping 20000 word types\n", - "2017-08-21 22:29:39,441 : INFO : PROGRESS: at sentence #1030000, processed 80165293 words, keeping 20000 word types\n", - "2017-08-21 22:29:39,612 : INFO : PROGRESS: at sentence #1040000, processed 80948388 words, keeping 20000 word types\n", - "2017-08-21 22:29:39,777 : INFO : PROGRESS: at sentence #1050000, processed 81714851 words, keeping 20000 word types\n", - "2017-08-21 22:29:39,950 : INFO : PROGRESS: at sentence #1060000, processed 82524258 words, keeping 20000 word types\n", - "2017-08-21 22:29:40,128 : INFO : PROGRESS: at sentence #1070000, processed 83290162 words, keeping 20000 word types\n", - "2017-08-21 22:29:40,321 : INFO : PROGRESS: at sentence #1080000, processed 84065705 words, keeping 20000 word types\n", - "2017-08-21 22:29:40,502 : INFO : PROGRESS: at sentence #1090000, processed 84862593 words, keeping 20000 word types\n", - "2017-08-21 22:29:40,691 : INFO : PROGRESS: at sentence #1100000, processed 85637077 words, keeping 20000 word types\n", - "2017-08-21 22:29:40,860 : INFO : PROGRESS: at sentence #1110000, processed 86403253 words, keeping 20000 word types\n", - "2017-08-21 22:29:41,033 : INFO : PROGRESS: at sentence #1120000, processed 87184272 words, keeping 20000 word types\n", - "2017-08-21 22:29:41,201 : INFO : PROGRESS: at sentence #1130000, processed 87963793 words, keeping 20000 word types\n", - "2017-08-21 22:29:41,372 : INFO : PROGRESS: at sentence #1140000, processed 88725656 words, keeping 20000 word types\n", - "2017-08-21 22:29:41,522 : INFO : PROGRESS: at sentence #1150000, processed 89417477 words, keeping 20000 word types\n", - "2017-08-21 22:29:41,687 : INFO : PROGRESS: at sentence #1160000, processed 90174057 words, keeping 20000 word types\n", - "2017-08-21 22:29:41,850 : INFO : PROGRESS: at sentence #1170000, processed 90938293 words, keeping 20000 word types\n", - "2017-08-21 22:29:42,033 : INFO : PROGRESS: at sentence #1180000, processed 91710185 words, keeping 20000 word types\n", - "2017-08-21 22:29:42,219 : INFO : PROGRESS: at sentence #1190000, processed 92493198 words, keeping 20000 word types\n", - "2017-08-21 22:29:42,387 : INFO : PROGRESS: at sentence #1200000, processed 93267564 words, keeping 20000 word types\n", - "2017-08-21 22:29:42,563 : INFO : PROGRESS: at sentence #1210000, processed 94035860 words, keeping 20000 word types\n", - "2017-08-21 22:29:42,736 : INFO : PROGRESS: at sentence #1220000, processed 94813670 words, keeping 20000 word types\n", - "2017-08-21 22:29:42,905 : INFO : PROGRESS: at sentence #1230000, processed 95590925 words, keeping 20000 word types\n", - "2017-08-21 22:29:43,076 : INFO : PROGRESS: at sentence #1240000, processed 96372436 words, keeping 20000 word types\n", - "2017-08-21 22:29:43,244 : INFO : PROGRESS: at sentence #1250000, processed 97139165 words, keeping 20000 word types\n", - "2017-08-21 22:29:43,410 : INFO : PROGRESS: at sentence #1260000, processed 97914933 words, keeping 20000 word types\n", - "2017-08-21 22:29:43,582 : INFO : PROGRESS: at sentence #1270000, processed 98676745 words, keeping 20000 word types\n", - "2017-08-21 22:29:43,757 : INFO : PROGRESS: at sentence #1280000, processed 99468658 words, keeping 20000 word types\n", - "2017-08-21 22:29:43,931 : INFO : PROGRESS: at sentence #1290000, processed 100246174 words, keeping 20000 word types\n", - "2017-08-21 22:29:44,096 : INFO : PROGRESS: at sentence #1300000, processed 101014932 words, keeping 20000 word types\n", - "2017-08-21 22:29:44,267 : INFO : PROGRESS: at sentence #1310000, processed 101790596 words, keeping 20000 word types\n", - "2017-08-21 22:29:44,433 : INFO : PROGRESS: at sentence #1320000, processed 102557672 words, keeping 20000 word types\n", - "2017-08-21 22:29:44,617 : INFO : PROGRESS: at sentence #1330000, processed 103333608 words, keeping 20000 word types\n", - "2017-08-21 22:29:44,789 : INFO : PROGRESS: at sentence #1340000, processed 104105061 words, keeping 20000 word types\n", - "2017-08-21 22:29:44,976 : INFO : PROGRESS: at sentence #1350000, processed 104883602 words, keeping 20000 word types\n", - "2017-08-21 22:29:45,143 : INFO : PROGRESS: at sentence #1360000, processed 105647124 words, keeping 20000 word types\n", - "2017-08-21 22:29:45,317 : INFO : PROGRESS: at sentence #1370000, processed 106432425 words, keeping 20000 word types\n", - "2017-08-21 22:29:45,504 : INFO : PROGRESS: at sentence #1380000, processed 107198008 words, keeping 20000 word types\n", - "2017-08-21 22:29:45,680 : INFO : PROGRESS: at sentence #1390000, processed 107963387 words, keeping 20000 word types\n", - "2017-08-21 22:29:45,854 : INFO : PROGRESS: at sentence #1400000, processed 108724510 words, keeping 20000 word types\n", - "2017-08-21 22:29:46,027 : INFO : PROGRESS: at sentence #1410000, processed 109510391 words, keeping 20000 word types\n", - "2017-08-21 22:29:46,195 : INFO : PROGRESS: at sentence #1420000, processed 110260277 words, keeping 20000 word types\n", - "2017-08-21 22:29:46,368 : INFO : PROGRESS: at sentence #1430000, processed 111043934 words, keeping 20000 word types\n", - "2017-08-21 22:29:46,538 : INFO : PROGRESS: at sentence #1440000, processed 111802132 words, keeping 20000 word types\n", - "2017-08-21 22:29:46,709 : INFO : PROGRESS: at sentence #1450000, processed 112554854 words, keeping 20000 word types\n", - "2017-08-21 22:29:46,872 : INFO : PROGRESS: at sentence #1460000, processed 113322164 words, keeping 20000 word types\n", - "2017-08-21 22:29:47,042 : INFO : PROGRESS: at sentence #1470000, processed 114089598 words, keeping 20000 word types\n", - "2017-08-21 22:29:47,219 : INFO : PROGRESS: at sentence #1480000, processed 114852694 words, keeping 20000 word types\n", - "2017-08-21 22:29:47,393 : INFO : PROGRESS: at sentence #1490000, processed 115624599 words, keeping 20000 word types\n", - "2017-08-21 22:29:47,570 : INFO : PROGRESS: at sentence #1500000, processed 116379068 words, keeping 20000 word types\n", - "2017-08-21 22:29:47,736 : INFO : PROGRESS: at sentence #1510000, processed 117147503 words, keeping 20000 word types\n", - "2017-08-21 22:29:47,897 : INFO : PROGRESS: at sentence #1520000, processed 117906193 words, keeping 20000 word types\n", - "2017-08-21 22:29:48,065 : INFO : PROGRESS: at sentence #1530000, processed 118686091 words, keeping 20000 word types\n", - "2017-08-21 22:29:48,238 : INFO : PROGRESS: at sentence #1540000, processed 119462523 words, keeping 20000 word types\n", - "2017-08-21 22:29:48,401 : INFO : PROGRESS: at sentence #1550000, processed 120233850 words, keeping 20000 word types\n", - "2017-08-21 22:29:48,573 : INFO : PROGRESS: at sentence #1560000, processed 120979939 words, keeping 20000 word types\n", - "2017-08-21 22:29:48,744 : INFO : PROGRESS: at sentence #1570000, processed 121742899 words, keeping 20000 word types\n", - "2017-08-21 22:29:48,914 : INFO : PROGRESS: at sentence #1580000, processed 122526781 words, keeping 20000 word types\n", - "2017-08-21 22:29:49,079 : INFO : PROGRESS: at sentence #1590000, processed 123291558 words, keeping 20000 word types\n", - "2017-08-21 22:29:49,242 : INFO : PROGRESS: at sentence #1600000, processed 124035894 words, keeping 20000 word types\n", - "2017-08-21 22:29:49,408 : INFO : PROGRESS: at sentence #1610000, processed 124797232 words, keeping 20000 word types\n", - "2017-08-21 22:29:49,574 : INFO : PROGRESS: at sentence #1620000, processed 125558236 words, keeping 20000 word types\n", - "2017-08-21 22:29:49,755 : INFO : PROGRESS: at sentence #1630000, processed 126335694 words, keeping 20000 word types\n", - "2017-08-21 22:29:49,918 : INFO : PROGRESS: at sentence #1640000, processed 127092139 words, keeping 20000 word types\n", - "2017-08-21 22:29:50,087 : INFO : PROGRESS: at sentence #1650000, processed 127857656 words, keeping 20000 word types\n", - "2017-08-21 22:29:50,259 : INFO : PROGRESS: at sentence #1660000, processed 128609323 words, keeping 20000 word types\n", - "2017-08-21 22:29:50,426 : INFO : PROGRESS: at sentence #1670000, processed 129375242 words, keeping 20000 word types\n", - "2017-08-21 22:29:50,613 : INFO : PROGRESS: at sentence #1680000, processed 130152974 words, keeping 20000 word types\n", - "2017-08-21 22:29:50,780 : INFO : PROGRESS: at sentence #1690000, processed 130909221 words, keeping 20000 word types\n", - "2017-08-21 22:29:50,954 : INFO : PROGRESS: at sentence #1700000, processed 131678950 words, keeping 20000 word types\n", - "2017-08-21 22:29:51,124 : INFO : PROGRESS: at sentence #1710000, processed 132453205 words, keeping 20000 word types\n", - "2017-08-21 22:29:51,296 : INFO : PROGRESS: at sentence #1720000, processed 133226116 words, keeping 20000 word types\n", - "2017-08-21 22:29:51,459 : INFO : PROGRESS: at sentence #1730000, processed 133992524 words, keeping 20000 word types\n", - "2017-08-21 22:29:51,635 : INFO : PROGRESS: at sentence #1740000, processed 134760798 words, keeping 20000 word types\n", - "2017-08-21 22:29:51,808 : INFO : PROGRESS: at sentence #1750000, processed 135544635 words, keeping 20000 word types\n", - "2017-08-21 22:29:51,968 : INFO : PROGRESS: at sentence #1760000, processed 136305306 words, keeping 20000 word types\n", - "2017-08-21 22:29:52,138 : INFO : PROGRESS: at sentence #1770000, processed 137086746 words, keeping 20000 word types\n", - "2017-08-21 22:29:52,308 : INFO : PROGRESS: at sentence #1780000, processed 137845398 words, keeping 20000 word types\n", - "2017-08-21 22:29:52,473 : INFO : PROGRESS: at sentence #1790000, processed 138594834 words, keeping 20000 word types\n", - "2017-08-21 22:29:52,655 : INFO : PROGRESS: at sentence #1800000, processed 139361482 words, keeping 20000 word types\n", - "2017-08-21 22:29:52,826 : INFO : PROGRESS: at sentence #1810000, processed 140138074 words, keeping 20000 word types\n", - "2017-08-21 22:29:52,984 : INFO : PROGRESS: at sentence #1820000, processed 140889199 words, keeping 20000 word types\n", - "2017-08-21 22:29:53,146 : INFO : PROGRESS: at sentence #1830000, processed 141624753 words, keeping 20000 word types\n", - "2017-08-21 22:29:53,309 : INFO : PROGRESS: at sentence #1840000, processed 142381663 words, keeping 20000 word types\n", - "2017-08-21 22:29:53,475 : INFO : PROGRESS: at sentence #1850000, processed 143148164 words, keeping 20000 word types\n", - "2017-08-21 22:29:53,651 : INFO : PROGRESS: at sentence #1860000, processed 143953533 words, keeping 20000 word types\n", - "2017-08-21 22:29:53,820 : INFO : PROGRESS: at sentence #1870000, processed 144722464 words, keeping 20000 word types\n", - "2017-08-21 22:29:53,976 : INFO : PROGRESS: at sentence #1880000, processed 145463974 words, keeping 20000 word types\n", - "2017-08-21 22:29:54,137 : INFO : PROGRESS: at sentence #1890000, processed 146215534 words, keeping 20000 word types\n", - "2017-08-21 22:29:54,303 : INFO : PROGRESS: at sentence #1900000, processed 146977350 words, keeping 20000 word types\n", - "2017-08-21 22:29:54,467 : INFO : PROGRESS: at sentence #1910000, processed 147746246 words, keeping 20000 word types\n", - "2017-08-21 22:29:54,636 : INFO : PROGRESS: at sentence #1920000, processed 148522739 words, keeping 20000 word types\n", - "2017-08-21 22:29:54,795 : INFO : PROGRESS: at sentence #1930000, processed 149276527 words, keeping 20000 word types\n", - "2017-08-21 22:29:54,961 : INFO : PROGRESS: at sentence #1940000, processed 150052237 words, keeping 20000 word types\n", - "2017-08-21 22:29:55,126 : INFO : PROGRESS: at sentence #1950000, processed 150818339 words, keeping 20000 word types\n", - "2017-08-21 22:29:55,294 : INFO : PROGRESS: at sentence #1960000, processed 151576053 words, keeping 20000 word types\n", - "2017-08-21 22:29:55,455 : INFO : PROGRESS: at sentence #1970000, processed 152339019 words, keeping 20000 word types\n", - "2017-08-21 22:29:55,631 : INFO : PROGRESS: at sentence #1980000, processed 153107759 words, keeping 20000 word types\n", - "2017-08-21 22:29:55,826 : INFO : PROGRESS: at sentence #1990000, processed 153870395 words, keeping 20000 word types\n", - "2017-08-21 22:29:56,001 : INFO : PROGRESS: at sentence #2000000, processed 154639645 words, keeping 20000 word types\n", - "2017-08-21 22:29:56,166 : INFO : PROGRESS: at sentence #2010000, processed 155385759 words, keeping 20000 word types\n", - "2017-08-21 22:29:56,333 : INFO : PROGRESS: at sentence #2020000, processed 156159003 words, keeping 20000 word types\n", - "2017-08-21 22:29:56,506 : INFO : PROGRESS: at sentence #2030000, processed 156924101 words, keeping 20000 word types\n", - "2017-08-21 22:29:56,681 : INFO : PROGRESS: at sentence #2040000, processed 157704202 words, keeping 20000 word types\n", - "2017-08-21 22:29:56,848 : INFO : PROGRESS: at sentence #2050000, processed 158459850 words, keeping 20000 word types\n", - "2017-08-21 22:29:57,007 : INFO : PROGRESS: at sentence #2060000, processed 159214233 words, keeping 20000 word types\n", - "2017-08-21 22:29:57,186 : INFO : PROGRESS: at sentence #2070000, processed 159978837 words, keeping 20000 word types\n", - "2017-08-21 22:29:57,351 : INFO : PROGRESS: at sentence #2080000, processed 160736952 words, keeping 20000 word types\n", - "2017-08-21 22:29:57,529 : INFO : PROGRESS: at sentence #2090000, processed 161516021 words, keeping 20000 word types\n", - "2017-08-21 22:29:57,705 : INFO : PROGRESS: at sentence #2100000, processed 162269156 words, keeping 20000 word types\n", - "2017-08-21 22:29:57,866 : INFO : PROGRESS: at sentence #2110000, processed 163017443 words, keeping 20000 word types\n", - "2017-08-21 22:29:58,025 : INFO : PROGRESS: at sentence #2120000, processed 163773050 words, keeping 20000 word types\n", - "2017-08-21 22:29:58,201 : INFO : PROGRESS: at sentence #2130000, processed 164537224 words, keeping 20000 word types\n", - "2017-08-21 22:29:58,367 : INFO : PROGRESS: at sentence #2140000, processed 165301390 words, keeping 20000 word types\n", - "2017-08-21 22:29:58,541 : INFO : PROGRESS: at sentence #2150000, processed 166080011 words, keeping 20000 word types\n", - "2017-08-21 22:29:58,722 : INFO : PROGRESS: at sentence #2160000, processed 166847988 words, keeping 20000 word types\n", - "2017-08-21 22:29:58,889 : INFO : PROGRESS: at sentence #2170000, processed 167613370 words, keeping 20000 word types\n", - "2017-08-21 22:29:59,049 : INFO : PROGRESS: at sentence #2180000, processed 168358396 words, keeping 20000 word types\n", - "2017-08-21 22:29:59,248 : INFO : PROGRESS: at sentence #2190000, processed 169124030 words, keeping 20000 word types\n", - "2017-08-21 22:29:59,435 : INFO : PROGRESS: at sentence #2200000, processed 169882361 words, keeping 20000 word types\n", - "2017-08-21 22:29:59,597 : INFO : PROGRESS: at sentence #2210000, processed 170624929 words, keeping 20000 word types\n", - "2017-08-21 22:29:59,772 : INFO : PROGRESS: at sentence #2220000, processed 171388506 words, keeping 20000 word types\n", - "2017-08-21 22:29:59,936 : INFO : PROGRESS: at sentence #2230000, processed 172140599 words, keeping 20000 word types\n", - "2017-08-21 22:30:00,100 : INFO : PROGRESS: at sentence #2240000, processed 172902015 words, keeping 20000 word types\n", - "2017-08-21 22:30:00,277 : INFO : PROGRESS: at sentence #2250000, processed 173677651 words, keeping 20000 word types\n", - "2017-08-21 22:30:00,444 : INFO : PROGRESS: at sentence #2260000, processed 174420171 words, keeping 20000 word types\n", - "2017-08-21 22:30:00,609 : INFO : PROGRESS: at sentence #2270000, processed 175181613 words, keeping 20000 word types\n", - "2017-08-21 22:30:00,790 : INFO : PROGRESS: at sentence #2280000, processed 175950104 words, keeping 20000 word types\n", - "2017-08-21 22:30:00,952 : INFO : PROGRESS: at sentence #2290000, processed 176713460 words, keeping 20000 word types\n", - "2017-08-21 22:30:01,120 : INFO : PROGRESS: at sentence #2300000, processed 177467649 words, keeping 20000 word types\n", - "2017-08-21 22:30:01,292 : INFO : PROGRESS: at sentence #2310000, processed 178219630 words, keeping 20000 word types\n", - "2017-08-21 22:30:01,462 : INFO : PROGRESS: at sentence #2320000, processed 178983014 words, keeping 20000 word types\n", - "2017-08-21 22:30:01,629 : INFO : PROGRESS: at sentence #2330000, processed 179753170 words, keeping 20000 word types\n", - "2017-08-21 22:30:01,796 : INFO : PROGRESS: at sentence #2340000, processed 180509791 words, keeping 20000 word types\n", - "2017-08-21 22:30:01,954 : INFO : PROGRESS: at sentence #2350000, processed 181249595 words, keeping 20000 word types\n", - "2017-08-21 22:30:02,119 : INFO : PROGRESS: at sentence #2360000, processed 182003695 words, keeping 20000 word types\n", - "2017-08-21 22:30:02,294 : INFO : PROGRESS: at sentence #2370000, processed 182760071 words, keeping 20000 word types\n", - "2017-08-21 22:30:02,459 : INFO : PROGRESS: at sentence #2380000, processed 183522791 words, keeping 20000 word types\n", - "2017-08-21 22:30:02,623 : INFO : PROGRESS: at sentence #2390000, processed 184283672 words, keeping 20000 word types\n", - "2017-08-21 22:30:02,786 : INFO : PROGRESS: at sentence #2400000, processed 185035073 words, keeping 20000 word types\n", - "2017-08-21 22:30:02,949 : INFO : PROGRESS: at sentence #2410000, processed 185800107 words, keeping 20000 word types\n", - "2017-08-21 22:30:03,117 : INFO : PROGRESS: at sentence #2420000, processed 186566079 words, keeping 20000 word types\n", - "2017-08-21 22:30:03,281 : INFO : PROGRESS: at sentence #2430000, processed 187323114 words, keeping 20000 word types\n", - "2017-08-21 22:30:03,446 : INFO : PROGRESS: at sentence #2440000, processed 188088203 words, keeping 20000 word types\n", - "2017-08-21 22:30:03,612 : INFO : PROGRESS: at sentence #2450000, processed 188858523 words, keeping 20000 word types\n", - "2017-08-21 22:30:03,779 : INFO : PROGRESS: at sentence #2460000, processed 189616433 words, keeping 20000 word types\n", - "2017-08-21 22:30:03,935 : INFO : PROGRESS: at sentence #2470000, processed 190350009 words, keeping 20000 word types\n", - "2017-08-21 22:30:04,095 : INFO : PROGRESS: at sentence #2480000, processed 191098437 words, keeping 20000 word types\n", - "2017-08-21 22:30:04,254 : INFO : PROGRESS: at sentence #2490000, processed 191850337 words, keeping 20000 word types\n", - "2017-08-21 22:30:04,415 : INFO : PROGRESS: at sentence #2500000, processed 192618694 words, keeping 20000 word types\n", - "2017-08-21 22:30:04,575 : INFO : PROGRESS: at sentence #2510000, processed 193370377 words, keeping 20000 word types\n", - "2017-08-21 22:30:04,744 : INFO : PROGRESS: at sentence #2520000, processed 194133310 words, keeping 20000 word types\n", - "2017-08-21 22:30:04,909 : INFO : PROGRESS: at sentence #2530000, processed 194897293 words, keeping 20000 word types\n", - "2017-08-21 22:30:05,067 : INFO : PROGRESS: at sentence #2540000, processed 195647692 words, keeping 20000 word types\n", - "2017-08-21 22:30:05,229 : INFO : PROGRESS: at sentence #2550000, processed 196405992 words, keeping 20000 word types\n", - "2017-08-21 22:30:05,394 : INFO : PROGRESS: at sentence #2560000, processed 197165438 words, keeping 20000 word types\n", - "2017-08-21 22:30:05,556 : INFO : PROGRESS: at sentence #2570000, processed 197917978 words, keeping 20000 word types\n", - "2017-08-21 22:30:05,722 : INFO : PROGRESS: at sentence #2580000, processed 198677774 words, keeping 20000 word types\n", - "2017-08-21 22:30:05,881 : INFO : PROGRESS: at sentence #2590000, processed 199429215 words, keeping 20000 word types\n", - "2017-08-21 22:30:06,045 : INFO : PROGRESS: at sentence #2600000, processed 200189134 words, keeping 20000 word types\n", - "2017-08-21 22:30:06,209 : INFO : PROGRESS: at sentence #2610000, processed 200953246 words, keeping 20000 word types\n", - "2017-08-21 22:30:06,375 : INFO : PROGRESS: at sentence #2620000, processed 201707171 words, keeping 20000 word types\n", - "2017-08-21 22:30:06,540 : INFO : PROGRESS: at sentence #2630000, processed 202465362 words, keeping 20000 word types\n", - "2017-08-21 22:30:06,703 : INFO : PROGRESS: at sentence #2640000, processed 203218188 words, keeping 20000 word types\n", - "2017-08-21 22:30:06,871 : INFO : PROGRESS: at sentence #2650000, processed 203979866 words, keeping 20000 word types\n", - "2017-08-21 22:30:07,034 : INFO : PROGRESS: at sentence #2660000, processed 204743066 words, keeping 20000 word types\n", - "2017-08-21 22:30:07,196 : INFO : PROGRESS: at sentence #2670000, processed 205484587 words, keeping 20000 word types\n", - "2017-08-21 22:30:07,362 : INFO : PROGRESS: at sentence #2680000, processed 206249113 words, keeping 20000 word types\n", - "2017-08-21 22:30:07,520 : INFO : PROGRESS: at sentence #2690000, processed 206984665 words, keeping 20000 word types\n", - "2017-08-21 22:30:07,681 : INFO : PROGRESS: at sentence #2700000, processed 207736834 words, keeping 20000 word types\n", - "2017-08-21 22:30:07,852 : INFO : PROGRESS: at sentence #2710000, processed 208488616 words, keeping 20000 word types\n", - "2017-08-21 22:30:08,016 : INFO : PROGRESS: at sentence #2720000, processed 209250225 words, keeping 20000 word types\n", - "2017-08-21 22:30:08,179 : INFO : PROGRESS: at sentence #2730000, processed 210016231 words, keeping 20000 word types\n", - "2017-08-21 22:30:08,357 : INFO : PROGRESS: at sentence #2740000, processed 210775524 words, keeping 20000 word types\n", - "2017-08-21 22:30:08,533 : INFO : PROGRESS: at sentence #2750000, processed 211567992 words, keeping 20000 word types\n", - "2017-08-21 22:30:08,704 : INFO : PROGRESS: at sentence #2760000, processed 212340213 words, keeping 20000 word types\n", - "2017-08-21 22:30:08,873 : INFO : PROGRESS: at sentence #2770000, processed 213103941 words, keeping 20000 word types\n", - "2017-08-21 22:30:09,033 : INFO : PROGRESS: at sentence #2780000, processed 213861979 words, keeping 20000 word types\n", - "2017-08-21 22:30:09,205 : INFO : PROGRESS: at sentence #2790000, processed 214627269 words, keeping 20000 word types\n", - "2017-08-21 22:30:09,371 : INFO : PROGRESS: at sentence #2800000, processed 215367981 words, keeping 20000 word types\n", - "2017-08-21 22:30:09,536 : INFO : PROGRESS: at sentence #2810000, processed 216120095 words, keeping 20000 word types\n", - "2017-08-21 22:30:09,699 : INFO : PROGRESS: at sentence #2820000, processed 216891421 words, keeping 20000 word types\n", - "2017-08-21 22:30:09,860 : INFO : PROGRESS: at sentence #2830000, processed 217645353 words, keeping 20000 word types\n", - "2017-08-21 22:30:10,024 : INFO : PROGRESS: at sentence #2840000, processed 218413470 words, keeping 20000 word types\n", - "2017-08-21 22:30:10,187 : INFO : PROGRESS: at sentence #2850000, processed 219167462 words, keeping 20000 word types\n", - "2017-08-21 22:30:10,352 : INFO : PROGRESS: at sentence #2860000, processed 219916163 words, keeping 20000 word types\n", - "2017-08-21 22:30:10,516 : INFO : PROGRESS: at sentence #2870000, processed 220680703 words, keeping 20000 word types\n", - "2017-08-21 22:30:10,683 : INFO : PROGRESS: at sentence #2880000, processed 221456129 words, keeping 20000 word types\n", - "2017-08-21 22:30:10,857 : INFO : PROGRESS: at sentence #2890000, processed 222218782 words, keeping 20000 word types\n", - "2017-08-21 22:30:11,023 : INFO : PROGRESS: at sentence #2900000, processed 222976534 words, keeping 20000 word types\n", - "2017-08-21 22:30:11,190 : INFO : PROGRESS: at sentence #2910000, processed 223730594 words, keeping 20000 word types\n", - "2017-08-21 22:30:11,362 : INFO : PROGRESS: at sentence #2920000, processed 224500475 words, keeping 20000 word types\n", - "2017-08-21 22:30:11,522 : INFO : PROGRESS: at sentence #2930000, processed 225252000 words, keeping 20000 word types\n", - "2017-08-21 22:30:11,687 : INFO : PROGRESS: at sentence #2940000, processed 226007150 words, keeping 20000 word types\n", - "2017-08-21 22:30:11,850 : INFO : PROGRESS: at sentence #2950000, processed 226760775 words, keeping 20000 word types\n", - "2017-08-21 22:30:12,014 : INFO : PROGRESS: at sentence #2960000, processed 227516583 words, keeping 20000 word types\n", - "2017-08-21 22:30:12,179 : INFO : PROGRESS: at sentence #2970000, processed 228282087 words, keeping 20000 word types\n", - "2017-08-21 22:30:12,344 : INFO : PROGRESS: at sentence #2980000, processed 229042334 words, keeping 20000 word types\n", - "2017-08-21 22:30:12,504 : INFO : PROGRESS: at sentence #2990000, processed 229794580 words, keeping 20000 word types\n", - "2017-08-21 22:30:12,672 : INFO : collected 20000 word types from a corpus of 230561105 raw words and 3000000 sentences\n", - "2017-08-21 22:30:12,673 : INFO : Loading a fresh vocabulary\n", - "2017-08-21 22:30:12,726 : INFO : min_count=6 retains 20000 unique words (100% of original 20000, drops 0)\n", - "2017-08-21 22:30:12,727 : INFO : min_count=6 leaves 230561105 word corpus (100% of original 230561105, drops 0)\n", - "2017-08-21 22:30:12,787 : INFO : deleting the raw counts dictionary of 20000 items\n", - "2017-08-21 22:30:12,789 : INFO : sample=0.001 downsamples 46 most-common words\n", - "2017-08-21 22:30:12,790 : INFO : downsampling leaves estimated 167501089 word corpus (72.6% of prior 230561105)\n", - "2017-08-21 22:30:12,792 : INFO : estimated required memory for 20000 words and 100 dimensions: 26000000 bytes\n", - "2017-08-21 22:30:12,845 : INFO : resetting layer weights\n", - "2017-08-21 22:30:13,185 : INFO : training model with 24 workers on 20000 vocabulary and 100 features, using sg=1 hs=0 sample=0.001 negative=5 window=5\n", - "2017-08-21 22:30:14,201 : INFO : PROGRESS: at 0.16% examples, 1323502 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:15,213 : INFO : PROGRESS: at 0.30% examples, 1288410 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:16,213 : INFO : PROGRESS: at 0.47% examples, 1342271 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:17,216 : INFO : PROGRESS: at 0.65% examples, 1383192 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:18,221 : INFO : PROGRESS: at 0.79% examples, 1337863 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:19,222 : INFO : PROGRESS: at 0.95% examples, 1348463 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:20,248 : INFO : PROGRESS: at 1.13% examples, 1368362 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:30:21,252 : INFO : PROGRESS: at 1.28% examples, 1354499 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:30:22,254 : INFO : PROGRESS: at 1.44% examples, 1361771 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:30:23,264 : INFO : PROGRESS: at 1.60% examples, 1354958 words/s, in_qsize 43, out_qsize 4\n", - "2017-08-21 22:30:24,266 : INFO : PROGRESS: at 1.77% examples, 1363572 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:25,267 : INFO : PROGRESS: at 1.94% examples, 1373182 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:26,286 : INFO : PROGRESS: at 2.10% examples, 1370521 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:27,287 : INFO : PROGRESS: at 2.27% examples, 1371195 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:28,293 : INFO : PROGRESS: at 2.43% examples, 1374892 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:30:29,294 : INFO : PROGRESS: at 2.61% examples, 1379138 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:30,308 : INFO : PROGRESS: at 2.78% examples, 1381390 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:30:31,322 : INFO : PROGRESS: at 2.93% examples, 1378215 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:32,332 : INFO : PROGRESS: at 3.09% examples, 1374308 words/s, in_qsize 41, out_qsize 6\n", - "2017-08-21 22:30:33,341 : INFO : PROGRESS: at 3.26% examples, 1379357 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:30:34,351 : INFO : PROGRESS: at 3.43% examples, 1378337 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:35,355 : INFO : PROGRESS: at 3.58% examples, 1376264 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:36,356 : INFO : PROGRESS: at 3.76% examples, 1380294 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:30:37,372 : INFO : PROGRESS: at 3.92% examples, 1380215 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:30:38,372 : INFO : PROGRESS: at 4.10% examples, 1383472 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:30:39,383 : INFO : PROGRESS: at 4.26% examples, 1382434 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:40,400 : INFO : PROGRESS: at 4.42% examples, 1381804 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:41,400 : INFO : PROGRESS: at 4.60% examples, 1386063 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:42,410 : INFO : PROGRESS: at 4.77% examples, 1387453 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:30:43,425 : INFO : PROGRESS: at 4.94% examples, 1386364 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:30:44,429 : INFO : PROGRESS: at 5.10% examples, 1386037 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:45,441 : INFO : PROGRESS: at 5.26% examples, 1384224 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:46,461 : INFO : PROGRESS: at 5.42% examples, 1383532 words/s, in_qsize 48, out_qsize 1\n", - "2017-08-21 22:30:47,462 : INFO : PROGRESS: at 5.59% examples, 1384655 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:48,468 : INFO : PROGRESS: at 5.76% examples, 1384694 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:30:49,474 : INFO : PROGRESS: at 5.92% examples, 1385121 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:50,489 : INFO : PROGRESS: at 6.10% examples, 1386605 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:51,490 : INFO : PROGRESS: at 6.26% examples, 1386313 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:52,496 : INFO : PROGRESS: at 6.43% examples, 1387426 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:53,501 : INFO : PROGRESS: at 6.59% examples, 1385553 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:30:54,511 : INFO : PROGRESS: at 6.74% examples, 1382184 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:30:55,512 : INFO : PROGRESS: at 6.91% examples, 1384238 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:30:56,515 : INFO : PROGRESS: at 7.07% examples, 1384053 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:57,527 : INFO : PROGRESS: at 7.23% examples, 1383992 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:30:58,534 : INFO : PROGRESS: at 7.40% examples, 1384693 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:30:59,536 : INFO : PROGRESS: at 7.55% examples, 1382477 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:31:00,570 : INFO : PROGRESS: at 7.73% examples, 1382706 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:01,577 : INFO : PROGRESS: at 7.89% examples, 1380817 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:02,585 : INFO : PROGRESS: at 8.06% examples, 1381614 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:31:03,586 : INFO : PROGRESS: at 8.23% examples, 1382975 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:04,593 : INFO : PROGRESS: at 8.38% examples, 1380241 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:05,600 : INFO : PROGRESS: at 8.54% examples, 1379398 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:31:06,603 : INFO : PROGRESS: at 8.70% examples, 1379759 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:31:07,609 : INFO : PROGRESS: at 8.87% examples, 1380539 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:08,610 : INFO : PROGRESS: at 9.05% examples, 1382339 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:31:09,611 : INFO : PROGRESS: at 9.19% examples, 1379470 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:10,621 : INFO : PROGRESS: at 9.37% examples, 1380248 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:11,624 : INFO : PROGRESS: at 9.54% examples, 1381225 words/s, in_qsize 47, out_qsize 1\n", - "2017-08-21 22:31:12,632 : INFO : PROGRESS: at 9.72% examples, 1383200 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:13,638 : INFO : PROGRESS: at 9.88% examples, 1382089 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:14,645 : INFO : PROGRESS: at 10.04% examples, 1381439 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:15,655 : INFO : PROGRESS: at 10.20% examples, 1380509 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:16,665 : INFO : PROGRESS: at 10.37% examples, 1380755 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:31:17,680 : INFO : PROGRESS: at 10.54% examples, 1381014 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:18,685 : INFO : PROGRESS: at 10.71% examples, 1382016 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:19,686 : INFO : PROGRESS: at 10.89% examples, 1382940 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:20,698 : INFO : PROGRESS: at 11.06% examples, 1383263 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:21,699 : INFO : PROGRESS: at 11.22% examples, 1383310 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:22,700 : INFO : PROGRESS: at 11.40% examples, 1384377 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:31:23,703 : INFO : PROGRESS: at 11.56% examples, 1384258 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:31:24,703 : INFO : PROGRESS: at 11.72% examples, 1383623 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:31:25,706 : INFO : PROGRESS: at 11.90% examples, 1384414 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:31:26,712 : INFO : PROGRESS: at 12.08% examples, 1386037 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:27,712 : INFO : PROGRESS: at 12.26% examples, 1387615 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:28,720 : INFO : PROGRESS: at 12.43% examples, 1388055 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:31:29,722 : INFO : PROGRESS: at 12.59% examples, 1387358 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:30,728 : INFO : PROGRESS: at 12.76% examples, 1387326 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:31,730 : INFO : PROGRESS: at 12.94% examples, 1388755 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:32,748 : INFO : PROGRESS: at 13.11% examples, 1388792 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:33,751 : INFO : PROGRESS: at 13.29% examples, 1390115 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:34,768 : INFO : PROGRESS: at 13.45% examples, 1388988 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:35,769 : INFO : PROGRESS: at 13.61% examples, 1388359 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:31:36,781 : INFO : PROGRESS: at 13.78% examples, 1388586 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:37,789 : INFO : PROGRESS: at 13.96% examples, 1389284 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:38,791 : INFO : PROGRESS: at 14.11% examples, 1388039 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:39,791 : INFO : PROGRESS: at 14.27% examples, 1387342 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:31:40,807 : INFO : PROGRESS: at 14.44% examples, 1387155 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:41,807 : INFO : PROGRESS: at 14.61% examples, 1387163 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:42,809 : INFO : PROGRESS: at 14.77% examples, 1386975 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:43,816 : INFO : PROGRESS: at 14.94% examples, 1387108 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:44,819 : INFO : PROGRESS: at 15.12% examples, 1387561 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:31:45,822 : INFO : PROGRESS: at 15.30% examples, 1388939 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:31:46,822 : INFO : PROGRESS: at 15.48% examples, 1390005 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:47,823 : INFO : PROGRESS: at 15.66% examples, 1390651 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:48,834 : INFO : PROGRESS: at 15.81% examples, 1389385 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:31:49,841 : INFO : PROGRESS: at 15.99% examples, 1389927 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:50,850 : INFO : PROGRESS: at 16.16% examples, 1390365 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:51,864 : INFO : PROGRESS: at 16.34% examples, 1391308 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:31:52,877 : INFO : PROGRESS: at 16.51% examples, 1391166 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:53,887 : INFO : PROGRESS: at 16.69% examples, 1391475 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:54,897 : INFO : PROGRESS: at 16.86% examples, 1391997 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:31:55,901 : INFO : PROGRESS: at 17.03% examples, 1391411 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:56,906 : INFO : PROGRESS: at 17.18% examples, 1390002 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:57,908 : INFO : PROGRESS: at 17.36% examples, 1391125 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:58,928 : INFO : PROGRESS: at 17.53% examples, 1390940 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:31:59,932 : INFO : PROGRESS: at 17.71% examples, 1391960 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:32:00,934 : INFO : PROGRESS: at 17.88% examples, 1391954 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:32:01,951 : INFO : PROGRESS: at 18.06% examples, 1392318 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:32:02,955 : INFO : PROGRESS: at 18.24% examples, 1392879 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:32:03,957 : INFO : PROGRESS: at 18.41% examples, 1393184 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:32:04,957 : INFO : PROGRESS: at 18.57% examples, 1392868 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:05,958 : INFO : PROGRESS: at 18.75% examples, 1393388 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:06,961 : INFO : PROGRESS: at 18.92% examples, 1393574 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:07,963 : INFO : PROGRESS: at 19.10% examples, 1394192 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:32:08,969 : INFO : PROGRESS: at 19.26% examples, 1394068 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:32:09,971 : INFO : PROGRESS: at 19.43% examples, 1394043 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:10,979 : INFO : PROGRESS: at 19.60% examples, 1394347 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:11,980 : INFO : PROGRESS: at 19.78% examples, 1394510 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:12,984 : INFO : PROGRESS: at 19.95% examples, 1395011 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:32:13,995 : INFO : PROGRESS: at 20.08% examples, 1392362 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:15,011 : INFO : PROGRESS: at 20.21% examples, 1389621 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:16,013 : INFO : PROGRESS: at 20.34% examples, 1387487 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:17,023 : INFO : PROGRESS: at 20.45% examples, 1384002 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:32:18,036 : INFO : PROGRESS: at 20.58% examples, 1380988 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:32:19,036 : INFO : PROGRESS: at 20.70% examples, 1378161 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:20,091 : INFO : PROGRESS: at 20.82% examples, 1375155 words/s, in_qsize 43, out_qsize 6\n", - "2017-08-21 22:32:21,102 : INFO : PROGRESS: at 20.95% examples, 1373124 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:32:22,127 : INFO : PROGRESS: at 21.08% examples, 1370785 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:23,140 : INFO : PROGRESS: at 21.22% examples, 1369200 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:24,168 : INFO : PROGRESS: at 21.33% examples, 1365967 words/s, in_qsize 48, out_qsize 8\n", - "2017-08-21 22:32:25,171 : INFO : PROGRESS: at 21.46% examples, 1363889 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:32:26,172 : INFO : PROGRESS: at 21.59% examples, 1362042 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:32:27,197 : INFO : PROGRESS: at 21.72% examples, 1359516 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:28,246 : INFO : PROGRESS: at 21.85% examples, 1356997 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:32:29,262 : INFO : PROGRESS: at 21.98% examples, 1354984 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:32:30,293 : INFO : PROGRESS: at 22.10% examples, 1352437 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:31,307 : INFO : PROGRESS: at 22.22% examples, 1350144 words/s, in_qsize 40, out_qsize 9\n", - "2017-08-21 22:32:32,316 : INFO : PROGRESS: at 22.35% examples, 1348215 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:32:33,320 : INFO : PROGRESS: at 22.48% examples, 1346351 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:32:34,346 : INFO : PROGRESS: at 22.61% examples, 1344300 words/s, in_qsize 40, out_qsize 7\n", - "2017-08-21 22:32:35,376 : INFO : PROGRESS: at 22.74% examples, 1342588 words/s, in_qsize 43, out_qsize 4\n", - "2017-08-21 22:32:36,383 : INFO : PROGRESS: at 22.88% examples, 1341012 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:32:37,392 : INFO : PROGRESS: at 23.00% examples, 1338890 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:38,402 : INFO : PROGRESS: at 23.13% examples, 1336943 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:32:39,402 : INFO : PROGRESS: at 23.26% examples, 1335358 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:40,411 : INFO : PROGRESS: at 23.39% examples, 1333569 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:41,416 : INFO : PROGRESS: at 23.52% examples, 1331779 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:32:42,417 : INFO : PROGRESS: at 23.65% examples, 1330414 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:32:43,424 : INFO : PROGRESS: at 23.78% examples, 1329162 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:44,428 : INFO : PROGRESS: at 23.91% examples, 1327657 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:45,457 : INFO : PROGRESS: at 24.04% examples, 1325857 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:46,478 : INFO : PROGRESS: at 24.15% examples, 1323167 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:32:47,522 : INFO : PROGRESS: at 24.29% examples, 1322032 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:48,524 : INFO : PROGRESS: at 24.42% examples, 1320348 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:32:49,525 : INFO : PROGRESS: at 24.54% examples, 1318446 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:50,541 : INFO : PROGRESS: at 24.66% examples, 1316420 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:51,541 : INFO : PROGRESS: at 24.80% examples, 1315141 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:32:52,546 : INFO : PROGRESS: at 24.93% examples, 1314077 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:32:53,560 : INFO : PROGRESS: at 25.05% examples, 1311984 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:54,562 : INFO : PROGRESS: at 25.18% examples, 1310610 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:55,565 : INFO : PROGRESS: at 25.31% examples, 1309280 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:32:56,572 : INFO : PROGRESS: at 25.45% examples, 1308212 words/s, in_qsize 39, out_qsize 8\n", - "2017-08-21 22:32:57,601 : INFO : PROGRESS: at 25.57% examples, 1306432 words/s, in_qsize 38, out_qsize 9\n", - "2017-08-21 22:32:58,639 : INFO : PROGRESS: at 25.70% examples, 1304781 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:32:59,651 : INFO : PROGRESS: at 25.83% examples, 1303453 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:33:00,700 : INFO : PROGRESS: at 25.95% examples, 1301175 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:01,701 : INFO : PROGRESS: at 26.09% examples, 1300384 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:33:02,705 : INFO : PROGRESS: at 26.22% examples, 1299094 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:33:03,713 : INFO : PROGRESS: at 26.34% examples, 1297564 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:04,720 : INFO : PROGRESS: at 26.47% examples, 1296431 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:05,727 : INFO : PROGRESS: at 26.60% examples, 1295136 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:06,746 : INFO : PROGRESS: at 26.72% examples, 1293616 words/s, in_qsize 47, out_qsize 1\n", - "2017-08-21 22:33:07,748 : INFO : PROGRESS: at 26.84% examples, 1291961 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:08,774 : INFO : PROGRESS: at 26.97% examples, 1290749 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:33:09,776 : INFO : PROGRESS: at 27.09% examples, 1289170 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:10,777 : INFO : PROGRESS: at 27.23% examples, 1288677 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:33:11,778 : INFO : PROGRESS: at 27.36% examples, 1287609 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:33:12,833 : INFO : PROGRESS: at 27.48% examples, 1285729 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:13,844 : INFO : PROGRESS: at 27.62% examples, 1284974 words/s, in_qsize 40, out_qsize 7\n", - "2017-08-21 22:33:14,847 : INFO : PROGRESS: at 27.76% examples, 1284016 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:33:15,864 : INFO : PROGRESS: at 27.90% examples, 1283075 words/s, in_qsize 43, out_qsize 4\n", - "2017-08-21 22:33:16,870 : INFO : PROGRESS: at 28.02% examples, 1281831 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:17,873 : INFO : PROGRESS: at 28.15% examples, 1280538 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:33:18,877 : INFO : PROGRESS: at 28.27% examples, 1279377 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:19,884 : INFO : PROGRESS: at 28.40% examples, 1278208 words/s, in_qsize 47, out_qsize 1\n", - "2017-08-21 22:33:20,890 : INFO : PROGRESS: at 28.53% examples, 1277289 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:33:21,896 : INFO : PROGRESS: at 28.66% examples, 1276228 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:33:22,922 : INFO : PROGRESS: at 28.79% examples, 1275154 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:23,931 : INFO : PROGRESS: at 28.92% examples, 1274092 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:24,946 : INFO : PROGRESS: at 29.04% examples, 1272772 words/s, in_qsize 48, out_qsize 1\n", - "2017-08-21 22:33:25,968 : INFO : PROGRESS: at 29.17% examples, 1271688 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:26,970 : INFO : PROGRESS: at 29.31% examples, 1270924 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:33:27,996 : INFO : PROGRESS: at 29.45% examples, 1270215 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:33:29,006 : INFO : PROGRESS: at 29.57% examples, 1269135 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:30,008 : INFO : PROGRESS: at 29.71% examples, 1268440 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:33:31,013 : INFO : PROGRESS: at 29.84% examples, 1267224 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:32,036 : INFO : PROGRESS: at 29.96% examples, 1266014 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:33,038 : INFO : PROGRESS: at 30.08% examples, 1264658 words/s, in_qsize 40, out_qsize 7\n", - "2017-08-21 22:33:34,066 : INFO : PROGRESS: at 30.22% examples, 1264050 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:35,067 : INFO : PROGRESS: at 30.35% examples, 1263083 words/s, in_qsize 44, out_qsize 0\n", - "2017-08-21 22:33:36,078 : INFO : PROGRESS: at 30.47% examples, 1261664 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:33:37,092 : INFO : PROGRESS: at 30.61% examples, 1261175 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:33:38,098 : INFO : PROGRESS: at 30.75% examples, 1260557 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:33:39,109 : INFO : PROGRESS: at 30.88% examples, 1259805 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:40,113 : INFO : PROGRESS: at 31.00% examples, 1258608 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:41,122 : INFO : PROGRESS: at 31.13% examples, 1257509 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:42,128 : INFO : PROGRESS: at 31.26% examples, 1256678 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:33:43,136 : INFO : PROGRESS: at 31.39% examples, 1255701 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:33:44,143 : INFO : PROGRESS: at 31.52% examples, 1255155 words/s, in_qsize 48, out_qsize 1\n", - "2017-08-21 22:33:45,149 : INFO : PROGRESS: at 31.65% examples, 1254367 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:46,161 : INFO : PROGRESS: at 31.79% examples, 1253814 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:33:47,178 : INFO : PROGRESS: at 31.92% examples, 1252759 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:48,183 : INFO : PROGRESS: at 32.06% examples, 1252284 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:49,185 : INFO : PROGRESS: at 32.17% examples, 1250834 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:50,188 : INFO : PROGRESS: at 32.30% examples, 1249814 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:51,237 : INFO : PROGRESS: at 32.42% examples, 1248742 words/s, in_qsize 45, out_qsize 10\n", - "2017-08-21 22:33:52,256 : INFO : PROGRESS: at 32.56% examples, 1247943 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:53,258 : INFO : PROGRESS: at 32.68% examples, 1246722 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:33:54,273 : INFO : PROGRESS: at 32.79% examples, 1245478 words/s, in_qsize 43, out_qsize 4\n", - "2017-08-21 22:33:55,275 : INFO : PROGRESS: at 32.92% examples, 1244544 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:33:56,289 : INFO : PROGRESS: at 33.05% examples, 1243643 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:33:57,303 : INFO : PROGRESS: at 33.18% examples, 1243149 words/s, in_qsize 47, out_qsize 1\n", - "2017-08-21 22:33:58,305 : INFO : PROGRESS: at 33.31% examples, 1242320 words/s, in_qsize 43, out_qsize 4\n", - "2017-08-21 22:33:59,324 : INFO : PROGRESS: at 33.45% examples, 1241637 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:34:00,339 : INFO : PROGRESS: at 33.58% examples, 1241025 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:34:01,346 : INFO : PROGRESS: at 33.71% examples, 1240181 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:34:02,357 : INFO : PROGRESS: at 33.84% examples, 1239723 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:03,394 : INFO : PROGRESS: at 33.97% examples, 1238782 words/s, in_qsize 38, out_qsize 9\n", - "2017-08-21 22:34:04,397 : INFO : PROGRESS: at 34.12% examples, 1238563 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:34:05,405 : INFO : PROGRESS: at 34.25% examples, 1238104 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:34:06,414 : INFO : PROGRESS: at 34.38% examples, 1237336 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:07,416 : INFO : PROGRESS: at 34.52% examples, 1237056 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:08,427 : INFO : PROGRESS: at 34.67% examples, 1236873 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:09,438 : INFO : PROGRESS: at 34.81% examples, 1236381 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:10,439 : INFO : PROGRESS: at 34.95% examples, 1236013 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:34:11,441 : INFO : PROGRESS: at 35.08% examples, 1235370 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:12,470 : INFO : PROGRESS: at 35.20% examples, 1234204 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:13,498 : INFO : PROGRESS: at 35.33% examples, 1233532 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:34:14,510 : INFO : PROGRESS: at 35.46% examples, 1232914 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:15,517 : INFO : PROGRESS: at 35.59% examples, 1232282 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:16,516 : INFO : PROGRESS: at 35.73% examples, 1231891 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:17,551 : INFO : PROGRESS: at 35.87% examples, 1231198 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:34:18,551 : INFO : PROGRESS: at 36.00% examples, 1230657 words/s, in_qsize 44, out_qsize 4\n", - "2017-08-21 22:34:19,574 : INFO : PROGRESS: at 36.14% examples, 1230151 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:20,578 : INFO : PROGRESS: at 36.27% examples, 1229597 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:34:21,584 : INFO : PROGRESS: at 36.40% examples, 1228843 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:34:22,591 : INFO : PROGRESS: at 36.52% examples, 1227852 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:23,624 : INFO : PROGRESS: at 36.66% examples, 1227577 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:34:24,645 : INFO : PROGRESS: at 36.80% examples, 1227127 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:25,652 : INFO : PROGRESS: at 36.95% examples, 1226981 words/s, in_qsize 41, out_qsize 6\n", - "2017-08-21 22:34:26,664 : INFO : PROGRESS: at 37.08% examples, 1226445 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:27,684 : INFO : PROGRESS: at 37.20% examples, 1225310 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:28,691 : INFO : PROGRESS: at 37.33% examples, 1224725 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:34:29,705 : INFO : PROGRESS: at 37.46% examples, 1223994 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:30,729 : INFO : PROGRESS: at 37.59% examples, 1223394 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:31,759 : INFO : PROGRESS: at 37.72% examples, 1222656 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:34:32,780 : INFO : PROGRESS: at 37.86% examples, 1222160 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:33,837 : INFO : PROGRESS: at 37.99% examples, 1221396 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:34:34,838 : INFO : PROGRESS: at 38.12% examples, 1220708 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:35,856 : INFO : PROGRESS: at 38.24% examples, 1220019 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:34:36,886 : INFO : PROGRESS: at 38.37% examples, 1219362 words/s, in_qsize 38, out_qsize 9\n", - "2017-08-21 22:34:37,890 : INFO : PROGRESS: at 38.50% examples, 1218638 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:38,900 : INFO : PROGRESS: at 38.62% examples, 1217761 words/s, in_qsize 43, out_qsize 4\n", - "2017-08-21 22:34:39,916 : INFO : PROGRESS: at 38.74% examples, 1216835 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:34:40,922 : INFO : PROGRESS: at 38.87% examples, 1216373 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:41,936 : INFO : PROGRESS: at 39.00% examples, 1215686 words/s, in_qsize 48, out_qsize 2\n", - "2017-08-21 22:34:42,940 : INFO : PROGRESS: at 39.13% examples, 1215065 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:43,948 : INFO : PROGRESS: at 39.26% examples, 1214707 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:44,953 : INFO : PROGRESS: at 39.37% examples, 1213669 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:45,959 : INFO : PROGRESS: at 39.49% examples, 1212589 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:34:46,969 : INFO : PROGRESS: at 39.61% examples, 1211974 words/s, in_qsize 47, out_qsize 1\n", - "2017-08-21 22:34:47,971 : INFO : PROGRESS: at 39.75% examples, 1211527 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:48,977 : INFO : PROGRESS: at 39.88% examples, 1211060 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:49,977 : INFO : PROGRESS: at 40.02% examples, 1210810 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:50,979 : INFO : PROGRESS: at 40.15% examples, 1210576 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:51,992 : INFO : PROGRESS: at 40.30% examples, 1210695 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:34:53,002 : INFO : PROGRESS: at 40.45% examples, 1210974 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:34:54,014 : INFO : PROGRESS: at 40.61% examples, 1211394 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:55,018 : INFO : PROGRESS: at 40.74% examples, 1211232 words/s, in_qsize 48, out_qsize 1\n", - "2017-08-21 22:34:56,030 : INFO : PROGRESS: at 40.88% examples, 1211057 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:57,031 : INFO : PROGRESS: at 41.01% examples, 1210786 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:34:58,036 : INFO : PROGRESS: at 41.15% examples, 1210620 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:34:59,042 : INFO : PROGRESS: at 41.28% examples, 1210375 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:35:00,046 : INFO : PROGRESS: at 41.43% examples, 1210369 words/s, in_qsize 48, out_qsize 1\n", - "2017-08-21 22:35:01,049 : INFO : PROGRESS: at 41.58% examples, 1210619 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:35:02,063 : INFO : PROGRESS: at 41.71% examples, 1210227 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:35:03,073 : INFO : PROGRESS: at 41.84% examples, 1209877 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:04,076 : INFO : PROGRESS: at 41.98% examples, 1209806 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:05,079 : INFO : PROGRESS: at 42.11% examples, 1209458 words/s, in_qsize 45, out_qsize 1\n", - "2017-08-21 22:35:06,092 : INFO : PROGRESS: at 42.23% examples, 1208850 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:35:07,097 : INFO : PROGRESS: at 42.38% examples, 1208932 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:08,107 : INFO : PROGRESS: at 42.52% examples, 1208794 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:09,117 : INFO : PROGRESS: at 42.66% examples, 1208600 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:10,128 : INFO : PROGRESS: at 42.80% examples, 1208472 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:11,145 : INFO : PROGRESS: at 42.94% examples, 1208419 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:12,170 : INFO : PROGRESS: at 43.08% examples, 1208245 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:35:13,178 : INFO : PROGRESS: at 43.22% examples, 1208181 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:14,183 : INFO : PROGRESS: at 43.37% examples, 1208219 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:15,202 : INFO : PROGRESS: at 43.51% examples, 1208134 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:35:16,209 : INFO : PROGRESS: at 43.65% examples, 1207917 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:35:17,219 : INFO : PROGRESS: at 43.78% examples, 1207837 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:18,225 : INFO : PROGRESS: at 43.92% examples, 1207668 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:19,232 : INFO : PROGRESS: at 44.07% examples, 1207689 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:20,250 : INFO : PROGRESS: at 44.20% examples, 1207245 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:21,288 : INFO : PROGRESS: at 44.33% examples, 1206883 words/s, in_qsize 48, out_qsize 3\n", - "2017-08-21 22:35:22,289 : INFO : PROGRESS: at 44.47% examples, 1206691 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:35:23,311 : INFO : PROGRESS: at 44.62% examples, 1206839 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:35:24,321 : INFO : PROGRESS: at 44.76% examples, 1206690 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:35:25,350 : INFO : PROGRESS: at 44.90% examples, 1206561 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:35:26,369 : INFO : PROGRESS: at 45.03% examples, 1206095 words/s, in_qsize 42, out_qsize 5\n", - "2017-08-21 22:35:27,371 : INFO : PROGRESS: at 45.17% examples, 1205867 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:35:28,397 : INFO : PROGRESS: at 45.30% examples, 1205611 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:35:29,402 : INFO : PROGRESS: at 45.44% examples, 1205393 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:35:30,409 : INFO : PROGRESS: at 45.57% examples, 1205049 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:35:31,417 : INFO : PROGRESS: at 45.71% examples, 1204955 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:32,421 : INFO : PROGRESS: at 45.86% examples, 1205154 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:35:33,431 : INFO : PROGRESS: at 46.00% examples, 1205150 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:35:34,435 : INFO : PROGRESS: at 46.14% examples, 1205117 words/s, in_qsize 47, out_qsize 1\n", - "2017-08-21 22:35:35,435 : INFO : PROGRESS: at 46.27% examples, 1204570 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:36,446 : INFO : PROGRESS: at 46.40% examples, 1204381 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:35:37,448 : INFO : PROGRESS: at 46.53% examples, 1204022 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:35:38,449 : INFO : PROGRESS: at 46.68% examples, 1204006 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:39,457 : INFO : PROGRESS: at 46.81% examples, 1203703 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:40,461 : INFO : PROGRESS: at 46.94% examples, 1203394 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:41,465 : INFO : PROGRESS: at 47.08% examples, 1203507 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:42,472 : INFO : PROGRESS: at 47.23% examples, 1203612 words/s, in_qsize 47, out_qsize 1\n", - "2017-08-21 22:35:43,482 : INFO : PROGRESS: at 47.35% examples, 1203127 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:44,488 : INFO : PROGRESS: at 47.49% examples, 1203009 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:45,506 : INFO : PROGRESS: at 47.64% examples, 1202851 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:46,518 : INFO : PROGRESS: at 47.78% examples, 1202697 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:35:47,521 : INFO : PROGRESS: at 47.92% examples, 1202628 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:48,537 : INFO : PROGRESS: at 48.06% examples, 1202542 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:35:49,567 : INFO : PROGRESS: at 48.20% examples, 1202396 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:50,574 : INFO : PROGRESS: at 48.35% examples, 1202519 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:51,575 : INFO : PROGRESS: at 48.49% examples, 1202446 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:35:52,576 : INFO : PROGRESS: at 48.63% examples, 1202420 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:53,592 : INFO : PROGRESS: at 48.78% examples, 1202523 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:35:54,596 : INFO : PROGRESS: at 48.91% examples, 1202350 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:35:55,599 : INFO : PROGRESS: at 49.06% examples, 1202457 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:35:56,602 : INFO : PROGRESS: at 49.19% examples, 1202103 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:35:57,603 : INFO : PROGRESS: at 49.33% examples, 1202085 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:35:58,613 : INFO : PROGRESS: at 49.47% examples, 1201887 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:35:59,617 : INFO : PROGRESS: at 49.62% examples, 1202057 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:00,631 : INFO : PROGRESS: at 49.77% examples, 1202173 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:01,633 : INFO : PROGRESS: at 49.92% examples, 1202245 words/s, in_qsize 42, out_qsize 5\n", - "2017-08-21 22:36:02,643 : INFO : PROGRESS: at 50.06% examples, 1202141 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:36:03,658 : INFO : PROGRESS: at 50.19% examples, 1201793 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:04,660 : INFO : PROGRESS: at 50.33% examples, 1201741 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:05,667 : INFO : PROGRESS: at 50.48% examples, 1201815 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:36:06,670 : INFO : PROGRESS: at 50.63% examples, 1201783 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:36:07,681 : INFO : PROGRESS: at 50.77% examples, 1201638 words/s, in_qsize 42, out_qsize 5\n", - "2017-08-21 22:36:08,683 : INFO : PROGRESS: at 50.90% examples, 1201359 words/s, in_qsize 47, out_qsize 3\n", - "2017-08-21 22:36:09,688 : INFO : PROGRESS: at 51.04% examples, 1201167 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:36:10,704 : INFO : PROGRESS: at 51.17% examples, 1200929 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:36:11,711 : INFO : PROGRESS: at 51.33% examples, 1201105 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:12,728 : INFO : PROGRESS: at 51.48% examples, 1201247 words/s, in_qsize 48, out_qsize 2\n", - "2017-08-21 22:36:13,733 : INFO : PROGRESS: at 51.61% examples, 1201079 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:14,749 : INFO : PROGRESS: at 51.75% examples, 1200869 words/s, in_qsize 43, out_qsize 4\n", - "2017-08-21 22:36:15,761 : INFO : PROGRESS: at 51.90% examples, 1200871 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:16,773 : INFO : PROGRESS: at 52.02% examples, 1200451 words/s, in_qsize 42, out_qsize 5\n", - "2017-08-21 22:36:17,781 : INFO : PROGRESS: at 52.16% examples, 1200329 words/s, in_qsize 47, out_qsize 3\n", - "2017-08-21 22:36:18,784 : INFO : PROGRESS: at 52.31% examples, 1200394 words/s, in_qsize 48, out_qsize 1\n", - "2017-08-21 22:36:19,788 : INFO : PROGRESS: at 52.45% examples, 1200321 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:36:20,789 : INFO : PROGRESS: at 52.60% examples, 1200358 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:21,810 : INFO : PROGRESS: at 52.75% examples, 1200460 words/s, in_qsize 38, out_qsize 9\n", - "2017-08-21 22:36:22,813 : INFO : PROGRESS: at 52.90% examples, 1200506 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:36:23,814 : INFO : PROGRESS: at 53.04% examples, 1200421 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:24,821 : INFO : PROGRESS: at 53.18% examples, 1200241 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:25,832 : INFO : PROGRESS: at 53.34% examples, 1200565 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:26,837 : INFO : PROGRESS: at 53.47% examples, 1200390 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:27,845 : INFO : PROGRESS: at 53.61% examples, 1200270 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:28,859 : INFO : PROGRESS: at 53.76% examples, 1200341 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:36:29,859 : INFO : PROGRESS: at 53.89% examples, 1200069 words/s, in_qsize 46, out_qsize 2\n", - "2017-08-21 22:36:30,872 : INFO : PROGRESS: at 54.05% examples, 1200252 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:36:31,875 : INFO : PROGRESS: at 54.19% examples, 1200111 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:32,878 : INFO : PROGRESS: at 54.33% examples, 1199940 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:33,936 : INFO : PROGRESS: at 54.47% examples, 1199714 words/s, in_qsize 48, out_qsize 5\n", - "2017-08-21 22:36:34,945 : INFO : PROGRESS: at 54.60% examples, 1199482 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:35,962 : INFO : PROGRESS: at 54.76% examples, 1199562 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:36:36,964 : INFO : PROGRESS: at 54.90% examples, 1199553 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:36:37,973 : INFO : PROGRESS: at 55.05% examples, 1199550 words/s, in_qsize 43, out_qsize 4\n", - "2017-08-21 22:36:38,984 : INFO : PROGRESS: at 55.20% examples, 1199684 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:36:39,985 : INFO : PROGRESS: at 55.34% examples, 1199611 words/s, in_qsize 48, out_qsize 2\n", - "2017-08-21 22:36:40,987 : INFO : PROGRESS: at 55.48% examples, 1199437 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:36:42,001 : INFO : PROGRESS: at 55.63% examples, 1199560 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:36:43,012 : INFO : PROGRESS: at 55.78% examples, 1199615 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:44,015 : INFO : PROGRESS: at 55.91% examples, 1199344 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:36:45,027 : INFO : PROGRESS: at 56.05% examples, 1199236 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:46,045 : INFO : PROGRESS: at 56.21% examples, 1199366 words/s, in_qsize 48, out_qsize 4\n", - "2017-08-21 22:36:47,047 : INFO : PROGRESS: at 56.36% examples, 1199513 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:48,059 : INFO : PROGRESS: at 56.51% examples, 1199497 words/s, in_qsize 48, out_qsize 1\n", - "2017-08-21 22:36:49,063 : INFO : PROGRESS: at 56.65% examples, 1199500 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:50,075 : INFO : PROGRESS: at 56.78% examples, 1199119 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:51,090 : INFO : PROGRESS: at 56.93% examples, 1199037 words/s, in_qsize 43, out_qsize 4\n", - "2017-08-21 22:36:52,098 : INFO : PROGRESS: at 57.07% examples, 1198885 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:36:53,104 : INFO : PROGRESS: at 57.21% examples, 1198781 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:54,108 : INFO : PROGRESS: at 57.35% examples, 1198772 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:36:55,139 : INFO : PROGRESS: at 57.49% examples, 1198462 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:56,143 : INFO : PROGRESS: at 57.64% examples, 1198621 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:36:57,143 : INFO : PROGRESS: at 57.78% examples, 1198551 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:58,145 : INFO : PROGRESS: at 57.93% examples, 1198494 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:36:59,146 : INFO : PROGRESS: at 58.07% examples, 1198429 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:00,154 : INFO : PROGRESS: at 58.22% examples, 1198423 words/s, in_qsize 39, out_qsize 8\n", - "2017-08-21 22:37:01,161 : INFO : PROGRESS: at 58.35% examples, 1198335 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:37:02,185 : INFO : PROGRESS: at 58.48% examples, 1197966 words/s, in_qsize 48, out_qsize 7\n", - "2017-08-21 22:37:03,188 : INFO : PROGRESS: at 58.63% examples, 1198049 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:37:04,194 : INFO : PROGRESS: at 58.78% examples, 1198161 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:05,210 : INFO : PROGRESS: at 58.91% examples, 1197738 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:37:06,220 : INFO : PROGRESS: at 59.05% examples, 1197659 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:07,238 : INFO : PROGRESS: at 59.19% examples, 1197399 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:37:08,239 : INFO : PROGRESS: at 59.33% examples, 1197473 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:09,246 : INFO : PROGRESS: at 59.47% examples, 1197374 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:10,249 : INFO : PROGRESS: at 59.62% examples, 1197321 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:11,263 : INFO : PROGRESS: at 59.77% examples, 1197357 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:12,263 : INFO : PROGRESS: at 59.89% examples, 1196924 words/s, in_qsize 41, out_qsize 6\n", - "2017-08-21 22:37:13,272 : INFO : PROGRESS: at 60.03% examples, 1196836 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:37:14,278 : INFO : PROGRESS: at 60.16% examples, 1196666 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:15,294 : INFO : PROGRESS: at 60.31% examples, 1196755 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:37:16,298 : INFO : PROGRESS: at 60.44% examples, 1196544 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:37:17,312 : INFO : PROGRESS: at 60.57% examples, 1196223 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:18,322 : INFO : PROGRESS: at 60.71% examples, 1196305 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:19,326 : INFO : PROGRESS: at 60.85% examples, 1196249 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:37:20,348 : INFO : PROGRESS: at 60.99% examples, 1196109 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:37:21,368 : INFO : PROGRESS: at 61.12% examples, 1195924 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:37:22,405 : INFO : PROGRESS: at 61.25% examples, 1195760 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:37:23,413 : INFO : PROGRESS: at 61.39% examples, 1195692 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:37:24,421 : INFO : PROGRESS: at 61.52% examples, 1195380 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:37:25,426 : INFO : PROGRESS: at 61.65% examples, 1195191 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:37:26,427 : INFO : PROGRESS: at 61.79% examples, 1195166 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:27,443 : INFO : PROGRESS: at 61.93% examples, 1195117 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:37:28,456 : INFO : PROGRESS: at 62.06% examples, 1194860 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:37:29,460 : INFO : PROGRESS: at 62.19% examples, 1194645 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:30,473 : INFO : PROGRESS: at 62.32% examples, 1194364 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:31,500 : INFO : PROGRESS: at 62.44% examples, 1193977 words/s, in_qsize 43, out_qsize 4\n", - "2017-08-21 22:37:32,506 : INFO : PROGRESS: at 62.59% examples, 1194070 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:33,516 : INFO : PROGRESS: at 62.71% examples, 1193726 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:37:34,534 : INFO : PROGRESS: at 62.85% examples, 1193509 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:37:35,549 : INFO : PROGRESS: at 62.99% examples, 1193497 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:37:36,577 : INFO : PROGRESS: at 63.13% examples, 1193519 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:37:37,580 : INFO : PROGRESS: at 63.27% examples, 1193379 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:38,603 : INFO : PROGRESS: at 63.40% examples, 1193165 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:37:39,605 : INFO : PROGRESS: at 63.52% examples, 1192769 words/s, in_qsize 47, out_qsize 7\n", - "2017-08-21 22:37:40,621 : INFO : PROGRESS: at 63.67% examples, 1192826 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:41,632 : INFO : PROGRESS: at 63.81% examples, 1192848 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:42,650 : INFO : PROGRESS: at 63.96% examples, 1192928 words/s, in_qsize 48, out_qsize 1\n", - "2017-08-21 22:37:43,653 : INFO : PROGRESS: at 64.09% examples, 1192826 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:44,655 : INFO : PROGRESS: at 64.22% examples, 1192502 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:37:45,664 : INFO : PROGRESS: at 64.35% examples, 1192398 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:37:46,691 : INFO : PROGRESS: at 64.49% examples, 1192198 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:47,702 : INFO : PROGRESS: at 64.62% examples, 1192090 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:48,719 : INFO : PROGRESS: at 64.76% examples, 1191941 words/s, in_qsize 47, out_qsize 1\n", - "2017-08-21 22:37:49,732 : INFO : PROGRESS: at 64.89% examples, 1191660 words/s, in_qsize 47, out_qsize 1\n", - "2017-08-21 22:37:50,764 : INFO : PROGRESS: at 65.03% examples, 1191529 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:51,787 : INFO : PROGRESS: at 65.16% examples, 1191287 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:37:52,794 : INFO : PROGRESS: at 65.30% examples, 1191179 words/s, in_qsize 47, out_qsize 1\n", - "2017-08-21 22:37:53,808 : INFO : PROGRESS: at 65.43% examples, 1191099 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:54,814 : INFO : PROGRESS: at 65.57% examples, 1191070 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:55,821 : INFO : PROGRESS: at 65.71% examples, 1191040 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:56,821 : INFO : PROGRESS: at 65.86% examples, 1191061 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:57,836 : INFO : PROGRESS: at 65.99% examples, 1190937 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:58,842 : INFO : PROGRESS: at 66.13% examples, 1190816 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:37:59,885 : INFO : PROGRESS: at 66.27% examples, 1190638 words/s, in_qsize 47, out_qsize 2\n", - "2017-08-21 22:38:00,890 : INFO : PROGRESS: at 66.41% examples, 1190720 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:01,920 : INFO : PROGRESS: at 66.57% examples, 1190879 words/s, in_qsize 41, out_qsize 7\n", - "2017-08-21 22:38:02,921 : INFO : PROGRESS: at 66.71% examples, 1190914 words/s, in_qsize 44, out_qsize 4\n", - "2017-08-21 22:38:03,922 : INFO : PROGRESS: at 66.84% examples, 1190765 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:04,930 : INFO : PROGRESS: at 66.98% examples, 1190670 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:05,936 : INFO : PROGRESS: at 67.11% examples, 1190497 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:38:06,960 : INFO : PROGRESS: at 67.25% examples, 1190464 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:07,970 : INFO : PROGRESS: at 67.38% examples, 1190218 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:08,984 : INFO : PROGRESS: at 67.52% examples, 1190247 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:09,992 : INFO : PROGRESS: at 67.67% examples, 1190191 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:38:10,993 : INFO : PROGRESS: at 67.80% examples, 1190060 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:11,995 : INFO : PROGRESS: at 67.94% examples, 1189905 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:38:13,026 : INFO : PROGRESS: at 68.06% examples, 1189503 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:38:14,028 : INFO : PROGRESS: at 68.20% examples, 1189573 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:38:15,034 : INFO : PROGRESS: at 68.34% examples, 1189550 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:16,056 : INFO : PROGRESS: at 68.47% examples, 1189295 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:38:17,067 : INFO : PROGRESS: at 68.61% examples, 1189218 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:18,076 : INFO : PROGRESS: at 68.75% examples, 1189189 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:19,090 : INFO : PROGRESS: at 68.88% examples, 1188937 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:20,103 : INFO : PROGRESS: at 69.01% examples, 1188659 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:21,110 : INFO : PROGRESS: at 69.14% examples, 1188550 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:38:22,110 : INFO : PROGRESS: at 69.30% examples, 1188746 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:23,111 : INFO : PROGRESS: at 69.43% examples, 1188578 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:24,203 : INFO : PROGRESS: at 69.57% examples, 1188397 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:25,215 : INFO : PROGRESS: at 69.70% examples, 1188144 words/s, in_qsize 46, out_qsize 3\n", - "2017-08-21 22:38:26,250 : INFO : PROGRESS: at 69.85% examples, 1188115 words/s, in_qsize 43, out_qsize 9\n", - "2017-08-21 22:38:27,287 : INFO : PROGRESS: at 69.99% examples, 1188049 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:28,304 : INFO : PROGRESS: at 70.13% examples, 1187902 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:38:29,306 : INFO : PROGRESS: at 70.27% examples, 1187834 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:38:30,329 : INFO : PROGRESS: at 70.40% examples, 1187594 words/s, in_qsize 42, out_qsize 5\n", - "2017-08-21 22:38:31,335 : INFO : PROGRESS: at 70.54% examples, 1187577 words/s, in_qsize 48, out_qsize 2\n", - "2017-08-21 22:38:32,356 : INFO : PROGRESS: at 70.67% examples, 1187378 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:38:33,356 : INFO : PROGRESS: at 70.81% examples, 1187334 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:34,361 : INFO : PROGRESS: at 70.94% examples, 1187041 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:35,375 : INFO : PROGRESS: at 71.07% examples, 1186783 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:38:36,379 : INFO : PROGRESS: at 71.21% examples, 1186729 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:37,387 : INFO : PROGRESS: at 71.35% examples, 1186660 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:38,396 : INFO : PROGRESS: at 71.48% examples, 1186440 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:39,399 : INFO : PROGRESS: at 71.61% examples, 1186354 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:38:40,404 : INFO : PROGRESS: at 71.75% examples, 1186330 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:41,405 : INFO : PROGRESS: at 71.88% examples, 1186116 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:42,418 : INFO : PROGRESS: at 72.02% examples, 1185959 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:38:43,432 : INFO : PROGRESS: at 72.15% examples, 1185728 words/s, in_qsize 46, out_qsize 2\n", - "2017-08-21 22:38:44,454 : INFO : PROGRESS: at 72.28% examples, 1185480 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:38:45,474 : INFO : PROGRESS: at 72.42% examples, 1185419 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:38:46,479 : INFO : PROGRESS: at 72.57% examples, 1185437 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:38:47,495 : INFO : PROGRESS: at 72.69% examples, 1185114 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:48,540 : INFO : PROGRESS: at 72.84% examples, 1185041 words/s, in_qsize 48, out_qsize 7\n", - "2017-08-21 22:38:49,545 : INFO : PROGRESS: at 72.98% examples, 1185016 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:50,545 : INFO : PROGRESS: at 73.12% examples, 1185014 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:51,562 : INFO : PROGRESS: at 73.26% examples, 1184917 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:38:52,573 : INFO : PROGRESS: at 73.39% examples, 1184764 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:53,588 : INFO : PROGRESS: at 73.53% examples, 1184635 words/s, in_qsize 42, out_qsize 5\n", - "2017-08-21 22:38:54,598 : INFO : PROGRESS: at 73.66% examples, 1184519 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:55,614 : INFO : PROGRESS: at 73.82% examples, 1184760 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:38:56,619 : INFO : PROGRESS: at 73.96% examples, 1184625 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:57,623 : INFO : PROGRESS: at 74.10% examples, 1184531 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:38:58,626 : INFO : PROGRESS: at 74.25% examples, 1184717 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:38:59,630 : INFO : PROGRESS: at 74.39% examples, 1184726 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:39:00,669 : INFO : PROGRESS: at 74.54% examples, 1184647 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:39:01,671 : INFO : PROGRESS: at 74.69% examples, 1184711 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:39:02,681 : INFO : PROGRESS: at 74.83% examples, 1184648 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:39:03,687 : INFO : PROGRESS: at 74.97% examples, 1184650 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:39:04,689 : INFO : PROGRESS: at 75.13% examples, 1184840 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:39:05,690 : INFO : PROGRESS: at 75.27% examples, 1184786 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:39:06,697 : INFO : PROGRESS: at 75.41% examples, 1184707 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:39:07,701 : INFO : PROGRESS: at 75.56% examples, 1184848 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:39:08,725 : INFO : PROGRESS: at 75.68% examples, 1184509 words/s, in_qsize 40, out_qsize 7\n", - "2017-08-21 22:39:09,727 : INFO : PROGRESS: at 75.83% examples, 1184479 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:39:10,740 : INFO : PROGRESS: at 75.97% examples, 1184492 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:39:11,767 : INFO : PROGRESS: at 76.11% examples, 1184422 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:39:12,805 : INFO : PROGRESS: at 76.24% examples, 1184056 words/s, in_qsize 42, out_qsize 5\n", - "2017-08-21 22:39:13,815 : INFO : PROGRESS: at 76.37% examples, 1183947 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:39:14,822 : INFO : PROGRESS: at 76.52% examples, 1183976 words/s, in_qsize 41, out_qsize 6\n", - "2017-08-21 22:39:15,824 : INFO : PROGRESS: at 76.65% examples, 1183692 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:39:16,841 : INFO : PROGRESS: at 76.78% examples, 1183484 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:39:17,851 : INFO : PROGRESS: at 76.93% examples, 1183597 words/s, in_qsize 42, out_qsize 5\n", - "2017-08-21 22:39:18,857 : INFO : PROGRESS: at 77.08% examples, 1183562 words/s, in_qsize 48, out_qsize 2\n", - "2017-08-21 22:39:19,862 : INFO : PROGRESS: at 77.21% examples, 1183435 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:39:20,863 : INFO : PROGRESS: at 77.35% examples, 1183345 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:39:21,873 : INFO : PROGRESS: at 77.49% examples, 1183243 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:39:22,875 : INFO : PROGRESS: at 77.61% examples, 1182892 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:39:23,883 : INFO : PROGRESS: at 77.73% examples, 1182642 words/s, in_qsize 42, out_qsize 5\n", - "2017-08-21 22:39:24,893 : INFO : PROGRESS: at 77.88% examples, 1182598 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:39:25,906 : INFO : PROGRESS: at 78.01% examples, 1182420 words/s, in_qsize 43, out_qsize 4\n", - "2017-08-21 22:39:26,911 : INFO : PROGRESS: at 78.16% examples, 1182454 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:39:27,915 : INFO : PROGRESS: at 78.29% examples, 1182254 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:39:28,947 : INFO : PROGRESS: at 78.44% examples, 1182315 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:39:29,952 : INFO : PROGRESS: at 78.57% examples, 1182196 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:39:30,971 : INFO : PROGRESS: at 78.71% examples, 1182068 words/s, in_qsize 43, out_qsize 4\n", - "2017-08-21 22:39:31,976 : INFO : PROGRESS: at 78.86% examples, 1182198 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:39:32,978 : INFO : PROGRESS: at 79.00% examples, 1182109 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:39:33,980 : INFO : PROGRESS: at 79.13% examples, 1181929 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:39:35,001 : INFO : PROGRESS: at 79.26% examples, 1181647 words/s, in_qsize 48, out_qsize 6\n", - "2017-08-21 22:39:36,017 : INFO : PROGRESS: at 79.39% examples, 1181530 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:39:37,019 : INFO : PROGRESS: at 79.52% examples, 1181329 words/s, in_qsize 48, out_qsize 3\n", - "2017-08-21 22:39:38,022 : INFO : PROGRESS: at 79.66% examples, 1181255 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:39:39,029 : INFO : PROGRESS: at 79.81% examples, 1181361 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:39:40,032 : INFO : PROGRESS: at 79.95% examples, 1181349 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:39:41,035 : INFO : PROGRESS: at 80.09% examples, 1181233 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:39:42,063 : INFO : PROGRESS: at 80.23% examples, 1181199 words/s, in_qsize 43, out_qsize 4\n", - "2017-08-21 22:39:43,068 : INFO : PROGRESS: at 80.38% examples, 1181344 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:39:44,082 : INFO : PROGRESS: at 80.51% examples, 1181297 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:39:45,121 : INFO : PROGRESS: at 80.65% examples, 1181273 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:39:46,121 : INFO : PROGRESS: at 80.80% examples, 1181429 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:39:47,136 : INFO : PROGRESS: at 80.94% examples, 1181367 words/s, in_qsize 42, out_qsize 5\n", - "2017-08-21 22:39:48,163 : INFO : PROGRESS: at 81.07% examples, 1181228 words/s, in_qsize 42, out_qsize 5\n", - "2017-08-21 22:39:49,168 : INFO : PROGRESS: at 81.19% examples, 1180936 words/s, in_qsize 34, out_qsize 13\n", - "2017-08-21 22:39:50,177 : INFO : PROGRESS: at 81.33% examples, 1180923 words/s, in_qsize 43, out_qsize 4\n", - "2017-08-21 22:39:51,193 : INFO : PROGRESS: at 81.48% examples, 1181057 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:39:52,195 : INFO : PROGRESS: at 81.61% examples, 1180973 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:39:53,195 : INFO : PROGRESS: at 81.75% examples, 1180943 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:39:54,209 : INFO : PROGRESS: at 81.88% examples, 1180799 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:39:55,209 : INFO : PROGRESS: at 82.03% examples, 1180907 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:39:56,211 : INFO : PROGRESS: at 82.16% examples, 1180849 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:39:57,216 : INFO : PROGRESS: at 82.29% examples, 1180663 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:39:58,234 : INFO : PROGRESS: at 82.45% examples, 1180947 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:39:59,250 : INFO : PROGRESS: at 82.60% examples, 1181056 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:00,252 : INFO : PROGRESS: at 82.73% examples, 1180924 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:01,289 : INFO : PROGRESS: at 82.87% examples, 1180842 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:40:02,306 : INFO : PROGRESS: at 83.02% examples, 1180902 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:40:03,326 : INFO : PROGRESS: at 83.16% examples, 1180869 words/s, in_qsize 45, out_qsize 3\n", - "2017-08-21 22:40:04,343 : INFO : PROGRESS: at 83.29% examples, 1180782 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:05,346 : INFO : PROGRESS: at 83.44% examples, 1180950 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:40:06,355 : INFO : PROGRESS: at 83.58% examples, 1180941 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:40:07,367 : INFO : PROGRESS: at 83.74% examples, 1181147 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:40:08,369 : INFO : PROGRESS: at 83.88% examples, 1181127 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:40:09,373 : INFO : PROGRESS: at 84.01% examples, 1181067 words/s, in_qsize 47, out_qsize 4\n", - "2017-08-21 22:40:10,402 : INFO : PROGRESS: at 84.15% examples, 1180972 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:40:11,433 : INFO : PROGRESS: at 84.28% examples, 1180857 words/s, in_qsize 40, out_qsize 7\n", - "2017-08-21 22:40:12,452 : INFO : PROGRESS: at 84.42% examples, 1180791 words/s, in_qsize 43, out_qsize 4\n", - "2017-08-21 22:40:13,496 : INFO : PROGRESS: at 84.55% examples, 1180519 words/s, in_qsize 44, out_qsize 10\n", - "2017-08-21 22:40:14,503 : INFO : PROGRESS: at 84.69% examples, 1180527 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:40:15,514 : INFO : PROGRESS: at 84.83% examples, 1180461 words/s, in_qsize 47, out_qsize 1\n", - "2017-08-21 22:40:16,518 : INFO : PROGRESS: at 84.96% examples, 1180305 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:40:17,522 : INFO : PROGRESS: at 85.10% examples, 1180320 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:18,526 : INFO : PROGRESS: at 85.25% examples, 1180438 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:40:19,530 : INFO : PROGRESS: at 85.39% examples, 1180561 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:20,532 : INFO : PROGRESS: at 85.52% examples, 1180337 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:40:21,537 : INFO : PROGRESS: at 85.66% examples, 1180332 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:40:22,540 : INFO : PROGRESS: at 85.79% examples, 1180243 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:23,565 : INFO : PROGRESS: at 85.93% examples, 1180147 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:40:24,569 : INFO : PROGRESS: at 86.07% examples, 1180137 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:25,577 : INFO : PROGRESS: at 86.22% examples, 1180180 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:40:26,580 : INFO : PROGRESS: at 86.35% examples, 1180068 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:27,584 : INFO : PROGRESS: at 86.48% examples, 1179960 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:28,600 : INFO : PROGRESS: at 86.62% examples, 1179901 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:29,606 : INFO : PROGRESS: at 86.75% examples, 1179866 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:40:30,613 : INFO : PROGRESS: at 86.90% examples, 1179923 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:31,616 : INFO : PROGRESS: at 87.04% examples, 1180021 words/s, in_qsize 45, out_qsize 0\n", - "2017-08-21 22:40:32,633 : INFO : PROGRESS: at 87.16% examples, 1179695 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:40:33,646 : INFO : PROGRESS: at 87.30% examples, 1179776 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:34,665 : INFO : PROGRESS: at 87.45% examples, 1179776 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:40:35,668 : INFO : PROGRESS: at 87.59% examples, 1179824 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:36,688 : INFO : PROGRESS: at 87.74% examples, 1179809 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:37,712 : INFO : PROGRESS: at 87.88% examples, 1179771 words/s, in_qsize 41, out_qsize 6\n", - "2017-08-21 22:40:38,723 : INFO : PROGRESS: at 88.02% examples, 1179772 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:40:39,723 : INFO : PROGRESS: at 88.17% examples, 1179789 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:40,733 : INFO : PROGRESS: at 88.30% examples, 1179746 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:40:41,734 : INFO : PROGRESS: at 88.44% examples, 1179642 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:40:42,760 : INFO : PROGRESS: at 88.58% examples, 1179718 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:43,766 : INFO : PROGRESS: at 88.72% examples, 1179646 words/s, in_qsize 48, out_qsize 3\n", - "2017-08-21 22:40:44,787 : INFO : PROGRESS: at 88.87% examples, 1179718 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:40:45,788 : INFO : PROGRESS: at 89.00% examples, 1179667 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:40:46,798 : INFO : PROGRESS: at 89.15% examples, 1179683 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:47,831 : INFO : PROGRESS: at 89.29% examples, 1179696 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:48,833 : INFO : PROGRESS: at 89.44% examples, 1179728 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:49,838 : INFO : PROGRESS: at 89.56% examples, 1179535 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:40:50,853 : INFO : PROGRESS: at 89.71% examples, 1179565 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:51,860 : INFO : PROGRESS: at 89.86% examples, 1179616 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:52,877 : INFO : PROGRESS: at 90.00% examples, 1179595 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:53,895 : INFO : PROGRESS: at 90.16% examples, 1179751 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:40:54,899 : INFO : PROGRESS: at 90.30% examples, 1179843 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:55,906 : INFO : PROGRESS: at 90.43% examples, 1179669 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:56,912 : INFO : PROGRESS: at 90.58% examples, 1179693 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:40:57,915 : INFO : PROGRESS: at 90.72% examples, 1179684 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:40:58,934 : INFO : PROGRESS: at 90.85% examples, 1179525 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:40:59,948 : INFO : PROGRESS: at 90.99% examples, 1179403 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:41:00,949 : INFO : PROGRESS: at 91.13% examples, 1179490 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:41:01,953 : INFO : PROGRESS: at 91.28% examples, 1179525 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:41:02,964 : INFO : PROGRESS: at 91.43% examples, 1179590 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:41:03,969 : INFO : PROGRESS: at 91.56% examples, 1179522 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:41:04,976 : INFO : PROGRESS: at 91.70% examples, 1179496 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:41:05,977 : INFO : PROGRESS: at 91.84% examples, 1179515 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:41:06,980 : INFO : PROGRESS: at 91.99% examples, 1179566 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:41:08,004 : INFO : PROGRESS: at 92.14% examples, 1179609 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:41:09,015 : INFO : PROGRESS: at 92.27% examples, 1179378 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:41:10,019 : INFO : PROGRESS: at 92.40% examples, 1179314 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:41:11,022 : INFO : PROGRESS: at 92.55% examples, 1179429 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:41:12,034 : INFO : PROGRESS: at 92.69% examples, 1179357 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:41:13,050 : INFO : PROGRESS: at 92.85% examples, 1179579 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:41:14,060 : INFO : PROGRESS: at 92.98% examples, 1179373 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:41:15,067 : INFO : PROGRESS: at 93.12% examples, 1179381 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:41:16,080 : INFO : PROGRESS: at 93.26% examples, 1179362 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:41:17,085 : INFO : PROGRESS: at 93.41% examples, 1179436 words/s, in_qsize 48, out_qsize 4\n", - "2017-08-21 22:41:18,089 : INFO : PROGRESS: at 93.56% examples, 1179448 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:41:19,114 : INFO : PROGRESS: at 93.68% examples, 1179230 words/s, in_qsize 48, out_qsize 2\n", - "2017-08-21 22:41:20,116 : INFO : PROGRESS: at 93.80% examples, 1178867 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:41:21,122 : INFO : PROGRESS: at 93.93% examples, 1178758 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:41:22,128 : INFO : PROGRESS: at 94.08% examples, 1178830 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:41:23,151 : INFO : PROGRESS: at 94.23% examples, 1178832 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:41:24,193 : INFO : PROGRESS: at 94.35% examples, 1178504 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:41:25,216 : INFO : PROGRESS: at 94.48% examples, 1178408 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:41:26,231 : INFO : PROGRESS: at 94.63% examples, 1178469 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:41:27,242 : INFO : PROGRESS: at 94.79% examples, 1178640 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:41:28,245 : INFO : PROGRESS: at 94.92% examples, 1178526 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:41:29,253 : INFO : PROGRESS: at 95.06% examples, 1178384 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:41:30,303 : INFO : PROGRESS: at 95.22% examples, 1178586 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:41:31,323 : INFO : PROGRESS: at 95.36% examples, 1178532 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:41:32,350 : INFO : PROGRESS: at 95.51% examples, 1178558 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:41:33,362 : INFO : PROGRESS: at 95.66% examples, 1178568 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:41:34,385 : INFO : PROGRESS: at 95.80% examples, 1178516 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:41:35,392 : INFO : PROGRESS: at 95.95% examples, 1178617 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:41:36,401 : INFO : PROGRESS: at 96.10% examples, 1178643 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:41:37,425 : INFO : PROGRESS: at 96.24% examples, 1178601 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:41:38,451 : INFO : PROGRESS: at 96.37% examples, 1178389 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:41:39,453 : INFO : PROGRESS: at 96.51% examples, 1178407 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:41:40,457 : INFO : PROGRESS: at 96.66% examples, 1178429 words/s, in_qsize 38, out_qsize 9\n", - "2017-08-21 22:41:41,465 : INFO : PROGRESS: at 96.80% examples, 1178443 words/s, in_qsize 45, out_qsize 2\n", - "2017-08-21 22:41:42,482 : INFO : PROGRESS: at 96.95% examples, 1178477 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:41:43,487 : INFO : PROGRESS: at 97.10% examples, 1178499 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:41:44,489 : INFO : PROGRESS: at 97.24% examples, 1178537 words/s, in_qsize 47, out_qsize 2\n", - "2017-08-21 22:41:45,492 : INFO : PROGRESS: at 97.39% examples, 1178510 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:41:46,516 : INFO : PROGRESS: at 97.52% examples, 1178405 words/s, in_qsize 43, out_qsize 5\n", - "2017-08-21 22:41:47,518 : INFO : PROGRESS: at 97.67% examples, 1178412 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:41:48,528 : INFO : PROGRESS: at 97.82% examples, 1178519 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:41:49,589 : INFO : PROGRESS: at 97.97% examples, 1178511 words/s, in_qsize 48, out_qsize 6\n", - "2017-08-21 22:41:50,591 : INFO : PROGRESS: at 98.12% examples, 1178578 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:41:51,606 : INFO : PROGRESS: at 98.25% examples, 1178377 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:41:52,613 : INFO : PROGRESS: at 98.38% examples, 1178330 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:41:53,614 : INFO : PROGRESS: at 98.53% examples, 1178359 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:41:54,620 : INFO : PROGRESS: at 98.66% examples, 1178235 words/s, in_qsize 39, out_qsize 8\n", - "2017-08-21 22:41:55,636 : INFO : PROGRESS: at 98.82% examples, 1178345 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:41:56,659 : INFO : PROGRESS: at 98.94% examples, 1178082 words/s, in_qsize 48, out_qsize 0\n", - "2017-08-21 22:41:57,661 : INFO : PROGRESS: at 99.09% examples, 1178128 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:41:58,664 : INFO : PROGRESS: at 99.23% examples, 1178184 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:41:59,667 : INFO : PROGRESS: at 99.38% examples, 1178198 words/s, in_qsize 46, out_qsize 1\n", - "2017-08-21 22:42:00,679 : INFO : PROGRESS: at 99.52% examples, 1178200 words/s, in_qsize 47, out_qsize 0\n", - "2017-08-21 22:42:01,695 : INFO : PROGRESS: at 99.67% examples, 1178269 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:42:02,704 : INFO : PROGRESS: at 99.82% examples, 1178283 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:42:03,724 : INFO : PROGRESS: at 99.96% examples, 1178230 words/s, in_qsize 44, out_qsize 3\n", - "2017-08-21 22:42:03,889 : INFO : worker thread finished; awaiting finish of 23 more threads\n", - "2017-08-21 22:42:03,892 : INFO : worker thread finished; awaiting finish of 22 more threads\n", - "2017-08-21 22:42:03,895 : INFO : worker thread finished; awaiting finish of 21 more threads\n", - "2017-08-21 22:42:03,897 : INFO : worker thread finished; awaiting finish of 20 more threads\n", - "2017-08-21 22:42:03,904 : INFO : worker thread finished; awaiting finish of 19 more threads\n", - "2017-08-21 22:42:03,915 : INFO : worker thread finished; awaiting finish of 18 more threads\n", - "2017-08-21 22:42:03,917 : INFO : worker thread finished; awaiting finish of 17 more threads\n", - "2017-08-21 22:42:03,919 : INFO : worker thread finished; awaiting finish of 16 more threads\n", - "2017-08-21 22:42:03,923 : INFO : worker thread finished; awaiting finish of 15 more threads\n", - "2017-08-21 22:42:03,925 : INFO : worker thread finished; awaiting finish of 14 more threads\n", - "2017-08-21 22:42:03,928 : INFO : worker thread finished; awaiting finish of 13 more threads\n", - "2017-08-21 22:42:03,929 : INFO : worker thread finished; awaiting finish of 12 more threads\n", - "2017-08-21 22:42:03,932 : INFO : worker thread finished; awaiting finish of 11 more threads\n", - "2017-08-21 22:42:03,936 : INFO : worker thread finished; awaiting finish of 10 more threads\n", - "2017-08-21 22:42:03,937 : INFO : worker thread finished; awaiting finish of 9 more threads\n", - "2017-08-21 22:42:03,944 : INFO : worker thread finished; awaiting finish of 8 more threads\n", - "2017-08-21 22:42:03,945 : INFO : worker thread finished; awaiting finish of 7 more threads\n", - "2017-08-21 22:42:03,953 : INFO : worker thread finished; awaiting finish of 6 more threads\n", - "2017-08-21 22:42:03,954 : INFO : worker thread finished; awaiting finish of 5 more threads\n", - "2017-08-21 22:42:03,958 : INFO : worker thread finished; awaiting finish of 4 more threads\n", - "2017-08-21 22:42:03,959 : INFO : worker thread finished; awaiting finish of 3 more threads\n", - "2017-08-21 22:42:03,962 : INFO : worker thread finished; awaiting finish of 2 more threads\n", - "2017-08-21 22:42:03,968 : INFO : worker thread finished; awaiting finish of 1 more threads\n", - "2017-08-21 22:42:03,970 : INFO : worker thread finished; awaiting finish of 0 more threads\n", - "2017-08-21 22:42:03,972 : INFO : training on 1152805525 raw words (837504892 effective words) took 710.8s, 1178301 effective words/s\n" - ] - } - ], - "source": [ - "import gensim, logging\n", - "\n", - "logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)\n", - "# use skip-gram\n", - "word2vec_model = gensim.models.Word2Vec(sentences=sent_lst, min_count=6, size=EMBEDDING_DIM, sg=1, workers=os.cpu_count())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "Create the initial embedding matrix from the output of word2vec." - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total 20000 word vectors.\n" - ] - } - ], - "source": [ - "embeddings_index = {}\n", - "\n", - "for word in word2vec_model.wv.vocab:\n", - " coefs = np.asarray(word2vec_model.wv[word], dtype='float32')\n", - " embeddings_index[word] = coefs\n", - "\n", - "print('Total %s word vectors.' % len(embeddings_index))\n", - "\n", - "# Initial embedding\n", - "embedding_matrix = np.zeros((VOCAB_SIZE, EMBEDDING_DIM))\n", - "\n", - "for word, i in tok.word_index.items():\n", - " embedding_vector = embeddings_index.get(word)\n", - " if embedding_vector is not None and i < VOCAB_SIZE:\n", - " embedding_matrix[i] = embedding_vector" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "LSTM_DIM is the dimensionality of each LSTM output (the number of LSTM units).\n", - "The mask_zero option determines whether masking is performed, i.e. whether the layers ignore the padded zeros in shorter documents. CNTK / Keras does not support masking yet." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "BATCH_SIZE = 100\n", - "NUM_EPOCHS = 10\n", - "LSTM_DIM = 100\n", - "OPTIMIZER = SGD(lr=0.01, nesterov=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "def lstm_create_train(reg_param):\n", - " l2_reg = regularizers.l2(reg_param)\n", - "\n", - " # model init\n", - " embedding_layer = Embedding(VOCAB_SIZE,\n", - " EMBEDDING_DIM,\n", - " input_length=MAX_DOC_LENGTH,\n", - " trainable=True,\n", - " mask_zero=False,\n", - " embeddings_regularizer=l2_reg,\n", - " weights=[embedding_matrix])\n", - "\n", - " lstm_layer = LSTM(units=LSTM_DIM, kernel_regularizer=l2_reg)\n", - " dense_layer = Dense(n_classes, activation='softmax', kernel_regularizer=l2_reg)\n", - "\n", - " model = Sequential()\n", - " model.add(embedding_layer)\n", - " model.add(Bidirectional(lstm_layer))\n", - " model.add(dense_layer)\n", - "\n", - " model.compile(loss='categorical_crossentropy',\n", - " optimizer=OPTIMIZER,\n", - " metrics=['acc'])\n", - "\n", - " history = History()\n", - " csv_logger = CSVLogger('./lstm_model_wvec_{}_log'.format(reg_param),\n", - " separator=',',\n", - " append=True)\n", - "\n", - " t1 = time.time()\n", - " # model fit\n", - " model.fit(train_seq,\n", - " labels.astype('float32'),\n", - " batch_size=BATCH_SIZE,\n", - " epochs=NUM_EPOCHS,\n", - " callbacks=[history, csv_logger],\n", - " verbose=2)\n", - " t2 = time.time()\n", - "\n", - " # save model\n", - " model.save('./lstm_wvec_{}_model.h5'.format(reg_param))\n", - " np.savetxt('./lstm_wvec_{}_time.txt'.format(reg_param), \n", - " [reg_param, (t2-t1) / 3600])\n", - " with open('./lstm_wvec_{}_history.txt'.format(reg_param), \"w\") as res_file:\n", - " res_file.write(str(history.history))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/10\n" - ] - } - ], - "source": [ - "lstm_create_train(1e-10)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}