Skip to content

Commit

Permalink
Edit preprocessing
Browse files Browse the repository at this point in the history
  • Loading branch information
Ubuntu committed Sep 12, 2017
1 parent 43f7be7 commit 12c186e
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions lstm_word2vec.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,8 @@
],
"source": [
"print('Number of reviews by class in training set')\n",
"print(y_train.sum(axis=0))\n",
"n_classes = y_train.shape[1]"
"print(labels.sum(axis=0))\n",
"n_classes = labels.shape[1]"
]
},
{
Expand Down Expand Up @@ -232,7 +232,9 @@
"\n",
"for doc in train_data[TEXT_COL]:\n",
" sentences = nltk.tokenize.sent_tokenize(doc)\n",
" sent_lst.extend(sentences)"
" for sent in sentences:\n",
" word_lst = [w for w in nltk.tokenize.word_tokenize(sent) if w.isalnum()]\n",
" sent_lst.append(word_lst)"
]
},
{
Expand Down

0 comments on commit 12c186e

Please sign in to comment.