From 33f5474e7664e44be0d48b2c950bce7284ccd3e8 Mon Sep 17 00:00:00 2001
From: dv-fenix <45421556+dv-fenix@users.noreply.github.com>
Date: Sun, 5 Apr 2020 01:11:16 +0530
Subject: [PATCH] Update 6.1-using-word-embeddings.ipynb

On running the cell under "Putting it all together:  from raw text to word embeddings", the kernel throws the following error:
UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d in position 1757: character maps to <undefined>

This is because the dataset stored at the link provided doesn't encode files in the utf8 format, specifying that as a parameter when opening the files prevents this error.
---
 6.1-using-word-embeddings.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/6.1-using-word-embeddings.ipynb b/6.1-using-word-embeddings.ipynb
index 8596b2a218..62510c1e43 100644
--- a/6.1-using-word-embeddings.ipynb
+++ b/6.1-using-word-embeddings.ipynb
@@ -354,7 +354,7 @@
     "    dir_name = os.path.join(train_dir, label_type)\n",
     "    for fname in os.listdir(dir_name):\n",
     "        if fname[-4:] == '.txt':\n",
-    "            f = open(os.path.join(dir_name, fname))\n",
+    "            f = open(os.path.join(dir_name, fname), encoding = "utf8")\n",
     "            texts.append(f.read())\n",
     "            f.close()\n",
     "            if label_type == 'neg':\n",