From 33f5474e7664e44be0d48b2c950bce7284ccd3e8 Mon Sep 17 00:00:00 2001 From: dv-fenix <45421556+dv-fenix@users.noreply.github.com> Date: Sun, 5 Apr 2020 01:11:16 +0530 Subject: [PATCH] Update 6.1-using-word-embeddings.ipynb On running the cell under "Putting it all together: from raw text to word embeddings", the kernel throws the following error: UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d in position 1757: character maps to This is because the dataset stored at the link provided doesn't encode files in the utf8 format, specifying that as a parameter when opening the files prevents this error. --- 6.1-using-word-embeddings.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/6.1-using-word-embeddings.ipynb b/6.1-using-word-embeddings.ipynb index 8596b2a218..62510c1e43 100644 --- a/6.1-using-word-embeddings.ipynb +++ b/6.1-using-word-embeddings.ipynb @@ -354,7 +354,7 @@ " dir_name = os.path.join(train_dir, label_type)\n", " for fname in os.listdir(dir_name):\n", " if fname[-4:] == '.txt':\n", - " f = open(os.path.join(dir_name, fname))\n", + " f = open(os.path.join(dir_name, fname), encoding = "utf8")\n", " texts.append(f.read())\n", " f.close()\n", " if label_type == 'neg':\n",