From 5d18c39c9125ea5760b30a4852df380b96e61f45 Mon Sep 17 00:00:00 2001
From: Ching Lam Choi <ccl5a09@gmail.com>
Date: Sat, 4 Jan 2020 13:31:36 +0800
Subject: [PATCH 1/6] Added annotation

---
 text/char-rnn/char-rnn_annotated.ipynb | 180 +++++++++++++++++++++++++
 text/char-rnn/char-rnn_annotated.jl    |  80 +++++++++++
 2 files changed, 260 insertions(+)
 create mode 100644 text/char-rnn/char-rnn_annotated.ipynb
 create mode 100644 text/char-rnn/char-rnn_annotated.jl

diff --git a/text/char-rnn/char-rnn_annotated.ipynb b/text/char-rnn/char-rnn_annotated.ipynb
new file mode 100644
index 000000000..a099329c6
--- /dev/null
+++ b/text/char-rnn/char-rnn_annotated.ipynb
@@ -0,0 +1,180 @@
+{
+ "cells": [
+  {
+   "outputs": [],
+   "cell_type": "markdown",
+   "source": [
+    "# Character-level Recurrent Neural Network"
+   ],
+   "metadata": {}
+  },
+  {
+   "outputs": [],
+   "cell_type": "markdown",
+   "source": [
+    "# 1. Import Dependencies"
+   ],
+   "metadata": {}
+  },
+  {
+   "outputs": [],
+   "cell_type": "code",
+   "source": [
+    "using Flux\n",
+    "using Flux: onehot, chunk, batchseq, throttle, crossentropy\n",
+    "using StatsBase: wsample\n",
+    "using Base.Iterators: partition"
+   ],
+   "metadata": {},
+   "execution_count": null
+  },
+  {
+   "outputs": [],
+   "cell_type": "markdown",
+   "source": [
+    "# 2. Data Download & Pre-processing\n",
+    "- Source of data: Shakespeare text from https://cs.stanford.edu/people/karpathy/char-rnn/\n",
+    "- Generate character tokens\n",
+    "- Partition in batches for input"
+   ],
+   "metadata": {}
+  },
+  {
+   "outputs": [],
+   "cell_type": "code",
+   "source": [
+    "cd(@__DIR__)\n",
+    "\n",
+    "isfile(\"input.txt\") ||\n",
+    "  download(\"https://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt\",\n",
+    "           \"input.txt\")\n",
+    "\n",
+    "#Generate array of all chars appearing in input.txt, let total num be N:\n",
+    "text = collect(String(read(\"input.txt\")))\n",
+    "alphabet = [unique(text)..., '_'] #get unique char array\n",
+    "#Generate array of one-hot vectors for each character in the text.\n",
+    "#Each vector has N-elements, where 1 element in N is set to true (others: false):\n",
+    "text = map(ch -> onehot(ch, alphabet), text)\n",
+    "stop = onehot('_', alphabet) #generate end token\n",
+    "\n",
+    "N = length(alphabet)\n",
+    "seqlen = 50 #batch size\n",
+    "nbatch = 50 #number of batches\n",
+    "\n",
+    "Xs = collect(partition(batchseq(chunk(text, nbatch), stop), seqlen)) #get array of minibatches of input x\n",
+    "Ys = collect(partition(batchseq(chunk(text[2:end], nbatch), stop), seqlen)) #get array of minibatches of \"label\" y"
+   ],
+   "metadata": {},
+   "execution_count": null
+  },
+  {
+   "outputs": [],
+   "cell_type": "markdown",
+   "source": [
+    "# 3. Define RNN Model, Hyperparameters"
+   ],
+   "metadata": {}
+  },
+  {
+   "outputs": [],
+   "cell_type": "code",
+   "source": [
+    "#Flux's chain function joins multiple layers together, such that layer operations are performed on input sequentially.\n",
+    "m = Chain(\n",
+    "  LSTM(N, 128), #Long Short-term Memory of feature space size 128\n",
+    "  LSTM(128, 128), # output is 128-dimensional\n",
+    "  Dense(128, N), #N = number of possible tokens\n",
+    "  softmax) #calculate the probability of output char corr. to each possible char\n",
+    "\n",
+    "m = gpu(m) #use GPU acceleration\n",
+    "\n",
+    "function loss(xs, ys) #CE loss, or log loss quanitfies the performance of models with probability output\n",
+    "  l = sum(crossentropy.(m.(gpu.(xs)), gpu.(ys))) #pass to GPU and get cost\n",
+    "  Flux.truncate!(m)\n",
+    "  return l\n",
+    "end\n",
+    "\n",
+    "opt = ADAM(0.01) #use the ADAM optimiser with learning rate of 0.01\n",
+    "tx, ty = (Xs[5], Ys[5])\n",
+    "evalcb = () -> @show loss(tx, ty)"
+   ],
+   "metadata": {},
+   "execution_count": null
+  },
+  {
+   "outputs": [],
+   "cell_type": "markdown",
+   "source": [
+    "# 4. Train model"
+   ],
+   "metadata": {}
+  },
+  {
+   "outputs": [],
+   "cell_type": "code",
+   "source": [
+    "Flux.train!(loss, params(m), zip(Xs, Ys), opt,\n",
+    "            cb = throttle(evalcb, 30)) #timeout for 30 secs"
+   ],
+   "metadata": {},
+   "execution_count": null
+  },
+  {
+   "outputs": [],
+   "cell_type": "markdown",
+   "source": [
+    "# 5. Sample from input.txt and test model\n",
+    "Compose a 1000-char long verse in the style of Shakespeare!"
+   ],
+   "metadata": {}
+  },
+  {
+   "outputs": [],
+   "cell_type": "code",
+   "source": [
+    "function sample(m, alphabet, len)\n",
+    "  m = cpu(m) #use cpu as gpu offers minimal acc for seq models\n",
+    "  Flux.reset!(m)\n",
+    "  buf = IOBuffer()\n",
+    "  c = rand(alphabet) #take random input char token\n",
+    "  for i = 1:len\n",
+    "    write(buf, c)\n",
+    "    #Compose like Shakespeare char-by-char! :\n",
+    "    c = wsample(alphabet, m(onehot(c, alphabet)).data)\n",
+    "  end\n",
+    "  return String(take!(buf)) #get results from last LSTM hidden state\n",
+    "end\n",
+    "\n",
+    "#Print results\n",
+    "sample(m, alphabet, 1000) |> println"
+   ],
+   "metadata": {},
+   "execution_count": null
+  },
+  {
+   "outputs": [],
+   "cell_type": "markdown",
+   "source": [
+    "---\n",
+    "\n",
+    "*This notebook was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*"
+   ],
+   "metadata": {}
+  }
+ ],
+ "nbformat_minor": 3,
+ "metadata": {
+  "language_info": {
+   "file_extension": ".jl",
+   "mimetype": "application/julia",
+   "name": "julia",
+   "version": "1.3.0"
+  },
+  "kernelspec": {
+   "name": "julia-1.3",
+   "display_name": "Julia 1.3.0",
+   "language": "julia"
+  }
+ },
+ "nbformat": 4
+}
diff --git a/text/char-rnn/char-rnn_annotated.jl b/text/char-rnn/char-rnn_annotated.jl
new file mode 100644
index 000000000..5a5870966
--- /dev/null
+++ b/text/char-rnn/char-rnn_annotated.jl
@@ -0,0 +1,80 @@
+# # Character-level Recurrent Neural Network
+#- Train model on Shakespeare's works
+#- Have model write like Shakespeare at the end
+
+# # 1. Import Dependencies
+
+using Flux
+using Flux: onehot, chunk, batchseq, throttle, crossentropy
+using StatsBase: wsample
+using Base.Iterators: partition
+
+# # 2. Data Download & Pre-processing
+# - Source of data: Shakespeare text from https://cs.stanford.edu/people/karpathy/char-rnn/
+# - Generate character tokens
+# - Partition in batches for input
+cd(@__DIR__)
+
+isfile("input.txt") ||
+  download("https://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt",
+           "input.txt")
+
+#Generate array of all chars appearing in input.txt, let total num be N:
+text = collect(String(read("input.txt")))
+alphabet = [unique(text)..., '_'] #get unique char array
+#Generate array of one-hot vectors for each character in the text. 
+#Each vector has N-elements, where 1 element in N is set to true (others: false):
+text = map(ch -> onehot(ch, alphabet), text)
+stop = onehot('_', alphabet) #generate end token
+
+N = length(alphabet)
+seqlen = 50 #batch size
+nbatch = 50 #number of batches
+
+Xs = collect(partition(batchseq(chunk(text, nbatch), stop), seqlen)) #get array of minibatches of input x
+Ys = collect(partition(batchseq(chunk(text[2:end], nbatch), stop), seqlen)) #get array of minibatches of "label" y
+
+# # 3. Define RNN Model, Hyperparameters
+#- Define 4-layer deep RNN
+#- Define loss function as Cross Entropy loss
+#- Define optimiser as Adam with learning rate of 0.01
+#Flux's chain function joins multiple layers together, such that layer operations are performed on input sequentially. 
+m = Chain(
+  LSTM(N, 128), #Long Short-term Memory of feature space size 128
+  LSTM(128, 128), # output is 128-dimensional
+  Dense(128, N), #N = number of possible tokens
+  softmax) #calculate the probability of output char corr. to each possible char
+
+m = gpu(m) #use GPU acceleration
+
+function loss(xs, ys) #CE loss, or log loss quanitfies the performance of models with probability output
+  l = sum(crossentropy.(m.(gpu.(xs)), gpu.(ys))) #pass to GPU and get cost
+  Flux.truncate!(m)
+  return l
+end
+
+opt = ADAM(0.01) #use the ADAM optimiser with learning rate of 0.01
+tx, ty = (Xs[5], Ys[5])
+evalcb = () -> @show loss(tx, ty)
+
+# # 4. Train model
+Flux.train!(loss, params(m), zip(Xs, Ys), opt,
+            cb = throttle(evalcb, 30)) #timeout for 30 secs
+
+# # 5. Sample from input.txt and test model
+# Compose a 1000-char long verse in the style of Shakespeare!
+function sample(m, alphabet, len)
+  m = cpu(m) #use cpu as gpu offers minimal acc for seq models
+  Flux.reset!(m)
+  buf = IOBuffer()
+  c = rand(alphabet) #take random input char token
+  for i = 1:len
+    write(buf, c)
+    #Compose like Shakespeare char-by-char! :
+    c = wsample(alphabet, m(onehot(c, alphabet)).data)
+  end
+  return String(take!(buf)) #get results from last LSTM hidden state
+end
+
+#Print results
+sample(m, alphabet, 1000) |> println

From c0b569b543ddf74e3fbdacbe8e47c9ba848ee068 Mon Sep 17 00:00:00 2001
From: Ching Lam Choi <ccl5a09@gmail.com>
Date: Sun, 5 Jan 2020 11:24:21 +0800
Subject: [PATCH 2/6] Replaced with annotated version

---
 text/char-rnn/char-rnn.jl | 61 ++++++++++++++++++++++++++-------------
 1 file changed, 41 insertions(+), 20 deletions(-)

diff --git a/text/char-rnn/char-rnn.jl b/text/char-rnn/char-rnn.jl
index 0db14463d..5a5870966 100644
--- a/text/char-rnn/char-rnn.jl
+++ b/text/char-rnn/char-rnn.jl
@@ -1,59 +1,80 @@
+# # Character-level Recurrent Neural Network
+#- Train model on Shakespeare's works
+#- Have model write like Shakespeare at the end
+
+# # 1. Import Dependencies
+
 using Flux
 using Flux: onehot, chunk, batchseq, throttle, crossentropy
 using StatsBase: wsample
 using Base.Iterators: partition
 
+# # 2. Data Download & Pre-processing
+# - Source of data: Shakespeare text from https://cs.stanford.edu/people/karpathy/char-rnn/
+# - Generate character tokens
+# - Partition in batches for input
 cd(@__DIR__)
 
 isfile("input.txt") ||
   download("https://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt",
            "input.txt")
 
+#Generate array of all chars appearing in input.txt, let total num be N:
 text = collect(String(read("input.txt")))
-alphabet = [unique(text)..., '_']
+alphabet = [unique(text)..., '_'] #get unique char array
+#Generate array of one-hot vectors for each character in the text. 
+#Each vector has N-elements, where 1 element in N is set to true (others: false):
 text = map(ch -> onehot(ch, alphabet), text)
-stop = onehot('_', alphabet)
+stop = onehot('_', alphabet) #generate end token
 
 N = length(alphabet)
-seqlen = 50
-nbatch = 50
+seqlen = 50 #batch size
+nbatch = 50 #number of batches
 
-Xs = collect(partition(batchseq(chunk(text, nbatch), stop), seqlen))
-Ys = collect(partition(batchseq(chunk(text[2:end], nbatch), stop), seqlen))
+Xs = collect(partition(batchseq(chunk(text, nbatch), stop), seqlen)) #get array of minibatches of input x
+Ys = collect(partition(batchseq(chunk(text[2:end], nbatch), stop), seqlen)) #get array of minibatches of "label" y
 
+# # 3. Define RNN Model, Hyperparameters
+#- Define 4-layer deep RNN
+#- Define loss function as Cross Entropy loss
+#- Define optimiser as Adam with learning rate of 0.01
+#Flux's chain function joins multiple layers together, such that layer operations are performed on input sequentially. 
 m = Chain(
-  LSTM(N, 128),
-  LSTM(128, 128),
-  Dense(128, N),
-  softmax)
+  LSTM(N, 128), #Long Short-term Memory of feature space size 128
+  LSTM(128, 128), # output is 128-dimensional
+  Dense(128, N), #N = number of possible tokens
+  softmax) #calculate the probability of output char corr. to each possible char
 
-m = gpu(m)
+m = gpu(m) #use GPU acceleration
 
-function loss(xs, ys)
-  l = sum(crossentropy.(m.(gpu.(xs)), gpu.(ys)))
+function loss(xs, ys) #CE loss, or log loss quanitfies the performance of models with probability output
+  l = sum(crossentropy.(m.(gpu.(xs)), gpu.(ys))) #pass to GPU and get cost
   Flux.truncate!(m)
   return l
 end
 
-opt = ADAM(0.01)
+opt = ADAM(0.01) #use the ADAM optimiser with learning rate of 0.01
 tx, ty = (Xs[5], Ys[5])
 evalcb = () -> @show loss(tx, ty)
 
+# # 4. Train model
 Flux.train!(loss, params(m), zip(Xs, Ys), opt,
-            cb = throttle(evalcb, 30))
-
-# Sampling
+            cb = throttle(evalcb, 30)) #timeout for 30 secs
 
+# # 5. Sample from input.txt and test model
+# Compose a 1000-char long verse in the style of Shakespeare!
 function sample(m, alphabet, len)
-  m = cpu(m)
+  m = cpu(m) #use cpu as gpu offers minimal acc for seq models
   Flux.reset!(m)
   buf = IOBuffer()
-  c = rand(alphabet)
+  c = rand(alphabet) #take random input char token
   for i = 1:len
     write(buf, c)
+    #Compose like Shakespeare char-by-char! :
     c = wsample(alphabet, m(onehot(c, alphabet)).data)
   end
-  return String(take!(buf))
+  return String(take!(buf)) #get results from last LSTM hidden state
 end
 
+#Print results
 sample(m, alphabet, 1000) |> println

From f76136920da2b54def325314e98bd0373e62a9ac Mon Sep 17 00:00:00 2001
From: Ching Lam Choi <ccl5a09@gmail.com>
Date: Sun, 5 Jan 2020 11:24:40 +0800
Subject: [PATCH 3/6] Rename char-rnn_annotated.ipynb to char-rnn.ipynb

---
 text/char-rnn/{char-rnn_annotated.ipynb => char-rnn.ipynb} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename text/char-rnn/{char-rnn_annotated.ipynb => char-rnn.ipynb} (100%)

diff --git a/text/char-rnn/char-rnn_annotated.ipynb b/text/char-rnn/char-rnn.ipynb
similarity index 100%
rename from text/char-rnn/char-rnn_annotated.ipynb
rename to text/char-rnn/char-rnn.ipynb

From bd844541cd9114eddb17be56c5847b1c90c67793 Mon Sep 17 00:00:00 2001
From: Ching Lam Choi <ccl5a09@gmail.com>
Date: Sun, 5 Jan 2020 11:24:53 +0800
Subject: [PATCH 4/6] Delete char-rnn_annotated.jl

---
 text/char-rnn/char-rnn_annotated.jl | 80 -----------------------------
 1 file changed, 80 deletions(-)
 delete mode 100644 text/char-rnn/char-rnn_annotated.jl

diff --git a/text/char-rnn/char-rnn_annotated.jl b/text/char-rnn/char-rnn_annotated.jl
deleted file mode 100644
index 5a5870966..000000000
--- a/text/char-rnn/char-rnn_annotated.jl
+++ /dev/null
@@ -1,80 +0,0 @@
-# # Character-level Recurrent Neural Network
-#- Train model on Shakespeare's works
-#- Have model write like Shakespeare at the end
-
-# # 1. Import Dependencies
-
-using Flux
-using Flux: onehot, chunk, batchseq, throttle, crossentropy
-using StatsBase: wsample
-using Base.Iterators: partition
-
-# # 2. Data Download & Pre-processing
-# - Source of data: Shakespeare text from https://cs.stanford.edu/people/karpathy/char-rnn/
-# - Generate character tokens
-# - Partition in batches for input
-cd(@__DIR__)
-
-isfile("input.txt") ||
-  download("https://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt",
-           "input.txt")
-
-#Generate array of all chars appearing in input.txt, let total num be N:
-text = collect(String(read("input.txt")))
-alphabet = [unique(text)..., '_'] #get unique char array
-#Generate array of one-hot vectors for each character in the text. 
-#Each vector has N-elements, where 1 element in N is set to true (others: false):
-text = map(ch -> onehot(ch, alphabet), text)
-stop = onehot('_', alphabet) #generate end token
-
-N = length(alphabet)
-seqlen = 50 #batch size
-nbatch = 50 #number of batches
-
-Xs = collect(partition(batchseq(chunk(text, nbatch), stop), seqlen)) #get array of minibatches of input x
-Ys = collect(partition(batchseq(chunk(text[2:end], nbatch), stop), seqlen)) #get array of minibatches of "label" y
-
-# # 3. Define RNN Model, Hyperparameters
-#- Define 4-layer deep RNN
-#- Define loss function as Cross Entropy loss
-#- Define optimiser as Adam with learning rate of 0.01
-#Flux's chain function joins multiple layers together, such that layer operations are performed on input sequentially. 
-m = Chain(
-  LSTM(N, 128), #Long Short-term Memory of feature space size 128
-  LSTM(128, 128), # output is 128-dimensional
-  Dense(128, N), #N = number of possible tokens
-  softmax) #calculate the probability of output char corr. to each possible char
-
-m = gpu(m) #use GPU acceleration
-
-function loss(xs, ys) #CE loss, or log loss quanitfies the performance of models with probability output
-  l = sum(crossentropy.(m.(gpu.(xs)), gpu.(ys))) #pass to GPU and get cost
-  Flux.truncate!(m)
-  return l
-end
-
-opt = ADAM(0.01) #use the ADAM optimiser with learning rate of 0.01
-tx, ty = (Xs[5], Ys[5])
-evalcb = () -> @show loss(tx, ty)
-
-# # 4. Train model
-Flux.train!(loss, params(m), zip(Xs, Ys), opt,
-            cb = throttle(evalcb, 30)) #timeout for 30 secs
-
-# # 5. Sample from input.txt and test model
-# Compose a 1000-char long verse in the style of Shakespeare!
-function sample(m, alphabet, len)
-  m = cpu(m) #use cpu as gpu offers minimal acc for seq models
-  Flux.reset!(m)
-  buf = IOBuffer()
-  c = rand(alphabet) #take random input char token
-  for i = 1:len
-    write(buf, c)
-    #Compose like Shakespeare char-by-char! :
-    c = wsample(alphabet, m(onehot(c, alphabet)).data)
-  end
-  return String(take!(buf)) #get results from last LSTM hidden state
-end
-
-#Print results
-sample(m, alphabet, 1000) |> println

From b2ea095c3aaf81918638dfadeef17cfecd86f037 Mon Sep 17 00:00:00 2001
From: Ching Lam Choi <ccl5a09@gmail.com>
Date: Fri, 24 Jan 2020 21:42:19 +0800
Subject: [PATCH 5/6] Update char-rnn.jl

---
 text/char-rnn/char-rnn.jl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/text/char-rnn/char-rnn.jl b/text/char-rnn/char-rnn.jl
index 5a5870966..80af66f9a 100644
--- a/text/char-rnn/char-rnn.jl
+++ b/text/char-rnn/char-rnn.jl
@@ -31,8 +31,9 @@ N = length(alphabet)
 seqlen = 50 #batch size
 nbatch = 50 #number of batches
 
-Xs = collect(partition(batchseq(chunk(text, nbatch), stop), seqlen)) #get array of minibatches of input x
-Ys = collect(partition(batchseq(chunk(text[2:end], nbatch), stop), seqlen)) #get array of minibatches of "label" y
+# perform chunking to get meaningful phrases, partition into minibatches and return as array
+Xs = collect(partition(batchseq(chunk(text, nbatch), stop), seqlen))
+Ys = collect(partition(batchseq(chunk(text[2:end], nbatch), stop), seqlen))
 
 # # 3. Define RNN Model, Hyperparameters
 #- Define 4-layer deep RNN

From cdf78cad81916656e116bf31353c56b33fddbf08 Mon Sep 17 00:00:00 2001
From: Ching Lam Choi <ccl5a09@gmail.com>
Date: Fri, 24 Jan 2020 21:47:33 +0800
Subject: [PATCH 6/6] Update char-rnn.jl

---
 text/char-rnn/char-rnn.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/text/char-rnn/char-rnn.jl b/text/char-rnn/char-rnn.jl
index 80af66f9a..4ba7db6fd 100644
--- a/text/char-rnn/char-rnn.jl
+++ b/text/char-rnn/char-rnn.jl
@@ -31,7 +31,7 @@ N = length(alphabet)
 seqlen = 50 #batch size
 nbatch = 50 #number of batches
 
-# perform chunking to get meaningful phrases, partition into minibatches and return as array
+#Perform chunking to get meaningful phrases, partition into minibatches and return as arrays
 Xs = collect(partition(batchseq(chunk(text, nbatch), stop), seqlen))
 Ys = collect(partition(batchseq(chunk(text[2:end], nbatch), stop), seqlen))