From 5d18c39c9125ea5760b30a4852df380b96e61f45 Mon Sep 17 00:00:00 2001 From: Ching Lam Choi Date: Sat, 4 Jan 2020 13:31:36 +0800 Subject: [PATCH 1/6] Added annotation --- text/char-rnn/char-rnn_annotated.ipynb | 180 +++++++++++++++++++++++++ text/char-rnn/char-rnn_annotated.jl | 80 +++++++++++ 2 files changed, 260 insertions(+) create mode 100644 text/char-rnn/char-rnn_annotated.ipynb create mode 100644 text/char-rnn/char-rnn_annotated.jl diff --git a/text/char-rnn/char-rnn_annotated.ipynb b/text/char-rnn/char-rnn_annotated.ipynb new file mode 100644 index 000000000..a099329c6 --- /dev/null +++ b/text/char-rnn/char-rnn_annotated.ipynb @@ -0,0 +1,180 @@ +{ + "cells": [ + { + "outputs": [], + "cell_type": "markdown", + "source": [ + "# Character-level Recurrent Neural Network" + ], + "metadata": {} + }, + { + "outputs": [], + "cell_type": "markdown", + "source": [ + "# 1. Import Dependencies" + ], + "metadata": {} + }, + { + "outputs": [], + "cell_type": "code", + "source": [ + "using Flux\n", + "using Flux: onehot, chunk, batchseq, throttle, crossentropy\n", + "using StatsBase: wsample\n", + "using Base.Iterators: partition" + ], + "metadata": {}, + "execution_count": null + }, + { + "outputs": [], + "cell_type": "markdown", + "source": [ + "# 2. Data Download & Pre-processing\n", + "- Source of data: Shakespeare text from https://cs.stanford.edu/people/karpathy/char-rnn/\n", + "- Generate character tokens\n", + "- Partition in batches for input" + ], + "metadata": {} + }, + { + "outputs": [], + "cell_type": "code", + "source": [ + "cd(@__DIR__)\n", + "\n", + "isfile(\"input.txt\") ||\n", + " download(\"https://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt\",\n", + " \"input.txt\")\n", + "\n", + "#Generate array of all chars appearing in input.txt, let total num be N:\n", + "text = collect(String(read(\"input.txt\")))\n", + "alphabet = [unique(text)..., '_'] #get unique char array\n", + "#Generate array of one-hot vectors for each character in the text.\n", + "#Each vector has N-elements, where 1 element in N is set to true (others: false):\n", + "text = map(ch -> onehot(ch, alphabet), text)\n", + "stop = onehot('_', alphabet) #generate end token\n", + "\n", + "N = length(alphabet)\n", + "seqlen = 50 #batch size\n", + "nbatch = 50 #number of batches\n", + "\n", + "Xs = collect(partition(batchseq(chunk(text, nbatch), stop), seqlen)) #get array of minibatches of input x\n", + "Ys = collect(partition(batchseq(chunk(text[2:end], nbatch), stop), seqlen)) #get array of minibatches of \"label\" y" + ], + "metadata": {}, + "execution_count": null + }, + { + "outputs": [], + "cell_type": "markdown", + "source": [ + "# 3. Define RNN Model, Hyperparameters" + ], + "metadata": {} + }, + { + "outputs": [], + "cell_type": "code", + "source": [ + "#Flux's chain function joins multiple layers together, such that layer operations are performed on input sequentially.\n", + "m = Chain(\n", + " LSTM(N, 128), #Long Short-term Memory of feature space size 128\n", + " LSTM(128, 128), # output is 128-dimensional\n", + " Dense(128, N), #N = number of possible tokens\n", + " softmax) #calculate the probability of output char corr. to each possible char\n", + "\n", + "m = gpu(m) #use GPU acceleration\n", + "\n", + "function loss(xs, ys) #CE loss, or log loss quanitfies the performance of models with probability output\n", + " l = sum(crossentropy.(m.(gpu.(xs)), gpu.(ys))) #pass to GPU and get cost\n", + " Flux.truncate!(m)\n", + " return l\n", + "end\n", + "\n", + "opt = ADAM(0.01) #use the ADAM optimiser with learning rate of 0.01\n", + "tx, ty = (Xs[5], Ys[5])\n", + "evalcb = () -> @show loss(tx, ty)" + ], + "metadata": {}, + "execution_count": null + }, + { + "outputs": [], + "cell_type": "markdown", + "source": [ + "# 4. Train model" + ], + "metadata": {} + }, + { + "outputs": [], + "cell_type": "code", + "source": [ + "Flux.train!(loss, params(m), zip(Xs, Ys), opt,\n", + " cb = throttle(evalcb, 30)) #timeout for 30 secs" + ], + "metadata": {}, + "execution_count": null + }, + { + "outputs": [], + "cell_type": "markdown", + "source": [ + "# 5. Sample from input.txt and test model\n", + "Compose a 1000-char long verse in the style of Shakespeare!" + ], + "metadata": {} + }, + { + "outputs": [], + "cell_type": "code", + "source": [ + "function sample(m, alphabet, len)\n", + " m = cpu(m) #use cpu as gpu offers minimal acc for seq models\n", + " Flux.reset!(m)\n", + " buf = IOBuffer()\n", + " c = rand(alphabet) #take random input char token\n", + " for i = 1:len\n", + " write(buf, c)\n", + " #Compose like Shakespeare char-by-char! :\n", + " c = wsample(alphabet, m(onehot(c, alphabet)).data)\n", + " end\n", + " return String(take!(buf)) #get results from last LSTM hidden state\n", + "end\n", + "\n", + "#Print results\n", + "sample(m, alphabet, 1000) |> println" + ], + "metadata": {}, + "execution_count": null + }, + { + "outputs": [], + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "*This notebook was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*" + ], + "metadata": {} + } + ], + "nbformat_minor": 3, + "metadata": { + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "1.3.0" + }, + "kernelspec": { + "name": "julia-1.3", + "display_name": "Julia 1.3.0", + "language": "julia" + } + }, + "nbformat": 4 +} diff --git a/text/char-rnn/char-rnn_annotated.jl b/text/char-rnn/char-rnn_annotated.jl new file mode 100644 index 000000000..5a5870966 --- /dev/null +++ b/text/char-rnn/char-rnn_annotated.jl @@ -0,0 +1,80 @@ +# # Character-level Recurrent Neural Network +#- Train model on Shakespeare's works +#- Have model write like Shakespeare at the end + +# # 1. Import Dependencies + +using Flux +using Flux: onehot, chunk, batchseq, throttle, crossentropy +using StatsBase: wsample +using Base.Iterators: partition + +# # 2. Data Download & Pre-processing +# - Source of data: Shakespeare text from https://cs.stanford.edu/people/karpathy/char-rnn/ +# - Generate character tokens +# - Partition in batches for input +cd(@__DIR__) + +isfile("input.txt") || + download("https://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt", + "input.txt") + +#Generate array of all chars appearing in input.txt, let total num be N: +text = collect(String(read("input.txt"))) +alphabet = [unique(text)..., '_'] #get unique char array +#Generate array of one-hot vectors for each character in the text. +#Each vector has N-elements, where 1 element in N is set to true (others: false): +text = map(ch -> onehot(ch, alphabet), text) +stop = onehot('_', alphabet) #generate end token + +N = length(alphabet) +seqlen = 50 #batch size +nbatch = 50 #number of batches + +Xs = collect(partition(batchseq(chunk(text, nbatch), stop), seqlen)) #get array of minibatches of input x +Ys = collect(partition(batchseq(chunk(text[2:end], nbatch), stop), seqlen)) #get array of minibatches of "label" y + +# # 3. Define RNN Model, Hyperparameters +#- Define 4-layer deep RNN +#- Define loss function as Cross Entropy loss +#- Define optimiser as Adam with learning rate of 0.01 +#Flux's chain function joins multiple layers together, such that layer operations are performed on input sequentially. +m = Chain( + LSTM(N, 128), #Long Short-term Memory of feature space size 128 + LSTM(128, 128), # output is 128-dimensional + Dense(128, N), #N = number of possible tokens + softmax) #calculate the probability of output char corr. to each possible char + +m = gpu(m) #use GPU acceleration + +function loss(xs, ys) #CE loss, or log loss quanitfies the performance of models with probability output + l = sum(crossentropy.(m.(gpu.(xs)), gpu.(ys))) #pass to GPU and get cost + Flux.truncate!(m) + return l +end + +opt = ADAM(0.01) #use the ADAM optimiser with learning rate of 0.01 +tx, ty = (Xs[5], Ys[5]) +evalcb = () -> @show loss(tx, ty) + +# # 4. Train model +Flux.train!(loss, params(m), zip(Xs, Ys), opt, + cb = throttle(evalcb, 30)) #timeout for 30 secs + +# # 5. Sample from input.txt and test model +# Compose a 1000-char long verse in the style of Shakespeare! +function sample(m, alphabet, len) + m = cpu(m) #use cpu as gpu offers minimal acc for seq models + Flux.reset!(m) + buf = IOBuffer() + c = rand(alphabet) #take random input char token + for i = 1:len + write(buf, c) + #Compose like Shakespeare char-by-char! : + c = wsample(alphabet, m(onehot(c, alphabet)).data) + end + return String(take!(buf)) #get results from last LSTM hidden state +end + +#Print results +sample(m, alphabet, 1000) |> println From c0b569b543ddf74e3fbdacbe8e47c9ba848ee068 Mon Sep 17 00:00:00 2001 From: Ching Lam Choi Date: Sun, 5 Jan 2020 11:24:21 +0800 Subject: [PATCH 2/6] Replaced with annotated version --- text/char-rnn/char-rnn.jl | 61 ++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/text/char-rnn/char-rnn.jl b/text/char-rnn/char-rnn.jl index 0db14463d..5a5870966 100644 --- a/text/char-rnn/char-rnn.jl +++ b/text/char-rnn/char-rnn.jl @@ -1,59 +1,80 @@ +# # Character-level Recurrent Neural Network +#- Train model on Shakespeare's works +#- Have model write like Shakespeare at the end + +# # 1. Import Dependencies + using Flux using Flux: onehot, chunk, batchseq, throttle, crossentropy using StatsBase: wsample using Base.Iterators: partition +# # 2. Data Download & Pre-processing +# - Source of data: Shakespeare text from https://cs.stanford.edu/people/karpathy/char-rnn/ +# - Generate character tokens +# - Partition in batches for input cd(@__DIR__) isfile("input.txt") || download("https://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt", "input.txt") +#Generate array of all chars appearing in input.txt, let total num be N: text = collect(String(read("input.txt"))) -alphabet = [unique(text)..., '_'] +alphabet = [unique(text)..., '_'] #get unique char array +#Generate array of one-hot vectors for each character in the text. +#Each vector has N-elements, where 1 element in N is set to true (others: false): text = map(ch -> onehot(ch, alphabet), text) -stop = onehot('_', alphabet) +stop = onehot('_', alphabet) #generate end token N = length(alphabet) -seqlen = 50 -nbatch = 50 +seqlen = 50 #batch size +nbatch = 50 #number of batches -Xs = collect(partition(batchseq(chunk(text, nbatch), stop), seqlen)) -Ys = collect(partition(batchseq(chunk(text[2:end], nbatch), stop), seqlen)) +Xs = collect(partition(batchseq(chunk(text, nbatch), stop), seqlen)) #get array of minibatches of input x +Ys = collect(partition(batchseq(chunk(text[2:end], nbatch), stop), seqlen)) #get array of minibatches of "label" y +# # 3. Define RNN Model, Hyperparameters +#- Define 4-layer deep RNN +#- Define loss function as Cross Entropy loss +#- Define optimiser as Adam with learning rate of 0.01 +#Flux's chain function joins multiple layers together, such that layer operations are performed on input sequentially. m = Chain( - LSTM(N, 128), - LSTM(128, 128), - Dense(128, N), - softmax) + LSTM(N, 128), #Long Short-term Memory of feature space size 128 + LSTM(128, 128), # output is 128-dimensional + Dense(128, N), #N = number of possible tokens + softmax) #calculate the probability of output char corr. to each possible char -m = gpu(m) +m = gpu(m) #use GPU acceleration -function loss(xs, ys) - l = sum(crossentropy.(m.(gpu.(xs)), gpu.(ys))) +function loss(xs, ys) #CE loss, or log loss quanitfies the performance of models with probability output + l = sum(crossentropy.(m.(gpu.(xs)), gpu.(ys))) #pass to GPU and get cost Flux.truncate!(m) return l end -opt = ADAM(0.01) +opt = ADAM(0.01) #use the ADAM optimiser with learning rate of 0.01 tx, ty = (Xs[5], Ys[5]) evalcb = () -> @show loss(tx, ty) +# # 4. Train model Flux.train!(loss, params(m), zip(Xs, Ys), opt, - cb = throttle(evalcb, 30)) - -# Sampling + cb = throttle(evalcb, 30)) #timeout for 30 secs +# # 5. Sample from input.txt and test model +# Compose a 1000-char long verse in the style of Shakespeare! function sample(m, alphabet, len) - m = cpu(m) + m = cpu(m) #use cpu as gpu offers minimal acc for seq models Flux.reset!(m) buf = IOBuffer() - c = rand(alphabet) + c = rand(alphabet) #take random input char token for i = 1:len write(buf, c) + #Compose like Shakespeare char-by-char! : c = wsample(alphabet, m(onehot(c, alphabet)).data) end - return String(take!(buf)) + return String(take!(buf)) #get results from last LSTM hidden state end +#Print results sample(m, alphabet, 1000) |> println From f76136920da2b54def325314e98bd0373e62a9ac Mon Sep 17 00:00:00 2001 From: Ching Lam Choi Date: Sun, 5 Jan 2020 11:24:40 +0800 Subject: [PATCH 3/6] Rename char-rnn_annotated.ipynb to char-rnn.ipynb --- text/char-rnn/{char-rnn_annotated.ipynb => char-rnn.ipynb} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename text/char-rnn/{char-rnn_annotated.ipynb => char-rnn.ipynb} (100%) diff --git a/text/char-rnn/char-rnn_annotated.ipynb b/text/char-rnn/char-rnn.ipynb similarity index 100% rename from text/char-rnn/char-rnn_annotated.ipynb rename to text/char-rnn/char-rnn.ipynb From bd844541cd9114eddb17be56c5847b1c90c67793 Mon Sep 17 00:00:00 2001 From: Ching Lam Choi Date: Sun, 5 Jan 2020 11:24:53 +0800 Subject: [PATCH 4/6] Delete char-rnn_annotated.jl --- text/char-rnn/char-rnn_annotated.jl | 80 ----------------------------- 1 file changed, 80 deletions(-) delete mode 100644 text/char-rnn/char-rnn_annotated.jl diff --git a/text/char-rnn/char-rnn_annotated.jl b/text/char-rnn/char-rnn_annotated.jl deleted file mode 100644 index 5a5870966..000000000 --- a/text/char-rnn/char-rnn_annotated.jl +++ /dev/null @@ -1,80 +0,0 @@ -# # Character-level Recurrent Neural Network -#- Train model on Shakespeare's works -#- Have model write like Shakespeare at the end - -# # 1. Import Dependencies - -using Flux -using Flux: onehot, chunk, batchseq, throttle, crossentropy -using StatsBase: wsample -using Base.Iterators: partition - -# # 2. Data Download & Pre-processing -# - Source of data: Shakespeare text from https://cs.stanford.edu/people/karpathy/char-rnn/ -# - Generate character tokens -# - Partition in batches for input -cd(@__DIR__) - -isfile("input.txt") || - download("https://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt", - "input.txt") - -#Generate array of all chars appearing in input.txt, let total num be N: -text = collect(String(read("input.txt"))) -alphabet = [unique(text)..., '_'] #get unique char array -#Generate array of one-hot vectors for each character in the text. -#Each vector has N-elements, where 1 element in N is set to true (others: false): -text = map(ch -> onehot(ch, alphabet), text) -stop = onehot('_', alphabet) #generate end token - -N = length(alphabet) -seqlen = 50 #batch size -nbatch = 50 #number of batches - -Xs = collect(partition(batchseq(chunk(text, nbatch), stop), seqlen)) #get array of minibatches of input x -Ys = collect(partition(batchseq(chunk(text[2:end], nbatch), stop), seqlen)) #get array of minibatches of "label" y - -# # 3. Define RNN Model, Hyperparameters -#- Define 4-layer deep RNN -#- Define loss function as Cross Entropy loss -#- Define optimiser as Adam with learning rate of 0.01 -#Flux's chain function joins multiple layers together, such that layer operations are performed on input sequentially. -m = Chain( - LSTM(N, 128), #Long Short-term Memory of feature space size 128 - LSTM(128, 128), # output is 128-dimensional - Dense(128, N), #N = number of possible tokens - softmax) #calculate the probability of output char corr. to each possible char - -m = gpu(m) #use GPU acceleration - -function loss(xs, ys) #CE loss, or log loss quanitfies the performance of models with probability output - l = sum(crossentropy.(m.(gpu.(xs)), gpu.(ys))) #pass to GPU and get cost - Flux.truncate!(m) - return l -end - -opt = ADAM(0.01) #use the ADAM optimiser with learning rate of 0.01 -tx, ty = (Xs[5], Ys[5]) -evalcb = () -> @show loss(tx, ty) - -# # 4. Train model -Flux.train!(loss, params(m), zip(Xs, Ys), opt, - cb = throttle(evalcb, 30)) #timeout for 30 secs - -# # 5. Sample from input.txt and test model -# Compose a 1000-char long verse in the style of Shakespeare! -function sample(m, alphabet, len) - m = cpu(m) #use cpu as gpu offers minimal acc for seq models - Flux.reset!(m) - buf = IOBuffer() - c = rand(alphabet) #take random input char token - for i = 1:len - write(buf, c) - #Compose like Shakespeare char-by-char! : - c = wsample(alphabet, m(onehot(c, alphabet)).data) - end - return String(take!(buf)) #get results from last LSTM hidden state -end - -#Print results -sample(m, alphabet, 1000) |> println From b2ea095c3aaf81918638dfadeef17cfecd86f037 Mon Sep 17 00:00:00 2001 From: Ching Lam Choi Date: Fri, 24 Jan 2020 21:42:19 +0800 Subject: [PATCH 5/6] Update char-rnn.jl --- text/char-rnn/char-rnn.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/text/char-rnn/char-rnn.jl b/text/char-rnn/char-rnn.jl index 5a5870966..80af66f9a 100644 --- a/text/char-rnn/char-rnn.jl +++ b/text/char-rnn/char-rnn.jl @@ -31,8 +31,9 @@ N = length(alphabet) seqlen = 50 #batch size nbatch = 50 #number of batches -Xs = collect(partition(batchseq(chunk(text, nbatch), stop), seqlen)) #get array of minibatches of input x -Ys = collect(partition(batchseq(chunk(text[2:end], nbatch), stop), seqlen)) #get array of minibatches of "label" y +# perform chunking to get meaningful phrases, partition into minibatches and return as array +Xs = collect(partition(batchseq(chunk(text, nbatch), stop), seqlen)) +Ys = collect(partition(batchseq(chunk(text[2:end], nbatch), stop), seqlen)) # # 3. Define RNN Model, Hyperparameters #- Define 4-layer deep RNN From cdf78cad81916656e116bf31353c56b33fddbf08 Mon Sep 17 00:00:00 2001 From: Ching Lam Choi Date: Fri, 24 Jan 2020 21:47:33 +0800 Subject: [PATCH 6/6] Update char-rnn.jl --- text/char-rnn/char-rnn.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text/char-rnn/char-rnn.jl b/text/char-rnn/char-rnn.jl index 80af66f9a..4ba7db6fd 100644 --- a/text/char-rnn/char-rnn.jl +++ b/text/char-rnn/char-rnn.jl @@ -31,7 +31,7 @@ N = length(alphabet) seqlen = 50 #batch size nbatch = 50 #number of batches -# perform chunking to get meaningful phrases, partition into minibatches and return as array +#Perform chunking to get meaningful phrases, partition into minibatches and return as arrays Xs = collect(partition(batchseq(chunk(text, nbatch), stop), seqlen)) Ys = collect(partition(batchseq(chunk(text[2:end], nbatch), stop), seqlen))