diff --git a/text/char-rnn/char-rnn.ipynb b/text/char-rnn/char-rnn.ipynb new file mode 100644 index 000000000..a099329c6 --- /dev/null +++ b/text/char-rnn/char-rnn.ipynb @@ -0,0 +1,180 @@ +{ + "cells": [ + { + "outputs": [], + "cell_type": "markdown", + "source": [ + "# Character-level Recurrent Neural Network" + ], + "metadata": {} + }, + { + "outputs": [], + "cell_type": "markdown", + "source": [ + "# 1. Import Dependencies" + ], + "metadata": {} + }, + { + "outputs": [], + "cell_type": "code", + "source": [ + "using Flux\n", + "using Flux: onehot, chunk, batchseq, throttle, crossentropy\n", + "using StatsBase: wsample\n", + "using Base.Iterators: partition" + ], + "metadata": {}, + "execution_count": null + }, + { + "outputs": [], + "cell_type": "markdown", + "source": [ + "# 2. Data Download & Pre-processing\n", + "- Source of data: Shakespeare text from https://cs.stanford.edu/people/karpathy/char-rnn/\n", + "- Generate character tokens\n", + "- Partition in batches for input" + ], + "metadata": {} + }, + { + "outputs": [], + "cell_type": "code", + "source": [ + "cd(@__DIR__)\n", + "\n", + "isfile(\"input.txt\") ||\n", + " download(\"https://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt\",\n", + " \"input.txt\")\n", + "\n", + "#Generate array of all chars appearing in input.txt, let total num be N:\n", + "text = collect(String(read(\"input.txt\")))\n", + "alphabet = [unique(text)..., '_'] #get unique char array\n", + "#Generate array of one-hot vectors for each character in the text.\n", + "#Each vector has N-elements, where 1 element in N is set to true (others: false):\n", + "text = map(ch -> onehot(ch, alphabet), text)\n", + "stop = onehot('_', alphabet) #generate end token\n", + "\n", + "N = length(alphabet)\n", + "seqlen = 50 #batch size\n", + "nbatch = 50 #number of batches\n", + "\n", + "Xs = collect(partition(batchseq(chunk(text, nbatch), stop), seqlen)) #get array of minibatches of input x\n", + "Ys = collect(partition(batchseq(chunk(text[2:end], nbatch), stop), seqlen)) #get array of minibatches of \"label\" y" + ], + "metadata": {}, + "execution_count": null + }, + { + "outputs": [], + "cell_type": "markdown", + "source": [ + "# 3. Define RNN Model, Hyperparameters" + ], + "metadata": {} + }, + { + "outputs": [], + "cell_type": "code", + "source": [ + "#Flux's chain function joins multiple layers together, such that layer operations are performed on input sequentially.\n", + "m = Chain(\n", + " LSTM(N, 128), #Long Short-term Memory of feature space size 128\n", + " LSTM(128, 128), # output is 128-dimensional\n", + " Dense(128, N), #N = number of possible tokens\n", + " softmax) #calculate the probability of output char corr. to each possible char\n", + "\n", + "m = gpu(m) #use GPU acceleration\n", + "\n", + "function loss(xs, ys) #CE loss, or log loss quanitfies the performance of models with probability output\n", + " l = sum(crossentropy.(m.(gpu.(xs)), gpu.(ys))) #pass to GPU and get cost\n", + " Flux.truncate!(m)\n", + " return l\n", + "end\n", + "\n", + "opt = ADAM(0.01) #use the ADAM optimiser with learning rate of 0.01\n", + "tx, ty = (Xs[5], Ys[5])\n", + "evalcb = () -> @show loss(tx, ty)" + ], + "metadata": {}, + "execution_count": null + }, + { + "outputs": [], + "cell_type": "markdown", + "source": [ + "# 4. Train model" + ], + "metadata": {} + }, + { + "outputs": [], + "cell_type": "code", + "source": [ + "Flux.train!(loss, params(m), zip(Xs, Ys), opt,\n", + " cb = throttle(evalcb, 30)) #timeout for 30 secs" + ], + "metadata": {}, + "execution_count": null + }, + { + "outputs": [], + "cell_type": "markdown", + "source": [ + "# 5. Sample from input.txt and test model\n", + "Compose a 1000-char long verse in the style of Shakespeare!" + ], + "metadata": {} + }, + { + "outputs": [], + "cell_type": "code", + "source": [ + "function sample(m, alphabet, len)\n", + " m = cpu(m) #use cpu as gpu offers minimal acc for seq models\n", + " Flux.reset!(m)\n", + " buf = IOBuffer()\n", + " c = rand(alphabet) #take random input char token\n", + " for i = 1:len\n", + " write(buf, c)\n", + " #Compose like Shakespeare char-by-char! :\n", + " c = wsample(alphabet, m(onehot(c, alphabet)).data)\n", + " end\n", + " return String(take!(buf)) #get results from last LSTM hidden state\n", + "end\n", + "\n", + "#Print results\n", + "sample(m, alphabet, 1000) |> println" + ], + "metadata": {}, + "execution_count": null + }, + { + "outputs": [], + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "*This notebook was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*" + ], + "metadata": {} + } + ], + "nbformat_minor": 3, + "metadata": { + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "1.3.0" + }, + "kernelspec": { + "name": "julia-1.3", + "display_name": "Julia 1.3.0", + "language": "julia" + } + }, + "nbformat": 4 +} diff --git a/text/char-rnn/char-rnn.jl b/text/char-rnn/char-rnn.jl index 0db14463d..4ba7db6fd 100644 --- a/text/char-rnn/char-rnn.jl +++ b/text/char-rnn/char-rnn.jl @@ -1,59 +1,81 @@ +# # Character-level Recurrent Neural Network +#- Train model on Shakespeare's works +#- Have model write like Shakespeare at the end + +# # 1. Import Dependencies + using Flux using Flux: onehot, chunk, batchseq, throttle, crossentropy using StatsBase: wsample using Base.Iterators: partition +# # 2. Data Download & Pre-processing +# - Source of data: Shakespeare text from https://cs.stanford.edu/people/karpathy/char-rnn/ +# - Generate character tokens +# - Partition in batches for input cd(@__DIR__) isfile("input.txt") || download("https://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt", "input.txt") +#Generate array of all chars appearing in input.txt, let total num be N: text = collect(String(read("input.txt"))) -alphabet = [unique(text)..., '_'] +alphabet = [unique(text)..., '_'] #get unique char array +#Generate array of one-hot vectors for each character in the text. +#Each vector has N-elements, where 1 element in N is set to true (others: false): text = map(ch -> onehot(ch, alphabet), text) -stop = onehot('_', alphabet) +stop = onehot('_', alphabet) #generate end token N = length(alphabet) -seqlen = 50 -nbatch = 50 +seqlen = 50 #batch size +nbatch = 50 #number of batches +#Perform chunking to get meaningful phrases, partition into minibatches and return as arrays Xs = collect(partition(batchseq(chunk(text, nbatch), stop), seqlen)) Ys = collect(partition(batchseq(chunk(text[2:end], nbatch), stop), seqlen)) +# # 3. Define RNN Model, Hyperparameters +#- Define 4-layer deep RNN +#- Define loss function as Cross Entropy loss +#- Define optimiser as Adam with learning rate of 0.01 +#Flux's chain function joins multiple layers together, such that layer operations are performed on input sequentially. m = Chain( - LSTM(N, 128), - LSTM(128, 128), - Dense(128, N), - softmax) + LSTM(N, 128), #Long Short-term Memory of feature space size 128 + LSTM(128, 128), # output is 128-dimensional + Dense(128, N), #N = number of possible tokens + softmax) #calculate the probability of output char corr. to each possible char -m = gpu(m) +m = gpu(m) #use GPU acceleration -function loss(xs, ys) - l = sum(crossentropy.(m.(gpu.(xs)), gpu.(ys))) +function loss(xs, ys) #CE loss, or log loss quanitfies the performance of models with probability output + l = sum(crossentropy.(m.(gpu.(xs)), gpu.(ys))) #pass to GPU and get cost Flux.truncate!(m) return l end -opt = ADAM(0.01) +opt = ADAM(0.01) #use the ADAM optimiser with learning rate of 0.01 tx, ty = (Xs[5], Ys[5]) evalcb = () -> @show loss(tx, ty) +# # 4. Train model Flux.train!(loss, params(m), zip(Xs, Ys), opt, - cb = throttle(evalcb, 30)) - -# Sampling + cb = throttle(evalcb, 30)) #timeout for 30 secs +# # 5. Sample from input.txt and test model +# Compose a 1000-char long verse in the style of Shakespeare! function sample(m, alphabet, len) - m = cpu(m) + m = cpu(m) #use cpu as gpu offers minimal acc for seq models Flux.reset!(m) buf = IOBuffer() - c = rand(alphabet) + c = rand(alphabet) #take random input char token for i = 1:len write(buf, c) + #Compose like Shakespeare char-by-char! : c = wsample(alphabet, m(onehot(c, alphabet)).data) end - return String(take!(buf)) + return String(take!(buf)) #get results from last LSTM hidden state end +#Print results sample(m, alphabet, 1000) |> println