forked from FluxML/model-zoo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
conv_mnist.jl
163 lines (139 loc) · 5.09 KB
/
conv_mnist.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
## Classification of MNIST dataset
## with the convolutional neural network known as LeNet5.
## This script also combines various
## packages from the Julia ecosystem with Flux.
using Flux
using Flux.Data: DataLoader
using Flux.Optimise: Optimiser, WeightDecay
using Flux: onehotbatch, onecold
using Flux.Losses: logitcrossentropy
using Statistics, Random
using Logging: with_logger
using TensorBoardLogger: TBLogger, tb_overwrite, set_step!, set_step_increment!
using ProgressMeter: @showprogress
import MLDatasets
import BSON
using CUDA
# LeNet5 "constructor".
# The model can be adapted to any image size
# and any number of output classes.
function LeNet5(; imgsize=(28,28,1), nclasses=10)
out_conv_size = (imgsize[1]÷4 - 3, imgsize[2]÷4 - 3, 16)
return Chain(
Conv((5, 5), imgsize[end]=>6, relu),
MaxPool((2, 2)),
Conv((5, 5), 6=>16, relu),
MaxPool((2, 2)),
flatten,
Dense(prod(out_conv_size), 120, relu),
Dense(120, 84, relu),
Dense(84, nclasses)
)
end
function get_data(args)
xtrain, ytrain = MLDatasets.MNIST.traindata(Float32)
xtest, ytest = MLDatasets.MNIST.testdata(Float32)
xtrain = reshape(xtrain, 28, 28, 1, :)
xtest = reshape(xtest, 28, 28, 1, :)
ytrain, ytest = onehotbatch(ytrain, 0:9), onehotbatch(ytest, 0:9)
train_loader = DataLoader((xtrain, ytrain), batchsize=args.batchsize, shuffle=true)
test_loader = DataLoader((xtest, ytest), batchsize=args.batchsize)
return train_loader, test_loader
end
loss(ŷ, y) = logitcrossentropy(ŷ, y)
function eval_loss_accuracy(loader, model, device)
l = 0f0
acc = 0
ntot = 0
for (x, y) in loader
x, y = x |> device, y |> device
ŷ = model(x)
l += loss(ŷ, y) * size(x)[end]
acc += sum(onecold(ŷ |> cpu) .== onecold(y |> cpu))
ntot += size(x)[end]
end
return (loss = l/ntot |> round4, acc = acc/ntot*100 |> round4)
end
## utility functions
num_params(model) = sum(length, Flux.params(model))
round4(x) = round(x, digits=4)
# arguments for the `train` function
Base.@kwdef mutable struct Args
η = 3e-4 # learning rate
λ = 0 # L2 regularizer param, implemented as weight decay
batchsize = 128 # batch size
epochs = 10 # number of epochs
seed = 0 # set seed > 0 for reproducibility
use_cuda = true # if true use cuda (if available)
infotime = 1 # report every `infotime` epochs
checktime = 5 # Save the model every `checktime` epochs. Set to 0 for no checkpoints.
tblogger = true # log training with tensorboard
savepath = "runs/" # results path
end
function train(; kws...)
args = Args(; kws...)
args.seed > 0 && Random.seed!(args.seed)
use_cuda = args.use_cuda && CUDA.functional()
if use_cuda
device = gpu
@info "Training on GPU"
else
device = cpu
@info "Training on CPU"
end
## DATA
train_loader, test_loader = get_data(args)
@info "Dataset MNIST: $(train_loader.nobs) train and $(test_loader.nobs) test examples"
## MODEL AND OPTIMIZER
model = LeNet5() |> device
@info "LeNet5 model: $(num_params(model)) trainable params"
ps = Flux.params(model)
opt = ADAM(args.η)
if args.λ > 0 # add weight decay, equivalent to L2 regularization
opt = Optimiser(WeightDecay(args.λ), opt)
end
## LOGGING UTILITIES
if args.tblogger
tblogger = TBLogger(args.savepath, tb_overwrite)
set_step_increment!(tblogger, 0) # 0 auto increment since we manually set_step!
@info "TensorBoard logging at \"$(args.savepath)\""
end
function report(epoch)
train = eval_loss_accuracy(train_loader, model, device)
test = eval_loss_accuracy(test_loader, model, device)
println("Epoch: $epoch Train: $(train) Test: $(test)")
if args.tblogger
set_step!(tblogger, epoch)
with_logger(tblogger) do
@info "train" loss=train.loss acc=train.acc
@info "test" loss=test.loss acc=test.acc
end
end
end
## TRAINING
@info "Start Training"
report(0)
for epoch in 1:args.epochs
@showprogress for (x, y) in train_loader
x, y = x |> device, y |> device
gs = Flux.gradient(ps) do
ŷ = model(x)
loss(ŷ, y)
end
Flux.Optimise.update!(opt, ps, gs)
end
## Printing and logging
epoch % args.infotime == 0 && report(epoch)
if args.checktime > 0 && epoch % args.checktime == 0
!ispath(args.savepath) && mkpath(args.savepath)
modelpath = joinpath(args.savepath, "model.bson")
let model = cpu(model) #return model to cpu before serialization
BSON.@save modelpath model epoch
end
@info "Model saved in \"$(modelpath)\""
end
end
end
if abspath(PROGRAM_FILE) == @__FILE__
train()
end