From 86d68d4f4d3bcea16c2e45fd7913e04d226f6df5 Mon Sep 17 00:00:00 2001 From: andrew Date: Tue, 19 Jan 2021 17:31:54 -0600 Subject: [PATCH 1/6] add time_series tutorial --- tutorials/time_series/.gitignore | 4 + tutorials/time_series/Project.toml | 12 + tutorials/time_series/time_series.jl | 435 +++++++++++++++++++++++++++ 3 files changed, 451 insertions(+) create mode 100644 tutorials/time_series/.gitignore create mode 100644 tutorials/time_series/Project.toml create mode 100644 tutorials/time_series/time_series.jl diff --git a/tutorials/time_series/.gitignore b/tutorials/time_series/.gitignore new file mode 100644 index 000000000..9d7ddda60 --- /dev/null +++ b/tutorials/time_series/.gitignore @@ -0,0 +1,4 @@ +Manifest.toml +jena_climate* +.vscode +.ipynb_* \ No newline at end of file diff --git a/tutorials/time_series/Project.toml b/tutorials/time_series/Project.toml new file mode 100644 index 000000000..73187fbe3 --- /dev/null +++ b/tutorials/time_series/Project.toml @@ -0,0 +1,12 @@ +[deps] +CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +Debugger = "31a5f54b-26ea-5ae9-a837-f05ce5417438" +FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" +Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" +Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" +MLDataPattern = "9920b226-0b2a-5f5f-9153-9aa70a013f8b" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" +ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" diff --git a/tutorials/time_series/time_series.jl b/tutorials/time_series/time_series.jl new file mode 100644 index 000000000..e30f3ecd0 --- /dev/null +++ b/tutorials/time_series/time_series.jl @@ -0,0 +1,435 @@ +#= TODO +* Add any Flux specific explanation +* Handle target value better for dataset y's; Is there a way to ensure that time series always have a batched axis using MLDataPattern, esp lazily? +* Decide what level we want this tutorial at - Flux or some higher level? +* Clean up and make more idiomatic - particularly on how timeseries are batched +* Make any tests or integrations +* Put things in functions as needed +* Eachbatch - size or maxsize? what's more popular +* Flux on master for Dense behavior. Needs to be latest when this is published +* In train_model! is it critical to return and assign the model (`linear = train_model!(linear, single_step_1h, opt; bs=16, epochs=20)`)? I found that without doing it, Flux.update! would work during training, but then calling the model outside wouldn't be mutated? could this deal with calling params at the beginning? +* Early Stopping could use some work. Not sure it does exactly what I want it to +* Need to convert data to Float32? Without it, I get this when running conv_model. Related to Params being Float32? + ┌ Warning: Slow fallback implementation invoked for conv! You probably don't want this; check your datatypes. + │ yT = Float64 + │ T1 = Float64 + │ T2 = Float32 + └ @ NNlib ~/.julia/packages/NNlib/fxLrD/src/conv.jl:206 + +------------------------------------------------------------------------ +=# + +# # Time series forecasting +# This tutorial serves as a `Julia` implementation of the Tensorflow time series forecasting tutorial here: +# +# [Tensorflow Time Series Forecasting Tutorial](https://www.tensorflow.org/tutorials/structured_data/time_series) + +# ## Setup + +using ZipFile +using CSV +using DataFrames +using StatsPlots #re-exports Plots.jl functions +using Dates +using FFTW +using CUDA +using Flux +using MLDataPattern + +using Random: seed! +using Statistics: mean, std +using Flux: unsqueeze +import StatsPlots: plot + +seed!(4231) +ENV["LINES"] = 20 + +CUDA.allowscalar(false) + +# ## The weather dataset + +function download_data(; fname="jena_climate_2009_2016.zip") + DATA_PATH = "https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip" + isfile(fname) || download(DATA_PATH, fname) + + zip = ZipFile.Reader(fname) #https://juliadata.github.io/CSV.jl/stable/index.html#Example:-reading-from-a-zip-file-1 + csv = zip.files[1] + df = CSV.File(csv) |> DataFrame + close(zip) + return df +end + +df = download_data(); + +df = df[6:6:end, :] + +df[!,"Date Time"] = Dates.DateTime.(df[:,"Date Time"], "dd.mm.yyyy HH:MM:SS"); #https://en.wikibooks.org/wiki/Introducing_Julia/Working_with_dates_and_times + +col = ["Date Time", "T (degC)", "p (mbar)", "rho (g/m**3)"] + +df[:,col] + +@df df plot(cols(1), cols(2:4); layout=(3, 1)) # from https://github.com/JuliaPlots/StatsPlots.jl + +@df df[1:480,:] plot(cols(1), cols(2:4); layout=(3, 1)) + +# ## Inspect and cleanup +@show describe(df) + +df."p (mbar)"; #cool that this works + +replace!(df[!,"wv (m/s)"], -9999.0 => 0); # https://juliadata.github.io/DataFrames.jl/stable/man/getting_started/#Replacing-Data +replace!(df[!,"max. wv (m/s)"], -9999.0 => 0); + +@show describe(df) + +# ## Feature engineering + +histogram2d(df[!,"wd (deg)"], df[!,"wv (m/s)"], bins=(75,75), xlabel="wd (deg)", ylabel="wv (m/s)") + +wd_rad = df[!,"wd (deg)"] * π / 180 +df.Wx = df[!,"wv (m/s)"] .* cos.(wd_rad) +df.Wy = df[!,"wv (m/s)"] .* sin.(wd_rad) +df."max Wx" = df[!,"max. wv (m/s)"] .* cos.(wd_rad) +df."max Wy" = df[!,"max. wv (m/s)"] .* sin.(wd_rad); + +histogram2d(df.Wx, df.Wy, bins=(75,75), xlabel="Wind X [m/s]", ylabel="Wind Y [m/s]") + +timestamp_s = Dates.datetime2unix.(df."Date Time"); + +day = 24*60*60 #seconds in a day +year = 365.2425 * day #seconds in a year + +df[!,"Day sin"] = sin.(timestamp_s * (2 * π / day)) +df[!,"Day cos"] = cos.(timestamp_s * (2 * π / day)) +df[!,"Year sin"] = sin.(timestamp_s * (2 * π / year)) +df[!,"Year cos"] = cos.(timestamp_s * (2 * π / year)); + +plot(df[1:25,"Day sin"], legend=false) +plot!(df[1:25,"Day cos"]) +xlabel!("Time [h]") +title!("Time of Day Signal") + +fftrans = FFTW.rfft(df[!,"T (degC)"]) +f_per_dataset = 1:size(fftrans)[1] + +n_samples_h = size(df[!,"T (degC)"])[1] +hours_per_year = 24 * 365.2524 +years_per_dataset = n_samples_h / hours_per_year +f_per_year = f_per_dataset / years_per_dataset; + +plot(f_per_year, abs.(fftrans), xscale=:log10, ylim=(0, 400000), xlim=(0.3,Inf), leg=false) +xticks!([1, 365.2524], ["1/Year", "1/Day"]) +xlabel!("Frequency (log scale)") + +# ## Split the data +# drop columns you don't want to use further +select!(df, Not([:("wv (m/s)"),:("max. wv (m/s)"), :("wd (deg)"), :("Date Time")])); + +column_indices = pairs(names(df)) +indices_columns = Dict(value => key for (key, value) in column_indices) +df = convert.(Float32, df) # Don't need high precision; reduces errors later on when using Params - gradients are Float32 + + +n = size(df)[1] +train_df = df[1:round(Int,n*0.7, RoundDown),:] +valid_df = df[round(Int,n*0.7, RoundUp):round(Int,n*0.9, RoundDown),:] +test_df = df[round(Int,n*0.9, RoundUp):end,:]; # matching TF tutorial exactly, can also use partition + +num_features = size(df)[2] + +# ## Normalize the data +train_mean = mean.(eachcol(train_df)) +train_std = std.(eachcol(train_df)) + +train_df = (train_df .- train_mean') ./ train_std' +valid_df = (valid_df .- train_mean') ./ train_std' +test_df = (test_df .- train_mean') ./ train_std' + +df_std = (df .- train_mean') ./ train_std' +df_std = stack(df_std) + +violin(df_std.variable, df_std.value, xrotation=30.0, legend=false, xticks=:all) # use plotattr() to learn about keywords + +boxplot!(df_std.variable, df_std.value, fillalpha=0.75, outliers=false) + + +# ## Data Windowing +# We are going to make use of MLDataPattern's `slidingwindow` to generate the windows +# https://mldatapatternjl.readthedocs.io/en/latest/documentation/dataview.html?highlight=slidingwindow#labeled-windows +h = 6 # historical window length +f = 1 # future window length + +train_loader = slidingwindow(i -> i+h:i+h+f-1, Array(train_df)', h, stride=1) + +# We will define our own WindowGenerator, some constructors, and plotting functions. The data from the WindowGenerator will be used in training. +""" +Calculates the windows used for modeling, the target index, and features useful for plotting. +""" +mutable struct WindowGenerator + train # training windows + valid # validation windows + h # historical steps in window + f # target steps in window + label_indices # indices for plotting predictions in a window + target_idx::Array{Int, 1} # indices to be predicted +end + +""" +Specify `h` historical points, `f` target points. By default, the target points are assumed to follow after all the historical points (`offset = h`). +By setting `offset = 1`, the targets for each historical point will be the next point in time. +""" +function WindowGenerator(h, f, train_df, valid_df, label_columns::Vector{String}; offset=h) + train = slidingwindow(i -> i+offset:i+offset+f-1, Array(train_df)', h, stride=1) + valid = slidingwindow(i -> i+offset:i+offset+f-1, Array(valid_df)', h, stride=1) + + label_indices = (offset + 1):(offset + f) + + target_idx = findall(x->x in label_columns, names(train_df)) + + return WindowGenerator(train, valid, h, f, label_indices, target_idx) +end + +WindowGenerator(h, f, train_df, valid_df, label_columns::String; offset=h) = + WindowGenerator(h, f, train_df, valid_df, label_columns=[label_columns]; offset=offset) + +WindowGenerator(h, f, train_df, valid_df; label_columns, offset=h) = + WindowGenerator(h, f, train_df, valid_df, label_columns; offset=offset) + +""" +Plots the historical data and the target(s) for prediction. +""" +function plot(wg::WindowGenerator) + idx = wg.target_idx[1] # Currently, will only plot the first target index but could be redone to plot all label columns. + sw = wg.valid + plots = [] + for it in 1:3 + i = rand(1:size(sw,1)) + p = plot(sw[i][1][idx, :], leg=false) + scatter!(wg.label_indices,sw[i][2][idx,:]') + it == 3 && xlabel!("Given $(wg.h)h as input, predict $(wg.f)h into the future.") + push!(plots, p) + end + plot(plots..., layout=(3,1)) +end + +# Some usage examples of WindowGenerator +WindowGenerator(6, 1, train_df, valid_df, label_columns=["T (degC)"]) +WindowGenerator(6, 1, train_df, valid_df, label_columns=["T (degC)", "Wx"]) +wg = WindowGenerator(6, 1, train_df, valid_df, label_columns="T (degC)") + +plot(wg) + +# Make a utility function for batching lazily-evaluated timeseries from slidingwindow +""" +Takes in t, which is an array of tuples of (`sequence`, `target`) where `sequence` is an array of timestamp x features, +and target is either an array or a single value. Outputs a tuple of batched + +julia> z = [([1 2 3; 2 3 4], 5), ([6 7 8; 7 8 9], 0)] +2-element Array{Tuple{Array{Int64,2},Int64},1}: + ([1 2 3; 2 3 4], 5) + ([6 7 8; 7 8 9], 0) + +julia> batch_ts(z) +([1 2 3; 2 3 4] + +[6 7 8; 7 8 9], [5] + +[0]) + +julia> size(batch_ts(z)[1]) +(2, 3, 2) + +julia> size(batch_ts(z)[2]) +(1, 1, 2) +""" +batch_ts(t) = reduce((x, y) -> (cat(x[1], y[1], dims=3), cat(x[2], y[2], dims=3)), t) +batch_ts(t::Tuple) = (unsqueeze(t[1],3), unsqueeze(t[2],3)) # handle batch size of 1 + +# To understand better what the above code does, let's look at an imitation training loop and look at everything's dimensions +practice_df = train_df[1:10,:] +a = slidingwindow(i -> i+h:i+h+f-1, Array(practice_df)', h, stride=1) + +for i in eachbatch(shuffleobs(a), size=2) + (x,y) = batch_ts(i) + @show x + @show y + println() +end + +# # Single Step Models +loss(x,y) = Flux.Losses.mse(x, y) + +# ### Baseline - 1h +struct Baseline + label_index::Int +end + +(m::Baseline)(x) = x[m.label_index,:,:] + +target = "T (degC)" + +# Since this model repeats the last point, we make slidingwindows with 1 historical and 1 target point. +single_step_1h = WindowGenerator(1, 1, train_df, valid_df, label_columns=target); + +baseline_model = Baseline(wg.target_idx[1]) + +function run_single_step_baseline(wg, model) + preds = Float32[] + reals = Float32[] + for (x,y) in wg.train + val = model(x)[1] + push!(preds, val) + push!(reals, y[model.label_index]) #figure out how to do validation + end + + l = loss(preds, reals) + return l +end + +run_single_step_baseline(single_step_1h, baseline_model) + +# ### Baseline - 24h +# Let's try to predict the next hour's value for 24 hours +single_step_24h = WindowGenerator(24, 24, train_df, valid_df, label_columns=target; offset=1); + +""" +Plots the historical window, the target(s) for prediction, and the model's predictions. +""" +function plot(wg::WindowGenerator, model; set=:valid) + data = getfield(wg,set) + i = rand(1:size(data,1)) + plot(1:wg.h, data[i][1][2,:], lab="Inputs", shape=:circ, m=2, leg=:outerright) + scatter!(wg.label_indices, data[i][2][2,:], lab="Labels", c=:green) + z = model(unsqueeze(data[i][1],3))[:] + scatter!(wg.label_indices, z, lab="Predictions", shape=:star5, m=6, c=:orange) +end + +plot(single_step_24h, baseline_model) + +# ### Linear Models +# ##### 1 hour +linear = Dense(size(single_step_1h.train[1][1],1), 1; initW=Flux.glorot_uniform, initb=Flux.zeros) +opt = Flux.Optimise.ADAM(0.01) + +function train_model!(model, wg::WindowGenerator, opt; epochs=20, bs=16, dev=Flux.gpu, conv=false) + model = model |> dev + ps = params(model) + t = shuffleobs(wg.train) + v = batch_ts(getobs(wg.valid)) + v = conv ? ((unsqueeze(v[1],3), v[2]) |> dev) : (v |> dev) # handle validation dimensions if conv network + + local l + vl_prev = Inf + for e in 1:epochs + for d in eachbatch(t, size=bs) + x, y = batch_ts(d) + y = y[wg.target_idx,:,:] + conv && (x = unsqueeze(x,3)) + x, y = x |> dev, y |> dev + gs = gradient(ps) do + l = loss(model(x),y) + end + Flux.update!(opt, ps, gs) + end + l = round(l;digits=4) + vl = round(loss(model(v[1]),v[2][wg.target_idx,:,:]); digits=4) + println("Epoch $e/$epochs - train loss: $l, valid loss: $vl") + # crude early-stopping + # vl_prev < (vl - 0.001) && break + # vl_prev = vl + end + model = model |> cpu +end + +@time linear = train_model!(linear, single_step_1h, opt; bs=32, epochs=20) + +# ##### 24 hr +plot(single_step_24h, linear) + +bar(names(train_df), linear.W[:], xrotation=30.0, legend=false, xticks=:all, tickfontsize=6) + +# ### Dense +dense = Chain( + Dense(19, 64, relu), + Dense(64, 64, relu), + Dense(64, 1) +) + +@time dense = train_model!(dense, single_step_1h, opt; bs=32, epochs=20) + +plot(single_step_24h, dense) + + +# ### Multi-step Dense +# Now we are going to use 3 historical hours to predict 1 hour in the future. +single_step_3h = WindowGenerator(3, 1, train_df, valid_df, label_columns=target); + +plot(single_step_3h) + +multi_step_dense = Chain( + i -> reshape(i, :, 1, size(i)[end]), # flatten first two dimensions, but preserve batch dimension + Dense(19*3, 32, relu), + Dense(32, 32, relu), + Dense(32, 1) +) + +@time multi_step_dense = train_model!(multi_step_dense, single_step_3h, opt; bs=32, epochs=20) + +plot(single_step_3h, multi_step_dense) + +# ### Convolutional Neural Network +conv_model = Chain( + Conv((19,3), 1=>32, relu), # need to explain why this conv pattern + x -> Flux.flatten(x), + Dense(32, 32, relu), + Dense(32, 1) +) + +single_step_3h = WindowGenerator(3, 1, train_df[1:60,:], valid_df, label_columns=target); + +@time conv_model = train_model!(conv_model, single_step_3h, opt; bs=20, epochs=2, conv=true) + + +#not learning :( but dimensions are right? +# maybe convolutions arent? + +# https://github.com/FluxML/Flux.jl/issues/1465 + + +# ### Recurrent Neural Network + +# ### Performance + +# ### Multi-output Models + +# #### Baseline + +# #### Dense + +# #### RNN + +# #### Advanced: Residual Connections + +# #### Performance + +# # Multi-Step Models + +# ### Baselines + +# ## Single-shot Models + +# ### Linear + +# ### Dense + +# ### CNN + +# ### RNN + +# ## Advanced Autoregressive model +# ### RNN + +# ## Performance + +# Next Steps From 4b256654336b9bbb800488995df4d5eee47df862 Mon Sep 17 00:00:00 2001 From: adinhobl Date: Sun, 21 Mar 2021 20:17:08 -0500 Subject: [PATCH 2/6] split up files, finish 1D conv --- tutorials/time_series/TODO | 16 +++ tutorials/time_series/batch_ts.jl | 24 ++++ tutorials/time_series/runtests.jl | 41 ++++++ tutorials/time_series/time_series.jl | 156 ++++------------------ tutorials/time_series/window_generator.jl | 64 +++++++++ 5 files changed, 173 insertions(+), 128 deletions(-) create mode 100644 tutorials/time_series/TODO create mode 100644 tutorials/time_series/batch_ts.jl create mode 100644 tutorials/time_series/runtests.jl create mode 100644 tutorials/time_series/window_generator.jl diff --git a/tutorials/time_series/TODO b/tutorials/time_series/TODO new file mode 100644 index 000000000..f88d96601 --- /dev/null +++ b/tutorials/time_series/TODO @@ -0,0 +1,16 @@ +* Handle target value better for dataset y's; Is there a way to ensure that time series always have a batched axis using MLDataPattern, esp lazily? +* Decide what level to put this tutorial at - base Flux or incorporate higher level packages? +* Make timeseries batching more idiomatic and faster. +* Add benchmarking integrations so we tell how other PRs affect this functionality. +* Put things in functions as needed +* Eachbatch - size or maxsize? what's more popular. Probably incorporate with DataLoaders.jl +* Flux on master for Dense behavior. Needs to be latest when this is published +* In train_model! is it critical to return and assign the model (`linear = train_model!(linear, single_step_1h, opt; bs=16, epochs=20)`)? I found that without doing it, Flux.update! would work during training, but then calling the model outside wouldn't be mutated? could this deal with calling params at the beginning? +* Early Stopping could use some work. Not sure it does exactly what I want it to. Could use earlystopping.jl +* Need to convert data to Float32? Without it, I get this when running conv_model. Related to Params being Float32? + ┌ Warning: Slow fallback implementation invoked for conv! You probably don't want this; check your datatypes. + │ yT = Float64 + │ T1 = Float64 + │ T2 = Float32 + └ @ NNlib ~/.julia/packages/NNlib/fxLrD/src/conv.jl:206 +* Make sure conv examples are implemented correctly. \ No newline at end of file diff --git a/tutorials/time_series/batch_ts.jl b/tutorials/time_series/batch_ts.jl new file mode 100644 index 000000000..8104b654c --- /dev/null +++ b/tutorials/time_series/batch_ts.jl @@ -0,0 +1,24 @@ +""" +Takes in t, which is an array of tuples of (`sequence`, `target`) where `sequence` is an array of timestamp x features, +and target is either an array or a single value. Outputs a tuple of batched + +julia> z = [([1 2 3; 2 3 4], 5), ([6 7 8; 7 8 9], 0)] +2-element Array{Tuple{Array{Int64,2},Int64},1}: + ([1 2 3; 2 3 4], 5) + ([6 7 8; 7 8 9], 0) + +julia> batch_ts(z) +([1 2 3; 2 3 4] + +[6 7 8; 7 8 9], [5] + +[0]) + +julia> size(batch_ts(z)[1]) +(2, 3, 2) + +julia> size(batch_ts(z)[2]) +(1, 1, 2) +""" +batch_ts(t) = reduce((x, y) -> (cat(x[1], y[1], dims=3), cat(x[2], y[2], dims=3)), t) +batch_ts(t::Tuple) = (unsqueeze(t[1],3), unsqueeze(t[2],3)) # handle batch size of 1 \ No newline at end of file diff --git a/tutorials/time_series/runtests.jl b/tutorials/time_series/runtests.jl new file mode 100644 index 000000000..b134a1ae3 --- /dev/null +++ b/tutorials/time_series/runtests.jl @@ -0,0 +1,41 @@ +using Test +using DataFrames +using Random +using Dates + +include("window_generator.jl") +include("batch_ts.jl") + +rng = MersenneTwister(123) + +df = DataFrame("timestamp"=>Date(2014,11,15):Day(1):Date(2014, 12, 31), + "A"=>randn(rng, Float16, 47), + "B"=>sin.(rand(rng, 47))) +num_train = 35 +train_df = df[1:num_train,:] +test_df = df[num_train+1:end,:] + +# WindowGenerator +@testset "historical window of length 1" begin + +end + + +@testset "multiple label columns" begin + +end + + +@testset "" begin + +end + + +# batch_ts +@testset "" begin + +end + + +WindowGenerator(6, 1, train_df, valid_df, label_columns=["T (degC)"]) +WindowGenerator(6, 1, train_df, valid_df, label_columns=["T (degC)", "Wx"]) \ No newline at end of file diff --git a/tutorials/time_series/time_series.jl b/tutorials/time_series/time_series.jl index e30f3ecd0..c4e82f58e 100644 --- a/tutorials/time_series/time_series.jl +++ b/tutorials/time_series/time_series.jl @@ -1,31 +1,9 @@ -#= TODO -* Add any Flux specific explanation -* Handle target value better for dataset y's; Is there a way to ensure that time series always have a batched axis using MLDataPattern, esp lazily? -* Decide what level we want this tutorial at - Flux or some higher level? -* Clean up and make more idiomatic - particularly on how timeseries are batched -* Make any tests or integrations -* Put things in functions as needed -* Eachbatch - size or maxsize? what's more popular -* Flux on master for Dense behavior. Needs to be latest when this is published -* In train_model! is it critical to return and assign the model (`linear = train_model!(linear, single_step_1h, opt; bs=16, epochs=20)`)? I found that without doing it, Flux.update! would work during training, but then calling the model outside wouldn't be mutated? could this deal with calling params at the beginning? -* Early Stopping could use some work. Not sure it does exactly what I want it to -* Need to convert data to Float32? Without it, I get this when running conv_model. Related to Params being Float32? - ┌ Warning: Slow fallback implementation invoked for conv! You probably don't want this; check your datatypes. - │ yT = Float64 - │ T1 = Float64 - │ T2 = Float32 - └ @ NNlib ~/.julia/packages/NNlib/fxLrD/src/conv.jl:206 - ------------------------------------------------------------------------- -=# - # # Time series forecasting # This tutorial serves as a `Julia` implementation of the Tensorflow time series forecasting tutorial here: # # [Tensorflow Time Series Forecasting Tutorial](https://www.tensorflow.org/tutorials/structured_data/time_series) # ## Setup - using ZipFile using CSV using DataFrames @@ -39,7 +17,6 @@ using MLDataPattern using Random: seed! using Statistics: mean, std using Flux: unsqueeze -import StatsPlots: plot seed!(4231) ENV["LINES"] = 20 @@ -150,101 +127,28 @@ df_std = (df .- train_mean') ./ train_std' df_std = stack(df_std) violin(df_std.variable, df_std.value, xrotation=30.0, legend=false, xticks=:all) # use plotattr() to learn about keywords - boxplot!(df_std.variable, df_std.value, fillalpha=0.75, outliers=false) # ## Data Windowing -# We are going to make use of MLDataPattern's `slidingwindow` to generate the windows -# https://mldatapatternjl.readthedocs.io/en/latest/documentation/dataview.html?highlight=slidingwindow#labeled-windows -h = 6 # historical window length -f = 1 # future window length - -train_loader = slidingwindow(i -> i+h:i+h+f-1, Array(train_df)', h, stride=1) - # We will define our own WindowGenerator, some constructors, and plotting functions. The data from the WindowGenerator will be used in training. -""" -Calculates the windows used for modeling, the target index, and features useful for plotting. -""" -mutable struct WindowGenerator - train # training windows - valid # validation windows - h # historical steps in window - f # target steps in window - label_indices # indices for plotting predictions in a window - target_idx::Array{Int, 1} # indices to be predicted -end +include("window_generator.jl") -""" -Specify `h` historical points, `f` target points. By default, the target points are assumed to follow after all the historical points (`offset = h`). -By setting `offset = 1`, the targets for each historical point will be the next point in time. -""" -function WindowGenerator(h, f, train_df, valid_df, label_columns::Vector{String}; offset=h) - train = slidingwindow(i -> i+offset:i+offset+f-1, Array(train_df)', h, stride=1) - valid = slidingwindow(i -> i+offset:i+offset+f-1, Array(valid_df)', h, stride=1) - - label_indices = (offset + 1):(offset + f) - - target_idx = findall(x->x in label_columns, names(train_df)) - - return WindowGenerator(train, valid, h, f, label_indices, target_idx) -end +h = 6 # historical window length +f = 1 # future window length -WindowGenerator(h, f, train_df, valid_df, label_columns::String; offset=h) = - WindowGenerator(h, f, train_df, valid_df, label_columns=[label_columns]; offset=offset) - -WindowGenerator(h, f, train_df, valid_df; label_columns, offset=h) = - WindowGenerator(h, f, train_df, valid_df, label_columns; offset=offset) - -""" -Plots the historical data and the target(s) for prediction. -""" -function plot(wg::WindowGenerator) - idx = wg.target_idx[1] # Currently, will only plot the first target index but could be redone to plot all label columns. - sw = wg.valid - plots = [] - for it in 1:3 - i = rand(1:size(sw,1)) - p = plot(sw[i][1][idx, :], leg=false) - scatter!(wg.label_indices,sw[i][2][idx,:]') - it == 3 && xlabel!("Given $(wg.h)h as input, predict $(wg.f)h into the future.") - push!(plots, p) - end - plot(plots..., layout=(3,1)) -end +# WindowGenerator makes use of MLDataPattern's `slidingwindow` to generate the windows. It is good at flexibly generating +# https://mldatapatternjl.readthedocs.io/en/latest/documentation/dataview.html?highlight=slidingwindow#labeled-windows +slidingwindow(i -> i+h:i+h+f-1, Array(train_df)', h, stride=1) -# Some usage examples of WindowGenerator -WindowGenerator(6, 1, train_df, valid_df, label_columns=["T (degC)"]) -WindowGenerator(6, 1, train_df, valid_df, label_columns=["T (degC)", "Wx"]) +# How to use WindowGenerator wg = WindowGenerator(6, 1, train_df, valid_df, label_columns="T (degC)") - plot(wg) -# Make a utility function for batching lazily-evaluated timeseries from slidingwindow -""" -Takes in t, which is an array of tuples of (`sequence`, `target`) where `sequence` is an array of timestamp x features, -and target is either an array or a single value. Outputs a tuple of batched - -julia> z = [([1 2 3; 2 3 4], 5), ([6 7 8; 7 8 9], 0)] -2-element Array{Tuple{Array{Int64,2},Int64},1}: - ([1 2 3; 2 3 4], 5) - ([6 7 8; 7 8 9], 0) - -julia> batch_ts(z) -([1 2 3; 2 3 4] - -[6 7 8; 7 8 9], [5] -[0]) +# We will also make use of the utility function for batching lazily-evaluated timeseries from slidingwindow +include("batch_ts.jl") -julia> size(batch_ts(z)[1]) -(2, 3, 2) - -julia> size(batch_ts(z)[2]) -(1, 1, 2) -""" -batch_ts(t) = reduce((x, y) -> (cat(x[1], y[1], dims=3), cat(x[2], y[2], dims=3)), t) -batch_ts(t::Tuple) = (unsqueeze(t[1],3), unsqueeze(t[2],3)) # handle batch size of 1 # To understand better what the above code does, let's look at an imitation training loop and look at everything's dimensions practice_df = train_df[1:10,:] @@ -293,20 +197,9 @@ run_single_step_baseline(single_step_1h, baseline_model) # Let's try to predict the next hour's value for 24 hours single_step_24h = WindowGenerator(24, 24, train_df, valid_df, label_columns=target; offset=1); -""" -Plots the historical window, the target(s) for prediction, and the model's predictions. -""" -function plot(wg::WindowGenerator, model; set=:valid) - data = getfield(wg,set) - i = rand(1:size(data,1)) - plot(1:wg.h, data[i][1][2,:], lab="Inputs", shape=:circ, m=2, leg=:outerright) - scatter!(wg.label_indices, data[i][2][2,:], lab="Labels", c=:green) - z = model(unsqueeze(data[i][1],3))[:] - scatter!(wg.label_indices, z, lab="Predictions", shape=:star5, m=6, c=:orange) -end - plot(single_step_24h, baseline_model) + # ### Linear Models # ##### 1 hour linear = Dense(size(single_step_1h.train[1][1],1), 1; initW=Flux.glorot_uniform, initb=Flux.zeros) @@ -317,7 +210,7 @@ function train_model!(model, wg::WindowGenerator, opt; epochs=20, bs=16, dev=Flu ps = params(model) t = shuffleobs(wg.train) v = batch_ts(getobs(wg.valid)) - v = conv ? ((unsqueeze(v[1],3), v[2]) |> dev) : (v |> dev) # handle validation dimensions if conv network + # v = conv ? ((unsqueeze(v[1],3), v[2]) |> dev) : (v |> dev) # handle validation dimensions if conv network local l vl_prev = Inf @@ -325,7 +218,7 @@ function train_model!(model, wg::WindowGenerator, opt; epochs=20, bs=16, dev=Flu for d in eachbatch(t, size=bs) x, y = batch_ts(d) y = y[wg.target_idx,:,:] - conv && (x = unsqueeze(x,3)) + # conv && (x = unsqueeze(x,3)) x, y = x |> dev, y |> dev gs = gradient(ps) do l = loss(model(x),y) @@ -379,22 +272,29 @@ multi_step_dense = Chain( plot(single_step_3h, multi_step_dense) # ### Convolutional Neural Network + +# conv_model = Chain( +# Conv((19,), 3=>32, relu), # need to explain why this conv pattern +# x -> Flux.flatten(x), +# Dense(32, 32, relu), +# Dense(32, 1), +# x -> unsqueeze(x, 1) +# ) + conv_model = Chain( - Conv((19,3), 1=>32, relu), # need to explain why this conv pattern + x -> permutedims(x, [2,1,3]), # put data in NTime x NCovariates X NBatch https://github.com/FluxML/Flux.jl/issues/1465 + Conv((3,), 19=>32, relu), # convolve over 3 inputs - 19 variables -> 32 filters x -> Flux.flatten(x), Dense(32, 32, relu), - Dense(32, 1) + Dense(32, 1), + x -> unsqueeze(x, 1) ) -single_step_3h = WindowGenerator(3, 1, train_df[1:60,:], valid_df, label_columns=target); - -@time conv_model = train_model!(conv_model, single_step_3h, opt; bs=20, epochs=2, conv=true) - +single_step_3h = WindowGenerator(3, 1, train_df, valid_df, label_columns=target); -#not learning :( but dimensions are right? -# maybe convolutions arent? +@time conv_model = train_model!(conv_model, single_step_3h, opt; bs=32, epochs=20, conv=true) -# https://github.com/FluxML/Flux.jl/issues/1465 +plot(single_step_3h, conv_model) # ### Recurrent Neural Network diff --git a/tutorials/time_series/window_generator.jl b/tutorials/time_series/window_generator.jl new file mode 100644 index 000000000..0ac2531da --- /dev/null +++ b/tutorials/time_series/window_generator.jl @@ -0,0 +1,64 @@ +import StatsPlots: plot + +""" +Calculates the windows used for modeling, the target index, and features useful for plotting. +""" +mutable struct WindowGenerator + train # training windows + valid # validation windows + h # historical steps in window + f # target steps in window + label_indices # indices for plotting predictions in a window + target_idx::Array{Int, 1} # indices to be predicted +end + +""" +Specify `h` historical points, `f` target points. By default, the target points are assumed to follow after all the historical points (`offset = h`). +By setting `offset = 1`, the targets for each historical point will be the next point in time. +""" +function WindowGenerator(h, f, train_df, valid_df, label_columns::Vector{String}; offset=h) + train = slidingwindow(i -> i+offset:i+offset+f-1, Array(train_df)', h, stride=1) + valid = slidingwindow(i -> i+offset:i+offset+f-1, Array(valid_df)', h, stride=1) + + label_indices = (offset + 1):(offset + f) + + target_idx = findall(x->x in label_columns, names(train_df)) + + return WindowGenerator(train, valid, h, f, label_indices, target_idx) +end + +WindowGenerator(h, f, train_df, valid_df, label_columns::String; offset=h) = + WindowGenerator(h, f, train_df, valid_df, label_columns=[label_columns]; offset=offset) + +WindowGenerator(h, f, train_df, valid_df; label_columns, offset=h) = + WindowGenerator(h, f, train_df, valid_df, label_columns; offset=offset) + +""" +Plots the historical data and the target(s) for prediction. +""" +function plot(wg::WindowGenerator) + idx = wg.target_idx[1] # Currently, will only plot the first target index but could be redone to plot all label columns. + sw = wg.valid + plots = [] + for it in 1:3 + i = rand(1:size(sw,1)) + p = plot(sw[i][1][idx, :], leg=false) + scatter!(wg.label_indices,sw[i][2][idx,:]') + it == 3 && xlabel!("Given $(wg.h)h as input, predict $(wg.f)h into the future.") + push!(plots, p) + end + plot(plots..., layout=(3,1)) +end + + +""" +Plots the historical window, the target(s) for prediction, and the model's predictions. +""" +function plot(wg::WindowGenerator, model; set=:valid) + data = getfield(wg,set) + i = rand(1:size(data,1)) + plot(1:wg.h, data[i][1][2,:], lab="Inputs", shape=:circ, m=2, leg=:outerright) + scatter!(wg.label_indices, data[i][2][2,:], lab="Labels", c=:green) + z = model(unsqueeze(data[i][1],3))[:] + scatter!(wg.label_indices, z, lab="Predictions", shape=:star5, m=6, c=:orange) +end \ No newline at end of file From 7f225aa827e7e26a65c7c26e5b2e75ba0925272b Mon Sep 17 00:00:00 2001 From: adinhobl Date: Sun, 21 Mar 2021 23:00:55 -0500 Subject: [PATCH 3/6] update tests --- tutorials/time_series/runtests.jl | 42 ++++++++++++++--------- tutorials/time_series/window_generator.jl | 7 ++++ 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/tutorials/time_series/runtests.jl b/tutorials/time_series/runtests.jl index b134a1ae3..782ac131f 100644 --- a/tutorials/time_series/runtests.jl +++ b/tutorials/time_series/runtests.jl @@ -3,39 +3,47 @@ using DataFrames using Random using Dates -include("window_generator.jl") -include("batch_ts.jl") - rng = MersenneTwister(123) -df = DataFrame("timestamp"=>Date(2014,11,15):Day(1):Date(2014, 12, 31), - "A"=>randn(rng, Float16, 47), - "B"=>sin.(rand(rng, 47))) -num_train = 35 -train_df = df[1:num_train,:] -test_df = df[num_train+1:end,:] +times = 150 +df = DataFrame("timestamp"=>1:times, + "A"=>randn(rng, Float16, times), + "B"=>sin.(rand(rng, times)), + "C"=>1:times) +train_prop = 0.8 +train_end = Int(times*train_prop) +train_df = df[1:train_end,:] +valid_df = df[train_end+1:end,:] + # WindowGenerator +include("window_generator.jl") @testset "historical window of length 1" begin - + h = 1 + f = 1 + wg = WindowGenerator(h, f, train_df, valid_df, "C") + @test wg.target_idx == [4] + @test size(wg.train[1][1]) == (4,1) + @test size(wg.valid[1][1]) == (4,1) + @test length(wg.train) == 119 + @test length(wg.valid) == 29 + @test (wg.train[1][1][1]) == (wg.train[1][2][1] - 1) + @test (wg.train[end][1][1]) == (wg.train[end][2][1] - 1) end -@testset "multiple label columns" begin +@testset "historical window of length 10" begin end -@testset "" begin +@testset "multiple label columns" begin end # batch_ts +include("batch_ts.jl") @testset "" begin -end - - -WindowGenerator(6, 1, train_df, valid_df, label_columns=["T (degC)"]) -WindowGenerator(6, 1, train_df, valid_df, label_columns=["T (degC)", "Wx"]) \ No newline at end of file +end \ No newline at end of file diff --git a/tutorials/time_series/window_generator.jl b/tutorials/time_series/window_generator.jl index 0ac2531da..e3b6eb8b5 100644 --- a/tutorials/time_series/window_generator.jl +++ b/tutorials/time_series/window_generator.jl @@ -1,3 +1,4 @@ +using MLDataPattern: slidingwindow import StatsPlots: plot """ @@ -27,9 +28,15 @@ function WindowGenerator(h, f, train_df, valid_df, label_columns::Vector{String} return WindowGenerator(train, valid, h, f, label_indices, target_idx) end +""" +WindowGenerator with a single label_column. +""" WindowGenerator(h, f, train_df, valid_df, label_columns::String; offset=h) = WindowGenerator(h, f, train_df, valid_df, label_columns=[label_columns]; offset=offset) +""" +WindowGenerator with multiple label_columns. +""" WindowGenerator(h, f, train_df, valid_df; label_columns, offset=h) = WindowGenerator(h, f, train_df, valid_df, label_columns; offset=offset) From 900c3845358b73169963737361e894879a8cfd3e Mon Sep 17 00:00:00 2001 From: adinhobl Date: Sun, 21 Mar 2021 23:01:51 -0500 Subject: [PATCH 4/6] remove conv from training loop --- tutorials/time_series/time_series.jl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tutorials/time_series/time_series.jl b/tutorials/time_series/time_series.jl index c4e82f58e..9457e524a 100644 --- a/tutorials/time_series/time_series.jl +++ b/tutorials/time_series/time_series.jl @@ -205,12 +205,11 @@ plot(single_step_24h, baseline_model) linear = Dense(size(single_step_1h.train[1][1],1), 1; initW=Flux.glorot_uniform, initb=Flux.zeros) opt = Flux.Optimise.ADAM(0.01) -function train_model!(model, wg::WindowGenerator, opt; epochs=20, bs=16, dev=Flux.gpu, conv=false) +function train_model!(model, wg::WindowGenerator, opt; epochs=20, bs=16, dev=Flux.gpu) model = model |> dev ps = params(model) t = shuffleobs(wg.train) v = batch_ts(getobs(wg.valid)) - # v = conv ? ((unsqueeze(v[1],3), v[2]) |> dev) : (v |> dev) # handle validation dimensions if conv network local l vl_prev = Inf @@ -218,7 +217,6 @@ function train_model!(model, wg::WindowGenerator, opt; epochs=20, bs=16, dev=Flu for d in eachbatch(t, size=bs) x, y = batch_ts(d) y = y[wg.target_idx,:,:] - # conv && (x = unsqueeze(x,3)) x, y = x |> dev, y |> dev gs = gradient(ps) do l = loss(model(x),y) From 250f5936be3bbac8893ac772662bbf337b22e1b4 Mon Sep 17 00:00:00 2001 From: adinhobl Date: Mon, 22 Mar 2021 21:47:11 -0500 Subject: [PATCH 5/6] separate & add tests --- tutorials/time_series/batch_ts.jl | 9 ++- tutorials/time_series/runtests.jl | 49 ------------- tutorials/time_series/runtests_batch.jl | 45 ++++++++++++ tutorials/time_series/runtests_wg.jl | 91 +++++++++++++++++++++++++ tutorials/time_series/time_series.jl | 2 +- 5 files changed, 145 insertions(+), 51 deletions(-) delete mode 100644 tutorials/time_series/runtests.jl create mode 100644 tutorials/time_series/runtests_batch.jl create mode 100644 tutorials/time_series/runtests_wg.jl diff --git a/tutorials/time_series/batch_ts.jl b/tutorials/time_series/batch_ts.jl index 8104b654c..6c0afd9f9 100644 --- a/tutorials/time_series/batch_ts.jl +++ b/tutorials/time_series/batch_ts.jl @@ -1,3 +1,5 @@ +using Flux: unsqueeze + """ Takes in t, which is an array of tuples of (`sequence`, `target`) where `sequence` is an array of timestamp x features, and target is either an array or a single value. Outputs a tuple of batched @@ -21,4 +23,9 @@ julia> size(batch_ts(z)[2]) (1, 1, 2) """ batch_ts(t) = reduce((x, y) -> (cat(x[1], y[1], dims=3), cat(x[2], y[2], dims=3)), t) -batch_ts(t::Tuple) = (unsqueeze(t[1],3), unsqueeze(t[2],3)) # handle batch size of 1 \ No newline at end of file + + +""" +Handles batch of size 1, with a (`sequence`, `target`) tuple +""" +batch_ts(t::Tuple) = (unsqueeze(t[1],3), unsqueeze(t[2],3)) \ No newline at end of file diff --git a/tutorials/time_series/runtests.jl b/tutorials/time_series/runtests.jl deleted file mode 100644 index 782ac131f..000000000 --- a/tutorials/time_series/runtests.jl +++ /dev/null @@ -1,49 +0,0 @@ -using Test -using DataFrames -using Random -using Dates - -rng = MersenneTwister(123) - -times = 150 -df = DataFrame("timestamp"=>1:times, - "A"=>randn(rng, Float16, times), - "B"=>sin.(rand(rng, times)), - "C"=>1:times) -train_prop = 0.8 -train_end = Int(times*train_prop) -train_df = df[1:train_end,:] -valid_df = df[train_end+1:end,:] - - -# WindowGenerator -include("window_generator.jl") -@testset "historical window of length 1" begin - h = 1 - f = 1 - wg = WindowGenerator(h, f, train_df, valid_df, "C") - @test wg.target_idx == [4] - @test size(wg.train[1][1]) == (4,1) - @test size(wg.valid[1][1]) == (4,1) - @test length(wg.train) == 119 - @test length(wg.valid) == 29 - @test (wg.train[1][1][1]) == (wg.train[1][2][1] - 1) - @test (wg.train[end][1][1]) == (wg.train[end][2][1] - 1) -end - - -@testset "historical window of length 10" begin - -end - - -@testset "multiple label columns" begin - -end - - -# batch_ts -include("batch_ts.jl") -@testset "" begin - -end \ No newline at end of file diff --git a/tutorials/time_series/runtests_batch.jl b/tutorials/time_series/runtests_batch.jl new file mode 100644 index 000000000..470cf157f --- /dev/null +++ b/tutorials/time_series/runtests_batch.jl @@ -0,0 +1,45 @@ +using Test + +include("batch_ts.jl") + +@testset "multi batch - single element y" begin + for _ in 1:100 + dim1 = rand(1:20) + dim2 = rand(1:20) + num = rand(2:100) # single item batch not used with single element + + tups = [(rand(dim1, dim2), rand()) for i in 1:num] + b_tups = batch_ts(tups) + @test b_tups |> length == 2 + @test size(b_tups[1]) == (dim1, dim2, num) + @test size(b_tups[2]) == (1, 1, num) + end +end + +@testset "multi batch - multi element y" begin + for _ in 1:100 + dim1 = rand(1:20) + dim2 = rand(1:20) + dim3 = rand(1:20) + dim4 = rand(1:20) + num = rand(2:100) # single item batch + + tups = [(rand(dim1, dim2), rand(dim3, dim4)) for i in 1:num] + b_tups = batch_ts(tups) + @test b_tups |> length == 2 + @test size(b_tups[1]) == (dim1, dim2, num) + @test size(b_tups[2]) == (dim3, dim4, num) + end +end + +@testset "single item batch" begin + for _ in 1:100 + dim1 = rand(1:20) + dim2 = rand(1:20) + + tups = (rand(dim1, dim2), rand(dim1, 1)) + b_tups = batch_ts(tups) + @test ndims(b_tups[1]) == ndims(b_tups[2]) == 3 + end +end + diff --git a/tutorials/time_series/runtests_wg.jl b/tutorials/time_series/runtests_wg.jl new file mode 100644 index 000000000..0cd010649 --- /dev/null +++ b/tutorials/time_series/runtests_wg.jl @@ -0,0 +1,91 @@ +using Test +using DataFrames +using Random +using Dates + +rng = MersenneTwister(123) + +times = 150 +df = DataFrame("timestamp"=>1:times, + "A"=>randn(rng, Float16, times), + "B"=>sin.(rand(rng, times)), + "C"=>1:times) +train_prop = 0.8 +train_end = Int(times*train_prop) +train_df = df[1:train_end,:] +valid_df = df[train_end+1:end,:] + + +include("window_generator.jl") +@testset "historical window of length 1, future window 1" begin + h = 1 + f = 1 + wg = WindowGenerator(h, f, train_df, valid_df, "C") + @test wg.target_idx == [4] + @test size(wg.train[1][1]) == (length(names(df)),h) + @test size(wg.valid[1][1]) == (length(names(df)),h) + @test length(wg.train) == train_end - (h+f-1) # truncates necessary first and last sequence points + @test length(wg.valid) == times - train_end - (h+f-1) + @test (wg.train[1][1][1]) == (wg.train[1][2][1] - h) + @test (wg.train[end][1][1]) == (wg.train[end][2][1] - h) + @test wg.train[end][1][1,end] == (wg.train[end][2][1,begin] - 1) # 1st pred is immediately after last hist + +end + +@testset "historical window of length 7, future window 5" begin + h = 7 + f = 5 + wg = WindowGenerator(h, f, train_df, valid_df, "C") + @test wg.target_idx == [4] + @test size(wg.train[1][1]) == (length(names(df)),h) + @test size(wg.valid[1][1]) == (length(names(df)),h) + @test length(wg.train) == train_end - (h+f-1) + @test length(wg.valid) == times - train_end - (h+f-1) + @test (wg.train[1][1][1]) == (wg.train[1][2][1] - h) + @test (wg.train[end][1][1]) == (wg.train[end][2][1] - h) + @test wg.train[end][1][1,end] == (wg.train[end][2][1,begin] - 1) # 1st pred is immediately after last hist +end + +@testset "multiple label columns" begin + h = 16 + f = 3 + wg = WindowGenerator(h, f, train_df, valid_df, "C") + @test length(wg.target_idx) == 1 + wg = WindowGenerator(h, f, train_df, valid_df, ["A","C"]) + @test length(wg.target_idx) == 2 + wg = WindowGenerator(h, f, train_df, valid_df, ["A","C","B"]) + @test length(wg.target_idx) == 3 + wg = WindowGenerator(h, f, train_df, valid_df, ["timestamp","A","C","B"]) + @test length(wg.target_idx) == 4 +end + +@testset "ignores nonexistent/duplicate columns" begin + h = 5 + f = 3 + wg = WindowGenerator(h, f, train_df, valid_df, ["timestamp","A","C","B","D"]) + @test length(wg.target_idx) == 4 + + wg = WindowGenerator(h, f, train_df, valid_df, ["timestamp","timestamp"]) + @test length(wg.target_idx) == 1 +end + +@testset "labels_indices are correct" begin + valid_end = times - train_end # limited by size of validation set + for h in 1:(valid_end - 1) + for f in 1:(valid_end - h) + wg = WindowGenerator(h, f, train_df, valid_df, ["timestamp","A","C","B"]) + @test f == length(wg.label_indices) + @test first(wg.label_indices) == h + 1 + @test last(wg.label_indices) == h + f + end + end +end + + + + +# batch_ts +include("batch_ts.jl") +@testset "" begin + +end \ No newline at end of file diff --git a/tutorials/time_series/time_series.jl b/tutorials/time_series/time_series.jl index 9457e524a..448385663 100644 --- a/tutorials/time_series/time_series.jl +++ b/tutorials/time_series/time_series.jl @@ -290,7 +290,7 @@ conv_model = Chain( single_step_3h = WindowGenerator(3, 1, train_df, valid_df, label_columns=target); -@time conv_model = train_model!(conv_model, single_step_3h, opt; bs=32, epochs=20, conv=true) +@time conv_model = train_model!(conv_model, single_step_3h, opt; bs=32, epochs=20) plot(single_step_3h, conv_model) From dfeb917cda264d11f03f454f0e0e6921e2c6cdf9 Mon Sep 17 00:00:00 2001 From: adinhobl Date: Tue, 23 Mar 2021 23:13:54 -0500 Subject: [PATCH 6/6] add notebook, update script --- tutorials/time_series/time_series.ipynb | 19314 ++++++++++++++++++++ tutorials/time_series/time_series.jl | 107 +- tutorials/time_series/window_generator.jl | 10 +- 3 files changed, 19399 insertions(+), 32 deletions(-) create mode 100644 tutorials/time_series/time_series.ipynb diff --git a/tutorials/time_series/time_series.ipynb b/tutorials/time_series/time_series.ipynb new file mode 100644 index 000000000..f060b60a0 --- /dev/null +++ b/tutorials/time_series/time_series.ipynb @@ -0,0 +1,19314 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Time series forecasting\n", + "This tutorial is inspired by a [Tensorflow Time Series Forecasting Tutorial](https://www.tensorflow.org/tutorials/structured_data/time_series).\n", + "It implements the data analysis, plotting, and model training from scratch using Julia tools such as Plots.jl, Flux, and DataFrames.jl." + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Setup\n", + "Importing the usual suspects. Two interesting notes:\n", + "* StatsPlots re-exports Plots.jl functions, so we don't need that separately\n", + "* MLDataPattern is useful for preprocessing data for ML in a lazy (read: non-memory intensive) way. We will be seeing more of it" + ], + "metadata": {} + }, + { + "outputs": [], + "cell_type": "code", + "source": [ + "using ZipFile\n", + "using CSV\n", + "using DataFrames\n", + "using StatsPlots\n", + "using Dates\n", + "using FFTW\n", + "using CUDA\n", + "using Flux\n", + "using MLDataPattern\n", + "\n", + "using Random: seed!\n", + "using Statistics: mean, std\n", + "using Flux: unsqueeze" + ], + "metadata": {}, + "execution_count": 1 + }, + { + "cell_type": "markdown", + "source": [ + "Set a seed to make this reproducible." + ], + "metadata": {} + }, + { + "outputs": [], + "cell_type": "code", + "source": [ + "seed!(4231)\n", + "\n", + "ENV[\"LINES\"] = 20;" + ], + "metadata": {}, + "execution_count": 2 + }, + { + "cell_type": "markdown", + "source": [ + "Disallow scalar indexing for CUDA - prevents slow accesses to GPU memory" + ], + "metadata": {} + }, + { + "outputs": [], + "cell_type": "code", + "source": [ + "CUDA.allowscalar(false)" + ], + "metadata": {}, + "execution_count": 3 + }, + { + "cell_type": "markdown", + "source": [ + "## The weather dataset\n", + "We are going to be using weather data for 7 years from the Max Planck Institute: https://www.bgc-jena.mpg.de/wetter/" + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "download_data (generic function with 1 method)" + }, + "metadata": {}, + "execution_count": 4 + } + ], + "cell_type": "code", + "source": [ + "function download_data(; fname=\"jena_climate_2009_2016.zip\")\n", + " DATA_PATH = \"https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip\"\n", + " isfile(fname) || download(DATA_PATH, fname)\n", + "\n", + " zip = ZipFile.Reader(fname)\n", + " csv = zip.files[1]\n", + " df = CSV.File(csv) |> DataFrame\n", + " close(zip)\n", + " return df\n", + "end" + ], + "metadata": {}, + "execution_count": 4 + }, + { + "cell_type": "markdown", + "source": [ + "More discussion on reading zipped CSVs here: https://juliadata.github.io/CSV.jl/stable/index.html#Example:-reading-from-a-zip-file-1" + ], + "metadata": {} + }, + { + "outputs": [], + "cell_type": "code", + "source": [ + "df = download_data();\n", + "\n", + "df = df[6:6:end, :]\n", + "\n", + "df[!,\"Date Time\"] = Dates.DateTime.(df[:,\"Date Time\"], \"dd.mm.yyyy HH:MM:SS\");" + ], + "metadata": {}, + "execution_count": 5 + }, + { + "cell_type": "markdown", + "source": [ + "More info on working with dates and times: https://en.wikibooks.org/wiki/Introducing_Julia/Working_with_dates_and_times" + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "\u001b[1m70091×4 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m Date Time \u001b[0m\u001b[1m T (degC) \u001b[0m\u001b[1m p (mbar) \u001b[0m\u001b[1m rho (g/m**3) \u001b[0m\n\u001b[1m \u001b[0m│\u001b[90m DateTime \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\n───────┼───────────────────────────────────────────────────────\n 1 │ 2009-01-01T01:00:00 -8.05 996.5 1307.86\n 2 │ 2009-01-01T02:00:00 -8.88 996.62 1312.25\n 3 │ 2009-01-01T03:00:00 -8.81 996.84 1312.18\n 4 │ 2009-01-01T04:00:00 -9.05 996.99 1313.61\n 5 │ 2009-01-01T05:00:00 -9.63 997.46 1317.19\n 6 │ 2009-01-01T06:00:00 -9.67 997.71 1317.71\n ⋮ │ ⋮ ⋮ ⋮ ⋮\n 70087 │ 2016-12-31T19:10:00 -0.98 1002.18 1280.7\n 70088 │ 2016-12-31T20:10:00 -1.4 1001.4 1281.87\n 70089 │ 2016-12-31T21:10:00 -2.75 1001.19 1288.02\n 70090 │ 2016-12-31T22:10:00 -2.89 1000.65 1288.03\n 70091 │ 2016-12-31T23:10:00 -3.93 1000.11 1292.41\n\u001b[36m 70080 rows omitted\u001b[0m", + "text/html": [ + "

70,091 rows × 4 columns

Date TimeT (degC)p (mbar)rho (g/m**3)
DateTimeFloat64Float64Float64
12009-01-01T01:00:00-8.05996.51307.86
22009-01-01T02:00:00-8.88996.621312.25
32009-01-01T03:00:00-8.81996.841312.18
42009-01-01T04:00:00-9.05996.991313.61
52009-01-01T05:00:00-9.63997.461317.19
62009-01-01T06:00:00-9.67997.711317.71
72009-01-01T07:00:00-9.17998.331315.98
82009-01-01T08:00:00-8.1999.171311.65
92009-01-01T09:00:00-7.66999.691310.14
102009-01-01T10:00:00-7.041000.271307.76
112009-01-01T11:00:00-7.411000.871310.43
122009-01-01T12:00:00-6.871000.31306.98
132009-01-01T13:00:00-5.891000.031301.73
142009-01-01T14:00:00-5.94999.811301.67
152009-01-01T15:00:00-5.69999.881300.51
162009-01-01T16:00:00-5.4999.941299.17
172009-01-01T17:00:00-5.371000.171299.27
182009-01-01T18:00:00-5.251000.161298.68
192009-01-01T19:00:00-5.111000.221298.07
202009-01-01T20:00:00-4.91000.221297.05
" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ], + "cell_type": "code", + "source": [ + "col = [\"Date Time\", \"T (degC)\", \"p (mbar)\", \"rho (g/m**3)\"]\n", + "\n", + "df[:,col]" + ], + "metadata": {}, + "execution_count": 6 + }, + { + "cell_type": "markdown", + "source": [ + "One easy way to plot data from a datafrom is with the StatsPlots `@df` macro" + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "Plot{Plots.GRBackend() n=3}", + "image/png": "", + "text/html": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ], + "cell_type": "code", + "source": [ + "@df df plot(cols(1), cols(2:4); layout=(3, 1))\n", + "\n", + "@df df[1:480,:] plot(cols(1), cols(2:4); layout=(3, 1))" + ], + "metadata": {}, + "execution_count": 7 + }, + { + "cell_type": "markdown", + "source": [ + "## Inspect and cleanup" + ], + "metadata": {} + }, + { + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "describe(df) = 15×7 DataFrame\n", + " Row │ variable mean min median max nmissing eltype\n", + " │ Symbol Union… Any Union… Any Int64 DataType\n", + "─────┼────────────────────────────────────────────────────────────────────────────────────────────────\n", + " 1 │ Date Time 2009-01-01T01:00:00 2016-12-31T23:10:00 0 DateTime\n", + " 2 │ p (mbar) 989.213 913.6 989.57 1015.29 0 Float64\n", + " 3 │ T (degC) 9.45048 -22.76 9.41 37.28 0 Float64\n", + " 4 │ Tpot (K) 283.493 250.85 283.46 311.21 0 Float64\n", + " 5 │ Tdew (degC) 4.95647 -24.8 5.21 23.06 0 Float64\n", + " 6 │ rh (%) 76.0098 13.88 79.3 100.0 0 Float64\n", + " 7 │ VPmax (mbar) 13.5766 0.97 11.82 63.77 0 Float64\n", + " 8 │ VPact (mbar) 9.53397 0.81 8.86 28.25 0 Float64\n", + " 9 │ VPdef (mbar) 4.04254 0.0 2.19 46.01 0 Float64\n", + " 10 │ sh (g/kg) 6.02256 0.51 5.59 18.07 0 Float64\n", + " 11 │ H2OC (mmol/mol) 9.64044 0.81 8.96 28.74 0 Float64\n", + " 12 │ rho (g/m**3) 1216.06 1059.45 1213.8 1393.54 0 Float64\n", + " 13 │ wv (m/s) 1.70257 -9999.0 1.76 14.01 0 Float64\n", + " 14 │ max. wv (m/s) 2.96304 -9999.0 2.98 23.5 0 Float64\n", + " 15 │ wd (deg) 174.789 0.0 198.1 360.0 0 Float64\n" + ] + } + ], + "cell_type": "code", + "source": [ + "@show describe(df)\n", + "\n", + "df.\"p (mbar)\"; #cool that this works" + ], + "metadata": {}, + "execution_count": 8 + }, + { + "cell_type": "markdown", + "source": [ + "Need to replace bad database values" + ], + "metadata": {} + }, + { + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "describe(df) = 15×7 DataFrame\n", + " Row │ variable mean min median max nmissing eltype\n", + " │ Symbol Union… Any Union… Any Int64 DataType\n", + "─────┼────────────────────────────────────────────────────────────────────────────────────────────────\n", + " 1 │ Date Time 2009-01-01T01:00:00 2016-12-31T23:10:00 0 DateTime\n", + " 2 │ p (mbar) 989.213 913.6 989.57 1015.29 0 Float64\n", + " 3 │ T (degC) 9.45048 -22.76 9.41 37.28 0 Float64\n", + " 4 │ Tpot (K) 283.493 250.85 283.46 311.21 0 Float64\n", + " 5 │ Tdew (degC) 4.95647 -24.8 5.21 23.06 0 Float64\n", + " 6 │ rh (%) 76.0098 13.88 79.3 100.0 0 Float64\n", + " 7 │ VPmax (mbar) 13.5766 0.97 11.82 63.77 0 Float64\n", + " 8 │ VPact (mbar) 9.53397 0.81 8.86 28.25 0 Float64\n", + " 9 │ VPdef (mbar) 4.04254 0.0 2.19 46.01 0 Float64\n", + " 10 │ sh (g/kg) 6.02256 0.51 5.59 18.07 0 Float64\n", + " 11 │ H2OC (mmol/mol) 9.64044 0.81 8.96 28.74 0 Float64\n", + " 12 │ rho (g/m**3) 1216.06 1059.45 1213.8 1393.54 0 Float64\n", + " 13 │ wv (m/s) 2.13054 0.0 1.76 14.01 0 Float64\n", + " 14 │ max. wv (m/s) 3.53367 0.0 2.98 23.5 0 Float64\n", + " 15 │ wd (deg) 174.789 0.0 198.1 360.0 0 Float64\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": "\u001b[1m15×7 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m variable \u001b[0m\u001b[1m mean \u001b[0m\u001b[1m min \u001b[0m\u001b[1m median \u001b[0m\u001b[1m max \u001b[0m ⋯\n\u001b[1m \u001b[0m│\u001b[90m Symbol \u001b[0m\u001b[90m Union… \u001b[0m\u001b[90m Any \u001b[0m\u001b[90m Union… \u001b[0m\u001b[90m Any \u001b[0m ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n 1 │ Date Time \u001b[90m \u001b[0m 2009-01-01T01:00:00 \u001b[90m \u001b[0m 2016-12-31T23:10 ⋯\n 2 │ p (mbar) 989.213 913.6 989.57 1015.29\n 3 │ T (degC) 9.45048 -22.76 9.41 37.28\n 4 │ Tpot (K) 283.493 250.85 283.46 311.21\n 5 │ Tdew (degC) 4.95647 -24.8 5.21 23.06 ⋯\n 6 │ rh (%) 76.0098 13.88 79.3 100.0\n ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱\n 11 │ H2OC (mmol/mol) 9.64044 0.81 8.96 28.74\n 12 │ rho (g/m**3) 1216.06 1059.45 1213.8 1393.54\n 13 │ wv (m/s) 2.13054 0.0 1.76 14.01 ⋯\n 14 │ max. wv (m/s) 3.53367 0.0 2.98 23.5\n 15 │ wd (deg) 174.789 0.0 198.1 360.0\n\u001b[36m 3 columns and 4 rows omitted\u001b[0m", + "text/html": [ + "

15 rows × 7 columns (omitted printing of 1 columns)

variablemeanminmedianmaxnmissing
SymbolUnion…AnyUnion…AnyInt64
1Date Time2009-01-01T01:00:002016-12-31T23:10:000
2p (mbar)989.213913.6989.571015.290
3T (degC)9.45048-22.769.4137.280
4Tpot (K)283.493250.85283.46311.210
5Tdew (degC)4.95647-24.85.2123.060
6rh (%)76.009813.8879.3100.00
7VPmax (mbar)13.57660.9711.8263.770
8VPact (mbar)9.533970.818.8628.250
9VPdef (mbar)4.042540.02.1946.010
10sh (g/kg)6.022560.515.5918.070
11H2OC (mmol/mol)9.640440.818.9628.740
12rho (g/m**3)1216.061059.451213.81393.540
13wv (m/s)2.130540.01.7614.010
14max. wv (m/s)3.533670.02.9823.50
15wd (deg)174.7890.0198.1360.00
" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ], + "cell_type": "code", + "source": [ + "replace!(df[!,\"wv (m/s)\"], -9999.0 => 0);\n", + "replace!(df[!,\"max. wv (m/s)\"], -9999.0 => 0);\n", + "\n", + "@show describe(df)" + ], + "metadata": {}, + "execution_count": 9 + }, + { + "cell_type": "markdown", + "source": [ + "## Feature engineering\n", + "Bucket wind direction and speed" + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "Plot{Plots.GRBackend() n=1}", + "image/png": "", + "text/html": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ], + "cell_type": "code", + "source": [ + "histogram2d(df[!,\"wd (deg)\"], df[!,\"wv (m/s)\"], bins=(75,75), xlabel=\"wd (deg)\", ylabel=\"wv (m/s)\")" + ], + "metadata": {}, + "execution_count": 10 + }, + { + "cell_type": "markdown", + "source": [ + "It would be better to decompose this into X and Y velocities" + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "Plot{Plots.GRBackend() n=1}", + "image/png": "", + "text/html": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ], + "cell_type": "code", + "source": [ + "wd_rad = df[!,\"wd (deg)\"] * π / 180\n", + "df.Wx = df[!,\"wv (m/s)\"] .* cos.(wd_rad)\n", + "df.Wy = df[!,\"wv (m/s)\"] .* sin.(wd_rad)\n", + "df.\"max Wx\" = df[!,\"max. wv (m/s)\"] .* cos.(wd_rad)\n", + "df.\"max Wy\" = df[!,\"max. wv (m/s)\"] .* sin.(wd_rad);\n", + "\n", + "histogram2d(df.Wx, df.Wy, bins=(75,75), xlabel=\"Wind X [m/s]\", ylabel=\"Wind Y [m/s]\")" + ], + "metadata": {}, + "execution_count": 11 + }, + { + "cell_type": "markdown", + "source": [ + "Modify timestamp into a continuous \"time of day\" signal to deal with periodicity." + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "Plot{Plots.GRBackend() n=2}", + "image/png": "", + "text/html": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "metadata": {}, + "execution_count": 12 + } + ], + "cell_type": "code", + "source": [ + "timestamp_s = Dates.datetime2unix.(df.\"Date Time\");\n", + "\n", + "day = 24*60*60 #seconds in a day\n", + "year = 365.2425 * day #seconds in a year\n", + "\n", + "df[!,\"Day sin\"] = sin.(timestamp_s * (2 * π / day))\n", + "df[!,\"Day cos\"] = cos.(timestamp_s * (2 * π / day))\n", + "df[!,\"Year sin\"] = sin.(timestamp_s * (2 * π / year))\n", + "df[!,\"Year cos\"] = cos.(timestamp_s * (2 * π / year));\n", + "\n", + "plot(df[1:25,\"Day sin\"], legend=false)\n", + "plot!(df[1:25,\"Day cos\"])\n", + "xlabel!(\"Time [h]\")\n", + "title!(\"Time of Day Signal\")" + ], + "metadata": {}, + "execution_count": 12 + }, + { + "cell_type": "markdown", + "source": [ + "If you don't know the frequency to choose for your periodicity, you can take the FFT.\n", + "Here you can see that the yearly and daily periodicities are the most prominent." + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "Plot{Plots.GRBackend() n=1}", + "image/png": "", + "text/html": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ], + "cell_type": "code", + "source": [ + "fftrans = FFTW.rfft(df[!,\"T (degC)\"])\n", + "f_per_dataset = 1:size(fftrans)[1]\n", + "\n", + "n_samples_h = size(df[!,\"T (degC)\"])[1]\n", + "hours_per_year = 24 * 365.2524\n", + "years_per_dataset = n_samples_h / hours_per_year\n", + "f_per_year = f_per_dataset / years_per_dataset;\n", + "\n", + "plot(f_per_year, abs.(fftrans), xscale=:log10, ylim=(0, 400000), xlim=(0.3,Inf), leg=false)\n", + "xticks!([1, 365.2524], [\"1/Year\", \"1/Day\"])\n", + "xlabel!(\"Frequency (log scale)\")" + ], + "metadata": {}, + "execution_count": 13 + }, + { + "cell_type": "markdown", + "source": [ + "## Split the data\n", + "Drop columns that won't be used further." + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "\u001b[1m70091×19 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m p (mbar) \u001b[0m\u001b[1m T (degC) \u001b[0m\u001b[1m Tpot (K) \u001b[0m\u001b[1m Tdew (degC) \u001b[0m\u001b[1m rh (%) \u001b[0m\u001b[1m VPmax (mbar) \u001b[0m\u001b[1m VPa\u001b[0m ⋯\n\u001b[1m \u001b[0m│\u001b[90m Float32 \u001b[0m\u001b[90m Float32 \u001b[0m\u001b[90m Float32 \u001b[0m\u001b[90m Float32 \u001b[0m\u001b[90m Float32 \u001b[0m\u001b[90m Float32 \u001b[0m\u001b[90m Flo\u001b[0m ⋯\n───────┼────────────────────────────────────────────────────────────────────────\n 1 │ 996.5 -8.05 265.38 -8.78 94.4 3.33 ⋯\n 2 │ 996.62 -8.88 264.54 -9.77 93.2 3.12\n 3 │ 996.84 -8.81 264.59 -9.66 93.5 3.13\n 4 │ 996.99 -9.05 264.34 -10.02 92.6 3.07\n 5 │ 997.46 -9.63 263.72 -10.65 92.2 2.94 ⋯\n 6 │ 997.71 -9.67 263.66 -10.62 92.7 2.93\n ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱\n 70087 │ 1002.18 -0.98 272.01 -5.36 72.0 5.69\n 70088 │ 1001.4 -1.4 271.66 -6.84 66.29 5.51\n 70089 │ 1001.19 -2.75 270.32 -6.9 72.9 4.99 ⋯\n 70090 │ 1000.65 -2.89 270.22 -7.15 72.3 4.93\n 70091 │ 1000.11 -3.93 269.23 -8.09 72.6 4.56\n\u001b[36m 13 columns and 70080 rows omitted\u001b[0m", + "text/html": [ + "

70,091 rows × 19 columns (omitted printing of 12 columns)

p (mbar)T (degC)Tpot (K)Tdew (degC)rh (%)VPmax (mbar)VPact (mbar)
Float32Float32Float32Float32Float32Float32Float32
1996.5-8.05265.38-8.7894.43.333.14
2996.62-8.88264.54-9.7793.23.122.9
3996.84-8.81264.59-9.6693.53.132.93
4996.99-9.05264.34-10.0292.63.072.85
5997.46-9.63263.72-10.6592.22.942.71
6997.71-9.67263.66-10.6292.72.932.71
7998.33-9.17264.12-10.192.93.042.83
8999.17-8.1265.12-9.0592.83.313.07
9999.69-7.66265.52-8.8491.23.433.13
101000.27-7.04266.1-8.1791.63.63.3
111000.87-7.41265.68-8.6690.73.53.17
121000.3-6.87266.27-8.2889.63.643.27
131000.03-5.89267.27-7.4688.63.933.48
14999.81-5.94267.24-7.4389.13.923.49
15999.88-5.69267.48-7.090.43.993.61
16999.94-5.4267.76-6.8689.44.083.65
171000.17-5.37267.78-6.8289.44.093.66
181000.16-5.25267.9-6.7589.14.133.68
191000.22-5.11268.03-6.5789.44.173.73
201000.22-4.9268.24-6.3889.34.243.79
" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ], + "cell_type": "code", + "source": [ + "select!(df, Not([:(\"wv (m/s)\"),:(\"max. wv (m/s)\"), :(\"wd (deg)\"), :(\"Date Time\")]));\n", + "\n", + "column_indices = pairs(names(df))\n", + "indices_columns = Dict(value => key for (key, value) in column_indices)\n", + "df = convert.(Float32, df) # Don't need high precision; reduces errors later on when using Params - gradients are Float32" + ], + "metadata": {}, + "execution_count": 14 + }, + { + "cell_type": "markdown", + "source": [ + "Split the data into a training and validation set" + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "19" + }, + "metadata": {}, + "execution_count": 15 + } + ], + "cell_type": "code", + "source": [ + "n = size(df)[1]\n", + "train_df = df[1:round(Int,n*0.7, RoundDown),:]\n", + "valid_df = df[round(Int,n*0.7, RoundUp):round(Int,n*0.9, RoundDown),:]\n", + "test_df = df[round(Int,n*0.9, RoundUp):end,:]; # matching TF tutorial exactly, can also use partition\n", + "\n", + "num_features = size(df,2)" + ], + "metadata": {}, + "execution_count": 15 + }, + { + "cell_type": "markdown", + "source": [ + "## Normalize the data" + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "Plot{Plots.GRBackend() n=2}", + "image/png": "", + "text/html": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ], + "cell_type": "code", + "source": [ + "train_mean = mean.(eachcol(train_df))\n", + "train_std = std.(eachcol(train_df))\n", + "\n", + "train_df = (train_df .- train_mean') ./ train_std'\n", + "valid_df = (valid_df .- train_mean') ./ train_std'\n", + "test_df = (test_df .- train_mean') ./ train_std'\n", + "\n", + "df_std = (df .- train_mean') ./ train_std'\n", + "df_std = stack(df_std)\n", + "\n", + "violin(df_std.variable, df_std.value, xrotation=30.0, legend=false, xticks=:all) # use plotattr() to learn about keywords\n", + "boxplot!(df_std.variable, df_std.value, fillalpha=0.75, outliers=false)" + ], + "metadata": {}, + "execution_count": 16 + }, + { + "cell_type": "markdown", + "source": [ + "## Data Windowing\n", + "We will define our own WindowGenerator, some constructors, and plotting functions. The data from the WindowGenerator will be used in training." + ], + "metadata": {} + }, + { + "outputs": [], + "cell_type": "code", + "source": [ + "include(\"window_generator.jl\")\n", + "\n", + "h = 6; # historical window length\n", + "f = 1; # future window length" + ], + "metadata": {}, + "execution_count": 17 + }, + { + "cell_type": "markdown", + "source": [ + "WindowGenerator makes use of MLDataPattern's `slidingwindow` to generate the windows. It is good at flexibly generating sequences data, though requires a bit of mind-bending to fully understand how it generates sequences.\n", + "[slidingwindow docs](https://mldatapatternjl.readthedocs.io/en/latest/documentation/dataview.html?highlight=slidingwindow#labeled-windows)" + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "49057-element slidingwindow(::Main.##722.var\"#18#19\", adjoint(::Matrix{Float32}), 6, stride = 1) with eltype Tuple:\n ([0.9453041 0.95976573 … 1.0610048 1.0911344; -1.9824733 -2.078372 … -2.1650276 -2.1696491; … ; -0.061052326 -0.060029317 … -0.056960337 -0.055937365; 1.428434 1.4284236 … 1.4283878 1.4283743], [1.1658554; -2.1118789; … ; -0.0549144; 1.4283602])\n ([0.95976573 0.9862837 … 1.0911344 1.1658554; -2.078372 -2.0702844 … -2.1696491 -2.1118789; … ; -0.060029317 -0.059006315 … -0.055937365 -0.0549144; 1.4284236 1.4284124 … 1.4283743 1.4283602], [1.2670871; -1.9882501; … ; -0.053891443; 1.4283456])\n ([0.9862837 1.0043571 … 1.1658554 1.2670871; -2.0702844 -2.098014 … -2.1118789 -1.9882501; … ; -0.059006315 -0.05798332 … -0.0549144 -0.053891443; 1.4284124 1.4284005 … 1.4283602 1.4283456], [1.3297591; -1.9374124; … ; -0.0528685; 1.42833])\n ([1.0043571 1.0610048 … 1.2670871 1.3297591; -2.098014 -2.1650276 … -1.9882501 -1.9374124; … ; -0.05798332 -0.056960337 … -0.053891443 -0.0528685; 1.4284005 1.4283878 … 1.4283456 1.42833], [1.399662; -1.865777; … ; -0.05184557; 1.4283137])\n ([1.0610048 1.0911344 … 1.3297591 1.399662; -2.1650276 -2.1696491 … -1.9374124 -1.865777; … ; -0.056960337 -0.055937365 … -0.0528685 -0.05184557; 1.4283878 1.4283743 … 1.42833 1.4283137], [1.4719703; -1.9085271; … ; -0.050822653; 1.4282966])\n ([1.0911344 1.1658554 … 1.399662 1.4719703; -2.1696491 -2.1118789 … -1.865777 -1.9085271; … ; -0.055937365 -0.0549144 … -0.05184557 -0.050822653; 1.4283743 1.4283602 … 1.4283137 1.4282966], [1.4032738; -1.846135; … ; -0.049799744; 1.428279])\n ([1.1658554 1.2670871 … 1.4719703 1.4032738; -2.1118789 -1.9882501 … -1.9085271 -1.846135; … ; -0.0549144 -0.053891443 … -0.050822653 -0.049799744; 1.4283602 1.4283456 … 1.4282966 1.428279], [1.3707387; -1.732905; … ; -0.04877685; 1.4282606])\n ([1.2670871 1.3297591 … 1.4032738 1.3707387; -1.9882501 -1.9374124 … -1.846135 -1.732905; … ; -0.053891443 -0.0528685 … -0.049799744 -0.04877685; 1.4283456 1.42833 … 1.428279 1.4282606], [1.3442208; -1.738682; … ; -0.04775397; 1.4282415])\n ⋮\n ([0.032976408 0.0076648267 … -0.033307422 0.06551882; 1.0135001 1.3786092 … 1.6616842 1.0550948; … ; -0.84815776 -0.84901774 … -0.8515952 -0.8524536; -1.158996 -1.1584501 … -1.156809 -1.1562608], [0.05105716; 0.97999334; … ; -0.8533115; -1.1557119])\n ([0.0076648267 -0.04295098 … 0.06551882 0.05105716; 1.3786092 1.5819609 … 1.0550948 0.97999334; … ; -0.84901774 -0.8498773 … -0.8524536 -0.8533115; -1.1584501 -1.1579037 … -1.1562608 -1.1557119], [0.08359221; 0.9372431; … ; -0.85416913; -1.1551625])\n ([-0.04295098 -0.033307422 … 0.05105716 0.08359221; 1.5819609 1.6940356 … 0.97999334 0.9372431; … ; -0.8498773 -0.85073644 … -0.8533115 -0.85416913; -1.1579037 -1.1573567 … -1.1557119 -1.1551625], [0.16916317; 0.90489185; … ; -0.85502625; -1.1546124])\n ([-0.033307422 -0.033307422 … 0.08359221 0.16916317; 1.6940356 1.6616842 … 0.9372431 0.90489185; … ; -0.85073644 -0.8515952 … -0.85416913 -0.85502625; -1.1573567 -1.156809 … -1.1551625 -1.1546124], [0.26678303; 0.87947273; … ; -0.855883; -1.1540618])\n ([-0.033307422 0.06551882 … 0.16916317 0.26678303; 1.6616842 1.0550948 … 0.90489185 0.87947273; … ; -0.8515952 -0.8524536 … -0.85502625 -0.855883; -1.156809 -1.1562608 … -1.1546124 -1.1540618], [0.2173736; 0.74082386; … ; -0.85673934; -1.1535105])\n ([0.06551882 0.05105716 … 0.26678303 0.2173736; 1.0550948 0.97999334 … 0.87947273 0.74082386; … ; -0.8524536 -0.8533115 … -0.855883 -0.85673934; -1.1562608 -1.1557119 … -1.1540618 -1.1535105], [0.2137545; 0.71078324; … ; -0.85759526; -1.1529586])\n ([0.05105716 0.08359221 … 0.2173736 0.2137545; 0.97999334 0.9372431 … 0.74082386 0.71078324; … ; -0.8533115 -0.85416913 … -0.85673934 -0.85759526; -1.1557119 -1.1551625 … -1.1535105 -1.1529586], [0.16675043; 0.6530128; … ; -0.8584509; -1.1524062])" + }, + "metadata": {}, + "execution_count": 18 + } + ], + "cell_type": "code", + "source": [ + "slidingwindow(i -> i+h:i+h+f-1, Array(train_df)', h, stride=1)" + ], + "metadata": {}, + "execution_count": 18 + }, + { + "cell_type": "markdown", + "source": [ + "WindowGenerator is implemented to make sequence-generating simpler. It uses `slidingwindow` behind the scenes to create sequences." + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": " Calculates the windows used for modeling, the target index, and features\n useful for plotting.\n\n Specify \u001b[36mh\u001b[39m historical points, \u001b[36mf\u001b[39m target points. By default, the target points\n are assumed to follow after all the historical points (\u001b[36moffset = h\u001b[39m). By\n setting \u001b[36moffset = 1\u001b[39m, the targets for each historical point will be the next\n point in time.\n\n WindowGenerator with a single label_column.\n\n WindowGenerator with multiple label_columns.", + "text/markdown": "Calculates the windows used for modeling, the target index, and features useful for plotting.\n\nSpecify `h` historical points, `f` target points. By default, the target points are assumed to follow after all the historical points (`offset = h`). By setting `offset = 1`, the targets for each historical point will be the next point in time.\n\nWindowGenerator with a single label_column. \n\nWindowGenerator with multiple label_columns.\n" + }, + "metadata": {}, + "execution_count": 19 + } + ], + "cell_type": "code", + "source": [ + "@doc WindowGenerator" + ], + "metadata": {}, + "execution_count": 19 + }, + { + "cell_type": "markdown", + "source": [ + "Create a WindowGenerator" + ], + "metadata": {} + }, + { + "outputs": [], + "cell_type": "code", + "source": [ + "wg = WindowGenerator(6, 1, train_df, valid_df, label_columns=\"T (degC)\");" + ], + "metadata": {}, + "execution_count": 20 + }, + { + "cell_type": "markdown", + "source": [ + "Plotting methods are implemented for raw data and with predictions." + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "Plot{Plots.GRBackend() n=6}", + "image/png": "", + "text/html": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ], + "cell_type": "code", + "source": [ + "plot(wg)" + ], + "metadata": {}, + "execution_count": 21 + }, + { + "cell_type": "markdown", + "source": [ + "We will also make use of the utility function for batching lazily-evaluated timeseries from slidingwindow" + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "Main.##722.batch_ts" + }, + "metadata": {}, + "execution_count": 22 + } + ], + "cell_type": "code", + "source": [ + "include(\"batch_ts.jl\")" + ], + "metadata": {}, + "execution_count": 22 + }, + { + "cell_type": "markdown", + "source": [ + "To understand better what all the above code does, let's look at an imitation training loop and look at everything's dimensions" + ], + "metadata": {} + }, + { + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "x = Float32[0.9862837 1.0043571 1.0610048 1.0911344 1.1658554 1.2670871; -2.0702844 -2.098014 -2.1650276 -2.1696491 -2.1118789 -1.9882501; -2.1324346 -2.1610887 -2.2321506 -2.2390273 -2.1863046 -2.071688; -2.045187 -2.09682 -2.187178 -2.1828752 -2.108294 -1.9576976; 1.0627385 1.0083753 0.9842138 1.0144156 1.0264965 1.0204563; -1.328843 -1.3366406 -1.3535352 -1.3548348 -1.3405393 -1.3054504; -1.5272249 -1.5462353 -1.5795032 -1.5795032 -1.5509878 -1.4939572; -0.78834784 -0.7821209 -0.7821209 -0.78627217 -0.7841965 -0.7800452; -1.5287029 -1.5474207 -1.5811126 -1.5811126 -1.5549077 -1.4950107; -1.5319922 -1.5531187 -1.5859822 -1.5836347 -1.5578135 -1.4991288; 2.323999 2.3589115 2.4463177 2.4590142 2.416776 2.311058; 0.20798267 0.27034295 0.11226401 0.27458957 1.3088222 -0.0017026255; 0.276266 0.19526656 0.35081816 0.26863295 -0.008001719 -0.006444905; 0.11121804 0.24690734 0.048640482 0.17511468 1.2110392 -0.024096228; 0.32407838 0.14517556 0.40205324 0.37752837 0.057261467 0.0046644583; 1.0000997 1.2248497 1.3661335 1.4143227 1.3661335 1.2248497; 1.0000591 0.70717865 0.366112 0.00010241506 -0.36590716 -0.7069738; -0.059006315 -0.05798332 -0.056960337 -0.055937365 -0.0549144 -0.053891443; 1.4284124 1.4284005 1.4283878 1.4283743 1.4283602 1.4283456]\n", + "\n", + "Float32[0.9453041 0.95976573 0.9862837 1.0043571 1.0610048 1.0911344; -1.9824733 -2.078372 -2.0702844 -2.098014 -2.1650276 -2.1696491; -2.0418866 -2.138164 -2.1324346 -2.1610887 -2.2321506 -2.2390273; -1.9189727 -2.0609639 -2.045187 -2.09682 -2.187178 -2.1828752; 1.1171018 1.0446173 1.0627385 1.0083753 0.9842138 1.0144156; -1.3028512 -1.3301426 -1.328843 -1.3366406 -1.3535352 -1.3548348; -1.477323 -1.5343539 -1.5272249 -1.5462353 -1.5795032 -1.5795032; -0.7904235 -0.78627217 -0.78834784 -0.7821209 -0.7821209 -0.78627217; -1.4800365 -1.5361899 -1.5287029 -1.5474207 -1.5811126 -1.5811126; -1.4826971 -1.5390345 -1.5319922 -1.5531187 -1.5859822 -1.5836347; 2.2185228 2.3257067 2.323999 2.3589115 2.4463177 2.4590142; 0.19340923 0.17298676 0.20798267 0.27034295 0.11226401 0.27458957; 0.2211613 0.22210087 0.276266 0.19526656 0.35081816 0.26863295; 0.11114045 0.10945824 0.11121804 0.24690734 0.048640482 0.17511468; 0.21792784 0.22779849 0.32407838 0.14517556 0.40205324 0.37752837; 0.36611056 0.70719975 1.0000997 1.2248497 1.3661335 1.4143227; 1.3660687 1.2247943 1.0000591 0.70717865 0.366112 0.00010241506; -0.061052326 -0.060029317 -0.059006315 -0.05798332 -0.056960337 -0.055937365; 1.428434 1.4284236 1.4284124 1.4284005 1.4283878 1.4283743]\n", + "y = Float32[1.3297591; -1.9374124; -2.0258422; -1.9275783; 0.92381036; -1.2898552; -1.4796994; -0.76759124; -1.48378; -1.4873918; 2.2741907; 0.13879544; 0.168457; 0.12114528; 0.17993191; 1.0000997; -0.9998543; -0.0528685; 1.42833]\n", + "\n", + "Float32[1.1658554; -2.1118789; -2.1863046; -2.108294; 1.0264965; -1.3405393; -1.5509878; -0.7841965; -1.5549077; -1.5578135; 2.416776; 1.3088222; -0.008001719; 1.2110392; 0.057261467; 1.3661335; -0.36590716; -0.0549144; 1.4283602]\n", + "\n", + "x = Float32[0.95976573 0.9862837 1.0043571 1.0610048 1.0911344 1.1658554; -2.078372 -2.0702844 -2.098014 -2.1650276 -2.1696491 -2.1118789; -2.138164 -2.1324346 -2.1610887 -2.2321506 -2.2390273 -2.1863046; -2.0609639 -2.045187 -2.09682 -2.187178 -2.1828752 -2.108294; 1.0446173 1.0627385 1.0083753 0.9842138 1.0144156 1.0264965; -1.3301426 -1.328843 -1.3366406 -1.3535352 -1.3548348 -1.3405393; -1.5343539 -1.5272249 -1.5462353 -1.5795032 -1.5795032 -1.5509878; -0.78627217 -0.78834784 -0.7821209 -0.7821209 -0.78627217 -0.7841965; -1.5361899 -1.5287029 -1.5474207 -1.5811126 -1.5811126 -1.5549077; -1.5390345 -1.5319922 -1.5531187 -1.5859822 -1.5836347 -1.5578135; 2.3257067 2.323999 2.3589115 2.4463177 2.4590142 2.416776; 0.17298676 0.20798267 0.27034295 0.11226401 0.27458957 1.3088222; 0.22210087 0.276266 0.19526656 0.35081816 0.26863295 -0.008001719; 0.10945824 0.11121804 0.24690734 0.048640482 0.17511468 1.2110392; 0.22779849 0.32407838 0.14517556 0.40205324 0.37752837 0.057261467; 0.70719975 1.0000997 1.2248497 1.3661335 1.4143227 1.3661335; 1.2247943 1.0000591 0.70717865 0.366112 0.00010241506 -0.36590716; -0.060029317 -0.059006315 -0.05798332 -0.056960337 -0.055937365 -0.0549144; 1.4284236 1.4284124 1.4284005 1.4283878 1.4283743 1.4283602]\n", + "\n", + "Float32[1.0043571 1.0610048 1.0911344 1.1658554 1.2670871 1.3297591; -2.098014 -2.1650276 -2.1696491 -2.1118789 -1.9882501 -1.9374124; -2.1610887 -2.2321506 -2.2390273 -2.1863046 -2.071688 -2.0258422; -2.09682 -2.187178 -2.1828752 -2.108294 -1.9576976 -1.9275783; 1.0083753 0.9842138 1.0144156 1.0264965 1.0204563 0.92381036; -1.3366406 -1.3535352 -1.3548348 -1.3405393 -1.3054504 -1.2898552; -1.5462353 -1.5795032 -1.5795032 -1.5509878 -1.4939572 -1.4796994; -0.7821209 -0.7821209 -0.78627217 -0.7841965 -0.7800452 -0.76759124; -1.5474207 -1.5811126 -1.5811126 -1.5549077 -1.4950107 -1.48378; -1.5531187 -1.5859822 -1.5836347 -1.5578135 -1.4991288 -1.4873918; 2.3589115 2.4463177 2.4590142 2.416776 2.311058 2.2741907; 0.27034295 0.11226401 0.27458957 1.3088222 -0.0017026255 0.13879544; 0.19526656 0.35081816 0.26863295 -0.008001719 -0.006444905 0.168457; 0.24690734 0.048640482 0.17511468 1.2110392 -0.024096228 0.12114528; 0.14517556 0.40205324 0.37752837 0.057261467 0.0046644583 0.17993191; 1.2248497 1.3661335 1.4143227 1.3661335 1.2248497 1.0000997; 0.70717865 0.366112 0.00010241506 -0.36590716 -0.7069738 -0.9998543; -0.05798332 -0.056960337 -0.055937365 -0.0549144 -0.053891443 -0.0528685; 1.4284005 1.4283878 1.4283743 1.4283602 1.4283456 1.42833]\n", + "y = Float32[1.2670871; -1.9882501; -2.071688; -1.9576976; 1.0204563; -1.3054504; -1.4939572; -0.7800452; -1.4950107; -1.4991288; 2.311058; -0.0017026255; -0.006444905; -0.024096228; 0.0046644583; 1.2248497; -0.7069738; -0.053891443; 1.4283456]\n", + "\n", + "Float32[1.399662; -1.865777; -1.9593625; -1.8314834; 0.9479719; -1.2677623; -1.4393027; -0.76759124; -1.4463444; -1.4498336; 2.2160816; 0.5720067; -0.6065562; 0.67631173; -0.7864484; 0.70719975; -1.2245895; -0.05184557; 1.4283137]\n", + "\n" + ] + } + ], + "cell_type": "code", + "source": [ + "practice_df = train_df[1:10,:]\n", + "a = slidingwindow(i -> i+h:i+h+f-1, Array(practice_df)', h, stride=1)\n", + "\n", + "for i in eachbatch(shuffleobs(a), size=2)\n", + " (x,y) = batch_ts(i)\n", + " @show x\n", + " @show y\n", + " println()\n", + "end" + ], + "metadata": {}, + "execution_count": 23 + }, + { + "cell_type": "markdown", + "source": [ + "That took the data at 10 timestamps (`train_df[1:10,:]`) and threw it into 4 'observations' using all the data it could while requesting 6 historical and 1 target point. This works out to the following:\n", + "* (1,2,3,4,5,6), 7\n", + "* (2,3,4,5,6,7), 8\n", + "* (3,4,5,6,7,8), 9\n", + "* (4,5,6,7,8,9), 10\n", + "\n", + "It then shuffles these obserations, and lazily batches them into 2 batches each of size 2." + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "# Single Step Models" + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "loss (generic function with 1 method)" + }, + "metadata": {}, + "execution_count": 24 + } + ], + "cell_type": "code", + "source": [ + "loss(x,y) = Flux.Losses.mse(x, y)" + ], + "metadata": {}, + "execution_count": 24 + }, + { + "cell_type": "markdown", + "source": [ + "### Baseline - 1h" + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "\"T (degC)\"" + }, + "metadata": {}, + "execution_count": 25 + } + ], + "cell_type": "code", + "source": [ + "struct Baseline\n", + " label_index::Int\n", + "end\n", + "\n", + "(m::Baseline)(x) = x[m.label_index,:,:]\n", + "\n", + "target = \"T (degC)\"" + ], + "metadata": {}, + "execution_count": 25 + }, + { + "cell_type": "markdown", + "source": [ + "Since this model repeats the last point, we make slidingwindows with 1 historical and 1 target point." + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "Main.##722.Baseline(2)" + }, + "metadata": {}, + "execution_count": 26 + } + ], + "cell_type": "code", + "source": [ + "single_step_1h = WindowGenerator(1, 1, train_df, valid_df, label_columns=target);\n", + "\n", + "baseline_model = Baseline(wg.target_idx[1])" + ], + "metadata": {}, + "execution_count": 26 + }, + { + "cell_type": "markdown", + "source": [ + "Demonstrate the 'training error' for this model, although there is no actual training." + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "0.013971615f0" + }, + "metadata": {}, + "execution_count": 27 + } + ], + "cell_type": "code", + "source": [ + "function run_single_step_baseline(wg, model)\n", + " preds = Float32[]\n", + " reals = Float32[]\n", + " for (x,y) in wg.train\n", + " val = model(x)[1]\n", + " push!(preds, val)\n", + " push!(reals, y[model.label_index])\n", + " end\n", + "\n", + " l = loss(preds, reals)\n", + " return l\n", + "end\n", + "\n", + "run_single_step_baseline(single_step_1h, baseline_model)" + ], + "metadata": {}, + "execution_count": 27 + }, + { + "cell_type": "markdown", + "source": [ + "### Baseline - 24h\n", + "Let's try to predict the next hour's value for 24 hours" + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "Plot{Plots.GRBackend() n=3}", + "image/png": "", + "text/html": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "metadata": {}, + "execution_count": 28 + } + ], + "cell_type": "code", + "source": [ + "single_step_24h = WindowGenerator(24, 24, train_df, valid_df, label_columns=target; offset=1);\n", + "\n", + "plot(single_step_24h, baseline_model)" + ], + "metadata": {}, + "execution_count": 28 + }, + { + "cell_type": "markdown", + "source": [ + "### Linear Models\n", + "##### 1 hour\n", + "Set up a fairly generic training loop for the rest of the models. It takes in a model, and a WindowGenerator object, and uses the ADAM optimizer to train the model." + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "train_model! (generic function with 1 method)" + }, + "metadata": {}, + "execution_count": 29 + } + ], + "cell_type": "code", + "source": [ + "opt = Flux.Optimise.ADAM(0.01)\n", + "\n", + "function train_model!(model, wg::WindowGenerator, opt; epochs=20, bs=16, dev=Flux.gpu)\n", + " model = model |> dev\n", + " ps = params(model)\n", + " t = shuffleobs(wg.train)\n", + " v = batch_ts(getobs(wg.valid))\n", + "\n", + " local l\n", + " vl_prev = Inf\n", + " for e in 1:epochs\n", + " for d in eachbatch(t, size=bs)\n", + " x, y = batch_ts(d)\n", + " y = y[wg.target_idx,:,:]\n", + " x, y = x |> dev, y |> dev\n", + " gs = gradient(ps) do\n", + " l = loss(model(x),y)\n", + " end\n", + " Flux.update!(opt, ps, gs)\n", + " end\n", + " l = round(l;digits=4)\n", + " vl = round(loss(model(v[1]),v[2][wg.target_idx,:,:]); digits=4)\n", + " println(\"Epoch $e/$epochs - train loss: $l, valid loss: $vl\")\n", + " #=\n", + " crude early-stopping\n", + " vl_prev < (vl - 0.001) && break\n", + " vl_prev = vl\n", + " =#\n", + " end\n", + " model = model |> cpu\n", + "end" + ], + "metadata": {}, + "execution_count": 29 + }, + { + "cell_type": "markdown", + "source": [ + "Now, train the linear model" + ], + "metadata": {} + }, + { + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "┌ Warning: The specified values for size and/or count will result in 6 unused data points\n", + "└ @ MLDataPattern /home/andrew/.julia/packages/MLDataPattern/KlSmO/src/dataview.jl:205\n", + "Epoch 1/20 - train loss: 0.0126, valid loss: 0.0094\n", + "Epoch 2/20 - train loss: 0.0112, valid loss: 0.009\n", + "Epoch 3/20 - train loss: 0.0112, valid loss: 0.009\n", + "Epoch 4/20 - train loss: 0.0112, valid loss: 0.009\n", + "Epoch 5/20 - train loss: 0.0112, valid loss: 0.009\n", + "Epoch 6/20 - train loss: 0.0112, valid loss: 0.009\n", + "Epoch 7/20 - train loss: 0.0111, valid loss: 0.009\n", + "Epoch 8/20 - train loss: 0.0111, valid loss: 0.009\n", + "Epoch 9/20 - train loss: 0.0111, valid loss: 0.009\n", + "Epoch 10/20 - train loss: 0.0111, valid loss: 0.009\n", + "Epoch 11/20 - train loss: 0.0111, valid loss: 0.009\n", + "Epoch 12/20 - train loss: 0.011, valid loss: 0.009\n", + "Epoch 13/20 - train loss: 0.011, valid loss: 0.009\n", + "Epoch 14/20 - train loss: 0.011, valid loss: 0.009\n", + "Epoch 15/20 - train loss: 0.011, valid loss: 0.009\n", + "Epoch 16/20 - train loss: 0.011, valid loss: 0.009\n", + "Epoch 17/20 - train loss: 0.011, valid loss: 0.009\n", + "Epoch 18/20 - train loss: 0.0109, valid loss: 0.009\n", + "Epoch 19/20 - train loss: 0.0109, valid loss: 0.009\n", + "Epoch 20/20 - train loss: 0.0109, valid loss: 0.009\n", + " 8.227544 seconds (55.81 M allocations: 5.889 GiB, 8.83% gc time, 8.01% compilation time)\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": "Dense(19, 1)" + }, + "metadata": {}, + "execution_count": 30 + } + ], + "cell_type": "code", + "source": [ + "linear = Dense(size(single_step_1h.train[1][1],1), 1; initW=Flux.glorot_uniform, initb=Flux.zeros)\n", + "\n", + "@time linear = train_model!(linear, single_step_1h, opt; bs=32, epochs=20)" + ], + "metadata": {}, + "execution_count": 30 + }, + { + "cell_type": "markdown", + "source": [ + "##### 24 hours\n", + "Show the model's predictions for each point in the window." + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "Plot{Plots.GRBackend() n=3}", + "image/png": "", + "text/html": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "metadata": {}, + "execution_count": 31 + } + ], + "cell_type": "code", + "source": [ + "plot(single_step_24h, linear)" + ], + "metadata": {}, + "execution_count": 31 + }, + { + "cell_type": "markdown", + "source": [ + "Visualize the weights of the linear regression. Naturally, since we are trying to predict future temperatures, we would expect that the current temperature is on of the most useful features.\n", + "This is shown by it having a high positive weight." + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "Plot{Plots.GRBackend() n=1}", + "image/png": "", + "text/html": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "metadata": {}, + "execution_count": 32 + } + ], + "cell_type": "code", + "source": [ + "bar(names(train_df), linear.W[:], xrotation=30.0, legend=false, xticks=:all, tickfontsize=6)" + ], + "metadata": {}, + "execution_count": 32 + }, + { + "cell_type": "markdown", + "source": [ + "### Dense\n", + "A model with 2 Fully-connected layers and relu non-linearities." + ], + "metadata": {} + }, + { + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "┌ Warning: The specified values for size and/or count will result in 6 unused data points\n", + "└ @ MLDataPattern /home/andrew/.julia/packages/MLDataPattern/KlSmO/src/dataview.jl:205\n", + "Epoch 1/20 - train loss: 0.0095, valid loss: 0.009\n", + "Epoch 2/20 - train loss: 0.0081, valid loss: 0.0089\n", + "Epoch 3/20 - train loss: 0.0079, valid loss: 0.0083\n", + "Epoch 4/20 - train loss: 0.008, valid loss: 0.0076\n", + "Epoch 5/20 - train loss: 0.0069, valid loss: 0.0072\n", + "Epoch 6/20 - train loss: 0.0066, valid loss: 0.0076\n", + "Epoch 7/20 - train loss: 0.0078, valid loss: 0.0074\n", + "Epoch 8/20 - train loss: 0.0046, valid loss: 0.0072\n", + "Epoch 9/20 - train loss: 0.0044, valid loss: 0.0069\n", + "Epoch 10/20 - train loss: 0.0053, valid loss: 0.0071\n", + "Epoch 11/20 - train loss: 0.005, valid loss: 0.0073\n", + "Epoch 12/20 - train loss: 0.0071, valid loss: 0.0072\n", + "Epoch 13/20 - train loss: 0.0057, valid loss: 0.0067\n", + "Epoch 14/20 - train loss: 0.0057, valid loss: 0.0071\n", + "Epoch 15/20 - train loss: 0.0059, valid loss: 0.0069\n", + "Epoch 16/20 - train loss: 0.0077, valid loss: 0.007\n", + "Epoch 17/20 - train loss: 0.0073, valid loss: 0.0069\n", + "Epoch 18/20 - train loss: 0.0076, valid loss: 0.0072\n", + "Epoch 19/20 - train loss: 0.0058, valid loss: 0.007\n", + "Epoch 20/20 - train loss: 0.0059, valid loss: 0.0068\n", + " 11.781388 seconds (60.57 M allocations: 9.586 GiB, 6.87% gc time, 0.42% compilation time)\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": "Chain(Dense(19, 64, relu), Dense(64, 64, relu), Dense(64, 1))" + }, + "metadata": {}, + "execution_count": 33 + } + ], + "cell_type": "code", + "source": [ + "dense = Chain(\n", + " Dense(19, 64, relu),\n", + " Dense(64, 64, relu),\n", + " Dense(64, 1)\n", + ")\n", + "\n", + "@time dense = train_model!(dense, single_step_1h, opt; bs=32, epochs=20)" + ], + "metadata": {}, + "execution_count": 33 + }, + { + "cell_type": "markdown", + "source": [ + "Dense model's predicions" + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "Plot{Plots.GRBackend() n=3}", + "image/png": "", + "text/html": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ], + "cell_type": "code", + "source": [ + "plot(single_step_24h, dense)" + ], + "metadata": {}, + "execution_count": 34 + }, + { + "cell_type": "markdown", + "source": [ + "### Multi-step Dense\n", + "Now we are going to use 3 historical hours to predict 1 hour in the future." + ], + "metadata": {} + }, + { + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "┌ Warning: The specified values for size and/or count will result in 4 unused data points\n", + "└ @ MLDataPattern /home/andrew/.julia/packages/MLDataPattern/KlSmO/src/dataview.jl:205\n", + "Epoch 1/20 - train loss: 0.0074, valid loss: 0.0088\n", + "Epoch 2/20 - train loss: 0.008, valid loss: 0.0092\n", + "Epoch 3/20 - train loss: 0.0065, valid loss: 0.0089\n", + "Epoch 4/20 - train loss: 0.0069, valid loss: 0.009\n", + "Epoch 5/20 - train loss: 0.0052, valid loss: 0.0088\n", + "Epoch 6/20 - train loss: 0.0057, valid loss: 0.0085\n", + "Epoch 7/20 - train loss: 0.006, valid loss: 0.009\n", + "Epoch 8/20 - train loss: 0.0072, valid loss: 0.0086\n", + "Epoch 9/20 - train loss: 0.0072, valid loss: 0.0087\n", + "Epoch 10/20 - train loss: 0.0073, valid loss: 0.008\n", + "Epoch 11/20 - train loss: 0.008, valid loss: 0.009\n", + "Epoch 12/20 - train loss: 0.0073, valid loss: 0.0088\n", + "Epoch 13/20 - train loss: 0.0076, valid loss: 0.0094\n", + "Epoch 14/20 - train loss: 0.0076, valid loss: 0.009\n", + "Epoch 15/20 - train loss: 0.0071, valid loss: 0.0089\n", + "Epoch 16/20 - train loss: 0.0074, valid loss: 0.0089\n", + "Epoch 17/20 - train loss: 0.0074, valid loss: 0.009\n", + "Epoch 18/20 - train loss: 0.0072, valid loss: 0.0085\n", + "Epoch 19/20 - train loss: 0.0074, valid loss: 0.0084\n", + "Epoch 20/20 - train loss: 0.007, valid loss: 0.0084\n", + " 12.272140 seconds (61.59 M allocations: 11.443 GiB, 6.89% gc time, 3.66% compilation time)\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": "Chain(#27, Dense(57, 32, relu), Dense(32, 32, relu), Dense(32, 1))" + }, + "metadata": {}, + "execution_count": 35 + } + ], + "cell_type": "code", + "source": [ + "single_step_3h = WindowGenerator(3, 1, train_df, valid_df, label_columns=target);\n", + "\n", + "plot(single_step_3h)\n", + "\n", + "multi_step_dense = Chain(\n", + " i -> reshape(i, :, 1, size(i)[end]), # flatten first two dimensions, but preserve batch dimension\n", + " Dense(19*3, 32, relu),\n", + " Dense(32, 32, relu),\n", + " Dense(32, 1)\n", + ")\n", + "\n", + "@time multi_step_dense = train_model!(multi_step_dense, single_step_3h, opt; bs=32, epochs=20)" + ], + "metadata": {}, + "execution_count": 35 + }, + { + "cell_type": "markdown", + "source": [ + "Show the model's 1 hour predicion using 3 hours of historical data." + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "Plot{Plots.GRBackend() n=3}", + "image/png": "", + "text/html": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "metadata": {}, + "execution_count": 36 + } + ], + "cell_type": "code", + "source": [ + "plot(single_step_3h, multi_step_dense)" + ], + "metadata": {}, + "execution_count": 36 + }, + { + "cell_type": "markdown", + "source": [ + "### Convolutional Neural Network" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "conv_model = Chain(\n", + " Conv((19,), 3=>32, relu), # need to explain why this conv pattern\n", + " x -> Flux.flatten(x),\n", + " Dense(32, 32, relu),\n", + " Dense(32, 1),\n", + " x -> unsqueeze(x, 1)\n", + ")" + ], + "metadata": {} + }, + { + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "┌ Warning: The specified values for size and/or count will result in 4 unused data points\n", + "└ @ MLDataPattern /home/andrew/.julia/packages/MLDataPattern/KlSmO/src/dataview.jl:205\n", + "Epoch 1/20 - train loss: 0.0081, valid loss: 0.0084\n", + "Epoch 2/20 - train loss: 0.0064, valid loss: 0.0071\n", + "Epoch 3/20 - train loss: 0.0089, valid loss: 0.0082\n", + "Epoch 4/20 - train loss: 0.0081, valid loss: 0.008\n", + "Epoch 5/20 - train loss: 0.0084, valid loss: 0.0089\n", + "Epoch 6/20 - train loss: 0.0083, valid loss: 0.0083\n", + "Epoch 7/20 - train loss: 0.0084, valid loss: 0.0089\n", + "Epoch 8/20 - train loss: 0.0081, valid loss: 0.0084\n", + "Epoch 9/20 - train loss: 0.0093, valid loss: 0.0091\n", + "Epoch 10/20 - train loss: 0.0095, valid loss: 0.0094\n", + "Epoch 11/20 - train loss: 0.0099, valid loss: 0.0092\n", + "Epoch 12/20 - train loss: 0.0095, valid loss: 0.0096\n", + "Epoch 13/20 - train loss: 0.0101, valid loss: 0.0099\n", + "Epoch 14/20 - train loss: 0.0087, valid loss: 0.0091\n", + "Epoch 15/20 - train loss: 0.009, valid loss: 0.0096\n", + "Epoch 16/20 - train loss: 0.0082, valid loss: 0.009\n", + "Epoch 17/20 - train loss: 0.0081, valid loss: 0.0087\n", + "Epoch 18/20 - train loss: 0.0075, valid loss: 0.0092\n", + "Epoch 19/20 - train loss: 0.0081, valid loss: 0.0093\n", + "Epoch 20/20 - train loss: 0.0078, valid loss: 0.0089\n", + " 30.839493 seconds (75.26 M allocations: 13.133 GiB, 3.48% gc time, 3.65% compilation time)\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": "Chain(#29, Conv((3,), 19=>32, relu), #30, Dense(32, 32, relu), Dense(32, 1), #31)" + }, + "metadata": {}, + "execution_count": 37 + } + ], + "cell_type": "code", + "source": [ + "conv_model = Chain(\n", + " x -> permutedims(x, [2,1,3]), # put data in NTime x NCovariates X NBatch https://github.com/FluxML/Flux.jl/issues/1465\n", + " Conv((3,), 19=>32, relu), # convolve over 3 inputs - 19 variables -> 32 filters\n", + " x -> Flux.flatten(x),\n", + " Dense(32, 32, relu),\n", + " Dense(32, 1),\n", + " x -> unsqueeze(x, 1)\n", + ")\n", + "\n", + "single_step_3h = WindowGenerator(3, 1, train_df, valid_df, label_columns=target);\n", + "\n", + "@time conv_model = train_model!(conv_model, single_step_3h, opt; bs=32, epochs=20)" + ], + "metadata": {}, + "execution_count": 37 + }, + { + "cell_type": "markdown", + "source": [ + "Show the model's 1 hour predicion using 3 hours of historical data." + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "Plot{Plots.GRBackend() n=3}", + "image/png": "", + "text/html": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "metadata": {}, + "execution_count": 38 + } + ], + "cell_type": "code", + "source": [ + "plot(single_step_3h, conv_model)" + ], + "metadata": {}, + "execution_count": 38 + }, + { + "cell_type": "markdown", + "source": [ + "### Recurrent Neural Network" + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "Chain(Recur(LSTMCell(19, 32)), Dense(32, 1), #35)" + }, + "metadata": {}, + "execution_count": 39 + } + ], + "cell_type": "code", + "source": [ + "rnn_single_24h = Chain(\n", + " LSTM(19, 32), # input features * output dimensionality\n", + " Dense(32, 1),\n", + " x->reshape(x,:) # lstm output * single number\n", + ")" + ], + "metadata": {}, + "execution_count": 39 + }, + { + "cell_type": "markdown", + "source": [ + "Flux uses vectors of observations for RNNs, rather than the bulk tensor" + ], + "metadata": {} + }, + { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "1-element Vector{Float32}:\n -0.7347592" + }, + "metadata": {}, + "execution_count": 40 + } + ], + "cell_type": "code", + "source": [ + "a = [rand(Float32, 19) for i in 1:24]\n", + "b = rnn_single_24h.(a)\n", + "a[1]\n", + "rnn_single_24h(a[1])" + ], + "metadata": {}, + "execution_count": 40 + }, + { + "cell_type": "markdown", + "source": [ + "### Performance" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "### Multi-output Models" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "#### Baseline" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "#### Dense" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "#### RNN" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "#### Advanced: Residual Connections" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "#### Performance" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "# Multi-Step Models" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "### Baselines" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Single-shot Models" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "### Linear" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "### Dense" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "### CNN" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "### RNN" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Advanced Autoregressive model\n", + "### RNN" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Performance" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Next Steps" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "*This notebook was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*" + ], + "metadata": {} + } + ], + "nbformat_minor": 3, + "metadata": { + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "1.6.0-rc1" + }, + "kernelspec": { + "name": "julia-1.6", + "display_name": "Julia 1.6.0-rc1", + "language": "julia" + } + }, + "nbformat": 4 +} \ No newline at end of file diff --git a/tutorials/time_series/time_series.jl b/tutorials/time_series/time_series.jl index 448385663..ce8295583 100644 --- a/tutorials/time_series/time_series.jl +++ b/tutorials/time_series/time_series.jl @@ -1,13 +1,16 @@ # # Time series forecasting -# This tutorial serves as a `Julia` implementation of the Tensorflow time series forecasting tutorial here: -# -# [Tensorflow Time Series Forecasting Tutorial](https://www.tensorflow.org/tutorials/structured_data/time_series) +# This tutorial is inspired by a [Tensorflow](https://www.tensorflow.org/tutorials/structured_data/time_series) Time Series Forecasting Tutorial. +# It implements the data analysis, plotting, and model training from scratch using Julia tools such as Plots.jl, Flux, and DataFrames.jl. + # ## Setup +# Importing the usual suspects. Two interesting notes: +# * StatsPlots re-exports Plots.jl functions, so we don't need that separately +# * MLDataPattern is useful for preprocessing data for ML in a lazy (read: non-memory intensive) way. We will be seeing more of it using ZipFile using CSV using DataFrames -using StatsPlots #re-exports Plots.jl functions +using StatsPlots using Dates using FFTW using CUDA @@ -18,35 +21,43 @@ using Random: seed! using Statistics: mean, std using Flux: unsqueeze +# Set a seed to make this reproducible. seed!(4231) -ENV["LINES"] = 20 +ENV["LINES"] = 20; + +# Disallow scalar indexing for CUDA - prevents slow accesses to GPU memory CUDA.allowscalar(false) # ## The weather dataset +# We are going to be using weather data for 7 years from the Max Planck Institute: https://www.bgc-jena.mpg.de/wetter/ function download_data(; fname="jena_climate_2009_2016.zip") DATA_PATH = "https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip" isfile(fname) || download(DATA_PATH, fname) - zip = ZipFile.Reader(fname) #https://juliadata.github.io/CSV.jl/stable/index.html#Example:-reading-from-a-zip-file-1 + zip = ZipFile.Reader(fname) csv = zip.files[1] df = CSV.File(csv) |> DataFrame close(zip) return df end +# More discussion on reading zipped CSVs here: https://juliadata.github.io/CSV.jl/stable/index.html#Example:-reading-from-a-zip-file-1 + df = download_data(); df = df[6:6:end, :] -df[!,"Date Time"] = Dates.DateTime.(df[:,"Date Time"], "dd.mm.yyyy HH:MM:SS"); #https://en.wikibooks.org/wiki/Introducing_Julia/Working_with_dates_and_times +df[!,"Date Time"] = Dates.DateTime.(df[:,"Date Time"], "dd.mm.yyyy HH:MM:SS"); +# More info on working with dates and times: https://en.wikibooks.org/wiki/Introducing_Julia/Working_with_dates_and_times col = ["Date Time", "T (degC)", "p (mbar)", "rho (g/m**3)"] df[:,col] -@df df plot(cols(1), cols(2:4); layout=(3, 1)) # from https://github.com/JuliaPlots/StatsPlots.jl +# One easy way to plot data from a datafrom is with the StatsPlots `@df` macro +@df df plot(cols(1), cols(2:4); layout=(3, 1)) @df df[1:480,:] plot(cols(1), cols(2:4); layout=(3, 1)) @@ -55,15 +66,17 @@ df[:,col] df."p (mbar)"; #cool that this works -replace!(df[!,"wv (m/s)"], -9999.0 => 0); # https://juliadata.github.io/DataFrames.jl/stable/man/getting_started/#Replacing-Data +# Need to replace bad database values +replace!(df[!,"wv (m/s)"], -9999.0 => 0); replace!(df[!,"max. wv (m/s)"], -9999.0 => 0); @show describe(df) # ## Feature engineering - +# Bucket wind direction and speed histogram2d(df[!,"wd (deg)"], df[!,"wv (m/s)"], bins=(75,75), xlabel="wd (deg)", ylabel="wv (m/s)") +# It would be better to decompose this into X and Y velocities wd_rad = df[!,"wd (deg)"] * π / 180 df.Wx = df[!,"wv (m/s)"] .* cos.(wd_rad) df.Wy = df[!,"wv (m/s)"] .* sin.(wd_rad) @@ -72,6 +85,7 @@ df."max Wy" = df[!,"max. wv (m/s)"] .* sin.(wd_rad); histogram2d(df.Wx, df.Wy, bins=(75,75), xlabel="Wind X [m/s]", ylabel="Wind Y [m/s]") +# Modify timestamp into a continuous "time of day" signal to deal with periodicity. timestamp_s = Dates.datetime2unix.(df."Date Time"); day = 24*60*60 #seconds in a day @@ -87,6 +101,8 @@ plot!(df[1:25,"Day cos"]) xlabel!("Time [h]") title!("Time of Day Signal") +# If you don't know the frequency to choose for your periodicity, you can take the FFT. +# Here you can see that the yearly and daily periodicities are the most prominent. fftrans = FFTW.rfft(df[!,"T (degC)"]) f_per_dataset = 1:size(fftrans)[1] @@ -100,20 +116,20 @@ xticks!([1, 365.2524], ["1/Year", "1/Day"]) xlabel!("Frequency (log scale)") # ## Split the data -# drop columns you don't want to use further +# Drop columns that won't be used further. select!(df, Not([:("wv (m/s)"),:("max. wv (m/s)"), :("wd (deg)"), :("Date Time")])); column_indices = pairs(names(df)) indices_columns = Dict(value => key for (key, value) in column_indices) df = convert.(Float32, df) # Don't need high precision; reduces errors later on when using Params - gradients are Float32 - +# Split the data into a training and validation set n = size(df)[1] train_df = df[1:round(Int,n*0.7, RoundDown),:] valid_df = df[round(Int,n*0.7, RoundUp):round(Int,n*0.9, RoundDown),:] test_df = df[round(Int,n*0.9, RoundUp):end,:]; # matching TF tutorial exactly, can also use partition -num_features = size(df)[2] +num_features = size(df,2) # ## Normalize the data train_mean = mean.(eachcol(train_df)) @@ -134,15 +150,20 @@ boxplot!(df_std.variable, df_std.value, fillalpha=0.75, outliers=false) # We will define our own WindowGenerator, some constructors, and plotting functions. The data from the WindowGenerator will be used in training. include("window_generator.jl") -h = 6 # historical window length -f = 1 # future window length +h = 6; # historical window length +f = 1; # future window length -# WindowGenerator makes use of MLDataPattern's `slidingwindow` to generate the windows. It is good at flexibly generating -# https://mldatapatternjl.readthedocs.io/en/latest/documentation/dataview.html?highlight=slidingwindow#labeled-windows +# WindowGenerator makes use of MLDataPattern's `slidingwindow` to generate the windows. It is good at flexibly generating sequences data, though requires a bit of mind-bending to fully understand how it generates sequences. +# [slidingwindow docs](https://mldatapatternjl.readthedocs.io/en/latest/documentation/dataview.html?highlight=slidingwindow#labeled-windows) slidingwindow(i -> i+h:i+h+f-1, Array(train_df)', h, stride=1) -# How to use WindowGenerator -wg = WindowGenerator(6, 1, train_df, valid_df, label_columns="T (degC)") +# WindowGenerator is implemented to make sequence-generating simpler. It uses `slidingwindow` behind the scenes to create sequences. +@doc WindowGenerator + +# Create a WindowGenerator +wg = WindowGenerator(6, 1, train_df, valid_df, label_columns="T (degC)"); + +# Plotting methods are implemented for raw data and with predictions. plot(wg) @@ -150,7 +171,7 @@ plot(wg) include("batch_ts.jl") -# To understand better what the above code does, let's look at an imitation training loop and look at everything's dimensions +# To understand better what all the above code does, let's look at an imitation training loop and look at everything's dimensions practice_df = train_df[1:10,:] a = slidingwindow(i -> i+h:i+h+f-1, Array(practice_df)', h, stride=1) @@ -160,6 +181,15 @@ for i in eachbatch(shuffleobs(a), size=2) @show y println() end +#= +That took the data at 10 timestamps (`train_df[1:10,:]`) and threw it into 4 'observations' using all the data it could while requesting 6 historical and 1 target point. This works out to the following: +* (1,2,3,4,5,6), 7 +* (2,3,4,5,6,7), 8 +* (3,4,5,6,7,8), 9 +* (4,5,6,7,8,9), 10 + +It then shuffles these obserations, and lazily batches them into 2 batches each of size 2. +=# # # Single Step Models loss(x,y) = Flux.Losses.mse(x, y) @@ -178,13 +208,14 @@ single_step_1h = WindowGenerator(1, 1, train_df, valid_df, label_columns=target) baseline_model = Baseline(wg.target_idx[1]) +# Demonstrate the 'training error' for this model, although there is no actual training. function run_single_step_baseline(wg, model) preds = Float32[] reals = Float32[] for (x,y) in wg.train val = model(x)[1] push!(preds, val) - push!(reals, y[model.label_index]) #figure out how to do validation + push!(reals, y[model.label_index]) end l = loss(preds, reals) @@ -202,7 +233,7 @@ plot(single_step_24h, baseline_model) # ### Linear Models # ##### 1 hour -linear = Dense(size(single_step_1h.train[1][1],1), 1; initW=Flux.glorot_uniform, initb=Flux.zeros) +# Set up a fairly generic training loop for the rest of the models. It takes in a model, and a WindowGenerator object, and uses the ADAM optimizer to train the model. opt = Flux.Optimise.ADAM(0.01) function train_model!(model, wg::WindowGenerator, opt; epochs=20, bs=16, dev=Flux.gpu) @@ -226,21 +257,30 @@ function train_model!(model, wg::WindowGenerator, opt; epochs=20, bs=16, dev=Flu l = round(l;digits=4) vl = round(loss(model(v[1]),v[2][wg.target_idx,:,:]); digits=4) println("Epoch $e/$epochs - train loss: $l, valid loss: $vl") - # crude early-stopping - # vl_prev < (vl - 0.001) && break - # vl_prev = vl + #= + crude early-stopping + vl_prev < (vl - 0.001) && break + vl_prev = vl + =# end model = model |> cpu end +# Now, train the linear model +linear = Dense(size(single_step_1h.train[1][1],1), 1; initW=Flux.glorot_uniform, initb=Flux.zeros) + @time linear = train_model!(linear, single_step_1h, opt; bs=32, epochs=20) -# ##### 24 hr +# ##### 24 hours +# Show the model's predictions for each point in the window. plot(single_step_24h, linear) +# Visualize the weights of the linear regression. Naturally, since we are trying to predict future temperatures, we would expect that the current temperature is on of the most useful features. +# This is shown by it having a high positive weight. bar(names(train_df), linear.W[:], xrotation=30.0, legend=false, xticks=:all, tickfontsize=6) # ### Dense +# A model with 2 Fully-connected layers and relu non-linearities. dense = Chain( Dense(19, 64, relu), Dense(64, 64, relu), @@ -249,6 +289,7 @@ dense = Chain( @time dense = train_model!(dense, single_step_1h, opt; bs=32, epochs=20) +# Dense model's predicions plot(single_step_24h, dense) @@ -267,6 +308,7 @@ multi_step_dense = Chain( @time multi_step_dense = train_model!(multi_step_dense, single_step_3h, opt; bs=32, epochs=20) +# Show the model's 1 hour predicion using 3 hours of historical data. plot(single_step_3h, multi_step_dense) # ### Convolutional Neural Network @@ -292,11 +334,24 @@ single_step_3h = WindowGenerator(3, 1, train_df, valid_df, label_columns=target) @time conv_model = train_model!(conv_model, single_step_3h, opt; bs=32, epochs=20) +# Show the model's 1 hour predicion using 3 hours of historical data. plot(single_step_3h, conv_model) # ### Recurrent Neural Network +rnn_single_24h = Chain( + LSTM(19, 32), # input features * output dimensionality + Dense(32, 1), + x->reshape(x,:) # lstm output * single number +) + +# Flux uses vectors of observations for RNNs, rather than the bulk tensor +a = [rand(Float32, 19) for i in 1:24] +b = rnn_single_24h.(a) +a[1] +rnn_single_24h(a[1]) + # ### Performance # ### Multi-output Models diff --git a/tutorials/time_series/window_generator.jl b/tutorials/time_series/window_generator.jl index e3b6eb8b5..4f04537c0 100644 --- a/tutorials/time_series/window_generator.jl +++ b/tutorials/time_series/window_generator.jl @@ -28,15 +28,13 @@ function WindowGenerator(h, f, train_df, valid_df, label_columns::Vector{String} return WindowGenerator(train, valid, h, f, label_indices, target_idx) end -""" -WindowGenerator with a single label_column. -""" + +# WindowGenerator with a single label_column. WindowGenerator(h, f, train_df, valid_df, label_columns::String; offset=h) = WindowGenerator(h, f, train_df, valid_df, label_columns=[label_columns]; offset=offset) -""" -WindowGenerator with multiple label_columns. -""" + +# WindowGenerator with multiple label_columns. WindowGenerator(h, f, train_df, valid_df; label_columns, offset=h) = WindowGenerator(h, f, train_df, valid_df, label_columns; offset=offset)