|
| 1 | +# Time-series regression with RNNs |
| 2 | + |
| 3 | +```elixir |
| 4 | +Mix.install([ |
| 5 | + {:axon, "~> 0.6.1"}, |
| 6 | + {:kino_vega_lite, "~> 0.1.13"}, |
| 7 | + {:nx, "~> 0.7.3"}, |
| 8 | + {:tucan, "~> 0.3.1"}, |
| 9 | + {:polaris, "~> 0.1.0"}, |
| 10 | + {:exla, "~> 0.7.3"}, |
| 11 | + {:req, "~> 0.5.2"}, |
| 12 | + {:vega_lite, "~> 0.1.9"}, |
| 13 | + {:nimble_csv, "~> 1.2"}, |
| 14 | + {:scholar, "~> 0.3.1"} |
| 15 | +]) |
| 16 | + |
| 17 | +Nx.global_default_backend(EXLA.Backend) |
| 18 | +Nx.Defn.global_default_options(compiler: EXLA) |
| 19 | +``` |
| 20 | + |
| 21 | +## Introduction |
| 22 | + |
| 23 | +In this project, we will build a predictor for a time series using a Recurrent Neural Network (RNN) regressor. The prediction will be made for Apple's stock price 7 days in advance, based on historical data. |
| 24 | + |
| 25 | +We will use an RNN architecture known as Long Term Short Memory (LSTM). |
| 26 | + |
| 27 | +<!-- livebook:{"break_markdown":true} --> |
| 28 | + |
| 29 | +First, we load and preprocess the historical data. We will apply *normalization* to the series of historical data. This helps mitigate significant numerical issues associated with how activation functions like `tanh` transform very large numbers (whether positive or negative), and it aids in avoiding problems with calculating derivatives. |
| 30 | + |
| 31 | +```elixir |
| 32 | +NimbleCSV.define(CSVParser, separator: "\n", escape: "\"") |
| 33 | + |
| 34 | +data = |
| 35 | + "https://raw.githubusercontent.com/santiago-imelio/datasets/main/apple_stock_prices.csv" |
| 36 | + |> Req.get!() |
| 37 | + |> Map.get(:body) |
| 38 | + |> CSVParser.parse_string() |
| 39 | + |> Enum.map(fn [price] -> |
| 40 | + price |
| 41 | + |> Float.parse() |
| 42 | + |> elem(0) |
| 43 | + end) |
| 44 | + |> Nx.tensor() |
| 45 | +``` |
| 46 | + |
| 47 | +```elixir |
| 48 | +min = Nx.reduce_min(data) |> Nx.to_number() |
| 49 | +max = Nx.reduce_max(data) |> Nx.to_number() |
| 50 | + |
| 51 | +{series_size} = Nx.shape(data) |
| 52 | +days = Nx.iota({series_size}) |
| 53 | + |
| 54 | +Tucan.lineplot([Prices: data, Days: days], "Days", "Prices") |
| 55 | +|> Tucan.set_size(600, 400) |
| 56 | +|> Tucan.Scale.set_y_domain(min, max) |
| 57 | +``` |
| 58 | + |
| 59 | +## Train-test splitting |
| 60 | + |
| 61 | +Here we split the dataset into train and test sets. We will use two thirds of the data for training and the remaining for validation. Since the dataset is a ordered time-series, we shouldn't shuffle the data before splitting. |
| 62 | + |
| 63 | +```elixir |
| 64 | +train_test_split = 2 / 3 |
| 65 | + |
| 66 | +{train, test} = Nx.split(data, train_test_split) |
| 67 | +``` |
| 68 | + |
| 69 | +## Normalization |
| 70 | + |
| 71 | +We normalize the series to belong within the range [-1, 1] using min-max scaling. It's also common to see applications where normalization is performed using standard deviation. |
| 72 | + |
| 73 | +```elixir |
| 74 | +alias Scholar.Preprocessing.MinMaxScaler |
| 75 | +``` |
| 76 | + |
| 77 | +We fit a min-max scaler on the training data, and then transform both train and test sets using the fitted scaler. This prevents data leakage from the test set to the training set. |
| 78 | + |
| 79 | +```elixir |
| 80 | +scaler = MinMaxScaler.fit(train, min_bound: -1, max_bound: 1) |
| 81 | +train = MinMaxScaler.transform(scaler, train) |
| 82 | +test = MinMaxScaler.transform(scaler, test) |
| 83 | +``` |
| 84 | + |
| 85 | +Let's visualize the normalized data. |
| 86 | + |
| 87 | +```elixir |
| 88 | +norm_data = Nx.concatenate([train, test]) |
| 89 | +new_min = Nx.reduce_min(norm_data) |> Nx.to_number() |
| 90 | +new_max = Nx.reduce_max(norm_data) |> Nx.to_number() |
| 91 | + |
| 92 | +Tucan.lineplot([Prices: norm_data, Days: days], "Days", "Prices") |
| 93 | +|> Tucan.set_size(600, 400) |
| 94 | +|> Tucan.Scale.set_y_domain(new_min, new_max) |
| 95 | +``` |
| 96 | + |
| 97 | +## Sliding window |
| 98 | + |
| 99 | +Generally, a time-series is mathematically represented as: |
| 100 | + |
| 101 | +$$ |
| 102 | +\langle s_0, s_1, s_2, \ldots, s_P \rangle |
| 103 | +$$ |
| 104 | + |
| 105 | +where $s_p$ is the numerical value of the series at time interval $p$, and $P$ is the total length of the series. To apply an RNN, the prediction should be treated as a regression problem. This involves using a sliding window to construct a set of input-output pairs on which regression will be applied. |
| 106 | + |
| 107 | +For example, with a window size $T = 3$, the following pairs need to be produced: |
| 108 | + |
| 109 | +$$ |
| 110 | +\begin{array}{c|c} |
| 111 | + \text{Input} & \text{Output}\\ |
| 112 | + \hline \langle s_{1},s_{2},s_{3}\rangle & s_{4} \\ |
| 113 | + \langle s_{2},s_{3},s_{4} \rangle & s_{5} \\ |
| 114 | + \vdots & \vdots \\ |
| 115 | + \langle s_{P-3},s_{P-2},s_{P-1} \rangle & s_{P} |
| 116 | +\end{array} |
| 117 | +$$ |
| 118 | + |
| 119 | +In each pair, the input consists of a sequence of $T$ consecutive values from the series, and the output is the subsequent value immediately following the input sequence. This approach prepares the data for training the RNN model for time-series prediction. |
| 120 | + |
| 121 | +```elixir |
| 122 | +sliding_window = fn series, win_size -> |
| 123 | + {p} = Nx.shape(series) |
| 124 | + |
| 125 | + x_idx = |
| 126 | + series |
| 127 | + |> Nx.shape() |
| 128 | + |> Nx.iota() |
| 129 | + |> Nx.reshape({:auto, 1}) |
| 130 | + |> Nx.vectorize(:elements) |
| 131 | + |
| 132 | + y_idx = |
| 133 | + series |
| 134 | + |> Nx.shape() |
| 135 | + |> Nx.iota() |
| 136 | + |> Nx.add(win_size) |
| 137 | + |> Nx.reshape({:auto, 1}) |
| 138 | + |> Nx.slice([0, 0], [p - win_size, 1]) |
| 139 | + |
| 140 | + x = |
| 141 | + Nx.slice(series, [x_idx[0]], [win_size]) |
| 142 | + |> Nx.devectorize(keep_names: false) |
| 143 | + |> Nx.slice([0, 0], [p - win_size, win_size]) |
| 144 | + |
| 145 | + y = Nx.gather(series, y_idx, axes: [0]) |
| 146 | + |
| 147 | + {x, y} |
| 148 | +end |
| 149 | +``` |
| 150 | + |
| 151 | +Next, we can test our sliding window function with a given series. For example, we can test it using the first 10 numbers of the Fibonacci sequence, known as $F_{n+1} = F_{n - 1} + F_n$. Passing a window size of 2, we should expect input-output pairs like the following: |
| 152 | + |
| 153 | +$$ |
| 154 | +\begin{array}{c|c} |
| 155 | + \text{Input} & \text{Output}\\ |
| 156 | + \hline \langle F_1, F_2\rangle & F_3 \\ |
| 157 | + \langle F_{2},F_{3}\rangle & F_{4} \\ |
| 158 | + \vdots & \vdots \\ |
| 159 | + \langle F_{8},F_{9} \rangle & F_{10} \\ |
| 160 | +\end{array} |
| 161 | +$$ |
| 162 | + |
| 163 | +```elixir |
| 164 | +test_series = Nx.tensor([0, 1, 1, 2, 3, 5, 8, 13, 21, 34]) |
| 165 | +sliding_window.(test_series, 2) |
| 166 | +``` |
| 167 | + |
| 168 | +Now we apply the sliding window to our dataset with a window size of 7. For simplicity, we truncate the test and train to a size divisible by the window length. |
| 169 | + |
| 170 | +```elixir |
| 171 | +win_size = 7 |
| 172 | + |
| 173 | +{train_size} = Nx.shape(train) |
| 174 | +{test_size} = Nx.shape(test) |
| 175 | + |
| 176 | +p_train = train_size - rem(train_size, win_size) |
| 177 | +p_test = test_size - rem(test_size, win_size) |
| 178 | + |
| 179 | +train = train[0..(p_train - 1)] |
| 180 | +test = test[0..(p_test - 1)] |
| 181 | + |
| 182 | +{x_train, y_train} = sliding_window.(train, win_size) |
| 183 | +{x_test, y_test} = sliding_window.(test, win_size) |
| 184 | +``` |
| 185 | + |
| 186 | +```elixir |
| 187 | +dataset_size = p_train + p_test |
| 188 | +``` |
| 189 | + |
| 190 | +## Building and training our RNN regressor |
| 191 | + |
| 192 | +We will use Axon to build a neural network with two hidden RNN layers with the following specifications: |
| 193 | + |
| 194 | +1. The first layer should use an LSTM module with 5 hidden units. Its `input_shape` should be `(window_size, 1)`. |
| 195 | +2. The second layer uses a fully connected module with one unit. |
| 196 | + |
| 197 | +For the loss function we will use mean squared error. |
| 198 | + |
| 199 | +```elixir |
| 200 | +batch_size = 6 |
| 201 | +input_shape = {batch_size, win_size, 1} |
| 202 | + |
| 203 | +model = |
| 204 | + Axon.input("input", shape: input_shape) |
| 205 | + |> Axon.lstm(5) |
| 206 | + |> then(fn {out, _} -> out end) |
| 207 | + |> Axon.nx(fn t -> t[[0..-1//1, -1]] end) |
| 208 | + |> Axon.dense(1) |
| 209 | + |
| 210 | +Axon.Display.as_graph(model, Nx.template(input_shape, :f32)) |
| 211 | +``` |
| 212 | + |
| 213 | +Before training we split our train and test data in batches using the specified batch size. |
| 214 | + |
| 215 | +```elixir |
| 216 | +y_train_batches = Nx.to_batched(y_train, batch_size) |
| 217 | + |
| 218 | +x_train_batches = |
| 219 | + x_train |
| 220 | + |> Nx.reshape({:auto, win_size, 1}) |
| 221 | + |> Nx.to_batched(batch_size) |
| 222 | + |
| 223 | +y_test_batches = Nx.to_batched(y_test, batch_size) |
| 224 | + |
| 225 | +x_test_batches = |
| 226 | + x_test |
| 227 | + |> Nx.reshape({:auto, win_size, 1}) |
| 228 | + |> Nx.to_batched(batch_size) |
| 229 | + |
| 230 | +train_data = Stream.zip([x_train_batches, y_train_batches]) |
| 231 | +test_data = Stream.zip([x_test_batches, y_test_batches]) |
| 232 | +``` |
| 233 | + |
| 234 | +We are ready to create our training loop and run it. Since our network is simple, we choose SGD as the network optimizer. |
| 235 | + |
| 236 | +```elixir |
| 237 | +optimizer = Polaris.Optimizers.sgd(learning_rate: 0.04) |
| 238 | +loop = Axon.Loop.trainer(model, :mean_squared_error, optimizer) |
| 239 | + |
| 240 | +trained_model_state = Axon.Loop.run(loop, train_data, %{}, epochs: 100) |
| 241 | +``` |
| 242 | + |
| 243 | +Finally, we make predictions over the train and test sets so we can evaluate our model. |
| 244 | + |
| 245 | +```elixir |
| 246 | +y_pred_train = |
| 247 | + x_train_batches |
| 248 | + |> Enum.map(&Axon.predict(model, trained_model_state, &1)) |
| 249 | + |> Nx.concatenate() |
| 250 | + |
| 251 | +y_pred_test = |
| 252 | + x_test_batches |
| 253 | + |> Enum.map(&Axon.predict(model, trained_model_state, &1)) |
| 254 | + |> Nx.concatenate() |
| 255 | +``` |
| 256 | + |
| 257 | +```elixir |
| 258 | +y_train |
| 259 | +|> Axon.Losses.mean_squared_error(y_pred_train, reduction: :mean) |
| 260 | +|> Nx.to_number() |
| 261 | +|> IO.inspect(label: "MSE train") |
| 262 | + |
| 263 | +y_test |
| 264 | +|> Axon.Losses.mean_squared_error(y_pred_test, reduction: :mean) |
| 265 | +|> Nx.to_number() |
| 266 | +|> IO.inspect(label: "MSE test") |
| 267 | + |
| 268 | +:ok |
| 269 | +``` |
| 270 | + |
| 271 | +## Visualizing predictions |
| 272 | + |
| 273 | +Let's visualize the predictions compared to the real trend of the prices. First we build the plot data required for each layer. |
| 274 | + |
| 275 | +```elixir |
| 276 | +interval = Nx.iota({dataset_size}) |
| 277 | + |
| 278 | +plt_data = [ |
| 279 | + Prices: norm_data[0..(dataset_size - 1)], |
| 280 | + Day: interval[0..(dataset_size - 1)], |
| 281 | + Eval: List.duplicate("Real prices", dataset_size) |
| 282 | +] |
| 283 | + |
| 284 | +{train_size} = Nx.shape(y_train) |
| 285 | +{test_size} = Nx.shape(y_test) |
| 286 | + |
| 287 | +plt_data_train = [ |
| 288 | + Prices: Nx.to_flat_list(y_pred_train), |
| 289 | + Day: interval[(win_size - 1)..(train_size + win_size - 1)], |
| 290 | + Eval: List.duplicate("Predictions train", train_size) |
| 291 | +] |
| 292 | + |
| 293 | +plt_data_test = [ |
| 294 | + Prices: Nx.to_flat_list(y_pred_test), |
| 295 | + Day: interval[(dataset_size - test_size)..(dataset_size - 1)], |
| 296 | + Eval: List.duplicate("Predictions test", test_size) |
| 297 | +] |
| 298 | + |
| 299 | +opts = [stroke_width: 2] |
| 300 | +``` |
| 301 | + |
| 302 | +```elixir |
| 303 | +Tucan.layers([ |
| 304 | + Tucan.lineplot(plt_data, "Day", "Prices", Keyword.put(opts, :stroke_dash, [2])), |
| 305 | + Tucan.lineplot(plt_data_train, "Day", "Prices", opts), |
| 306 | + Tucan.lineplot(plt_data_test, "Day", "Prices", opts) |
| 307 | +]) |
| 308 | +|> Tucan.color_by("Eval") |
| 309 | +|> Tucan.set_size(600, 400) |
| 310 | +|> Tucan.Scale.set_x_domain(0, dataset_size - 1) |
| 311 | +|> VegaLite.encode(:color, field: "Eval", scale: [scheme: "set1"]) |
| 312 | +``` |
0 commit comments