Created using Colaboratory

sanikamal · Oct 24, 2019 · 2a13497 · 2a13497
1 parent 8f2d98d
commit 2a13497
Showing 1 changed file with 340 additions and 0 deletions.
diff --git a/forecasting_with_lstm.ipynb b/forecasting_with_lstm.ipynb
@@ -0,0 +1,340 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "name": "forecasting_with_lstm.ipynb",
+      "provenance": [],
+      "collapsed_sections": [],
+      "toc_visible": true,
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/sanikamal/time-series-analysis-and-forecasting-atoz/blob/master/forecasting_with_lstm.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "E5VI4y76i14x"
+      },
+      "source": [
+        "# Forecasting with an LSTM"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "vidayERjaO5q"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "WQ5AjH_KY74O",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# !pip install --upgrade tensorflow"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "gqWabzlJ63nL",
+        "colab": {}
+      },
+      "source": [
+        "import numpy as np\n",
+        "import matplotlib.pyplot as plt\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "keras = tf.keras"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "cg1hfKCPldZG",
+        "colab": {}
+      },
+      "source": [
+        "def plot_series(time, series, format=\"-\", start=0, end=None, label=None):\n",
+        "    plt.plot(time[start:end], series[start:end], format, label=label)\n",
+        "    plt.xlabel(\"Time\")\n",
+        "    plt.ylabel(\"Value\")\n",
+        "    if label:\n",
+        "        plt.legend(fontsize=14)\n",
+        "    plt.grid(True)\n",
+        "    \n",
+        "def trend(time, slope=0):\n",
+        "    return slope * time\n",
+        "  \n",
+        "  \n",
+        "def seasonal_pattern(season_time):\n",
+        "    \"\"\"Just an arbitrary pattern, you can change it if you wish\"\"\"\n",
+        "    return np.where(season_time < 0.4,\n",
+        "                    np.cos(season_time * 2 * np.pi),\n",
+        "                    1 / np.exp(3 * season_time))\n",
+        "\n",
+        "  \n",
+        "def seasonality(time, period, amplitude=1, phase=0):\n",
+        "    \"\"\"Repeats the same pattern at each period\"\"\"\n",
+        "    season_time = ((time + phase) % period) / period\n",
+        "    return amplitude * seasonal_pattern(season_time)\n",
+        "  \n",
+        "  \n",
+        "def white_noise(time, noise_level=1, seed=None):\n",
+        "    rnd = np.random.RandomState(seed)\n",
+        "    return rnd.randn(len(time)) * noise_level\n",
+        "  \n",
+        "\n",
+        "def sequential_window_dataset(series, window_size):\n",
+        "    series = tf.expand_dims(series, axis=-1)\n",
+        "    ds = tf.data.Dataset.from_tensor_slices(series)\n",
+        "    ds = ds.window(window_size + 1, shift=window_size, drop_remainder=True)\n",
+        "    ds = ds.flat_map(lambda window: window.batch(window_size + 1))\n",
+        "    ds = ds.map(lambda window: (window[:-1], window[1:]))\n",
+        "    return ds.batch(1).prefetch(1)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "iL2DDjV3lel6",
+        "colab": {}
+      },
+      "source": [
+        "time = np.arange(4 * 365 + 1)\n",
+        "\n",
+        "slope = 0.05\n",
+        "baseline = 10\n",
+        "amplitude = 40\n",
+        "series = baseline + trend(time, slope) + seasonality(time, period=365, amplitude=amplitude)\n",
+        "\n",
+        "noise_level = 5\n",
+        "noise = white_noise(time, noise_level, seed=42)\n",
+        "\n",
+        "series += noise\n",
+        "\n",
+        "plt.figure(figsize=(10, 6))\n",
+        "plot_series(time, series)\n",
+        "plt.show()"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "Zmp1JXKxk9Vb",
+        "colab": {}
+      },
+      "source": [
+        "split_time = 1000\n",
+        "time_train = time[:split_time]\n",
+        "x_train = series[:split_time]\n",
+        "time_valid = time[split_time:]\n",
+        "x_valid = series[split_time:]"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "9fPenJpTtuDE",
+        "colab": {}
+      },
+      "source": [
+        "class ResetStatesCallback(keras.callbacks.Callback):\n",
+        "    def on_epoch_begin(self, epoch, logs):\n",
+        "        self.model.reset_states()"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "EPjK0l9P8OJM"
+      },
+      "source": [
+        "## LSTM RNN Forecasting"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "cSoUmW-x8OJN",
+        "colab": {}
+      },
+      "source": [
+        "keras.backend.clear_session()\n",
+        "tf.random.set_seed(42)\n",
+        "np.random.seed(42)\n",
+        "\n",
+        "window_size = 30\n",
+        "train_set = sequential_window_dataset(x_train, window_size)\n",
+        "\n",
+        "model = keras.models.Sequential([\n",
+        "  keras.layers.LSTM(100, return_sequences=True, stateful=True,\n",
+        "                    batch_input_shape=[1, None, 1]),\n",
+        "  keras.layers.LSTM(100, return_sequences=True, stateful=True),\n",
+        "  keras.layers.Dense(1),\n",
+        "  keras.layers.Lambda(lambda x: x * 200.0)\n",
+        "])\n",
+        "lr_schedule = keras.callbacks.LearningRateScheduler(\n",
+        "    lambda epoch: 1e-8 * 10**(epoch / 20))\n",
+        "reset_states = ResetStatesCallback()\n",
+        "optimizer = keras.optimizers.SGD(lr=1e-8, momentum=0.9)\n",
+        "model.compile(loss=keras.losses.Huber(),\n",
+        "              optimizer=optimizer,\n",
+        "              metrics=[\"mae\"])\n",
+        "history = model.fit(train_set, epochs=100,\n",
+        "                    callbacks=[lr_schedule, reset_states])"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "KA0GM9sQ8OJO",
+        "colab": {}
+      },
+      "source": [
+        "plt.semilogx(history.history[\"lr\"], history.history[\"loss\"])\n",
+        "plt.axis([1e-8, 1e-4, 0, 30])"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "hiHR5pPL8OJP",
+        "colab": {}
+      },
+      "source": [
+        "keras.backend.clear_session()\n",
+        "tf.random.set_seed(42)\n",
+        "np.random.seed(42)\n",
+        "\n",
+        "window_size = 30\n",
+        "train_set = sequential_window_dataset(x_train, window_size)\n",
+        "valid_set = sequential_window_dataset(x_valid, window_size)\n",
+        "\n",
+        "model = keras.models.Sequential([\n",
+        "  keras.layers.LSTM(100, return_sequences=True, stateful=True,\n",
+        "                         batch_input_shape=[1, None, 1]),\n",
+        "  keras.layers.LSTM(100, return_sequences=True, stateful=True),\n",
+        "  keras.layers.Dense(1),\n",
+        "  keras.layers.Lambda(lambda x: x * 200.0)\n",
+        "])\n",
+        "optimizer = keras.optimizers.SGD(lr=5e-7, momentum=0.9)\n",
+        "model.compile(loss=keras.losses.Huber(),\n",
+        "              optimizer=optimizer,\n",
+        "              metrics=[\"mae\"])\n",
+        "reset_states = ResetStatesCallback()\n",
+        "model_checkpoint = keras.callbacks.ModelCheckpoint(\n",
+        "    \"my_checkpoint.h5\", save_best_only=True)\n",
+        "early_stopping = keras.callbacks.EarlyStopping(patience=50)\n",
+        "model.fit(train_set, epochs=500,\n",
+        "          validation_data=valid_set,\n",
+        "          callbacks=[early_stopping, model_checkpoint, reset_states])"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "nPeZUfQy8OJQ",
+        "colab": {}
+      },
+      "source": [
+        "model = keras.models.load_model(\"my_checkpoint.h5\")"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "4tFrq5uW8OJR",
+        "colab": {}
+      },
+      "source": [
+        "rnn_forecast = model.predict(series[np.newaxis, :, np.newaxis])\n",
+        "rnn_forecast = rnn_forecast[0, split_time - 1:-1, 0]"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "ZfaR6nqj8OJT",
+        "colab": {}
+      },
+      "source": [
+        "plt.figure(figsize=(10, 6))\n",
+        "plot_series(time_valid, x_valid)\n",
+        "plot_series(time_valid, rnn_forecast)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "Wgf2u2Tp8OJV",
+        "colab": {}
+      },
+      "source": [
+        "keras.metrics.mean_absolute_error(x_valid, rnn_forecast).numpy()"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    }
+  ]
+}