From 5ed3d9342ebf1f6a953ba78f502b61cfbf985725 Mon Sep 17 00:00:00 2001 From: jinw00jun <79273189+jinw00jun@users.noreply.github.com> Date: Thu, 18 Feb 2021 22:35:18 -0500 Subject: [PATCH 1/6] Create aNewReadme.md A new directory and a new markdown file was created. --- Test/aNewReadme.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 Test/aNewReadme.md diff --git a/Test/aNewReadme.md b/Test/aNewReadme.md new file mode 100644 index 00000000..95993f45 --- /dev/null +++ b/Test/aNewReadme.md @@ -0,0 +1 @@ +New markdown file 'aNewReadme.md' was made in the forked course github repo. From bec72b738609ef34c0fea0dd1c965eb2788f731a Mon Sep 17 00:00:00 2001 From: jinw00jun <79273189+jinw00jun@users.noreply.github.com> Date: Thu, 18 Feb 2021 22:35:43 -0500 Subject: [PATCH 2/6] Update aNewReadme.md --- Test/aNewReadme.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Test/aNewReadme.md b/Test/aNewReadme.md index 95993f45..8720f415 100644 --- a/Test/aNewReadme.md +++ b/Test/aNewReadme.md @@ -1 +1,2 @@ -New markdown file 'aNewReadme.md' was made in the forked course github repo. +# New markdown file 'aNewReadme.md' was made in the forked course github repo. +assignment 1 From a5bfac96723b4ffe7a4090546b7a97ac738c0055 Mon Sep 17 00:00:00 2001 From: jinw00jun <79273189+jinw00jun@users.noreply.github.com> Date: Thu, 18 Feb 2021 22:36:35 -0500 Subject: [PATCH 3/6] Update aNewReadme.md --- Test/aNewReadme.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Test/aNewReadme.md b/Test/aNewReadme.md index 8720f415..05011984 100644 --- a/Test/aNewReadme.md +++ b/Test/aNewReadme.md @@ -1,2 +1,3 @@ -# New markdown file 'aNewReadme.md' was made in the forked course github repo. -assignment 1 +# Assignment 1 +New markdown file 'aNewReadme.md' was made in newly added `test` directory of the forked course github repo. + From be3e07724a75690395e56668b050bbcb4e2083db Mon Sep 17 00:00:00 2001 From: jinw00jun <79273189+jinw00jun@users.noreply.github.com> Date: Thu, 18 Feb 2021 22:36:48 -0500 Subject: [PATCH 4/6] Update aNewReadme.md --- Test/aNewReadme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Test/aNewReadme.md b/Test/aNewReadme.md index 05011984..b47d734d 100644 --- a/Test/aNewReadme.md +++ b/Test/aNewReadme.md @@ -1,3 +1,3 @@ # Assignment 1 -New markdown file 'aNewReadme.md' was made in newly added `test` directory of the forked course github repo. +New markdown file `aNewReadme.md` was made in newly added `test` directory of the forked course github repo. From f11817b1d1d7d6df61e7c882d8bcd3b463a29ec8 Mon Sep 17 00:00:00 2001 From: jinw00jun <79273189+jinw00jun@users.noreply.github.com> Date: Mon, 12 Apr 2021 15:27:27 -0400 Subject: [PATCH 5/6] HW7. Created using Colaboratory --- HW7_colab.ipynb | 698 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 698 insertions(+) create mode 100644 HW7_colab.ipynb diff --git a/HW7_colab.ipynb b/HW7_colab.ipynb new file mode 100644 index 00000000..4dd366fb --- /dev/null +++ b/HW7_colab.ipynb @@ -0,0 +1,698 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Copy of Untitled0.ipynb", + "provenance": [], + "authorship_tag": "ABX9TyNZ4mYUsREhwFWjwd8bPgWn", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mzjhcuWShQ5o" + }, + "source": [ + "Libraries used" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "LYe1huaSQI0q", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "7dc6d907-2dcd-44d4-b225-9307dfbba91b" + }, + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "import statsmodels.formula.api as smf" + ], + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.7/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n", + " import pandas.util.testing as tm\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Wlydw3AlhVY3" + }, + "source": [ + "Loading shhs.txt data file as a dataframe using pandas " + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "resources": { + "http://localhost:8080/nbextensions/google.colab/files.js": { + "data": "Ly8gQ29weXJpZ2h0IDIwMTcgR29vZ2xlIExMQwovLwovLyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKLy8geW91IG1heSBub3QgdXNlIHRoaXMgZmlsZSBleGNlcHQgaW4gY29tcGxpYW5jZSB3aXRoIHRoZSBMaWNlbnNlLgovLyBZb3UgbWF5IG9idGFpbiBhIGNvcHkgb2YgdGhlIExpY2Vuc2UgYXQKLy8KLy8gICAgICBodHRwOi8vd3d3LmFwYWNoZS5vcmcvbGljZW5zZXMvTElDRU5TRS0yLjAKLy8KLy8gVW5sZXNzIHJlcXVpcmVkIGJ5IGFwcGxpY2FibGUgbGF3IG9yIGFncmVlZCB0byBpbiB3cml0aW5nLCBzb2Z0d2FyZQovLyBkaXN0cmlidXRlZCB1bmRlciB0aGUgTGljZW5zZSBpcyBkaXN0cmlidXRlZCBvbiBhbiAiQVMgSVMiIEJBU0lTLAovLyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KLy8gU2VlIHRoZSBMaWNlbnNlIGZvciB0aGUgc3BlY2lmaWMgbGFuZ3VhZ2UgZ292ZXJuaW5nIHBlcm1pc3Npb25zIGFuZAovLyBsaW1pdGF0aW9ucyB1bmRlciB0aGUgTGljZW5zZS4KCi8qKgogKiBAZmlsZW92ZXJ2aWV3IEhlbHBlcnMgZm9yIGdvb2dsZS5jb2xhYiBQeXRob24gbW9kdWxlLgogKi8KKGZ1bmN0aW9uKHNjb3BlKSB7CmZ1bmN0aW9uIHNwYW4odGV4dCwgc3R5bGVBdHRyaWJ1dGVzID0ge30pIHsKICBjb25zdCBlbGVtZW50ID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnc3BhbicpOwogIGVsZW1lbnQudGV4dENvbnRlbnQgPSB0ZXh0OwogIGZvciAoY29uc3Qga2V5IG9mIE9iamVjdC5rZXlzKHN0eWxlQXR0cmlidXRlcykpIHsKICAgIGVsZW1lbnQuc3R5bGVba2V5XSA9IHN0eWxlQXR0cmlidXRlc1trZXldOwogIH0KICByZXR1cm4gZWxlbWVudDsKfQoKLy8gTWF4IG51bWJlciBvZiBieXRlcyB3aGljaCB3aWxsIGJlIHVwbG9hZGVkIGF0IGEgdGltZS4KY29uc3QgTUFYX1BBWUxPQURfU0laRSA9IDEwMCAqIDEwMjQ7CgpmdW5jdGlvbiBfdXBsb2FkRmlsZXMoaW5wdXRJZCwgb3V0cHV0SWQpIHsKICBjb25zdCBzdGVwcyA9IHVwbG9hZEZpbGVzU3RlcChpbnB1dElkLCBvdXRwdXRJZCk7CiAgY29uc3Qgb3V0cHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKG91dHB1dElkKTsKICAvLyBDYWNoZSBzdGVwcyBvbiB0aGUgb3V0cHV0RWxlbWVudCB0byBtYWtlIGl0IGF2YWlsYWJsZSBmb3IgdGhlIG5leHQgY2FsbAogIC8vIHRvIHVwbG9hZEZpbGVzQ29udGludWUgZnJvbSBQeXRob24uCiAgb3V0cHV0RWxlbWVudC5zdGVwcyA9IHN0ZXBzOwoKICByZXR1cm4gX3VwbG9hZEZpbGVzQ29udGludWUob3V0cHV0SWQpOwp9CgovLyBUaGlzIGlzIHJvdWdobHkgYW4gYXN5bmMgZ2VuZXJhdG9yIChub3Qgc3VwcG9ydGVkIGluIHRoZSBicm93c2VyIHlldCksCi8vIHdoZXJlIHRoZXJlIGFyZSBtdWx0aXBsZSBhc3luY2hyb25vdXMgc3RlcHMgYW5kIHRoZSBQeXRob24gc2lkZSBpcyBnb2luZwovLyB0byBwb2xsIGZvciBjb21wbGV0aW9uIG9mIGVhY2ggc3RlcC4KLy8gVGhpcyB1c2VzIGEgUHJvbWlzZSB0byBibG9jayB0aGUgcHl0aG9uIHNpZGUgb24gY29tcGxldGlvbiBvZiBlYWNoIHN0ZXAsCi8vIHRoZW4gcGFzc2VzIHRoZSByZXN1bHQgb2YgdGhlIHByZXZpb3VzIHN0ZXAgYXMgdGhlIGlucHV0IHRvIHRoZSBuZXh0IHN0ZXAuCmZ1bmN0aW9uIF91cGxvYWRGaWxlc0NvbnRpbnVlKG91dHB1dElkKSB7CiAgY29uc3Qgb3V0cHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKG91dHB1dElkKTsKICBjb25zdCBzdGVwcyA9IG91dHB1dEVsZW1lbnQuc3RlcHM7CgogIGNvbnN0IG5leHQgPSBzdGVwcy5uZXh0KG91dHB1dEVsZW1lbnQubGFzdFByb21pc2VWYWx1ZSk7CiAgcmV0dXJuIFByb21pc2UucmVzb2x2ZShuZXh0LnZhbHVlLnByb21pc2UpLnRoZW4oKHZhbHVlKSA9PiB7CiAgICAvLyBDYWNoZSB0aGUgbGFzdCBwcm9taXNlIHZhbHVlIHRvIG1ha2UgaXQgYXZhaWxhYmxlIHRvIHRoZSBuZXh0CiAgICAvLyBzdGVwIG9mIHRoZSBnZW5lcmF0b3IuCiAgICBvdXRwdXRFbGVtZW50Lmxhc3RQcm9taXNlVmFsdWUgPSB2YWx1ZTsKICAgIHJldHVybiBuZXh0LnZhbHVlLnJlc3BvbnNlOwogIH0pOwp9CgovKioKICogR2VuZXJhdG9yIGZ1bmN0aW9uIHdoaWNoIGlzIGNhbGxlZCBiZXR3ZWVuIGVhY2ggYXN5bmMgc3RlcCBvZiB0aGUgdXBsb2FkCiAqIHByb2Nlc3MuCiAqIEBwYXJhbSB7c3RyaW5nfSBpbnB1dElkIEVsZW1lbnQgSUQgb2YgdGhlIGlucHV0IGZpbGUgcGlja2VyIGVsZW1lbnQuCiAqIEBwYXJhbSB7c3RyaW5nfSBvdXRwdXRJZCBFbGVtZW50IElEIG9mIHRoZSBvdXRwdXQgZGlzcGxheS4KICogQHJldHVybiB7IUl0ZXJhYmxlPCFPYmplY3Q+fSBJdGVyYWJsZSBvZiBuZXh0IHN0ZXBzLgogKi8KZnVuY3Rpb24qIHVwbG9hZEZpbGVzU3RlcChpbnB1dElkLCBvdXRwdXRJZCkgewogIGNvbnN0IGlucHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKGlucHV0SWQpOwogIGlucHV0RWxlbWVudC5kaXNhYmxlZCA9IGZhbHNlOwoKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIG91dHB1dEVsZW1lbnQuaW5uZXJIVE1MID0gJyc7CgogIGNvbnN0IHBpY2tlZFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgaW5wdXRFbGVtZW50LmFkZEV2ZW50TGlzdGVuZXIoJ2NoYW5nZScsIChlKSA9PiB7CiAgICAgIHJlc29sdmUoZS50YXJnZXQuZmlsZXMpOwogICAgfSk7CiAgfSk7CgogIGNvbnN0IGNhbmNlbCA9IGRvY3VtZW50LmNyZWF0ZUVsZW1lbnQoJ2J1dHRvbicpOwogIGlucHV0RWxlbWVudC5wYXJlbnRFbGVtZW50LmFwcGVuZENoaWxkKGNhbmNlbCk7CiAgY2FuY2VsLnRleHRDb250ZW50ID0gJ0NhbmNlbCB1cGxvYWQnOwogIGNvbnN0IGNhbmNlbFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgY2FuY2VsLm9uY2xpY2sgPSAoKSA9PiB7CiAgICAgIHJlc29sdmUobnVsbCk7CiAgICB9OwogIH0pOwoKICAvLyBXYWl0IGZvciB0aGUgdXNlciB0byBwaWNrIHRoZSBmaWxlcy4KICBjb25zdCBmaWxlcyA9IHlpZWxkIHsKICAgIHByb21pc2U6IFByb21pc2UucmFjZShbcGlja2VkUHJvbWlzZSwgY2FuY2VsUHJvbWlzZV0pLAogICAgcmVzcG9uc2U6IHsKICAgICAgYWN0aW9uOiAnc3RhcnRpbmcnLAogICAgfQogIH07CgogIGNhbmNlbC5yZW1vdmUoKTsKCiAgLy8gRGlzYWJsZSB0aGUgaW5wdXQgZWxlbWVudCBzaW5jZSBmdXJ0aGVyIHBpY2tzIGFyZSBub3QgYWxsb3dlZC4KICBpbnB1dEVsZW1lbnQuZGlzYWJsZWQgPSB0cnVlOwoKICBpZiAoIWZpbGVzKSB7CiAgICByZXR1cm4gewogICAgICByZXNwb25zZTogewogICAgICAgIGFjdGlvbjogJ2NvbXBsZXRlJywKICAgICAgfQogICAgfTsKICB9CgogIGZvciAoY29uc3QgZmlsZSBvZiBmaWxlcykgewogICAgY29uc3QgbGkgPSBkb2N1bWVudC5jcmVhdGVFbGVtZW50KCdsaScpOwogICAgbGkuYXBwZW5kKHNwYW4oZmlsZS5uYW1lLCB7Zm9udFdlaWdodDogJ2JvbGQnfSkpOwogICAgbGkuYXBwZW5kKHNwYW4oCiAgICAgICAgYCgke2ZpbGUudHlwZSB8fCAnbi9hJ30pIC0gJHtmaWxlLnNpemV9IGJ5dGVzLCBgICsKICAgICAgICBgbGFzdCBtb2RpZmllZDogJHsKICAgICAgICAgICAgZmlsZS5sYXN0TW9kaWZpZWREYXRlID8gZmlsZS5sYXN0TW9kaWZpZWREYXRlLnRvTG9jYWxlRGF0ZVN0cmluZygpIDoKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgJ24vYSd9IC0gYCkpOwogICAgY29uc3QgcGVyY2VudCA9IHNwYW4oJzAlIGRvbmUnKTsKICAgIGxpLmFwcGVuZENoaWxkKHBlcmNlbnQpOwoKICAgIG91dHB1dEVsZW1lbnQuYXBwZW5kQ2hpbGQobGkpOwoKICAgIGNvbnN0IGZpbGVEYXRhUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICAgIGNvbnN0IHJlYWRlciA9IG5ldyBGaWxlUmVhZGVyKCk7CiAgICAgIHJlYWRlci5vbmxvYWQgPSAoZSkgPT4gewogICAgICAgIHJlc29sdmUoZS50YXJnZXQucmVzdWx0KTsKICAgICAgfTsKICAgICAgcmVhZGVyLnJlYWRBc0FycmF5QnVmZmVyKGZpbGUpOwogICAgfSk7CiAgICAvLyBXYWl0IGZvciB0aGUgZGF0YSB0byBiZSByZWFkeS4KICAgIGxldCBmaWxlRGF0YSA9IHlpZWxkIHsKICAgICAgcHJvbWlzZTogZmlsZURhdGFQcm9taXNlLAogICAgICByZXNwb25zZTogewogICAgICAgIGFjdGlvbjogJ2NvbnRpbnVlJywKICAgICAgfQogICAgfTsKCiAgICAvLyBVc2UgYSBjaHVua2VkIHNlbmRpbmcgdG8gYXZvaWQgbWVzc2FnZSBzaXplIGxpbWl0cy4gU2VlIGIvNjIxMTU2NjAuCiAgICBsZXQgcG9zaXRpb24gPSAwOwogICAgd2hpbGUgKHBvc2l0aW9uIDwgZmlsZURhdGEuYnl0ZUxlbmd0aCkgewogICAgICBjb25zdCBsZW5ndGggPSBNYXRoLm1pbihmaWxlRGF0YS5ieXRlTGVuZ3RoIC0gcG9zaXRpb24sIE1BWF9QQVlMT0FEX1NJWkUpOwogICAgICBjb25zdCBjaHVuayA9IG5ldyBVaW50OEFycmF5KGZpbGVEYXRhLCBwb3NpdGlvbiwgbGVuZ3RoKTsKICAgICAgcG9zaXRpb24gKz0gbGVuZ3RoOwoKICAgICAgY29uc3QgYmFzZTY0ID0gYnRvYShTdHJpbmcuZnJvbUNoYXJDb2RlLmFwcGx5KG51bGwsIGNodW5rKSk7CiAgICAgIHlpZWxkIHsKICAgICAgICByZXNwb25zZTogewogICAgICAgICAgYWN0aW9uOiAnYXBwZW5kJywKICAgICAgICAgIGZpbGU6IGZpbGUubmFtZSwKICAgICAgICAgIGRhdGE6IGJhc2U2NCwKICAgICAgICB9LAogICAgICB9OwogICAgICBwZXJjZW50LnRleHRDb250ZW50ID0KICAgICAgICAgIGAke01hdGgucm91bmQoKHBvc2l0aW9uIC8gZmlsZURhdGEuYnl0ZUxlbmd0aCkgKiAxMDApfSUgZG9uZWA7CiAgICB9CiAgfQoKICAvLyBBbGwgZG9uZS4KICB5aWVsZCB7CiAgICByZXNwb25zZTogewogICAgICBhY3Rpb246ICdjb21wbGV0ZScsCiAgICB9CiAgfTsKfQoKc2NvcGUuZ29vZ2xlID0gc2NvcGUuZ29vZ2xlIHx8IHt9OwpzY29wZS5nb29nbGUuY29sYWIgPSBzY29wZS5nb29nbGUuY29sYWIgfHwge307CnNjb3BlLmdvb2dsZS5jb2xhYi5fZmlsZXMgPSB7CiAgX3VwbG9hZEZpbGVzLAogIF91cGxvYWRGaWxlc0NvbnRpbnVlLAp9Owp9KShzZWxmKTsK", + "ok": true, + "headers": [ + [ + "content-type", + "application/javascript" + ] + ], + "status": 200, + "status_text": "" + } + }, + "base_uri": "https://localhost:8080/", + "height": 227 + }, + "id": "R2PBlW5ATiGX", + "outputId": "e710df31-23f1-4cd1-f1ad-110ba0f1ae1e" + }, + "source": [ + "from google.colab import files\n", + "uploaded = files.upload()" + ], + "execution_count": 2, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " Upload widget is only available when the cell has been executed in the\n", + " current browser session. Please rerun this cell to enable.\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "Saving shhs.txt to shhs (2).txt\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "KMH5FqlATiIw" + }, + "source": [ + "import io\n", + "shhs = pd.read_csv(io.BytesIO(uploaded['shhs.txt']))" + ], + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 372 + }, + "id": "LkYva6sZTiLP", + "outputId": "83adc35f-7186-4f81-df82-686407840dc9" + }, + "source": [ + "shhs.head()" + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pptidclinicWaistCOPD15ASTHMA15slp_lattime_bedtimest1ptimest2ptimes34ptimeremprdi4pStLOutPStOnsetPSlpPrdPStaging1Staging2Staging3Staging4Staging5RestAn1RestAn2RestAn3RestAn4HTNCVDCHDsexagesmokstatusbmi
00007418600.440.56.25832260.85219619.3075913.5818911.438083282822530000101000100155Former21.78
100094110700.225.00.82417665.6593416.75824216.75824217.8021980010920.........100178Never32.95
20028418200.431.54.88145140.30683542.81729511.9944214.85355616716721510100001000000077Never24.11
3003941850014358.52.99003329.40199352.32558115.2823930.797342548218060100001000100148Never20.19
400454176006.5477.05.67567668.64865113.37837812.2972972.75675772022200100001000100066Former23.31
\n", + "
" + ], + "text/plain": [ + " pptid clinic Waist COPD15 ASTHMA15 ... CHD sex age smokstatus bmi\n", + "0 0007 41 86 0 0 ... 0 1 55 Former 21.78\n", + "1 0009 41 107 0 0 ... 0 1 78 Never 32.95\n", + "2 0028 41 82 0 0 ... 0 0 77 Never 24.11\n", + "3 0039 41 85 0 0 ... 0 1 48 Never 20.19\n", + "4 0045 41 76 0 0 ... 0 0 66 Former 23.31\n", + "\n", + "[5 rows x 31 columns]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 4 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7FSPXxOPhffP" + }, + "source": [ + "Because bmi values are read as characters, convert it to numeric and drop NA values.\n", + " " + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "iT9uR3AHcMRQ", + "outputId": "5cf76c98-213a-4ec7-890e-85fb836bd076" + }, + "source": [ + "df = shhs[['bmi', 'rdi4p']]\n", + "df['bmi'] = pd.to_numeric(df['bmi'],errors='coerce')\n", + "df = df.dropna()\n", + "df['log_rdi4p'] = np.log(df.rdi4p + 1)" + ], + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " \n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_H96Yud2htWm" + }, + "source": [ + "Fitting linear model." + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 542 + }, + "id": "hQ1Zr7xrTiZI", + "outputId": "4f84793a-4e83-440b-ebdc-53b97a0430a6" + }, + "source": [ + "fit = smf.ols('log_rdi4p ~ bmi', data = df).fit()\n", + "fit.summary()" + ], + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
OLS Regression Results
Dep. Variable: log_rdi4p R-squared: 0.133
Model: OLS Adj. R-squared: 0.133
Method: Least Squares F-statistic: 973.8
Date: Mon, 12 Apr 2021 Prob (F-statistic): 5.47e-199
Time: 19:24:45 Log-Likelihood: -8893.0
No. Observations: 6333 AIC: 1.779e+04
Df Residuals: 6331 BIC: 1.780e+04
Df Model: 1
Covariance Type: nonrobust
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
coef std err t P>|t| [0.025 0.975]
Intercept -0.3430 0.067 -5.111 0.000 -0.475 -0.211
bmi 0.0722 0.002 31.207 0.000 0.068 0.077
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
Omnibus: 202.903 Durbin-Watson: 1.967
Prob(Omnibus): 0.000 Jarque-Bera (JB): 144.575
Skew: 0.266 Prob(JB): 4.04e-32
Kurtosis: 2.485 Cond. No. 157.


Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified." + ], + "text/plain": [ + "\n", + "\"\"\"\n", + " OLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: log_rdi4p R-squared: 0.133\n", + "Model: OLS Adj. R-squared: 0.133\n", + "Method: Least Squares F-statistic: 973.8\n", + "Date: Mon, 12 Apr 2021 Prob (F-statistic): 5.47e-199\n", + "Time: 19:24:45 Log-Likelihood: -8893.0\n", + "No. Observations: 6333 AIC: 1.779e+04\n", + "Df Residuals: 6331 BIC: 1.780e+04\n", + "Df Model: 1 \n", + "Covariance Type: nonrobust \n", + "==============================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "Intercept -0.3430 0.067 -5.111 0.000 -0.475 -0.211\n", + "bmi 0.0722 0.002 31.207 0.000 0.068 0.077\n", + "==============================================================================\n", + "Omnibus: 202.903 Durbin-Watson: 1.967\n", + "Prob(Omnibus): 0.000 Jarque-Bera (JB): 144.575\n", + "Skew: 0.266 Prob(JB): 4.04e-32\n", + "Kurtosis: 2.485 Cond. No. 157.\n", + "==============================================================================\n", + "\n", + "Warnings:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", + "\"\"\"" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 6 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4_6BGfViieZl" + }, + "source": [ + "Scatter plot + fitted line. " + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "mwrKbjqAfDjn" + }, + "source": [ + "yhat = fit.predict(df.bmi)" + ], + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 451 + }, + "id": "E5aih7x4fDmX", + "outputId": "0239b774-374e-44aa-d7d2-17f7f667ffdb" + }, + "source": [ + "sns.scatterplot(data = df, x= 'bmi', y = 'log_rdi4p' )\n", + "plt.plot(df.bmi, yhat, color='r')" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 8 + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qE6KMkzqh5TD" + }, + "source": [ + "Estimation of rdi4p when bmi = 30. \n", + "\n", + "**The predicted rdi4p is 5.19241304**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Xy3b721ifDhL" + }, + "source": [ + "intercept = fit.params[0]\n", + "b1 = fit.params[1]" + ], + "execution_count": 9, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "JcdznwrkfDoi" + }, + "source": [ + "bmi_val = 30\n", + "predicted_log_rdi4p = intercept + b1 * bmi_val\n", + "predicted_rdi4p = np.exp(predicted_log_rdi4p)-1" + ], + "execution_count": 10, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HlbqqZ5LhKw_", + "outputId": "3a0c86f9-fb15-4cc0-f29c-2a6ae69e46cb" + }, + "source": [ + "print(predicted_rdi4p)" + ], + "execution_count": 11, + "outputs": [ + { + "output_type": "stream", + "text": [ + "5.192413044073733\n" + ], + "name": "stdout" + } + ] + } + ] +} \ No newline at end of file From 13276b3f10a3bbab3f80b65143747ce0ad3f1209 Mon Sep 17 00:00:00 2001 From: jinw00jun <79273189+jinw00jun@users.noreply.github.com> Date: Mon, 12 Apr 2021 15:33:51 -0400 Subject: [PATCH 6/6] Created using Colaboratory --- HW7_colab.ipynb | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/HW7_colab.ipynb b/HW7_colab.ipynb index 4dd366fb..43743e36 100644 --- a/HW7_colab.ipynb +++ b/HW7_colab.ipynb @@ -5,7 +5,7 @@ "colab": { "name": "Copy of Untitled0.ipynb", "provenance": [], - "authorship_tag": "ABX9TyNZ4mYUsREhwFWjwd8bPgWn", + "authorship_tag": "ABX9TyOgN9LM174c9Rddp32i5QsD", "include_colab_link": true }, "kernelspec": { @@ -658,6 +658,31 @@ "execution_count": 9, "outputs": [] }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "RoG6WkCAkePy", + "outputId": "36d03790-2807-41bd-aa14-f1ecd94377a8" + }, + "source": [ + "print(intercept)\n", + "print(b1)" + ], + "execution_count": 12, + "outputs": [ + { + "output_type": "stream", + "text": [ + "-0.3430244378036704\n", + "0.07221164259835346\n" + ], + "name": "stdout" + } + ] + }, { "cell_type": "code", "metadata": {