diff --git a/02_activities/assignments/assignment_1.ipynb b/02_activities/assignments/assignment_1.ipynb index 73d92a3ee..a32064353 100644 --- a/02_activities/assignments/assignment_1.ipynb +++ b/02_activities/assignments/assignment_1.ipynb @@ -34,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "4a3485d6-ba58-4660-a983-5680821c5719", "metadata": {}, "outputs": [], @@ -56,10 +56,288 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "a431d282-f9ca-4d5d-8912-71ffc9d8ea19", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alcoholmalic_acidashalcalinity_of_ashmagnesiumtotal_phenolsflavanoidsnonflavanoid_phenolsproanthocyaninscolor_intensityhueod280/od315_of_diluted_winesprolineclass
014.231.712.4315.6127.02.803.060.282.295.641.043.921065.00
113.201.782.1411.2100.02.652.760.261.284.381.053.401050.00
213.162.362.6718.6101.02.803.240.302.815.681.033.171185.00
314.371.952.5016.8113.03.853.490.242.187.800.863.451480.00
413.242.592.8721.0118.02.802.690.391.824.321.042.93735.00
.............................................
17313.715.652.4520.595.01.680.610.521.067.700.641.74740.02
17413.403.912.4823.0102.01.800.750.431.417.300.701.56750.02
17513.274.282.2620.0120.01.590.690.431.3510.200.591.56835.02
17613.172.592.3720.0120.01.650.680.531.469.300.601.62840.02
17714.134.102.7424.596.02.050.760.561.359.200.611.60560.02
\n", + "

178 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium total_phenols \\\n", + "0 14.23 1.71 2.43 15.6 127.0 2.80 \n", + "1 13.20 1.78 2.14 11.2 100.0 2.65 \n", + "2 13.16 2.36 2.67 18.6 101.0 2.80 \n", + "3 14.37 1.95 2.50 16.8 113.0 3.85 \n", + "4 13.24 2.59 2.87 21.0 118.0 2.80 \n", + ".. ... ... ... ... ... ... \n", + "173 13.71 5.65 2.45 20.5 95.0 1.68 \n", + "174 13.40 3.91 2.48 23.0 102.0 1.80 \n", + "175 13.27 4.28 2.26 20.0 120.0 1.59 \n", + "176 13.17 2.59 2.37 20.0 120.0 1.65 \n", + "177 14.13 4.10 2.74 24.5 96.0 2.05 \n", + "\n", + " flavanoids nonflavanoid_phenols proanthocyanins color_intensity hue \\\n", + "0 3.06 0.28 2.29 5.64 1.04 \n", + "1 2.76 0.26 1.28 4.38 1.05 \n", + "2 3.24 0.30 2.81 5.68 1.03 \n", + "3 3.49 0.24 2.18 7.80 0.86 \n", + "4 2.69 0.39 1.82 4.32 1.04 \n", + ".. ... ... ... ... ... \n", + "173 0.61 0.52 1.06 7.70 0.64 \n", + "174 0.75 0.43 1.41 7.30 0.70 \n", + "175 0.69 0.43 1.35 10.20 0.59 \n", + "176 0.68 0.53 1.46 9.30 0.60 \n", + "177 0.76 0.56 1.35 9.20 0.61 \n", + "\n", + " od280/od315_of_diluted_wines proline class \n", + "0 3.92 1065.0 0 \n", + "1 3.40 1050.0 0 \n", + "2 3.17 1185.0 0 \n", + "3 3.45 1480.0 0 \n", + "4 2.93 735.0 0 \n", + ".. ... ... ... \n", + "173 1.74 740.0 2 \n", + "174 1.56 750.0 2 \n", + "175 1.56 835.0 2 \n", + "176 1.62 840.0 2 \n", + "177 1.60 560.0 2 \n", + "\n", + "[178 rows x 14 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from sklearn.datasets import load_wine\n", "\n", @@ -91,12 +369,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 55, "id": "56916892", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "178\n" + ] + } + ], "source": [ - "# Your answer here" + "# Your answer here\n", + "num_rows = wine_df.shape[0]\n", + "print(num_rows)" ] }, { @@ -109,12 +397,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 53, "id": "df0ef103", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium',\n", + " 'total_phenols', 'flavanoids', 'nonflavanoid_phenols',\n", + " 'proanthocyanins', 'color_intensity', 'hue',\n", + " 'od280/od315_of_diluted_wines', 'proline', 'class'],\n", + " dtype='object')\n" + ] + } + ], "source": [ - "# Your answer here" + "# Your answer here\n", + "columns = wine_df.columns\n", + "print(columns)" ] }, { @@ -127,12 +429,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 57, "id": "47989426", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0 1 2]\n" + ] + } + ], "source": [ - "# Your answer here" + "# Your answer here\n", + "# the variable type is integer.\n", + "print(wine_df['class'].unique())\n" ] }, { @@ -146,12 +458,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 58, "id": "bd7b0910", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium',\n", + " 'total_phenols', 'flavanoids', 'nonflavanoid_phenols',\n", + " 'proanthocyanins', 'color_intensity', 'hue',\n", + " 'od280/od315_of_diluted_wines', 'proline'],\n", + " dtype='object')\n" + ] + } + ], "source": [ - "# Your answer here" + "# Your answer here\n", + "predictor_variables = wine_df.drop(columns=['class'])\n", + "print(predictor_variables.columns)" ] }, { @@ -175,10 +501,58 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, + "id": "fff52b40", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.frame.DataFrame" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type (predictors_standardized)" + ] + }, + { + "cell_type": "code", + "execution_count": 59, "id": "cc899b59", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium \\\n", + "0 1.518613 -0.562250 0.232053 -1.169593 1.913905 \n", + "1 0.246290 -0.499413 -0.827996 -2.490847 0.018145 \n", + "2 0.196879 0.021231 1.109334 -0.268738 0.088358 \n", + "3 1.691550 -0.346811 0.487926 -0.809251 0.930918 \n", + "4 0.295700 0.227694 1.840403 0.451946 1.281985 \n", + "\n", + " total_phenols flavanoids nonflavanoid_phenols proanthocyanins \\\n", + "0 0.808997 1.034819 -0.659563 1.224884 \n", + "1 0.568648 0.733629 -0.820719 -0.544721 \n", + "2 0.808997 1.215533 -0.498407 2.135968 \n", + "3 2.491446 1.466525 -0.981875 1.032155 \n", + "4 0.808997 0.663351 0.226796 0.401404 \n", + "\n", + " color_intensity hue od280/od315_of_diluted_wines proline \n", + "0 0.251717 0.362177 1.847920 1.013009 \n", + "1 -0.293321 0.406051 1.113449 0.965242 \n", + "2 0.269020 0.318304 0.788587 1.395148 \n", + "3 1.186068 -0.427544 1.184071 2.334574 \n", + "4 -0.319276 0.362177 0.449601 -0.037874 \n" + ] + } + ], "source": [ "# Select predictors (excluding the last column)\n", "predictors = wine_df.iloc[:, :-1]\n", @@ -204,7 +578,7 @@ "id": "403ef0bb", "metadata": {}, "source": [ - "> Your answer here..." + "Standardizing predictor variables ensures equal contribution to the model and prevents variables with larger scales from dominating the model." ] }, { @@ -220,7 +594,7 @@ "id": "fdee5a15", "metadata": {}, "source": [ - "> Your answer here..." + "Because it's not a predictor variable, the response variable \"class\" works as a wine category. " ] }, { @@ -236,7 +610,9 @@ "id": "f0676c21", "metadata": {}, "source": [ - "> Your answer here..." + "Setting a seed is essential for reproducibility and consistency.\n", + "Ensures that the results of experiments can be replicated by anyone at any time.\n", + "The value is important. When comparing models or techniques, using the same seed ensures that they are evaluated under identical conditions (e.g., same training and testing splits)." ] }, { @@ -252,7 +628,7 @@ { "cell_type": "code", "execution_count": null, - "id": "72c101f2", + "id": "4b72dbac", "metadata": {}, "outputs": [], "source": [ @@ -260,8 +636,280 @@ "np.random.seed(123)\n", "\n", "# split the data into a training and testing set. hint: use train_test_split !\n", + "wine_df_train, wine_df_test = train_test_split(\n", + " wine_df, train_size = 0.75, shuffle = True, random_state=123\n", + ")\n", + "train_x = wine_df_train.iloc[:, :-1]\n", + "train_y = wine_df_train['class']\n", + "test_x = wine_df_test['class']\n", + "test_y = wine_df_test.iloc[:, :-1]" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "72c101f2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "( alcohol malic_acid ash alcalinity_of_ash magnesium \\\n", + " 28 1.073917 -0.391694 1.584529 -0.028510 0.509638 \n", + " 13 2.160950 -0.544297 0.085839 -2.430790 -0.613775 \n", + " 88 -1.680724 -0.248068 0.341713 0.632117 -1.105268 \n", + " 24 0.616869 -0.472483 0.890014 0.151661 -0.262708 \n", + " 100 -1.137207 -0.230114 -2.436346 -0.599052 -0.192495 \n", + " .. ... ... ... ... ... \n", + " 17 1.024507 -0.687923 0.926567 0.151661 1.071345 \n", + " 98 -0.778980 -1.136754 -0.974210 -0.298767 -0.824415 \n", + " 66 0.135116 -1.190614 -2.436346 -1.349764 -1.526548 \n", + " 126 -0.704864 -0.723829 -0.279695 0.602088 -0.964842 \n", + " 109 -1.717782 -0.885409 1.218995 0.151661 -0.403135 \n", + " \n", + " total_phenols flavanoids nonflavanoid_phenols proanthocyanins \\\n", + " 28 1.049347 0.944462 0.065639 0.296279 \n", + " 13 1.289697 1.667318 0.549108 2.135968 \n", + " 88 -0.552984 -0.340615 0.951998 -0.422075 \n", + " 24 0.376368 0.583034 -0.659563 0.121071 \n", + " 100 -0.104332 0.141289 -0.820719 -0.334471 \n", + " .. ... ... ... ... \n", + " 17 1.049347 1.376168 0.307374 0.226196 \n", + " 98 1.962676 1.727556 -0.981875 0.629175 \n", + " 66 1.097417 1.155295 -0.820719 1.207363 \n", + " 126 0.712858 1.125176 0.226796 0.313800 \n", + " 109 0.712858 0.894264 -0.578985 1.575301 \n", + " \n", + " color_intensity hue od280/od315_of_diluted_wines proline \n", + " 28 -0.241413 1.283518 1.113449 0.535335 \n", + " 13 0.147900 1.283518 0.167113 1.283691 \n", + " 88 -0.976782 0.186684 0.195361 -0.213021 \n", + " 24 -0.665332 0.713164 1.706675 0.312420 \n", + " 100 -0.760497 1.371265 0.491974 -0.117486 \n", + " .. ... ... ... ... \n", + " 17 0.666984 0.757038 -0.058878 1.220001 \n", + " 98 -0.241413 0.362177 0.223610 -0.276711 \n", + " 66 0.104643 0.713164 0.802712 -0.779861 \n", + " 126 -0.483652 -1.173391 0.322481 -1.257535 \n", + " 109 -1.041667 0.011190 0.915707 -0.213021 \n", + " \n", + " [133 rows x 13 columns],\n", + " alcohol malic_acid ash alcalinity_of_ash magnesium \\\n", + " 138 0.604516 1.125357 -0.645229 0.001518 -0.824415 \n", + " 60 -0.828391 -1.109824 -0.316249 -1.049479 0.088358 \n", + " 137 -0.581338 2.848870 0.999674 1.653086 -0.262708 \n", + " 77 -1.433671 0.496993 -0.499016 -0.448909 0.860705 \n", + " 90 -1.137207 -0.454530 -0.170035 -0.298767 -1.315908 \n", + " 159 0.592164 -0.598156 0.999674 0.902373 -0.754202 \n", + " 41 0.505695 1.349773 -0.901103 -0.208681 -0.683988 \n", + " 136 -0.927212 2.139716 0.634140 0.451946 -0.754202 \n", + " 166 0.555106 1.224100 0.853460 1.052516 0.790492 \n", + " 93 -0.877801 0.443133 -0.535569 -0.448909 -0.824415 \n", + " 154 -0.519575 -0.939268 -0.974210 0.151661 0.228785 \n", + " 141 0.443932 0.200764 -0.060375 0.151661 -0.754202 \n", + " 164 0.962743 0.380297 -0.243142 0.752231 -0.683988 \n", + " 33 0.938038 -0.723829 1.218995 0.001518 2.264972 \n", + " 31 0.715690 -0.607133 -0.023821 -0.118596 0.439425 \n", + " 152 0.135116 -0.391694 1.401762 1.803228 1.141558 \n", + " 82 -1.137207 -1.082894 0.524480 1.352801 -1.526548 \n", + " 122 -0.717217 1.879394 1.328655 2.103513 0.158572 \n", + " 42 1.086270 -0.400670 0.816907 -1.349764 0.088358 \n", + " 87 -1.668371 -0.598156 0.926567 1.953371 -0.824415 \n", + " 139 -0.198406 0.559829 0.890014 1.352801 0.088358 \n", + " 147 -0.161348 2.040973 0.414820 0.602088 -0.964842 \n", + " 151 -0.260169 0.299507 0.414820 0.752231 0.860705 \n", + " 162 -0.186053 0.838105 0.780354 0.752231 0.439425 \n", + " 79 -0.371343 1.376703 0.122392 1.052516 0.088358 \n", + " 169 0.493343 2.031997 1.803849 1.653086 0.860705 \n", + " 157 -0.680159 0.622666 0.999674 2.253656 -0.192495 \n", + " 95 -0.655454 -0.732806 -0.608676 -0.148624 4.371372 \n", + " 53 0.950391 -0.391694 1.145888 -0.719166 1.071345 \n", + " 10 1.358028 -0.158301 -0.243142 -0.448909 0.369212 \n", + " 23 -0.186053 -0.660993 0.561033 -0.508966 -0.332922 \n", + " 37 0.061000 -0.616110 0.670693 -0.448909 -0.122282 \n", + " 134 -0.606043 -0.984151 -0.425909 -0.599052 -1.035055 \n", + " 110 -1.903071 1.260006 -1.997705 0.001518 0.509638 \n", + " 167 -0.223111 0.927871 -0.243142 0.001518 -0.824415 \n", + " 120 -1.915424 0.057138 0.195499 0.151661 -0.262708 \n", + " 142 0.641574 0.748338 1.292101 1.202658 -0.192495 \n", + " 8 2.259772 -0.625086 -0.718336 -1.650049 -0.192495 \n", + " 72 0.604516 -0.607133 -0.462462 1.352801 -0.894628 \n", + " 114 -1.137207 -0.849502 0.487926 0.902373 -1.105268 \n", + " 148 0.394521 0.811175 0.049285 0.602088 -0.543562 \n", + " 163 -0.050174 0.999684 -0.060375 -0.298767 0.439425 \n", + " 36 0.345111 -0.625086 1.730742 -1.199622 0.720278 \n", + " 20 1.308617 -0.634063 -0.316249 -1.049479 1.843692 \n", + " 121 -1.779545 -0.257044 3.156325 2.704083 1.352198 \n", + " \n", + " total_phenols flavanoids nonflavanoid_phenols proanthocyanins \\\n", + " 138 -1.081754 -1.555415 1.757779 -1.245555 \n", + " 60 -0.392751 -0.942995 2.160669 -2.069034 \n", + " 137 -0.809357 -1.434939 2.160669 -0.860096 \n", + " 77 -0.921521 -0.712083 0.549108 -1.122909 \n", + " 90 -1.113800 -0.531369 1.274310 0.086029 \n", + " 159 0.488531 -0.932956 1.274310 1.224884 \n", + " 41 0.248181 0.653312 -0.740141 -0.194304 \n", + " 136 -1.466313 -1.565455 1.354888 -1.385721 \n", + " 166 -0.953567 -1.113670 0.549108 -0.229346 \n", + " 93 0.248181 0.221606 -0.901297 0.699259 \n", + " 154 -1.306080 -1.455019 1.354888 -0.334471 \n", + " 141 -1.434267 -1.535336 0.065639 -1.666055 \n", + " 164 -1.514383 -1.354622 0.387952 -0.982742 \n", + " 33 1.049347 0.713550 1.113154 -0.422075 \n", + " 31 0.905137 1.165335 -1.143031 0.629175 \n", + " 152 -0.152402 -0.752242 -0.820719 -0.054137 \n", + " 82 -0.472868 -0.451052 0.307374 -0.334471 \n", + " 122 -0.152402 0.101130 0.549108 0.208675 \n", + " 42 1.530047 1.536802 -1.545922 0.191154 \n", + " 87 -0.601054 -0.420933 0.307374 -0.439596 \n", + " 139 0.039878 -1.434939 1.354888 -1.368201 \n", + " 147 -0.953567 -1.384741 0.871420 -1.280596 \n", + " 151 -1.306080 -0.671924 -0.981875 -0.579763 \n", + " 162 -1.033684 -1.434939 1.918935 -1.105388 \n", + " 79 0.857067 0.522796 0.549108 0.629175 \n", + " 169 -0.504914 -1.073511 -0.740141 -0.842575 \n", + " 157 -0.633101 -1.455019 2.160669 -0.790013 \n", + " 95 0.328298 0.241685 -0.337251 2.959447 \n", + " 53 1.129464 0.763748 0.226796 0.156113 \n", + " 10 1.049347 1.295850 -1.143031 1.382572 \n", + " 23 0.296251 0.342082 -0.820719 -0.229346 \n", + " 37 0.248181 0.402320 -0.578985 -0.264388 \n", + " 134 -0.472868 -1.455019 1.918935 -0.597284 \n", + " 110 1.417883 0.552915 -0.981875 3.485073 \n", + " 167 -1.306080 -1.374701 0.307374 -1.087867 \n", + " 120 0.969231 0.763748 -0.337251 0.418925 \n", + " 142 -1.193917 -1.515257 1.113154 -1.823742 \n", + " 8 0.808997 0.954502 -0.578985 0.681738 \n", + " 72 -0.665148 -0.190020 -0.740141 -0.982742 \n", + " 114 0.424438 0.261765 0.549108 -0.965221 \n", + " 148 -0.585031 -1.274305 0.710264 -0.597284 \n", + " 163 -1.450290 -1.334543 0.307374 -1.140430 \n", + " 36 0.488531 0.653312 -0.176095 -0.404554 \n", + " 20 1.129464 1.145255 -0.981875 0.891988 \n", + " 121 1.417883 3.062832 0.871420 0.489009 \n", + " \n", + " color_intensity hue od280/od315_of_diluted_wines proline \n", + " 138 0.277671 -0.646911 -1.118210 -0.531471 \n", + " 60 -0.773474 1.283518 -1.330077 -0.213021 \n", + " 137 -0.025128 -0.603037 -1.301828 -0.738463 \n", + " 77 -1.041667 0.011190 -0.129500 -0.786230 \n", + " 90 -1.149810 0.537671 -0.482611 -0.849920 \n", + " 159 2.894719 -1.699872 -1.174708 -0.404091 \n", + " 41 -0.336578 -0.208177 0.548472 0.917474 \n", + " 136 -0.522583 -0.910151 -1.895054 -0.085641 \n", + " 166 2.431870 -0.471417 -1.485445 -0.165254 \n", + " 93 -1.257952 0.844785 0.972205 -1.454974 \n", + " 154 1.099554 -1.655999 -1.499570 -0.340401 \n", + " 141 0.234414 -1.129518 -0.200123 0.105428 \n", + " 164 1.956043 -1.129518 -1.315952 -0.420013 \n", + " 33 0.147900 1.283518 0.548472 1.554373 \n", + " 31 0.796755 0.581544 0.378979 2.446031 \n", + " 152 0.883269 -1.524378 -1.810307 -1.025067 \n", + " 82 -1.236324 1.546759 0.152988 -0.372246 \n", + " 122 -1.288232 -0.164303 0.717965 -1.216137 \n", + " 42 0.160877 -0.339797 1.339440 1.108544 \n", + " 87 -1.063296 1.766126 0.845085 -0.588791 \n", + " 139 -0.059734 -0.295924 -0.652104 -0.499626 \n", + " 147 1.121183 -1.831492 -1.061713 -0.388168 \n", + " 151 2.483778 -2.094732 -1.612565 -0.849920 \n", + " 162 0.225763 -0.383670 -0.708602 -0.563315 \n", + " 79 -1.076273 1.020278 0.732090 -0.904056 \n", + " 169 1.488867 -1.261138 -0.976966 -0.372246 \n", + " 157 1.056297 -1.261138 -1.245330 0.423878 \n", + " 95 -1.063296 0.888658 0.025868 0.605394 \n", + " 53 0.537213 0.757038 0.449601 2.000202 \n", + " 10 0.299300 1.283518 0.788587 2.430109 \n", + " 23 -0.487978 0.581544 1.438311 0.853784 \n", + " 37 -0.349555 0.713164 -0.143625 1.140389 \n", + " 134 0.169529 -0.910151 -1.556068 -0.308556 \n", + " 110 -0.933525 -0.910151 0.280108 -0.588791 \n", + " 167 2.250190 -1.041771 -1.217081 -0.197099 \n", + " 120 -0.782125 -0.690784 1.099325 -0.388168 \n", + " 142 -0.306298 -0.295924 -0.779224 -0.722540 \n", + " 8 0.061386 0.537671 0.336606 0.949319 \n", + " 72 -0.570166 0.098937 0.237735 -0.875396 \n", + " 114 -0.933525 -0.120430 0.816836 -1.152447 \n", + " 148 1.454261 -1.787619 -1.400699 -0.308556 \n", + " 163 0.095992 -1.217265 -1.217081 -0.228944 \n", + " 36 -0.198156 0.581544 0.237735 0.423878 \n", + " 20 0.256043 0.581544 1.551307 0.105428 \n", + " 121 0.407442 -0.120430 1.523058 -0.897687 ,\n", + " 28 0\n", + " 13 0\n", + " 88 1\n", + " 24 0\n", + " 100 1\n", + " ..\n", + " 17 0\n", + " 98 1\n", + " 66 1\n", + " 126 1\n", + " 109 1\n", + " Name: class, Length: 133, dtype: int64,\n", + " 138 2\n", + " 60 1\n", + " 137 2\n", + " 77 1\n", + " 90 1\n", + " 159 2\n", + " 41 0\n", + " 136 2\n", + " 166 2\n", + " 93 1\n", + " 154 2\n", + " 141 2\n", + " 164 2\n", + " 33 0\n", + " 31 0\n", + " 152 2\n", + " 82 1\n", + " 122 1\n", + " 42 0\n", + " 87 1\n", + " 139 2\n", + " 147 2\n", + " 151 2\n", + " 162 2\n", + " 79 1\n", + " 169 2\n", + " 157 2\n", + " 95 1\n", + " 53 0\n", + " 10 0\n", + " 23 0\n", + " 37 0\n", + " 134 2\n", + " 110 1\n", + " 167 2\n", + " 120 1\n", + " 142 2\n", + " 8 0\n", + " 72 1\n", + " 114 1\n", + " 148 2\n", + " 163 2\n", + " 36 0\n", + " 20 0\n", + " 121 1\n", + " Name: class, dtype: int64)" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# set a seed for reproducibility\n", + "np.random.seed(123)\n", "\n", - "# Your code here ..." + "# split the data into a training and testing set. hint: use train_test_split !\n", + " \n", + "Wine_train_X, Wine_test_X, Wine_train_y, Wine_test_y = train_test_split(\n", + " predictors_standardized, wine_df['class'], train_size = 0.75, shuffle = True,\n", + " random_state=123\n", + ")\n", + "Wine_train_X, Wine_test_X, Wine_train_y, Wine_test_y" ] }, { @@ -287,9 +935,458 @@ "execution_count": null, "id": "08818c64", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'n_neighbors': array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,\n", + " 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,\n", + " 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50])}\n", + "Optimal number of neighbors: 15\n" + ] + }, + { + "data": { + "text/html": [ + "
KNeighborsClassifier(n_neighbors=10)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "KNeighborsClassifier(n_neighbors=10)" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here..." + "# Your code here...\n", + "knn = KNeighborsClassifier(n_neighbors=10)\n", + "\n", + "import numpy as np\n", + "\n", + "# Define the parameter grid for n_neighbors\n", + "param_grid = {'n_neighbors': np.arange(1, 51)}\n", + "\n", + "print(param_grid)\n", + "\n", + "# GridSearchCV with 10-fold cross-validation\n", + "grid_search = GridSearchCV(estimator=knn, param_grid=param_grid, cv=10, n_jobs=-1)\n", + "\n", + "# Fitting grid search to the training data\n", + "grid_search.fit(Wine_train_X, Wine_train_y)\n", + "\n", + "# Printing optimal n_neighbors\n", + "print(f\"Optimal number of neighbors: {grid_search.best_params_['n_neighbors']}\")\n", + "\n", + "#fitting our model to the train data\n", + "\n", + "knn.fit(Wine_train_X, Wine_train_y)\n" ] }, { @@ -305,12 +1402,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 78, "id": "ffefa9f2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy on the test set: 0.9333333333333333\n" + ] + } + ], "source": [ - "# Your code here..." + "from sklearn.metrics import accuracy_score\n", + "\n", + "# Get the optimal number of neighbors from the grid search\n", + "best_n_neighbors = grid_search.best_params_['n_neighbors']\n", + "\n", + "# Initialize the KNN classifier with the best n_neighbors\n", + "knn_best = KNeighborsClassifier(n_neighbors=best_n_neighbors)\n", + "\n", + "# Fit the model on the training data\n", + "knn_best.fit(Wine_train_X, Wine_train_y)\n", + "\n", + "# Make predictions on the test set\n", + "Wine_test_pred = knn_best.predict(Wine_test_X)\n", + "\n", + "# Evaluate the model using accuracy_score\n", + "accuracy = accuracy_score(Wine_test_y, Wine_test_pred)\n", + "print(f\"Accuracy on the test set: {accuracy}\")\n" ] }, { @@ -365,7 +1486,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.10.4", + "display_name": "dsi_participant", "language": "python", "name": "python3" }, @@ -379,12 +1500,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.19" - }, - "vscode": { - "interpreter": { - "hash": "497a84dc8fec8cf8d24e7e87b6d954c9a18a327edc66feb9b9ea7e9e72cc5c7e" - } + "version": "3.9.20" } }, "nbformat": 4,