diff --git a/02_activities/assignments/assignment_1.ipynb b/02_activities/assignments/assignment_1.ipynb index 73d92a3ee..e6bb1e0e6 100644 --- a/02_activities/assignments/assignment_1.ipynb +++ b/02_activities/assignments/assignment_1.ipynb @@ -34,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "4a3485d6-ba58-4660-a983-5680821c5719", "metadata": {}, "outputs": [], @@ -56,10 +56,288 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "a431d282-f9ca-4d5d-8912-71ffc9d8ea19", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alcoholmalic_acidashalcalinity_of_ashmagnesiumtotal_phenolsflavanoidsnonflavanoid_phenolsproanthocyaninscolor_intensityhueod280/od315_of_diluted_winesprolineclass
014.231.712.4315.6127.02.803.060.282.295.641.043.921065.00
113.201.782.1411.2100.02.652.760.261.284.381.053.401050.00
213.162.362.6718.6101.02.803.240.302.815.681.033.171185.00
314.371.952.5016.8113.03.853.490.242.187.800.863.451480.00
413.242.592.8721.0118.02.802.690.391.824.321.042.93735.00
.............................................
17313.715.652.4520.595.01.680.610.521.067.700.641.74740.02
17413.403.912.4823.0102.01.800.750.431.417.300.701.56750.02
17513.274.282.2620.0120.01.590.690.431.3510.200.591.56835.02
17613.172.592.3720.0120.01.650.680.531.469.300.601.62840.02
17714.134.102.7424.596.02.050.760.561.359.200.611.60560.02
\n", + "

178 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium total_phenols \\\n", + "0 14.23 1.71 2.43 15.6 127.0 2.80 \n", + "1 13.20 1.78 2.14 11.2 100.0 2.65 \n", + "2 13.16 2.36 2.67 18.6 101.0 2.80 \n", + "3 14.37 1.95 2.50 16.8 113.0 3.85 \n", + "4 13.24 2.59 2.87 21.0 118.0 2.80 \n", + ".. ... ... ... ... ... ... \n", + "173 13.71 5.65 2.45 20.5 95.0 1.68 \n", + "174 13.40 3.91 2.48 23.0 102.0 1.80 \n", + "175 13.27 4.28 2.26 20.0 120.0 1.59 \n", + "176 13.17 2.59 2.37 20.0 120.0 1.65 \n", + "177 14.13 4.10 2.74 24.5 96.0 2.05 \n", + "\n", + " flavanoids nonflavanoid_phenols proanthocyanins color_intensity hue \\\n", + "0 3.06 0.28 2.29 5.64 1.04 \n", + "1 2.76 0.26 1.28 4.38 1.05 \n", + "2 3.24 0.30 2.81 5.68 1.03 \n", + "3 3.49 0.24 2.18 7.80 0.86 \n", + "4 2.69 0.39 1.82 4.32 1.04 \n", + ".. ... ... ... ... ... \n", + "173 0.61 0.52 1.06 7.70 0.64 \n", + "174 0.75 0.43 1.41 7.30 0.70 \n", + "175 0.69 0.43 1.35 10.20 0.59 \n", + "176 0.68 0.53 1.46 9.30 0.60 \n", + "177 0.76 0.56 1.35 9.20 0.61 \n", + "\n", + " od280/od315_of_diluted_wines proline class \n", + "0 3.92 1065.0 0 \n", + "1 3.40 1050.0 0 \n", + "2 3.17 1185.0 0 \n", + "3 3.45 1480.0 0 \n", + "4 2.93 735.0 0 \n", + ".. ... ... ... \n", + "173 1.74 740.0 2 \n", + "174 1.56 750.0 2 \n", + "175 1.56 835.0 2 \n", + "176 1.62 840.0 2 \n", + "177 1.60 560.0 2 \n", + "\n", + "[178 rows x 14 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from sklearn.datasets import load_wine\n", "\n", @@ -96,7 +374,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Your answer here" + "number_of_rows = wine_df.shape[0]" ] }, { @@ -114,7 +392,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Your answer here" + "number_of_rows = wine_df.shape[0]" ] }, { @@ -130,9 +408,24 @@ "execution_count": null, "id": "47989426", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The variable type of 'class' is: int32\n", + "The unique levels of 'class' are: [0 1 2]\n" + ] + } + ], "source": [ - "# Your answer here" + "# Check type of 'class'\n", + "class_type = wine_df['class'].dtype\n", + "print(f\"The variable type of 'class' is: {class_type}\")\n", + "\n", + "# Get unique values of 'class'\n", + "class_levels = wine_df['class'].unique()\n", + "print(f\"The unique levels of 'class' are: {class_levels}\")" ] }, { @@ -151,7 +444,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Your answer here" + "total_columns = wine_df.shape[1]\n", + "num_predictor_variables = total_columns - 1" ] }, { @@ -175,10 +469,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "cc899b59", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium \\\n", + "0 1.518613 -0.562250 0.232053 -1.169593 1.913905 \n", + "1 0.246290 -0.499413 -0.827996 -2.490847 0.018145 \n", + "2 0.196879 0.021231 1.109334 -0.268738 0.088358 \n", + "3 1.691550 -0.346811 0.487926 -0.809251 0.930918 \n", + "4 0.295700 0.227694 1.840403 0.451946 1.281985 \n", + "\n", + " total_phenols flavanoids nonflavanoid_phenols proanthocyanins \\\n", + "0 0.808997 1.034819 -0.659563 1.224884 \n", + "1 0.568648 0.733629 -0.820719 -0.544721 \n", + "2 0.808997 1.215533 -0.498407 2.135968 \n", + "3 2.491446 1.466525 -0.981875 1.032155 \n", + "4 0.808997 0.663351 0.226796 0.401404 \n", + "\n", + " color_intensity hue od280/od315_of_diluted_wines proline \n", + "0 0.251717 0.362177 1.847920 1.013009 \n", + "1 -0.293321 0.406051 1.113449 0.965242 \n", + "2 0.269020 0.318304 0.788587 1.395148 \n", + "3 1.186068 -0.427544 1.184071 2.334574 \n", + "4 -0.319276 0.362177 0.449601 -0.037874 \n" + ] + } + ], "source": [ "# Select predictors (excluding the last column)\n", "predictors = wine_df.iloc[:, :-1]\n", @@ -204,7 +525,7 @@ "id": "403ef0bb", "metadata": {}, "source": [ - "> Your answer here..." + "Ans: It ensures a standard where none of the individual variables affect the rest of the model simply because of their magnitude. " ] }, { @@ -220,7 +541,7 @@ "id": "fdee5a15", "metadata": {}, "source": [ - "> Your answer here..." + "Ans: Because it represents categorical data." ] }, { @@ -236,7 +557,9 @@ "id": "f0676c21", "metadata": {}, "source": [ - "> Your answer here..." + "random.seed(0)\n", + "\n", + "Ans: Setting a random seed ensures that in the event that a random event occurs, it produces the same result every time. Essentially, ensuring reproducibility and consistency. " ] }, { @@ -251,7 +574,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "72c101f2", "metadata": {}, "outputs": [], @@ -261,7 +584,10 @@ "\n", "# split the data into a training and testing set. hint: use train_test_split !\n", "\n", - "# Your code here ..." + "X = wine_df.drop(columns=['class']) # Predictor variables\n", + "y = wine_df['class'] # Response variable\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=123)" ] }, { @@ -284,12 +610,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "08818c64", "metadata": {}, "outputs": [], "source": [ - "# Your code here..." + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.model_selection import GridSearchCV\n", + "\n", + "knn = KNeighborsClassifier() # step 1\n", + "\n", + "param_grid = {'n_neighbors': list(range(1, 51))} # step 2\n", + "\n", + "# step 3\n", + "grid_search = GridSearchCV(estimator=knn, param_grid=param_grid, cv=10, scoring='accuracy') \n", + "grid_search.fit(X_train, y_train)\n", + "\n", + "# step 4\n", + "best_n_neighbors = grid_search.best_params_['n_neighbors']\n", + "best_score = grid_search.best_score_" ] }, { @@ -305,12 +644,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "id": "ffefa9f2", "metadata": {}, "outputs": [], "source": [ - "# Your code here..." + "from sklearn.metrics import accuracy_score\n", + "\n", + "best_knn = KNeighborsClassifier(n_neighbors=best_n_neighbors) # step 1\n", + "\n", + "best_knn.fit(X_train, y_train) # step 2\n", + "\n", + "y_pred = best_knn.predict(X_test) # step 3\n", + "\n", + "test_accuracy = accuracy_score(y_test, y_pred) # step 4\n" ] }, { @@ -365,7 +712,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.10.4", + "display_name": "dsi_participant", "language": "python", "name": "python3" }, @@ -379,12 +726,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.19" - }, - "vscode": { - "interpreter": { - "hash": "497a84dc8fec8cf8d24e7e87b6d954c9a18a327edc66feb9b9ea7e9e72cc5c7e" - } + "version": "3.9.15" } }, "nbformat": 4,