diff --git a/02_activities/assignments/assignment_1.ipynb b/02_activities/assignments/assignment_1.ipynb
index 828092657..ca4909ee4 100644
--- a/02_activities/assignments/assignment_1.ipynb
+++ b/02_activities/assignments/assignment_1.ipynb
@@ -34,7 +34,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"id": "4a3485d6-ba58-4660-a983-5680821c5719",
"metadata": {},
"outputs": [],
@@ -56,10 +56,288 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"id": "a431d282-f9ca-4d5d-8912-71ffc9d8ea19",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " alcohol | \n",
+ " malic_acid | \n",
+ " ash | \n",
+ " alcalinity_of_ash | \n",
+ " magnesium | \n",
+ " total_phenols | \n",
+ " flavanoids | \n",
+ " nonflavanoid_phenols | \n",
+ " proanthocyanins | \n",
+ " color_intensity | \n",
+ " hue | \n",
+ " od280/od315_of_diluted_wines | \n",
+ " proline | \n",
+ " class | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 14.23 | \n",
+ " 1.71 | \n",
+ " 2.43 | \n",
+ " 15.6 | \n",
+ " 127.0 | \n",
+ " 2.80 | \n",
+ " 3.06 | \n",
+ " 0.28 | \n",
+ " 2.29 | \n",
+ " 5.64 | \n",
+ " 1.04 | \n",
+ " 3.92 | \n",
+ " 1065.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 13.20 | \n",
+ " 1.78 | \n",
+ " 2.14 | \n",
+ " 11.2 | \n",
+ " 100.0 | \n",
+ " 2.65 | \n",
+ " 2.76 | \n",
+ " 0.26 | \n",
+ " 1.28 | \n",
+ " 4.38 | \n",
+ " 1.05 | \n",
+ " 3.40 | \n",
+ " 1050.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 13.16 | \n",
+ " 2.36 | \n",
+ " 2.67 | \n",
+ " 18.6 | \n",
+ " 101.0 | \n",
+ " 2.80 | \n",
+ " 3.24 | \n",
+ " 0.30 | \n",
+ " 2.81 | \n",
+ " 5.68 | \n",
+ " 1.03 | \n",
+ " 3.17 | \n",
+ " 1185.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 14.37 | \n",
+ " 1.95 | \n",
+ " 2.50 | \n",
+ " 16.8 | \n",
+ " 113.0 | \n",
+ " 3.85 | \n",
+ " 3.49 | \n",
+ " 0.24 | \n",
+ " 2.18 | \n",
+ " 7.80 | \n",
+ " 0.86 | \n",
+ " 3.45 | \n",
+ " 1480.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 13.24 | \n",
+ " 2.59 | \n",
+ " 2.87 | \n",
+ " 21.0 | \n",
+ " 118.0 | \n",
+ " 2.80 | \n",
+ " 2.69 | \n",
+ " 0.39 | \n",
+ " 1.82 | \n",
+ " 4.32 | \n",
+ " 1.04 | \n",
+ " 2.93 | \n",
+ " 735.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 173 | \n",
+ " 13.71 | \n",
+ " 5.65 | \n",
+ " 2.45 | \n",
+ " 20.5 | \n",
+ " 95.0 | \n",
+ " 1.68 | \n",
+ " 0.61 | \n",
+ " 0.52 | \n",
+ " 1.06 | \n",
+ " 7.70 | \n",
+ " 0.64 | \n",
+ " 1.74 | \n",
+ " 740.0 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 174 | \n",
+ " 13.40 | \n",
+ " 3.91 | \n",
+ " 2.48 | \n",
+ " 23.0 | \n",
+ " 102.0 | \n",
+ " 1.80 | \n",
+ " 0.75 | \n",
+ " 0.43 | \n",
+ " 1.41 | \n",
+ " 7.30 | \n",
+ " 0.70 | \n",
+ " 1.56 | \n",
+ " 750.0 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 175 | \n",
+ " 13.27 | \n",
+ " 4.28 | \n",
+ " 2.26 | \n",
+ " 20.0 | \n",
+ " 120.0 | \n",
+ " 1.59 | \n",
+ " 0.69 | \n",
+ " 0.43 | \n",
+ " 1.35 | \n",
+ " 10.20 | \n",
+ " 0.59 | \n",
+ " 1.56 | \n",
+ " 835.0 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 176 | \n",
+ " 13.17 | \n",
+ " 2.59 | \n",
+ " 2.37 | \n",
+ " 20.0 | \n",
+ " 120.0 | \n",
+ " 1.65 | \n",
+ " 0.68 | \n",
+ " 0.53 | \n",
+ " 1.46 | \n",
+ " 9.30 | \n",
+ " 0.60 | \n",
+ " 1.62 | \n",
+ " 840.0 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 177 | \n",
+ " 14.13 | \n",
+ " 4.10 | \n",
+ " 2.74 | \n",
+ " 24.5 | \n",
+ " 96.0 | \n",
+ " 2.05 | \n",
+ " 0.76 | \n",
+ " 0.56 | \n",
+ " 1.35 | \n",
+ " 9.20 | \n",
+ " 0.61 | \n",
+ " 1.60 | \n",
+ " 560.0 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
178 rows × 14 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " alcohol malic_acid ash alcalinity_of_ash magnesium total_phenols \\\n",
+ "0 14.23 1.71 2.43 15.6 127.0 2.80 \n",
+ "1 13.20 1.78 2.14 11.2 100.0 2.65 \n",
+ "2 13.16 2.36 2.67 18.6 101.0 2.80 \n",
+ "3 14.37 1.95 2.50 16.8 113.0 3.85 \n",
+ "4 13.24 2.59 2.87 21.0 118.0 2.80 \n",
+ ".. ... ... ... ... ... ... \n",
+ "173 13.71 5.65 2.45 20.5 95.0 1.68 \n",
+ "174 13.40 3.91 2.48 23.0 102.0 1.80 \n",
+ "175 13.27 4.28 2.26 20.0 120.0 1.59 \n",
+ "176 13.17 2.59 2.37 20.0 120.0 1.65 \n",
+ "177 14.13 4.10 2.74 24.5 96.0 2.05 \n",
+ "\n",
+ " flavanoids nonflavanoid_phenols proanthocyanins color_intensity hue \\\n",
+ "0 3.06 0.28 2.29 5.64 1.04 \n",
+ "1 2.76 0.26 1.28 4.38 1.05 \n",
+ "2 3.24 0.30 2.81 5.68 1.03 \n",
+ "3 3.49 0.24 2.18 7.80 0.86 \n",
+ "4 2.69 0.39 1.82 4.32 1.04 \n",
+ ".. ... ... ... ... ... \n",
+ "173 0.61 0.52 1.06 7.70 0.64 \n",
+ "174 0.75 0.43 1.41 7.30 0.70 \n",
+ "175 0.69 0.43 1.35 10.20 0.59 \n",
+ "176 0.68 0.53 1.46 9.30 0.60 \n",
+ "177 0.76 0.56 1.35 9.20 0.61 \n",
+ "\n",
+ " od280/od315_of_diluted_wines proline class \n",
+ "0 3.92 1065.0 0 \n",
+ "1 3.40 1050.0 0 \n",
+ "2 3.17 1185.0 0 \n",
+ "3 3.45 1480.0 0 \n",
+ "4 2.93 735.0 0 \n",
+ ".. ... ... ... \n",
+ "173 1.74 740.0 2 \n",
+ "174 1.56 750.0 2 \n",
+ "175 1.56 835.0 2 \n",
+ "176 1.62 840.0 2 \n",
+ "177 1.60 560.0 2 \n",
+ "\n",
+ "[178 rows x 14 columns]"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"from sklearn.datasets import load_wine\n",
"\n",
@@ -91,12 +369,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"id": "56916892",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "178"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your answer here"
+ "# Number of rows (observations)\n",
+ "wine_df.shape[0]"
]
},
{
@@ -109,12 +399,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"id": "df0ef103",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "14"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your answer here"
+ "# Number of columns (variables)\n",
+ "wine_df.shape[1]"
]
},
{
@@ -127,12 +429,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"id": "47989426",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(dtype('int32'), array([0, 1, 2]))"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your answer here"
+ "# Checking data type and unique values of the target variable\n",
+ "wine_df['class'].dtype, wine_df['class'].unique()"
]
},
{
@@ -146,12 +460,23 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 10,
"id": "bd7b0910",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "13"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your answer here"
+ "13 "
]
},
{
@@ -175,10 +500,37 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"id": "cc899b59",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " alcohol malic_acid ash alcalinity_of_ash magnesium \\\n",
+ "0 1.518613 -0.562250 0.232053 -1.169593 1.913905 \n",
+ "1 0.246290 -0.499413 -0.827996 -2.490847 0.018145 \n",
+ "2 0.196879 0.021231 1.109334 -0.268738 0.088358 \n",
+ "3 1.691550 -0.346811 0.487926 -0.809251 0.930918 \n",
+ "4 0.295700 0.227694 1.840403 0.451946 1.281985 \n",
+ "\n",
+ " total_phenols flavanoids nonflavanoid_phenols proanthocyanins \\\n",
+ "0 0.808997 1.034819 -0.659563 1.224884 \n",
+ "1 0.568648 0.733629 -0.820719 -0.544721 \n",
+ "2 0.808997 1.215533 -0.498407 2.135968 \n",
+ "3 2.491446 1.466525 -0.981875 1.032155 \n",
+ "4 0.808997 0.663351 0.226796 0.401404 \n",
+ "\n",
+ " color_intensity hue od280/od315_of_diluted_wines proline \n",
+ "0 0.251717 0.362177 1.847920 1.013009 \n",
+ "1 -0.293321 0.406051 1.113449 0.965242 \n",
+ "2 0.269020 0.318304 0.788587 1.395148 \n",
+ "3 1.186068 -0.427544 1.184071 2.334574 \n",
+ "4 -0.319276 0.362177 0.449601 -0.037874 \n"
+ ]
+ }
+ ],
"source": [
"# Select predictors (excluding the last column)\n",
"predictors = wine_df.iloc[:, :-1]\n",
@@ -204,7 +556,7 @@
"id": "403ef0bb",
"metadata": {},
"source": [
- "> Your answer here..."
+ "> We standardize the predictors because KNN uses distance to compare data points. If features have different scales (like alcohol % vs. magnesium levels), the larger-scale ones will dominate. Standardization makes sure all features contribute equally by putting them on the same scale.\n"
]
},
{
@@ -220,7 +572,7 @@
"id": "fdee5a15",
"metadata": {},
"source": [
- "> Your answer here..."
+ "> The `class` variable is a label (like wine type 0, 1, or 2). Standardizing it would make it a meaningless number, which could confuse the model. Labels are not used in distance calculations, so we leave them as-is."
]
},
{
@@ -236,7 +588,7 @@
"id": "f0676c21",
"metadata": {},
"source": [
- "> Your answer here..."
+ "> Setting a random seed ensures we get the same results every time we run the code. This is helpful for debugging and comparing models. The actual number used as the seed doesn’t matter — what matters is that it stays the same if you want repeatable results.\n"
]
},
{
@@ -251,17 +603,32 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"id": "72c101f2",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Training set size: 133\n",
+ "Test set size: 45\n"
+ ]
+ }
+ ],
"source": [
"# set a seed for reproducibility\n",
"np.random.seed(123)\n",
"\n",
- "# split the data into a training and testing set. hint: use train_test_split !\n",
+ "# split the data into a training and testing set. \n",
+ "# 75% training and 25% testing, ensuring no overlap\n",
+ "X_train, X_test, y_train, y_test = train_test_split(\n",
+ " predictors_standardized, wine_df['class'], test_size=0.25, random_state=123\n",
+ ")\n",
"\n",
- "# Your code here ..."
+ "# Check the number of rows in each split\n",
+ "print(\"Training set size:\", X_train.shape[0])\n",
+ "print(\"Test set size:\", X_test.shape[0])"
]
},
{
@@ -287,9 +654,33 @@
"execution_count": null,
"id": "08818c64",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'n_neighbors': 15}"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here..."
+ "# Step 1: Initialize the KNN classifier\n",
+ "knn = KNeighborsClassifier()\n",
+ "\n",
+ "# Step 2: Define the parameter grid for 'n_neighbors' from 1 to 50\n",
+ "param_grid = {'n_neighbors': range(1, 51)}\n",
+ "\n",
+ "# Step 3: Set up GridSearchCV with 10-fold cross-validation\n",
+ "grid_search = GridSearchCV(knn, param_grid, cv=10)\n",
+ "\n",
+ "# Step 4: Fit the model using the training data\n",
+ "grid_search.fit(X_train, y_train)\n",
+ "\n",
+ "# The best number of neighbors based on cross-validation\n",
+ "grid_search.best_params_"
]
},
{
@@ -305,12 +696,30 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 18,
"id": "ffefa9f2",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.9333333333333333"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here..."
+ "# The best number of neighbors from grid search to train final model\n",
+ "best_knn = grid_search.best_estimator_\n",
+ "\n",
+ "# Predict the labels of the test set\n",
+ "y_pred = best_knn.predict(X_test)\n",
+ "\n",
+ "# Evaluate the model's performance using accuracy score\n",
+ "accuracy_score(y_test, y_pred)"
]
},
{
@@ -354,10 +763,10 @@
" * Open a private window in your browser. Copy and paste the link to your pull request into the address bar. Make sure you can see your pull request properly. This helps the technical facilitator and learning support staff review your submission easily.\n",
"\n",
"Checklist:\n",
- "- [ ] Created a branch with the correct naming convention.\n",
- "- [ ] Ensured that the repository is public.\n",
- "- [ ] Reviewed the PR description guidelines and adhered to them.\n",
- "- [ ] Verify that the link is accessible in a private browser window.\n",
+ "- [X] Created a branch with the correct naming convention.\n",
+ "- [X] Ensured that the repository is public.\n",
+ "- [X] Reviewed the PR description guidelines and adhered to them.\n",
+ "- [X] Verify that the link is accessible in a private browser window.\n",
"\n",
"If you encounter any difficulties or have questions, please don't hesitate to reach out to our team via our Slack at `#cohort-6-help`. Our Technical Facilitators and Learning Support staff are here to help you navigate any challenges.\n"
]
@@ -365,7 +774,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3.10.4",
+ "display_name": "dsi_participant",
"language": "python",
"name": "python3"
},
@@ -379,12 +788,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.19"
- },
- "vscode": {
- "interpreter": {
- "hash": "497a84dc8fec8cf8d24e7e87b6d954c9a18a327edc66feb9b9ea7e9e72cc5c7e"
- }
+ "version": "3.9.15"
}
},
"nbformat": 4,