diff --git a/02_activities/assignments/assignment_1.ipynb b/02_activities/assignments/assignment_1.ipynb
index 28d4df017..1d3967d69 100644
--- a/02_activities/assignments/assignment_1.ipynb
+++ b/02_activities/assignments/assignment_1.ipynb
@@ -34,7 +34,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 22,
"id": "4a3485d6-ba58-4660-a983-5680821c5719",
"metadata": {},
"outputs": [],
@@ -56,10 +56,288 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 23,
"id": "a431d282-f9ca-4d5d-8912-71ffc9d8ea19",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " alcohol | \n",
+ " malic_acid | \n",
+ " ash | \n",
+ " alcalinity_of_ash | \n",
+ " magnesium | \n",
+ " total_phenols | \n",
+ " flavanoids | \n",
+ " nonflavanoid_phenols | \n",
+ " proanthocyanins | \n",
+ " color_intensity | \n",
+ " hue | \n",
+ " od280/od315_of_diluted_wines | \n",
+ " proline | \n",
+ " class | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 14.23 | \n",
+ " 1.71 | \n",
+ " 2.43 | \n",
+ " 15.6 | \n",
+ " 127.0 | \n",
+ " 2.80 | \n",
+ " 3.06 | \n",
+ " 0.28 | \n",
+ " 2.29 | \n",
+ " 5.64 | \n",
+ " 1.04 | \n",
+ " 3.92 | \n",
+ " 1065.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 13.20 | \n",
+ " 1.78 | \n",
+ " 2.14 | \n",
+ " 11.2 | \n",
+ " 100.0 | \n",
+ " 2.65 | \n",
+ " 2.76 | \n",
+ " 0.26 | \n",
+ " 1.28 | \n",
+ " 4.38 | \n",
+ " 1.05 | \n",
+ " 3.40 | \n",
+ " 1050.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 13.16 | \n",
+ " 2.36 | \n",
+ " 2.67 | \n",
+ " 18.6 | \n",
+ " 101.0 | \n",
+ " 2.80 | \n",
+ " 3.24 | \n",
+ " 0.30 | \n",
+ " 2.81 | \n",
+ " 5.68 | \n",
+ " 1.03 | \n",
+ " 3.17 | \n",
+ " 1185.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 14.37 | \n",
+ " 1.95 | \n",
+ " 2.50 | \n",
+ " 16.8 | \n",
+ " 113.0 | \n",
+ " 3.85 | \n",
+ " 3.49 | \n",
+ " 0.24 | \n",
+ " 2.18 | \n",
+ " 7.80 | \n",
+ " 0.86 | \n",
+ " 3.45 | \n",
+ " 1480.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 13.24 | \n",
+ " 2.59 | \n",
+ " 2.87 | \n",
+ " 21.0 | \n",
+ " 118.0 | \n",
+ " 2.80 | \n",
+ " 2.69 | \n",
+ " 0.39 | \n",
+ " 1.82 | \n",
+ " 4.32 | \n",
+ " 1.04 | \n",
+ " 2.93 | \n",
+ " 735.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 173 | \n",
+ " 13.71 | \n",
+ " 5.65 | \n",
+ " 2.45 | \n",
+ " 20.5 | \n",
+ " 95.0 | \n",
+ " 1.68 | \n",
+ " 0.61 | \n",
+ " 0.52 | \n",
+ " 1.06 | \n",
+ " 7.70 | \n",
+ " 0.64 | \n",
+ " 1.74 | \n",
+ " 740.0 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 174 | \n",
+ " 13.40 | \n",
+ " 3.91 | \n",
+ " 2.48 | \n",
+ " 23.0 | \n",
+ " 102.0 | \n",
+ " 1.80 | \n",
+ " 0.75 | \n",
+ " 0.43 | \n",
+ " 1.41 | \n",
+ " 7.30 | \n",
+ " 0.70 | \n",
+ " 1.56 | \n",
+ " 750.0 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 175 | \n",
+ " 13.27 | \n",
+ " 4.28 | \n",
+ " 2.26 | \n",
+ " 20.0 | \n",
+ " 120.0 | \n",
+ " 1.59 | \n",
+ " 0.69 | \n",
+ " 0.43 | \n",
+ " 1.35 | \n",
+ " 10.20 | \n",
+ " 0.59 | \n",
+ " 1.56 | \n",
+ " 835.0 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 176 | \n",
+ " 13.17 | \n",
+ " 2.59 | \n",
+ " 2.37 | \n",
+ " 20.0 | \n",
+ " 120.0 | \n",
+ " 1.65 | \n",
+ " 0.68 | \n",
+ " 0.53 | \n",
+ " 1.46 | \n",
+ " 9.30 | \n",
+ " 0.60 | \n",
+ " 1.62 | \n",
+ " 840.0 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 177 | \n",
+ " 14.13 | \n",
+ " 4.10 | \n",
+ " 2.74 | \n",
+ " 24.5 | \n",
+ " 96.0 | \n",
+ " 2.05 | \n",
+ " 0.76 | \n",
+ " 0.56 | \n",
+ " 1.35 | \n",
+ " 9.20 | \n",
+ " 0.61 | \n",
+ " 1.60 | \n",
+ " 560.0 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
178 rows × 14 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " alcohol malic_acid ash alcalinity_of_ash magnesium total_phenols \\\n",
+ "0 14.23 1.71 2.43 15.6 127.0 2.80 \n",
+ "1 13.20 1.78 2.14 11.2 100.0 2.65 \n",
+ "2 13.16 2.36 2.67 18.6 101.0 2.80 \n",
+ "3 14.37 1.95 2.50 16.8 113.0 3.85 \n",
+ "4 13.24 2.59 2.87 21.0 118.0 2.80 \n",
+ ".. ... ... ... ... ... ... \n",
+ "173 13.71 5.65 2.45 20.5 95.0 1.68 \n",
+ "174 13.40 3.91 2.48 23.0 102.0 1.80 \n",
+ "175 13.27 4.28 2.26 20.0 120.0 1.59 \n",
+ "176 13.17 2.59 2.37 20.0 120.0 1.65 \n",
+ "177 14.13 4.10 2.74 24.5 96.0 2.05 \n",
+ "\n",
+ " flavanoids nonflavanoid_phenols proanthocyanins color_intensity hue \\\n",
+ "0 3.06 0.28 2.29 5.64 1.04 \n",
+ "1 2.76 0.26 1.28 4.38 1.05 \n",
+ "2 3.24 0.30 2.81 5.68 1.03 \n",
+ "3 3.49 0.24 2.18 7.80 0.86 \n",
+ "4 2.69 0.39 1.82 4.32 1.04 \n",
+ ".. ... ... ... ... ... \n",
+ "173 0.61 0.52 1.06 7.70 0.64 \n",
+ "174 0.75 0.43 1.41 7.30 0.70 \n",
+ "175 0.69 0.43 1.35 10.20 0.59 \n",
+ "176 0.68 0.53 1.46 9.30 0.60 \n",
+ "177 0.76 0.56 1.35 9.20 0.61 \n",
+ "\n",
+ " od280/od315_of_diluted_wines proline class \n",
+ "0 3.92 1065.0 0 \n",
+ "1 3.40 1050.0 0 \n",
+ "2 3.17 1185.0 0 \n",
+ "3 3.45 1480.0 0 \n",
+ "4 2.93 735.0 0 \n",
+ ".. ... ... ... \n",
+ "173 1.74 740.0 2 \n",
+ "174 1.56 750.0 2 \n",
+ "175 1.56 835.0 2 \n",
+ "176 1.62 840.0 2 \n",
+ "177 1.60 560.0 2 \n",
+ "\n",
+ "[178 rows x 14 columns]"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"from sklearn.datasets import load_wine\n",
"\n",
@@ -91,12 +369,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 24,
"id": "56916892",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "178\n"
+ ]
+ }
+ ],
"source": [
- "# Your answer here"
+ "# Your answer here\n",
+ "rows = wine_df.shape[0]\n",
+ "print(rows)"
]
},
{
@@ -109,12 +397,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 25,
"id": "df0ef103",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "14\n"
+ ]
+ }
+ ],
"source": [
- "# Your answer here"
+ "# Your answer here\n",
+ "columns = wine_df.shape[1]\n",
+ "print(columns)"
]
},
{
@@ -127,12 +425,41 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 26,
"id": "47989426",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "alcohol float64\n",
+ "malic_acid float64\n",
+ "ash float64\n",
+ "alcalinity_of_ash float64\n",
+ "magnesium float64\n",
+ "total_phenols float64\n",
+ "flavanoids float64\n",
+ "nonflavanoid_phenols float64\n",
+ "proanthocyanins float64\n",
+ "color_intensity float64\n",
+ "hue float64\n",
+ "od280/od315_of_diluted_wines float64\n",
+ "proline float64\n",
+ "class int64\n",
+ "dtype: object\n",
+ "[0 1 2]\n"
+ ]
+ }
+ ],
"source": [
- "# Your answer here"
+ "# Your answer here\n",
+ "\n",
+ "# It is an int64 type\n",
+ "print(wine_df.dtypes)\n",
+ "\n",
+ "unique_class = wine_df['class'].unique()\n",
+ "print(unique_class)"
]
},
{
@@ -146,12 +473,14 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 27,
"id": "bd7b0910",
"metadata": {},
"outputs": [],
"source": [
- "# Your answer here"
+ "# Your answer here\n",
+ "\n",
+ "# The total number of columns - \"class\" column = 13. "
]
},
{
@@ -175,10 +504,37 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 28,
"id": "cc899b59",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " alcohol malic_acid ash alcalinity_of_ash magnesium \\\n",
+ "0 1.518613 -0.562250 0.232053 -1.169593 1.913905 \n",
+ "1 0.246290 -0.499413 -0.827996 -2.490847 0.018145 \n",
+ "2 0.196879 0.021231 1.109334 -0.268738 0.088358 \n",
+ "3 1.691550 -0.346811 0.487926 -0.809251 0.930918 \n",
+ "4 0.295700 0.227694 1.840403 0.451946 1.281985 \n",
+ "\n",
+ " total_phenols flavanoids nonflavanoid_phenols proanthocyanins \\\n",
+ "0 0.808997 1.034819 -0.659563 1.224884 \n",
+ "1 0.568648 0.733629 -0.820719 -0.544721 \n",
+ "2 0.808997 1.215533 -0.498407 2.135968 \n",
+ "3 2.491446 1.466525 -0.981875 1.032155 \n",
+ "4 0.808997 0.663351 0.226796 0.401404 \n",
+ "\n",
+ " color_intensity hue od280/od315_of_diluted_wines proline \n",
+ "0 0.251717 0.362177 1.847920 1.013009 \n",
+ "1 -0.293321 0.406051 1.113449 0.965242 \n",
+ "2 0.269020 0.318304 0.788587 1.395148 \n",
+ "3 1.186068 -0.427544 1.184071 2.334574 \n",
+ "4 -0.319276 0.362177 0.449601 -0.037874 \n"
+ ]
+ }
+ ],
"source": [
"# Select predictors (excluding the last column)\n",
"predictors = wine_df.iloc[:, :-1]\n",
@@ -204,7 +560,9 @@
"id": "403ef0bb",
"metadata": {},
"source": [
- "> Your answer here..."
+ "> Your answer here...\n",
+ "\n",
+ "It is important to standardize the predictor variables to ensure they are all within the same range/a common scale and provide the same effect when utilizing k-means clustering (which is sensitive to the magnitude of a value). "
]
},
{
@@ -220,7 +578,9 @@
"id": "fdee5a15",
"metadata": {},
"source": [
- "> Your answer here..."
+ "> Your answer here...\n",
+ "\n",
+ "Our response variable is simply a tag - it is not part of the data involved in the calculations (i.e. the predictor variables)."
]
},
{
@@ -236,7 +596,19 @@
"id": "f0676c21",
"metadata": {},
"source": [
- "> Your answer here..."
+ "> Your answer here...\n",
+ "\n",
+ "In the context of k-means clustering, setting a seed ensures that the initial centroids (and ultimately, what the predicted clusters are) are the same, meaning they are reproducible between different runs of the code at different times. The particular seed value doesn't matter - but if you change the seed number, the results may differ from another seed number. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "4707e008",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "np.random.seed(42)"
]
},
{
@@ -251,7 +623,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 30,
"id": "72c101f2",
"metadata": {},
"outputs": [],
@@ -261,7 +633,11 @@
"\n",
"# split the data into a training and testing set. hint: use train_test_split !\n",
"\n",
- "# Your code here ..."
+ "# Your code here ...\n",
+ "features = wine_df.drop('class', axis = 1) #Just the predictor variables\n",
+ "target = wine_df['class'] #Just the target variables\n",
+ "\n",
+ "X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.25)"
]
},
{
@@ -284,12 +660,44 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 31,
"id": "08818c64",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Best Parameters: {'n_neighbors': 1}\n",
+ "Best Cross-validation Accuracy: 0.7901098901098901\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here..."
+ "# Your code here...\n",
+ "\n",
+ "#KNN classifer\n",
+ "knn = KNeighborsClassifier() \n",
+ "\n",
+ "#Parameter grid \n",
+ "parameters_grid = {\n",
+ " \"n_neighbors\": list(range(1,51))\n",
+ "}\n",
+ "\n",
+ "#Grid search with 10-fold cross-validation (to find optimal number of neightbors)\n",
+ "grid_search = GridSearchCV(\n",
+ " estimator = knn,\n",
+ " param_grid=parameters_grid,\n",
+ " cv=10,\n",
+ " scoring = \"accuracy\",\n",
+ " n_jobs=-1\n",
+ ")\n",
+ "\n",
+ "grid_search.fit(X_train, y_train)\n",
+ "\n",
+ "#Print \n",
+ "print(\"Best Parameters:\", grid_search.best_params_)\n",
+ "print(\"Best Cross-validation Accuracy:\", grid_search.best_score_)"
]
},
{
@@ -305,12 +713,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 32,
"id": "ffefa9f2",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.6666666666666666\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here..."
+ "# Your code here...\n",
+ "\n",
+ "knn = KNeighborsClassifier(n_neighbors = 1)\n",
+ "knn.fit(X_train, y_train)\n",
+ "y_prediction = knn.predict(X_test)\n",
+ "\n",
+ "accuracy = accuracy_score(y_test, y_prediction)\n",
+ "print(accuracy)\n"
]
},
{
@@ -365,7 +788,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3.10.4",
+ "display_name": "base",
"language": "python",
"name": "python3"
},
@@ -379,12 +802,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.19"
- },
- "vscode": {
- "interpreter": {
- "hash": "497a84dc8fec8cf8d24e7e87b6d954c9a18a327edc66feb9b9ea7e9e72cc5c7e"
- }
+ "version": "3.13.5"
}
},
"nbformat": 4,