diff --git a/Projekty/Projekt2/Spytek_Zolkowski/milestone3_pres.pdf b/Projekty/Projekt2/Spytek_Zolkowski/milestone3_pres.pdf
new file mode 100644
index 000000000..594038476
Binary files /dev/null and b/Projekty/Projekt2/Spytek_Zolkowski/milestone3_pres.pdf differ
diff --git a/Projekty/Projekt2/Spytek_Zolkowski/notebook_final2.ipynb b/Projekty/Projekt2/Spytek_Zolkowski/notebook_final2.ipynb
new file mode 100644
index 000000000..9dfdfc677
--- /dev/null
+++ b/Projekty/Projekt2/Spytek_Zolkowski/notebook_final2.ipynb
@@ -0,0 +1,835 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Projekt 2 - EDA\n",
+    "**Mikołaj Spytek, Artur Żółkowski**\n",
+    "\n",
+    "W tym projekcie zajmujemy się klasteryzacją danych dotyczących aktywności użytkowników sklepu internetowego.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import seaborn as sns\n",
+    "from sklearn.cluster import AgglomerativeClustering\n",
+    "from sklearn.mixture import GaussianMixture\n",
+    "from sklearn.cluster import DBSCAN\n",
+    "from sklearn.metrics import calinski_harabasz_score, silhouette_score, davies_bouldin_score, adjusted_mutual_info_score, normalized_mutual_info_score\n",
+    "from sklearn.manifold import TSNE\n",
+    "import sklearn\n",
+    "import seaborn as sns\n",
+    "from sklearn.cluster import KMeans\n",
+    "\n",
+    "import random\n",
+    "random.seed(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = pd.read_csv(\"data/online_shoppers_intention.csv\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Przygotowanie danych"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def encode(data, col, max_val):\n",
+    "    data[col + '_sin'] = np.sin(2 * np.pi * data[col]/max_val)\n",
+    "    data[col + '_cos'] = np.cos(2 * np.pi * data[col]/max_val)\n",
+    "    return data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "months = {\"Jan\": 1, \"Feb\": 2, \"Mar\": 3, \"Apr\": 4, \"May\": 5, \"June\": 6, \n",
+    "          \"Jul\": 7, \"Aug\": 8, \"Sep\": 9, \"Oct\": 10, \"Nov\": 11, \"Dec\": 12}\n",
+    "data[\"Month\"] = data[\"Month\"].map(months)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = encode(data, 'Month', 12)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ax = data.plot.scatter('Month_sin', 'Month_cos').set_aspect('equal')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_vars = [\"Administrative\", \"Administrative_Duration\", \"Informational\", \"Informational_Duration\", \"ProductRelated\", \n",
+    "            \"ProductRelated_Duration\", \"BounceRates\", \"ExitRates\", \"PageValues\", \"SpecialDay\", \"Month_sin\", \"Month_cos\"]\n",
+    "cat_vars = [\"OperatingSystems\", \"Browser\", \"Region\", \"VisitorType\", \"Weekend\", \"TrafficType\"]\n",
+    "log_vars = ['Administrative', 'Administrative_Duration', 'Informational',\n",
+    "                'Informational_Duration', 'ProductRelated', 'ProductRelated_Duration',\n",
+    "                'BounceRates', 'ExitRates', 'PageValues']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.compose import ColumnTransformer\n",
+    "from sklearn.preprocessing import FunctionTransformer, StandardScaler, OrdinalEncoder\n",
+    "\n",
+    "scaler=StandardScaler()\n",
+    "\n",
+    "preprocessor = ColumnTransformer(\n",
+    "    transformers= [\n",
+    "        ('log', FunctionTransformer(np.log1p), log_vars),\n",
+    "        ('cat', OrdinalEncoder(), cat_vars)\n",
+    "    ],\n",
+    "    remainder = 'passthrough'\n",
+    ")\n",
+    "transformed_data = preprocessor.fit_transform(data.drop(['Month', 'Revenue'], axis=1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transformed_data = scaler.fit_transform(transformed_data)\n",
+    "transformed_data = pd.DataFrame(transformed_data, columns = data.drop(['Month', 'Revenue'], axis=1).columns)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Klastrowania"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def count_clustering_scores(X, cluster_num, model, score_fun):\n",
+    "    if isinstance(cluster_num, int):\n",
+    "        cluster_num_iter = [cluster_num]\n",
+    "    else:\n",
+    "        cluster_num_iter = cluster_num\n",
+    "        \n",
+    "    scores = []    \n",
+    "    for k in cluster_num_iter:\n",
+    "        model_instance = model(n_clusters=k)\n",
+    "        labels = model_instance.fit_predict(X)\n",
+    "        wcss = score_fun(X, labels)\n",
+    "        scores.append(wcss)\n",
+    "    \n",
+    "    if isinstance(cluster_num, int):\n",
+    "        return scores[0]\n",
+    "    else:\n",
+    "        return scores"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cluster_num_seq = range(2, 20)\n",
+    "davies_vec = count_clustering_scores(transformed_data, cluster_num_seq, KMeans, davies_bouldin_score)\n",
+    "plt.figure(figsize=(12,8))\n",
+    "plt.plot(cluster_num_seq, davies_vec, 'bx-')\n",
+    "plt.xlabel('k')\n",
+    "plt.ylabel('davies-bouldin score')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cluster_num_seq = range(2, 20)\n",
+    "silhouette_vec = count_clustering_scores(transformed_data, cluster_num_seq, KMeans, silhouette_score)\n",
+    "plt.figure(figsize=(12,8))\n",
+    "plt.plot(cluster_num_seq, silhouette_vec, 'bx-')\n",
+    "plt.xlabel('k')\n",
+    "plt.ylabel('Silhouette score')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Pierwszy przykładowy model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_km = KMeans(n_clusters = 12, random_state = 42)\n",
+    "labels_km = model_km.fit_predict(transformed_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transformed_data[\"cluster\"] = labels_km\n",
+    "data[\"cluster\"] = labels_km"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tSNE = TSNE(learning_rate = 300, random_state = 42, verbose = 1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tSNE_td = tSNE.fit_transform(transformed_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(12,8))\n",
+    "sns.scatterplot(x = tSNE_td[:,0],\n",
+    "                y = tSNE_td[:,1], \n",
+    "                hue = labels_km,\n",
+    "                style = data[\"Revenue\"],\n",
+    "                alpha=0.5,\n",
+    "                palette=sns.color_palette(\"hls\", 12), \n",
+    "                legend=True)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots(4, 3, figsize=(14, 14))\n",
+    "for i, feature in enumerate(num_vars):\n",
+    "    m, n = divmod(i, 3)\n",
+    "    sns.boxplot(x=\"cluster\", y=feature, data=data, ax = ax[m, n])\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results = data.groupby(\"cluster\").agg(['sum', 'count'])\n",
+    "results[\"Revenue\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Porównaniue wyników różnych modeli"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "algorithms = {\n",
+    "    \"KMeans\": KMeans(random_state=42),\n",
+    "    \"Agglomerative - ward linkage\": AgglomerativeClustering(linkage=\"ward\"),\n",
+    "    \"Agglomerative - single linkage\": AgglomerativeClustering(linkage=\"single\"),\n",
+    "    \"GMM - spherical covariance\": GaussianMixture(covariance_type = \"spherical\", random_state = 42)\n",
+    "}\n",
+    "\n",
+    "# scores = {\n",
+    "#     \"Silhouette\": silhouette_score(),\n",
+    "#     \"Calinski_Harabasz\": calinski_harabasz_score(),\n",
+    "#     \"Davies_Bouldin\": davies_bouldin_score()\n",
+    "# }\n",
+    "\n",
+    "\n",
+    "silhouette_scores = pd.DataFrame()\n",
+    "calinski_harabasz_scores = pd.DataFrame()\n",
+    "davies_bouldin_scores = pd.DataFrame()\n",
+    "stability_scores= pd.DataFrame()\n",
+    "indices = [k for k in range(len(transformed_data))]\n",
+    "\n",
+    "\n",
+    "for i in range (2, 13):\n",
+    "    for name in algorithms:\n",
+    "        model = algorithms[name]\n",
+    "        if \"KMeans\" in name or \"Agglomerative\" in name:\n",
+    "            model.n_clusters = i\n",
+    "        else:\n",
+    "            model.n_components = i\n",
+    "        labels = model.fit_predict(transformed_data)\n",
+    "        silhouette_scores.loc[name, i] = silhouette_score(transformed_data, labels)\n",
+    "        calinski_harabasz_scores.loc[name, i] = calinski_harabasz_score(transformed_data, labels)\n",
+    "        davies_bouldin_scores.loc[name, i] = davies_bouldin_score(transformed_data, labels)\n",
+    "        stability = []\n",
+    "        for j in range(5):\n",
+    "            resampled = sklearn.utils.resample(indices)\n",
+    "            resampled_pred = model.fit_predict(transformed_data.loc[resampled])\n",
+    "            stability.append(normalized_mutual_info_score(labels[resampled], resampled_pred))\n",
+    "        stability_scores.loc[name,i]  = np.mean(stability)\n",
+    "        print(\"Doing {} with {} clusters\".format(name, i))\n",
+    "        "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "silhouette_scores"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.heatmap(silhouette_scores)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "calinski_harabasz_scores"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.heatmap(calinski_harabasz_scores)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "davies_bouldin_scores"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.heatmap(davies_bouldin_scores)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stability_scores"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.heatmap(stability_scores)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transformed_data.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "minPts = 38\n",
+    "nbrs = sklearn.neighbors.NearestNeighbors(n_neighbors=minPts).fit(transformed_data)\n",
+    "distances, indices = nbrs.kneighbors(transformed_data)\n",
+    "distanceDec = sorted(distances[:,minPts-1], reverse=True)\n",
+    "fig = plt.figure(figsize=(9,6))\n",
+    "ax1 = fig.add_subplot()\n",
+    "\n",
+    "plt.xlabel('Indeks punktu po sortowaniu')\n",
+    "plt.ylabel('Dystans od 37 najbliższego sąsiada')\n",
+    "ax1.plot(list(range(1,transformed_data.shape[0]+1)), distanceDec)\n",
+    "plt.xscale('log')\n",
+    "plt.grid(axis='y')\n",
+    "\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "db = DBSCAN(eps=4.2, min_samples=38)\n",
+    "\n",
+    "db_labels = db.fit_predict(transformed_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "set(db_labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(12,8))\n",
+    "sns.scatterplot(x = tSNE_td[:,0],\n",
+    "                y = tSNE_td[:,1], \n",
+    "                hue = db_labels,\n",
+    "                alpha=0.5,\n",
+    "                palette=sns.color_palette(\"Set2\", 3), \n",
+    "                legend=True)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.hist(db_labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "db = DBSCAN(eps=2.2, min_samples=38)\n",
+    "\n",
+    "db_labels = db.fit_predict(transformed_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "set(db_labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(12,8))\n",
+    "sns.scatterplot(x = tSNE_td[:,0],\n",
+    "                y = tSNE_td[:,1], \n",
+    "                hue = db_labels,\n",
+    "                alpha=0.5,\n",
+    "                palette=sns.color_palette(\"hls\", 14), \n",
+    "                legend=True)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.hist(db_labels)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Analiza wybranego modelu"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "km = KMeans(n_clusters=5, random_state=42)\n",
+    "\n",
+    "\n",
+    "labels = km.fit_predict(transformed_data)\n",
+    "\n",
+    "\n",
+    "transformed_data[\"cluster\"] = labels\n",
+    "data[\"cluster\"] = labels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(12,8))\n",
+    "sns.scatterplot(x = tSNE_td[:,0],\n",
+    "                y = tSNE_td[:,1], \n",
+    "                hue = labels,\n",
+    "                style = data[\"Revenue\"],\n",
+    "                alpha=0.5,\n",
+    "                palette=sns.color_palette(\"hls\", 5), \n",
+    "                legend=True)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.hist(labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results = data.groupby(\"cluster\").agg(['sum', 'count'])\n",
+    "results[\"Revenue\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots(4, 3, figsize=(14, 14))\n",
+    "for i, feature in enumerate(num_vars):\n",
+    "    m, n = divmod(i, 3)\n",
+    "    sns.boxplot(x=\"cluster\", y=feature, data=data, ax = ax[m, n], palette=sns.color_palette(\"hls\", 5))\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "sns.countplot(x=\"VisitorType\", hue=\"cluster\", data=data, palette=sns.color_palette(\"hls\", 5))\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.countplot(x=\"Revenue\", hue=\"cluster\", data=data, palette=sns.color_palette(\"hls\", 5))\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from scipy.spatial import distance\n",
+    "\n",
+    "def min_interclust_dist(X, label):\n",
+    "    clusters = set(label)\n",
+    "    global_min_dist = np.inf\n",
+    "    for cluster_i in clusters:\n",
+    "        cluster_i_idx = np.where(label == cluster_i)\n",
+    "        for cluster_j in clusters:\n",
+    "            if cluster_i != cluster_j:\n",
+    "                cluster_j_idx = np.where(label == cluster_j)\n",
+    "                interclust_min_dist = np.min(distance.cdist(X[cluster_i_idx], X[cluster_j_idx]))\n",
+    "                global_min_dist = np.min([global_min_dist, interclust_min_dist])\n",
+    "    return global_min_dist\n",
+    "\n",
+    "def _inclust_mean_dists(X, label):\n",
+    "    clusters = set(label)\n",
+    "    inclust_dist_list = []\n",
+    "    for cluster_i in clusters:\n",
+    "        cluster_i_idx = np.where(label == cluster_i)\n",
+    "        inclust_dist = np.mean(distance.pdist(X[cluster_i_idx]))\n",
+    "        inclust_dist_list.append(inclust_dist)\n",
+    "    return inclust_dist_list\n",
+    "\n",
+    "def mean_inclust_dist(X, label):\n",
+    "    inclust_dist_list = _inclust_mean_dists(X, label)\n",
+    "    return np.mean(inclust_dist_list)\n",
+    "\n",
+    "def std_dev_of_inclust_dist(X, label):\n",
+    "    inclust_dist_list = _inclust_mean_dists(X, label)\n",
+    "    return np.std(inclust_dist_list)\n",
+    "\n",
+    "def mean_dist_to_center(X, label):\n",
+    "    clusters = set(label)\n",
+    "    inclust_dist_list = []\n",
+    "    for cluster_i in clusters:\n",
+    "        cluster_i_idx = np.where(label == cluster_i)\n",
+    "        cluster_i_mean = np.mean(X[cluster_i_idx], axis=0, keepdims=True)\n",
+    "        inclust_dist = np.mean(distance.cdist(X[cluster_i_idx], cluster_i_mean))\n",
+    "        inclust_dist_list.append(inclust_dist)\n",
+    "    return np.mean(inclust_dist_list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "min_interclust_dist(transformed_data.to_numpy(), labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mean_inclust_dist(transformed_data.to_numpy(), labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "std_dev_of_inclust_dist(transformed_data.to_numpy(), labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mean_dist_to_center(transformed_data.to_numpy(), labels)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 9 klastrów - bonus"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "km = KMeans(n_clusters=9, random_state=42)\n",
+    "\n",
+    "\n",
+    "labels = km.fit_predict(transformed_data)\n",
+    "\n",
+    "\n",
+    "transformed_data[\"cluster\"] = labels\n",
+    "data[\"cluster\"] = labels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(12,8))\n",
+    "sns.scatterplot(x = tSNE_td[:,0],\n",
+    "                y = tSNE_td[:,1], \n",
+    "                hue = labels,\n",
+    "                style = data[\"Revenue\"],\n",
+    "                alpha=0.5,\n",
+    "                palette=sns.color_palette(\"hls\", 9), \n",
+    "                legend=True)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.hist(labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results = data.groupby(\"cluster\").agg(['sum', 'count'])\n",
+    "results[\"Revenue\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots(4, 3, figsize=(14, 14))\n",
+    "for i, feature in enumerate(num_vars):\n",
+    "    m, n = divmod(i, 3)\n",
+    "    sns.boxplot(x=\"cluster\", y=feature, data=data, ax = ax[m, n], palette=sns.color_palette(\"hls\", 9))\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.countplot(x=\"VisitorType\", hue=\"cluster\", data=data, palette=sns.color_palette(\"hls\", 9))\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.countplot(x=\"Revenue\", hue=\"cluster\", data=data, palette=sns.color_palette(\"hls\", 9))\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/Projekty/Projekt2/Spytek_Zolkowski/notebook_final2.pdf b/Projekty/Projekt2/Spytek_Zolkowski/notebook_final2.pdf
new file mode 100644
index 000000000..a9bd8986b
Binary files /dev/null and b/Projekty/Projekt2/Spytek_Zolkowski/notebook_final2.pdf differ