diff --git a/practise/KNN+Practise.ipynb b/practise/KNN+Practise.ipynb index 07a0698..aabacfe 100644 --- a/practise/KNN+Practise.ipynb +++ b/practise/KNN+Practise.ipynb @@ -19,13 +19,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2017-03-09T17:11:00.462641Z", "start_time": "2017-03-09T12:11:00.457060-05:00" - }, - "collapsed": true + } }, "outputs": [], "source": [ @@ -48,46 +47,188 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2017-03-09T17:11:00.913456Z", "start_time": "2017-03-09T12:11:00.883452-05:00" - }, - "collapsed": true + } }, "outputs": [], "source": [ "import pandas as pd\n", "\n", "# Import the data using the file path\n", - "filepath = os.sep.join(data_path + ['Orange_Telecom_Churn_Data.csv'])\n", + "#filepath = os.sep.join(data_path + ['Orange_Telecom_Churn_Data.csv'])\n", + "filepath = 'Orange_Telecom_Churn_Data.csv'\n", "data = pd.read_csv(filepath)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2017-03-09T17:11:01.087485Z", "start_time": "2017-03-09T12:11:01.075442-05:00" } }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
stateKS
account_length128
area_code415
phone_number382-4657
intl_planno
voice_mail_planyes
number_vmail_messages25
total_day_minutes265.1
total_day_calls110
total_day_charge45.07
total_eve_minutes197.4
total_eve_calls99
total_eve_charge16.78
total_night_minutes244.7
total_night_calls91
total_night_charge11.01
total_intl_minutes10
total_intl_calls3
total_intl_charge2.7
number_customer_service_calls1
churnedFalse
\n", + "
" + ], + "text/plain": [ + " 0\n", + "state KS\n", + "account_length 128\n", + "area_code 415\n", + "phone_number 382-4657\n", + "intl_plan no\n", + "voice_mail_plan yes\n", + "number_vmail_messages 25\n", + "total_day_minutes 265.1\n", + "total_day_calls 110\n", + "total_day_charge 45.07\n", + "total_eve_minutes 197.4\n", + "total_eve_calls 99\n", + "total_eve_charge 16.78\n", + "total_night_minutes 244.7\n", + "total_night_calls 91\n", + "total_night_charge 11.01\n", + "total_intl_minutes 10\n", + "total_intl_calls 3\n", + "total_intl_charge 2.7\n", + "number_customer_service_calls 1\n", + "churned False" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "data.head(1).T" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2017-03-09T17:11:01.564122Z", "start_time": "2017-03-09T12:11:01.557967-05:00" - }, - "collapsed": true + } }, "outputs": [], "source": [ @@ -97,14 +238,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2017-03-09T17:11:02.585712Z", "start_time": "2017-03-09T12:11:02.579981-05:00" } }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['account_length', 'intl_plan', 'voice_mail_plan',\n", + " 'number_vmail_messages', 'total_day_minutes', 'total_day_calls',\n", + " 'total_day_charge', 'total_eve_minutes', 'total_eve_calls',\n", + " 'total_eve_charge', 'total_night_minutes', 'total_night_calls',\n", + " 'total_night_charge', 'total_intl_minutes', 'total_intl_calls',\n", + " 'total_intl_charge', 'number_customer_service_calls', 'churned'],\n", + " dtype='object')" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "data.columns" ] @@ -121,13 +279,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2017-03-09T17:11:04.545751Z", "start_time": "2017-03-09T12:11:04.509105-05:00" - }, - "collapsed": true + } }, "outputs": [], "source": [ @@ -141,13 +298,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2017-03-09T17:11:04.736451Z", "start_time": "2017-03-09T12:11:04.718049-05:00" - }, - "collapsed": true + } }, "outputs": [], "source": [ @@ -175,13 +331,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2017-03-09T17:11:50.280188Z", "start_time": "2017-03-09T12:11:50.269326-05:00" - }, - "collapsed": true + } }, "outputs": [], "source": [ @@ -189,23 +344,22 @@ "x_cols = [x for x in data.columns if x != 'churned']\n", "\n", "# Split the data into two dataframes\n", - "X_data = data[x_cols]\n", - "y_data = data['churned']\n", + "#X_data = data[x_cols]\n", + "#y_data = data['churned']\n", "\n", "# # alternatively:\n", - "# X_data = data.copy()\n", - "# y_data = X_data.pop('churned')" + "X_data = data.copy()\n", + "y_data = X_data.pop('churned')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2017-03-09T17:11:50.989446Z", "start_time": "2017-03-09T12:11:50.498708-05:00" - }, - "collapsed": true + } }, "outputs": [], "source": [ @@ -232,32 +386,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2017-03-09T17:11:50.997204Z", "start_time": "2017-03-09T12:11:50.991392-05:00" - }, - "collapsed": true + } }, "outputs": [], "source": [ "# Function to calculate the % of values that were correctly predicted\n", "\n", "def accuracy(real, predict):\n", - " return sum(y_data == y_pred) / float(real.shape[0])" + " return sum(real == predict) / float(real.shape[0])" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2017-03-09T17:11:51.128466Z", "start_time": "2017-03-09T12:11:51.115874-05:00" } }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.9422\n" + ] + } + ], "source": [ "print(accuracy(y_data, y_pred))" ] @@ -276,31 +437,57 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": { "ExecuteTime": { "end_time": "2017-03-09T17:11:52.047123Z", "start_time": "2017-03-09T12:11:51.538212-05:00" - }, - "collapsed": true + } }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.0\n" + ] + } + ], "source": [ - "#Student writes code here" + "#Student writes code here\n", + "#q5 part 1 weights are the invers of distances\n", + "knn2 = KNeighborsClassifier(n_neighbors=3, weights = 'distance')\n", + "knn2 = knn2.fit(X_data, y_data)\n", + "y_pred2 = knn2.predict(X_data)\n", + "print(accuracy(y_data, y_pred2))\n", + "#we get accuracy 1 because we checked the accuracy on our train set, ie the set which was used to learn" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": { "ExecuteTime": { "end_time": "2017-03-09T17:11:52.755941Z", "start_time": "2017-03-09T12:11:52.049816-05:00" - }, - "collapsed": true + } }, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.9456\n" + ] + } + ], + "source": [ + "# q5 part 2 manhatan distances\n", + "knn3 = KNeighborsClassifier(n_neighbors=3, p =1)\n", + "knn3 = knn3.fit(X_data, y_data)\n", + "y_pred3 = knn3.predict(X_data)\n", + "print(accuracy(y_data, y_pred3))" + ] }, { "cell_type": "markdown", @@ -314,51 +501,175 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": { "ExecuteTime": { "end_time": "2017-03-09T17:12:01.329053Z", "start_time": "2017-03-09T12:11:52.759302-05:00" - }, - "collapsed": true + } }, "outputs": [], "source": [ - "#Student writes code here" + "#Student writes code here\n", + "# q6 starts. \n", + "k_values = [i for i in range(1,21)] # k values from 1 to 20\n", + "accuracies = [] # declare an array to store accuracies" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": { "ExecuteTime": { "end_time": "2017-03-09T17:12:01.829160Z", "start_time": "2017-03-09T12:12:01.331021-05:00" - }, - "collapsed": true + } }, "outputs": [], - "source": [] + "source": [ + "# iterate through all k values and store the accuracies\n", + "for i in k_values :\n", + " knnx = KNeighborsClassifier(n_neighbors=i)\n", + " knnx = knnx.fit(X_data, y_data)\n", + " y_predx = knnx.predict(X_data)\n", + " acc = accuracy(y_data, y_predx)\n", + " accuracies.append(acc)" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": { "ExecuteTime": { "end_time": "2017-03-09T17:12:02.238935Z", "start_time": "2017-03-09T12:12:01.831094-05:00" - }, - "collapsed": true + } }, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5,1,'accuracies vs k')" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEWCAYAAACXGLsWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAHWRJREFUeJzt3XuYHHWd7/H3R0K4Q4AMLCaBcAloluVwGSJeIAgeNmFdsomPCosrl9WoyIruwp4g59lMwnJQLrrHRx48KNFF5KrARlYJkZvrHlAmQIAQAoEFSbiNQLgeDJfv+aN+A80w01XVM5XuTn9ez9PPVFfVt+s7PTX17d/vV12liMDMzKye9zQ7ATMza30uFmZmlsvFwszMcrlYmJlZLhcLMzPL5WJhZma5XCzM1iFJv5R0TLPzGEjSsZJ+0+w8rHWNanYCZp0kIqY3OwezRrhlYVaCMv6/sY7jnd7ajqQ5kh6S9KKk+yTNHLD885KW1yzfN82fIOkqSX2SnpH03TS/R9LFNfETJYWkUen5zZLOkPSfwCvALpKOq9nGw5K+MCCHGZLukvRCynVazWt9rma949PrPCdpkaSd0nxJ+rakp9Nr3CNpz0Hei09L6h0w72uSFqbpw9N78KKk1ZJOLvgeny3pN5K2KrK+rf9cLKwdPQQcCGwFzAMulrQDgKRPAj3AZ4EtgSOAZyRtAFwLPApMBMYBl5XY5t8As4Et0ms8DXw8beM44Ns1RWkKcBFwCjAGOAh4ZOALSpoBfB2YBXQB/wFcmhYfluJ2T7/np4BnBsnr58AekibVzPtr4JI0fSHwhYjYAtgTuLHeLynpPZK+D+wFHBYRz9db3zqHi4W1nYi4MiIej4g3I+Jy4EFgSlr8OeCsiLg9Misj4tG0/L3AKRHxckS8GhFlBnR/FBHLIuL1iHgtIv49Ih5K27gFuJ6sgAH8LbAgIhanHFdHxP2DvOYXgTMjYnlEvA78L2Dv1Lp4jawwvQ9QWueJQd6LV4B/A44CSEXjfcDCtMprwGRJW0bEcxFxR53fcUOyYrUN8Jfptc0AFwtrQ5I+m7p41khaQ/aJeWxaPIGs5THQBODRdFBuxGMDcpgu6TZJz6YcDi+Qw0A7Af+75vd4FhAwLiJuBL4LnAc8LekCSVsO8TqXkIoFWavimpoD/SdSbo9KukXSB+vksxswA5gXEWsL5G8dxMXC2kr61P194ERg24gYA9xLdpCF7KC+6yChjwE79o9DDPAysGnN8z8ZZJ23Ls8saSPgZ8A5wPYph18UyGGwnL4QEWNqHptExP8FiIjvRMR+wGSy7qhThnidxUCXpL3JikZ/FxSphTUD2A64BriiTj7LybrUfilpjwL5WwdxsbB2sxnZgbsPQNJxZC2Lfj8ATpa0Xxok3i0VmN8BTwDfkLSZpI0lfTjF3AUcJGnHNKB7ak4Oo4GNUg6vS5pONsbQ70LgOEmHpjGAcZLeN8jrfA84VdKfpt9lqzTmgqT9JX1A0oZkxexV4M3BkomI14ArgbPJupAWp9cYLeloSVuldV4Y6jVqXutSsnGUX0kqUvCsQ7hYWFuJiPuAc4FbgaeAPwP+s2b5lcAZZJ+uXyT7NL1NRLwB/CVZV8vvgVXAp1PMYuBy4G5gCdlAeL0cXgS+QvYp/Tmyrp+FNct/Rxr0Bp4HbiHrchr4OlcD3wQuk/QCWQup/3sYW5K1oJ4jG1B/hqwYDOUS4GPAlQO62v4GeCS9/heBo+v9bimvfwXmAzdKmpi3vnUG+eZHZmaWxy0LMzPL5WJhZma5XCzMzCyXi4WZmeVab646O3bs2Jg4cWKz0zAzaytLliz5Q0R05a233hSLiRMn0tvbm7+imZm9RdKjRdZzN5SZmeVysTAzs1wuFmZmlsvFwszMcrlYmJlZrsqKhaQF6ZaQ9w6xXJK+I2mlpLv77zKWlh0j6cH0OKaqHN+hp2edbMbMrB1V2bL4ETCtzvLpwKT0mA2cDyBpG2Au8AGyu5vNlbR1hXlm5s2rfBNmZu2qsmIREb8mu/PXUGYAF6XbUt4GjEn3Uf5zYHFEPBsRz5Fdm79e0TEzs4o1c8xiHO+8VeWqNG+o+e8iabakXkm9fX195TPo6QEpe2QvmD3cJWVm9g5tPcAdERdERHdEdHd15X5b/d16eiAie2QvmD1cLMzM3qGZxWI12Y3t+41P84aab2ZmTdLMYrEQ+Gw6K+oA4PmIeAJYBBwmaes0sH1YmletuXMr34SZWbuq7EKCki4FDgbGSlpFdobThgAR8T3gF8DhwErgFbJ7FhMRz0o6Hbg9vdT8iKg3UD4y3PVkZjakyopFRByVszyALw+xbAGwoIq8zMysvLYe4DYzs3XDxcLMzHK5WJiZWS4XCzMzy+ViYWZmuVwszMwsl4uFmZnlcrEwM7NcLhZmZpbLxcLMzHK5WJiZWS4XCzMzy+ViYWZmuVwszMwsl4uFmZnlcrEwM7NcLhZmZpbLxcLMzHK5WJiZWS4XCzMzy+ViYWZmuVwszMwsl4uFmZnlcrEwM7NcLhZmZpbLxcLMzHK5WJiZWS4XCzMzy+ViYWZmuVwszMwsl4uFmZnlqrRYSJomaYWklZLmDLJ8J0k3SLpb0s2SxtcsO0vSMknLJX1HkqrM1czMhlZZsZC0AXAeMB2YDBwlafKA1c4BLoqIvYD5wJkp9kPAh4G9gD2B/YGpVeVqZmb1VdmymAKsjIiHI2ItcBkwY8A6k4Eb0/RNNcsD2BgYDWwEbAg8VWGuZmZWR5XFYhzwWM3zVWleraXArDQ9E9hC0rYRcStZ8XgiPRZFxPIKczUzszqaPcB9MjBV0p1k3UyrgTck7Qa8HxhPVmAOkXTgwGBJsyX1Surt6+tbl3mbmXWUKovFamBCzfPxad5bIuLxiJgVEfsAp6V5a8haGbdFxEsR8RLwS+CDAzcQERdERHdEdHd1dVX1e5iZdbwqi8XtwCRJO0saDRwJLKxdQdJYSf05nAosSNO/J2txjJK0IVmrw91QZmZNUlmxiIjXgROBRWQH+isiYpmk+ZKOSKsdDKyQ9ACwPXBGmv9T4CHgHrJxjaUR8fOqcjUzs/oUEc3OYUR0d3dHb29vs9MwM2srkpZERHfees0e4DYzszbgYmFmZrlcLMzMLJeLhZmZ5XKxMDOzXC4WZmaWy8XCzMxyuViYmVkuFwszM8vlYmFmZrlcLMzMLJeLhZmZ5XKxMDOzXC4WZmaWy8XCzMxyuViYmVkuFwszM8vlYmFmZrlcLMzMLJeLhZmZ5XKxMDOzXC4WZmaWy8XCzMxyuViYmVkuFwszM8vlYmFmZrlcLMzMLJeLhZmZ5XKxMDOzXC4WZmaWq1CxkHSWpC0lbSjpBkl9kj5TdXJmZtYairYsDouIF4CPA48AuwGnVJWUmZm1lqLFYlT6+RfAlRHxfJEgSdMkrZC0UtKcQZbvlFoqd0u6WdL4mmU7Srpe0nJJ90maWDBXMzMbYUWLxbWS7gf2A26Q1AW8Wi9A0gbAecB0YDJwlKTJA1Y7B7goIvYC5gNn1iy7CDg7It4PTAGeLpirmZmNsELFIiLmAB8CuiPiNeAVYEZO2BRgZUQ8HBFrgcsGiZkM3Jimb+pfnorKqIhYnLb/UkS8UiRXMzMbeUUHuDcFTgDOT7PeC3TnhI0DHqt5virNq7UUmJWmZwJbSNoW2B1YI+kqSXdKOju1VAbmNVtSr6Tevr6+Ir+KmZk1oGg31A+BtWStC4DVwD+PwPZPBqZKuhOYml73DbIxkgPT8v2BXYBjBwZHxAUR0R0R3V1dXSOQjpmZDaZosdg1Is4CXgNIXULKiVkNTKh5Pj7Ne0tEPB4RsyJiH+C0NG8NWSvkrtSF9TpwDbBvwVzNzGyEFS0WayVtAgSApF2BP+bE3A5MkrSzpNHAkcDC2hUkjZXUn8OpwIKa2DFpIB3gEOC+grmamdkIK1os5gLXARMk/QS4AfjHegGpRXAisAhYDlwREcskzZd0RFrtYGCFpAeA7YEzUuwbZF1QN0i6h6wV8/0yv5iZmY0cRUSxFbOB5wPIDty3RcQfqkysrO7u7ujt7W12GmZmbUXSkojIO2GpfstC0vvSz32BnYAngMeBHdM8MzPrAKNylv89MBs4d5BlQTaWYGZm67m6xSIiZqefH1036ZiZWSsq+qW8L0saU/N8a0knVJeWmZm1kqJnQ30+ff8BgIh4Dvh8NSmZmVmrKVosNpD01pfw0qU3RleTkpmZtZq8Ae5+1wGXS/o/6fkX0jwzM+sARYvF/yArEF9KzxcDP6gkIzMzazmFikVEvEl2xdnz89Y1M7P1T6FiIWkS2Y2JJgMb98+PiF0qysvMzFpImUuUnw+8DnyU7C52F1eVlJmZtZaixWKTiLiB7FpSj0ZED9n9uM3MrAMUHeD+Y7qU+IOSTiS7L8Xm1aVlZmatpGjL4iRgU+ArwH7AZ4BjqkrKzMxaS27LIn0B79MRcTLwEnBc5VmZmVlLyW1ZpBsRfWQd5GJmZi2q6JjFnZIWAlcCL/fPjIirKsnKzMxaStFisTHwDO+8f0UALhZmZh2g6De4PU5hZtbBin6D+4dkLYl3iIjjRzwjMzNrOUW7oa6tmd4YmEl2L24zM+sARbuhflb7XNKlwG8qycjMzFpO0S/lDTQJ2G4kEzEzs9ZVdMziRd45ZvEk2T0uzMysAxTthtqi6kTMzKx1FeqGkjRT0lY1z8dI+qvq0jIzs1ZSdMxibkQ83/8kItYAc6tJyczMWk3RYjHYekVPu7UienqanYGZ2ZCKFoteSd+StGt6fAtYUmViHWfevGZnYGY2pKLF4u+AtcDlwGXAq8CXq0rKzMxaS6FiEREvR8SciOiOiP0j4usR8XJ+pNXV0wNS9oC3p90lZWYtpujZUIsljal5vrWkRQXipklaIWmlpDmDLN9J0g2S7pZ0s6TxA5ZvKWmVpO8WybPt9PRARPaAt6ddLMysxRTthhqbzoACICKeI+cb3OkOe+cB04HJwFGSJg9Y7RzgoojYC5gPnDlg+enArwvmaGZmFSlaLN6UtGP/E0kTGeQqtANMAVZGxMMRsZZsrGPGgHUmAzem6Ztql0vaD9geuL5gju1trs9ENrPWVbRYnAb8RtKPJV0M3AKcmhMzDnis5vmqNK/WUmBWmp4JbCFpW0nvAc4FTq63AUmzJfVK6u3r6yv4q7Qodz2ZWQsrOsB9HdANrAAuBf4B+H8jsP2TgamS7gSmAquBN4ATgF9ExKqcvC5Ig+7dXV1dI5COmZkNpuiFBD8HnASMB+4CDgBu5Z23WR1oNTCh5vn4NO8tEfE4qWUhaXPgExGxRtIHgQMlnQBsDoyW9FJEvGuQ3MzMqle0G+okYH/g0Yj4KLAPsKZ+CLcDkyTtLGk0cCSwsHYFSWNTlxNk3VoLACLi6IjYMSImkrU+LnKhMDNrnqLF4tWIeBVA0kYRcT+wR72AiHgdOBFYBCwHroiIZZLmSzoirXYwsELSA2SD2Wc08DuYmVnFFJF3UhNIuho4DvgqWdfTc8CGEXF4tekV193dHb29vc1Ow8ysrUhaEhHdeesVvZ/FzDTZI+kmYCvgumHkZ2ZmbaT0lWMj4pYqEjEzs9bV6D24zcysg7hYmJlZLhcLMzPL5WJhZma5XCzMzCyXi4WZmeVysTAzs1wuFmZmlsvFwszMcrlYmJlZLhcLMzPL5WIxUnxbVDNbj7lYjJR585qdgZlZZVwsLOOWkZnV4WIxHD09IGUPeHu6HQ+8bhmZWR2F7pTXDpp+pzwJ2vm9bPf8zawhRe+U55ZFJ1ufWkZmVqnSd8qzIcyd2+wMyuvpebswuGVhZnW4ZTFS/GnczNZjLhaWaXbLyMXWrKV5gNtag7vBzJrCA9xmZjZiXCyseXw2llnbcDeUtQZ3Q5k1hbuhzMxsxLhYWGvw2VhmLc3dUGbgbjDrWO6GMjOzEeNiYZ3LZ2OZFVZpsZA0TdIKSSslzRlk+U6SbpB0t6SbJY1P8/eWdKukZWnZp6vM0zpUT0/W9dTf/dQ/7WJh9i6VFQtJGwDnAdOBycBRkiYPWO0c4KKI2AuYD5yZ5r8CfDYi/hSYBvyLpDFV5WpmZvVV2bKYAqyMiIcjYi1wGTBjwDqTgRvT9E39yyPigYh4ME0/DjwNdFWYq3W6Zp+NZdbiqiwW44DHap6vSvNqLQVmpemZwBaStq1dQdIUYDTwUEV5mrnrySxHswe4TwamSroTmAqsBt7oXyhpB+DHwHER8ebAYEmzJfVK6u3r61tXOZuZdZwqi8VqYELN8/Fp3lsi4vGImBUR+wCnpXlrACRtCfw7cFpE3DbYBiLigojojojuri73UpmZVaXKYnE7MEnSzpJGA0cCC2tXkDRWUn8OpwIL0vzRwNVkg98/rTBHMzMroLJiERGvAycCi4DlwBURsUzSfElHpNUOBlZIegDYHjgjzf8UcBBwrKS70mPvqnI1M7P6fLkPM7MO5st92Lrls4mGx++ftTgXi/VFsw828+Y1d/vtzu+ftTgXi/WFDzY2HM3+sGEtz8XCGucL8Q1PK71//rBhOTzA3c56egb/J587d90fcHw/iOFp9vvX7O1b03iAuxP4qqk2HK3UsrGWN6rZCdh6whfiG55mvH89PW8XBrcsLIdbFuuLZh+sm/1ptNnbH652z9/Wey4W64tOP9h4gHZ4mv1hw1qei4WZ+cOG5XKxsPblAVqzdcanztr6wQO0Zg3xqbNm1j7cGmx5Lha2fmj2AK0PdsPjExRanruhzEaCu8GGx+9f07gbysxam09QaCsuFmaN8sFueFrpcjXN/ps1e/sFuBvKbCQ0uxul9tId7ajZ718Hb9/dUGadpN0HiJt9goLlcrEwGwk+2A1Ps7qemtmN2Oztl+RuKLN21Ur3M2l37obK7YZysTBbHzT7YNfumv3+tUGxcDeUmQ1fu7dkmt2N2OztF+BiYbY+aPbBptkD7MMtVs0uds3efgHuhjKz4evgbpx2524oM6tWm53NY8PjYmFmjWn2N7BbqVh1QIF0N5SZDV+zu4E6ffvD4G4oM1t3mj3A3unWQcvGxcLMhq/Z3TDNKFat1A22Ds5GczeUmdlwNbsbahjbdzeUmdn6bB23bCotFpKmSVohaaWkOYMs30nSDZLulnSzpPE1y46R9GB6HFNlnmZmw9KsbrB1eDZaZcVC0gbAecB0YDJwlKTJA1Y7B7goIvYC5gNnpthtgLnAB4ApwFxJW1eVq5nZsDR7zGYdqLJlMQVYGREPR8Ra4DJgxoB1JgM3pumbapb/ObA4Ip6NiOeAxcC0CnM1M2tf66BlU2WxGAc8VvN8VZpXaykwK03PBLaQtG3BWCTNltQrqbevr2/EEjczaysdcOrsycBUSXcCU4HVwBtFgyPigojojojurq6uqnI0M+t4oyp87dXAhJrn49O8t0TE46SWhaTNgU9ExBpJq4GDB8TeXGGuZmZWR5Uti9uBSZJ2ljQaOBJYWLuCpLGS+nM4FViQphcBh0naOg1sH5bmmZlZE1RWLCLideBEsoP8cuCKiFgmab6kI9JqBwMrJD0AbA+ckWKfBU4nKzi3A/PTPDMzawJ/g9vMrIN13D24JfUBjw7jJcYCf3C84x3v+A6L3yki8s8Qigg/soLZ63jHO97xnRhf5NHsU2fNzKwNuFiYmVkuF4u3XeB4xzve8R0an2u9GeA2M7PquGVhZma5XCzMzCxXxxcLSQskPS3p3gZiJ0i6SdJ9kpZJOqlk/MaSfidpaYpv6Ea6kjaQdKekaxuIfUTSPZLuklT6W42Sxkj6qaT7JS2X9MESsXuk7fY/XpD01ZLb/1p67+6VdKmkjUvGn5RilxXd9mD7jKRtJC1ON+taXO/+K0PEfzLl8Kakul+QGiL+7PQ3uFvS1ZLGlIw/PcXeJel6Se8tE1+z7B8khaSxJbffI2l1zb5weNntS/q79B4sk3RWye1fXrPtRyTdVTJ+b0m39f8fSZpSMv6/Sbo1/S/+XNKWQ8QOeswps/81rOpzc1v9ARwE7Avc20DsDsC+aXoL4AFgcol4AZun6Q2B3wIHNJDH3wOXANc2EPsIMHYY79+/Ap9L06OBMQ2+zgbAk2RfECoaMw74L2CT9PwK4NgS8XsC9wKbkl1U81fAbo3sM8BZwJw0PQf4Zsn49wN7kF0ws7uB7R8GjErT32xg+1vWTH8F+F6Z+DR/AtnlfR6tt08Nsf0e4OSCf7fB4j+a/n4bpefblc2/Zvm5wD+V3P71wPQ0fThwc8n424Gpafp44PQhYgc95pTZ/xp9dHzLIiJ+DTR03amIeCIi7kjTL5JdA+td992oEx8R8VJ6umF6lDrjQNmtaP8C+EGZuJEgaSuyHf9CgIhYGxFrGny5Q4GHIqLst/BHAZtIGkV20H+8ROz7gd9GxCuRXcvsFt6+v8qQhthnZpAVTtLPvyoTHxHLI2JFkaSHiL8+/Q4At5FdqblM/As1Tzejzn5Y53/m28A/1ovNiS9kiPgvAd+IiD+mdZ5uZPuSBHwKuLRkfAD9rYGtqLMfDhG/O/DrNL0Y+MQQsUMdcwrvf43q+GIxUiRNBPYhax2UidsgNXmfJrs7YKl44F/I/kHfLBnXL4DrJS2RNLtk7M5AH/DD1A32A0mbNZjHkdT5Bx1MRKwmuzXv74EngOcj4voSL3EvcKCkbSVtSvaJcEJOzFC2j4gn0vSTZBfGbJbjgV+WDZJ0hqTHgKOBfyoZOwNYHRFLy263xompK2xBA90ou5P9LX8r6RZJ+zeYw4HAUxHxYMm4rwJnp/fvHLKraJexjLfvFPpJCuyHA445le9/LhYjQNm9OH4GfHXAJ7RcEfFGROxN9klwiqQ9S2z348DTEbGkVMLv9JGI2JfsXulflnRQidhRZM3p8yNiH+BlsiZwKcouYX8EcGXJuK3J/sF2Bt4LbCbpM0XjI2I5WZfN9cB1wF2UuPlWndcNSrYQR4qk04DXgZ+UjY2I0yJiQoo9scQ2NwW+TskCM8D5wK7A3mSF/9yS8aOAbYADgFOAK1IroayjKPmhJfkS8LX0/n2N1Nou4XjgBElLyLqX1tZbud4xp6r9z8VimCRtSPZH+0lEXNXo66Tum5sod6/xDwNHSHqE7B7nh0i6uOR2V6efTwNXk907vahVwKqa1tBPyYpHWdOBOyLiqZJxHwP+KyL6IuI14CrgQ2VeICIujIj9IuIg4DmyPuBGPCVpB4D0c8hukKpIOhb4OHB0OmA06icM0Q0yhF3JCvbStC+OB+6Q9CdFXyAinkofnN4Evk+5/RCyffGq1LX7O7KW9pCD7INJXZmzgMtLbhvgGLL9D7IPPaXyj4j7I+KwiNiPrFg9VCfPwY45le9/LhbDkD65XAgsj4hvNRDf1X/WiqRNgP8O3F80PiJOjYjxETGRrBvnxogo/Mla0maStuifJhskLXxWWEQ8CTwmaY8061DgvqLxNRr9NPd74ABJm6a/xaFkfbiFSdou/dyR7EBxSQN5QHZjr2PS9DHAvzX4Og2RNI2sO/KIiHilgfhJNU9nUG4/vCcitouIiWlfXEU2CPtkie3vUPN0JiX2w+QaskFuJO1OdrJF2auwfgy4PyJWlYyDbIxiapo+BCjVjVWzH74H+J/A94ZYb6hjTvX730iPmLfbg+wg9QTwGtlO/rclYj9C1ty7m6wL4y7g8BLxewF3pvh7qXMGRoHXOpiSZ0MBuwBL02MZcFoD290b6E2/wzXA1iXjNwOeAbZq8PeeR3Zguxf4MelsmBLx/0FW4JYChza6zwDbAjeQHSR+BWxTMn5mmv4j8BSwqGT8SuCxmv2w3tlMg8X/LL2HdwM/B8Y1+j9Dzhl2Q2z/x8A9afsLgR1Kxo8GLk6/wx3AIWXzB34EfLHBv/9HgCVpP/otsF/J+JPIWrUPAN8gXV1jkNhBjzll9r9GH77ch5mZ5XI3lJmZ5XKxMDOzXC4WZmaWy8XCzMxyuViYmVkuFwuzCkmaONjVWc3ajYuFmZnlcrEwW0ck7ZIuuNjoRe7MmmZUsxMw6wTpkiiXkd1vYzhXZjVrChcLs+p1kV2rZ1ZENHLtLLOmczeUWfWeJ7vo4UeanYhZo9yyMKveWrILBS6S9FJENHplW7OmcbEwWwci4uV0s6rFqWAsbHZOZmX4qrNmZpbLYxZmZpbLxcLMzHK5WJiZWS4XCzMzy+ViYWZmuVwszMwsl4uFmZnl+v9YDJ26r2CuCAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#plot the graph\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "plt.plot(k_values, accuracies,'r+')\n", + "plt.xticks(range(1,21))\n", + "plt.xlabel('k')\n", + "plt.ylabel('accuracies')\n", + "plt.title('accuracies vs k')\n", + "# for k = 1, accuracy = 1 because the model has overfit" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, "outputs": [], - "source": [] + "source": [ + "from sklearn.model_selection import train_test_split\n", + "X = data.copy()\n", + "y = X.pop('churned')\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state = 99)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# iterate through all k values and store the test accuracies\n", + "test_acc = []\n", + "for i in k_values :\n", + " knn_t = KNeighborsClassifier(n_neighbors=i)\n", + " #or we can get different sets every time\n", + " #X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)\n", + " knn_t = knn_t.fit(X_train, y_train)\n", + " y_pred_t = knn_t.predict(X_test)\n", + " acc = accuracy(y_test, y_pred_t)\n", + " test_acc.append(acc)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5,1,'test accuracies vs k')" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAEWCAYAAABMoxE0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3XucHHWZ7/HPNwkhJFwSTHAhtwmISBSWyyxkj8gdDIFDlHNwwUFFOURUUBBdYcMqghxv6OoeEQ1yEwIYFTUiGFBAjy4oEyCBEC4BQi4EEoRwiwpJnvNH/ebQDD0z1V3T6amZ7/v16ldXVdfv0j01/XTVU/UrRQRmZmb1GtTsDpiZWbk5kJiZWSEOJGZmVogDiZmZFeJAYmZmhTiQmJlZIQ4kZiUl6XuS/r3Z/ehM0oGSVjS7H7bpOJBYnyNpqaRDe6GeEyX9oTf61BdFxCkRcX6z+2HmQGLWRJIGN7sPZkU5kFifIukqYALwS0kvSfrXtHyKpP+StFbSAkkHVpQ5UdJjkl6U9LikNkm7At8D/jnVs7aL9j4saXEq+5ikj3Z6fbqkeyW9IOlRSVPT8m0lXS7pSUnPSfp5RV/+0KmOkPSWNH2FpIsl3SjpZeAgSUdKuie1sVzSuZ3K71fx3pdLOrGiri9VrHdU6uvatP7uFa99TtLK9D4fknRIlc9iX0lPVQY3Se+VtDBN7yOpPfXzaUnf7Orv2KneT0p6QNK4POtbCUWEH370qQewFDi0Yn4s8BdgGtmPn8PS/BhgBPACsEtad3vg7Wn6ROAPPbR1JLATIOAAYB2wV3ptH+D51N6g1I+3pdd+BfwIGAVsBhzQVZtAAG9J01ekOt+Z6hwGHAjsluZ3B54G3pPWnwi8CByf2nkTsEdFXV9K03sCq4F9gcHAh9LnuDmwC7Ac2CGt2wLs1MXn8ShwWMX8j4Gz0vQdwAfS9JbAlC7qOBBYkaY/D9wNjGn2duVH4x7eI7EyOAG4MSJujIiNEXEL0E4WWAA2Au+QtEVErIqIRXkrjohfRcSjkfkdcDPwrvTyScBlEXFLandlRDwoaXvgCOCUiHguIl5NZfP6RUT8MdX5t4i4PSLuS/MLgWvJghrA+4HfRMS1qZ2/RMS9VeqcAXw/Iv4UERsi4krg78AUYANZQJksabOIWBoRj3bRt2vJghaStiL7jK9Nr70KvEXS6Ih4KSLu7OY9Ku2xHA4cFBFr8n44Vj4OJFYGE4Fj0yGbtekw1X7A9hHxMvAvwCnAKkm/kvS2vBVLOkLSnZKeTfVOA0anl8eT/ULvbDzwbEQ8V+f7Wd6pD/tKuk3SGknPk72XnvrQ2UTgzE6f0XiyvZAlwOnAucBqSddJ2qGLeq4BjpG0OXAMcHdEPJFeOwl4K/CgpLskHdVNf0aSBbcvR8TzOfpvJeZAYn1R5yGplwNXRcTIiseIiPgKQETMi4jDyA5rPQhc0kU9r5O+LH8KXAi8OSJGAjeSHebqaHenKkWXA9tKGlnltZeB4RVt/EOO93cNMBcYHxHbkOV2eupDtT5d0OkzGh4R1wJExDURsR9ZwAngq9UqiYgHgCfI9rjen/rW8dojEXE8sF0q/xNJI7roz3PAUcDlkt6Zo/9WYg4k1hc9DexYMX818N8lvVvSYEnD0rUK4yS9OSXER5AdynmJ7FBXRz3jJA3top2hZId81gDrJR1Bdiimw6XAhyUdImmQpLGS3hYRq4CbgO9KGiVpM0n7pzILgLdL2kPSMLK9gJ5sRbaH8zdJ+5B9gXeYDRwq6X2Shkh6k6Q9qtRxCXBK2ruRpBEpib+VpF0kHZwC59+Av1Z8RtVcA3wK2J8sRwKApBMkjYmIjUDHyQtd1hMRtwNtwPXpfVk/5UBifdGXgXPSIZrPRMRyYDrwb2Rf+suBz5Jtv4OATwNPAs+S5RY+luq5FVgEPCXpmc6NRMSLwCeBOWS/oN9PtmfQ8fqfgQ8D/0GWIP8d2S96gA+Q5QweJEtyn57KPAycB/wGeATIcx3Lx4HzJL1IlpyeU9GHZWSH285M7+9e4B+rvJd24GTgO+m9LCFL/EMWLL8CPAM8RbZHcXY3/enI0dwaEZWf21RgkaSXgG8Dx0XEX7t7Yymf9RGys/D26m5dKy9F+MZWZmZWP++RmJlZIQ4kZmZWiAOJmZkV4kBiZmaFDGl2BzaF0aNHR0tLS7O7YWZWKvPnz38mIsb0tN6ACCQtLS20t7c3uxtmZqUi6Yme1/KhLTMzK8iBxMzMCnEgMTOzQhxIzMysEAcSMzMrxIHErC+aPRtaWmDQoOx59uyB1b6VyoA4/desVGbPhhkzYN26bP6JJ7J5gLa2/t++lc6AGP23tbU1fB2JlUZLS/bl3dnEibB0af9v3/oMSfMjorWn9Xxoy6yvWbastuX9rX0rHQcSs75mwoTalve39q10HEisOidbm+eCC2D48NcvGz48Wz4Q2rfScSCxN+pItj7xBES8lmx1MNk02tpg1qwsJyFlz7NmbbpEd7Pbt9Jxst3eyMlWM8PJdivCyVYzq4EDib2Rk61mVgMHEnsjJ1ut2XyyR6k4kNgbOdlqzeSTPUrHyXYz61t8skef4WS7mZWTT/YoHQcSM+tbfLJH6TiQmDVCs5PFzW6/CJ/sUToeRt6stzV7GPZmt19URx9nzswOZ02YkAWRMvR9gHKy3ay3NTtZ3Oz2rd9wst2sWZqdLG52+zbgOJCY9bZmJ4ub3b4NOA4k1jc5WVze9m3AcSCxvqfsVzY3e2SAZrdvA46T7db3OFls1ic42W7l5WSxWak4kFjf42SxWak0NJBImirpIUlLJJ1V5fWJkn4raaGk2yWNq3jtQ5IeSY8PVSzfW9J9qc7/lKRGvgdrAieLzUqlYYFE0mDgIuAIYDJwvKTJnVa7EPhhROwOnAd8OZXdFvgCsC+wD/AFSaNSmYuBk4Gd02Nqo96DNYmTxWal0sg9kn2AJRHxWES8AlwHTO+0zmTg1jR9W8Xr7wZuiYhnI+I54BZgqqTtga0j4s7IzhL4IfCeBr4Ha5a2tiyxvnFj9uwgYtZnNTKQjAWWV8yvSMsqLQCOSdPvBbaS9KZuyo5N093VCYCkGZLaJbWvWbOm7jdhZmbda3ay/TPAAZLuAQ4AVgIbeqPiiJgVEa0R0TpmzJjeqNLMzKpoZCBZCYyvmB+Xlv1/EfFkRBwTEXsCM9Oytd2UXZmmu6zT+ogyX5luZjVpZCC5C9hZ0iRJQ4HjgLmVK0gaLamjD2cDl6XpecDhkkalJPvhwLyIWAW8IGlKOlvrg8AvGvgerB5lvzLdzGrSsEASEeuBU8mCwmJgTkQsknSepKPTagcCD0l6GHgzcEEq+yxwPlkwugs4Ly0D+DjwA2AJ8ChwU6Peg9Vp5szX7oXRYd26bLmZ9TseIsV636BB2Z5IZ1J2FpaZlYKHSLHm8ZXpZgOKA4n1vv5wZbpPFjDLzYHEel/Zr0z3yQJmNXGOxKwzD2NvBjhHYlY/D2NvVhMHErPOfLKAWU0cSKx/KpIs7w8nC1i5lexkjyHN7oBZr+tIlndcFNmRLId8Cf+OdWbOzA5nTZiQBZGynCxg5VZ0+20CJ9ut/3Gy3MqsD22/TrbbwOVkuZVZCbdfBxLrf5wstzIr4fbrQGL9j5PlzdfsZHHR9pvZ/zJuvxHR7x9777132ABz9dUREydGSNnz1Vc3u0cDx9VXRwwfHpGNC5A9hg/fdH+Dou03u/8dfegD2y/QHjm+Y51sN7Pe1exkcdH2m93/PsTJdjNrjmYni4u23+z+l5ADiZn1rmYni4u23+z+l5ADiZn1rmYni4u23+z+l5ADiZn1rmbfRqBo+83ufwk52W5mZlU52W5mZpuEA4mZmRXiQGJmZoU4kJiZWSEOJGZmVogDiZmZFeJAYmZmhTiQmJlZIQ4kZmZWSEMDiaSpkh6StETSWVVenyDpNkn3SFooaVpaPlTS5ZLuk7RA0oEVZW5Pdd6bHts18j2YmVn3hjSqYkmDgYuAw4AVwF2S5kbEAxWrnQPMiYiLJU0GbgRagJMBImK3FChukvRPEbExlWuLCI95YmbWBzRyj2QfYElEPBYRrwDXAdM7rRPA1ml6G+DJND0ZuBUgIlYDa4Eex3sxMzM2+a2CGxlIxgLLK+ZXpGWVzgVOkLSCbG/ktLR8AXC0pCGSJgF7A+Mryl2eDmv9uyRVa1zSDEntktrXrFnTC2/HzKwEZs+GGTOyuzxGZM8zZjQ0mNQUSCSNkrR7L7Z/PHBFRIwDpgFXSRoEXEYWeNqBbwH/BWxIZdoiYjfgXenxgWoVR8SsiGiNiNYxY8b0YpfNzPqwmTNh3brXL1u3LlveID0GkpTc3lrStsDdwCWSvpmj7pW8fi9iXFpW6SRgDkBE3AEMA0ZHxPqIOCMi9oiI6cBI4OG03sr0/CJwDdkhNDMzg6bcKjjPHsk2EfECcAzww4jYFzg0R7m7gJ0lTZI0FDgOmNtpnWXAIQCSdiULJGskDZc0Ii0/DFgfEQ+kQ12j0/LNgKOA+3P0xcxsYGjCrYLzBJIhkrYH3gfckLfiiFgPnArMAxaTnZ21SNJ5ko5Oq50JnCxpAXAtcGJkd9raDrhb0mLgc7x2+GpzYJ6khcC9ZHs4l+Ttk5lZv9eEWwXnOf33PLJg8MeIuEvSjsAjeSqPiBvJkuiVyz5fMf0A8M4q5ZYCu1RZ/jJZ4t3MzKrpuCXwzJnZ4awJE7Ig0sBbBftWu2ZmVlWv3WpX0lsl/VbS/Wl+d0nn9EYnzcys/PLkSC4BzgZeBYiIhWSJczMzs1yBZHhE/LnTsvWN6IyZWb+wia8sb7Y8yfZnJO1ENpwJkv4nsKqhvTIzK6uOK8s7LgrsuLIcGprwbqY8eySfAL4PvE3SSuB04GMN7ZWZWVk14cryZutxjyQiHgMOTRcIDkpXlJuZWTVNuLK82boMJJJOiIirJX2603IAIiLPMClmZgPLhAnZ4axqy/up7g5tjUjPW3XxMDOzzppwZXmzdblHEhHfT89f3HTdMTMruSZcWd5seS5IvFLSyIr5UZIua2y3zMxKrK0Nli6FjRuz534cRCDfWVu7R8TajpmIeA7Ys3FdMjOzMskTSAZJGtUxk+5L0rB7vZuZWbnkCSTfAO6QdL6kL5HdrfBrje2WFTbArqw1s+bJcx3JDyXNBw5Ki45Jw79bXzUAr6w1s+bJPYy8pO3I7mAIQESU5uqaATeMfEtL9fPYJ07MEn9mZjn05jDyR0t6BHgc+B2wFLipcA+tcQbglbVm1jx5ciTnA1OAhyNiEtk91u9saK+smCbcs9nMBq48geTViPgL2dlbgyLiNqDHXR1rogF4Za2ZNU+e03jXStoS+D0wW9Jq4OXGdssKGYBX1ppZ8/SYbE+j/v6VbO+lDdgGmJ32UkphwCXbzcx6Qd5ke7d7JJIGAzdExEHARuDKXuqfmZn1E93mSCJiA7BR0jabqD9mZlYyeZLtLwH3SbpU0n92PBrdsQHPV6abWUnkSbZfnx62qfjKdDMrkdxXtpdZ6ZLtvjLdzPqAXkm2p4oeB94QbSJixzr7Zj3xlelmViJ5Dm1VRqNhwLHAto3pjgED8p7PZlZePSbbI+IvFY+VEfEt4Mg8lUuaKukhSUsknVXl9QmSbpN0j6SFkqal5UMlXS7pPkkLJB1YUWbvtHxJSvwr/9stCV+ZbmYlkmfQxr0qHq2STiHfIbHBwEXAEcBk4HhJkzutdg4wJyL2BI4DvpuWnwwQEbsBhwHfkNTR14vT6zunx9Se+lI6bW0wa1aWE5Gy51mznGg3sz4pz6Gtb1RMrycbBfh9OcrtAyyJiMcAJF0HTAcq72USwNZpehvgyTQ9GbgVICJWS1oLtEpaDmwdEXemOn8IvIf+OBpxW5sDh5mVQp4bWx3U0zpdGAssr5hfAezbaZ1zgZslnQaMAA5NyxcAR0u6FhgP7J2eN6Z6KuscW61xSTOAGQATnFswM2uYPIe2/rekkRXzo9Itd3vD8cAVETEOmAZclQ5hXUYWJNqBb5Hd3ndDLRVHxKyIaI2I1jFjxvRSd83MrLM8V7YfERFrO2Yi4jmyL/2erCTbi+gwLi2rdBIwJ9V7B9lZYaMjYn1EnBERe0TEdGAk8HAqP66HOs3MbBPKE0gGS9q8Y0bSFsDm3azf4S5gZ0mTJA0lS6bP7bTOMrIbZSFpV7JAskbS8DTqMJIOA9ZHxAMRsQp4QdKUdLbWB4Ff5OiLmZk1SJ5k+2zgt5IuT/MfJscowBGxXtKpwDxgMHBZRCySdB7QHhFzgTOBSySdQZZ4PzEiIt0ffp6kjWR7HB+oqPrjwBXAFmRJ9v6XaDczK5FcQ6RImsprifBbImJeQ3vVy0o3RIqZWR/Qm0OkTAJuj4hfp/ktJLVExNLi3TQzs7LLkyP5Mdlptx02pGXWHQ8Db2YDRJ4cyZCIeKVjJiJeSclz64qHgTezASTPHskaSUd3zEiaDjzTuC71AzNnvhZEOqxbly03M+tn8uyRnALMlvQdQGRXq3+wob0qOw8Db2YDSJ4hUh4FpkjaMs2/1PBelZ2HgTezASTPHgmSjgTeDgzrGLU9Is5rYL/K7YILXp8jAQ8Db2b9Vp6xtr4H/AtwGtmhrWOBiQ3uV7l5GHgzG0B6vCBR0sKI2L3ieUvgpoh416bpYnG+INHMrHZ5L0jMc9bWX9PzOkk7AK8C2xfpnJmZ9R95ciQ3pGHkvw7cTTYm1iUN7ZWZmZVGnrO2zk+TP5V0AzAsIp5vbLfMzKwscp211SEi/g78vUF9MTOzEsqTIzEzM+uSA4mZmRWS5zqS3+ZZZmZmA1OXORJJw4DhwGhJo8guRgTYGhi7CfpmZmYl0F2y/aPA6cAOwHxeCyQvAN9pcL/MzKwkugwkEfFt4NuSTouI/7MJ+2RmZiWSJ9n+lKStACSdI+l6SXs1uF9mZlYSeQLJv0fEi5L2Aw4FLgUubmy3zMysLPIEkg3p+UhgVkT8CvCtds3MDMgXSFZK+j7ZUPI3Sto8ZzkzMxsA8gSE9wHzgHdHxFpgW+CzDe2VmZmVRo+BJCLWAauB/dKi9cAjjeyUmZmVR54r278AfA44Oy3aDLi6kZ0yM7PyyHNo673A0cDLABHxJLBVIztlZmblkSeQvBLZ/XgDQNKIxnbJzMzKJE8gmZPO2hop6WTgN8AP8lQuaaqkhyQtkXRWldcnSLpN0j2SFkqalpZvJulKSfdJWizp7IoyS9PyeyX5RuxmZk2W5w6JF0o6jGyMrV2Az0fELT2VkzQYuAg4DFgB3CVpbkQ8ULHaOcCciLhY0mTgRqAFOBbYPCJ2kzQceEDStRGxNJU7KCKeyf0uzcysYXoMJJK+GhGfA26psqw7+wBLIuKxVOY6YDpQGUiCbDRhgG2AJyuWj5A0BNgCeIUskJmZWR+T59DWYVWWHZGj3FhgecX8Ct44/Py5wAmSVpDtjZyWlv+ELLm/ClgGXBgRz6bXArhZ0nxJM7pqXNIMSe2S2tesWZOju2ZmVo8uA4mkj0m6D9gl5S86Ho8DC3up/eOBKyJiHDANuErSILK9mQ1kQ9hPAs6UtGMqs19E7EUWzD4haf9qFUfErIhojYjWMWPG9FJ3zcyss+4ObV0D3AR8GahMlL9YsXfQnZXA+Ir5cWlZpZOAqQARcUe6mdZo4P3AryPiVWC1pD8CrcBjEbEyrb9a0s/Igs7vc/THzMwaoMs9koh4PiKWRsTxEfFExSNPEAG4C9hZ0iRJQ4HjgLmd1lkGHAIgaVdgGLAmLT84LR8BTAEelDSiYkj7EcDhwP1536yZmfW+HpPt9YqI9ZJOJRunazBwWUQsknQe0B4Rc4EzgUsknUGW+zgxIkLSRcDlkhaR3Znx8ohYmA5v/UxSR9+viYhfN+o9mJlZz5Rda9i/tba2Rnu7LzkxM6uFpPkR0drTeh4O3szMCnEgMTOzQhxIzMysEAcSMzMrxIHEzMwKcSAxM7NCHEjMzKwQBxIzMyvEgcTMzApxIDEzs0IcSMzMrBAHEjMzK8SBxMzMCnEgMTOzQhxIzMysEAcSMzMrxIHEzMwKcSAxM7NCHEjMzKwQBxIzMyvEgcTMzApxIDEzs0IcSMzMrBAHEjMzK8SBxMzMCnEgMTOzQhxIzMysEAcSMzMrpKGBRNJUSQ9JWiLprCqvT5B0m6R7JC2UNC0t30zSlZLuk7RY0tl56zQzs02rYYFE0mDgIuAIYDJwvKTJnVY7B5gTEXsCxwHfTcuPBTaPiN2AvYGPSmrJWaeZmW1Cjdwj2QdYEhGPRcQrwHXA9E7rBLB1mt4GeLJi+QhJQ4AtgFeAF3LWaWZmm1AjA8lYYHnF/Iq0rNK5wAmSVgA3Aqel5T8BXgZWAcuACyPi2Zx1AiBphqR2Se1r1qwp+FbMzKwrzU62Hw9cERHjgGnAVZIGke15bAB2ACYBZ0rasZaKI2JWRLRGROuYMWN6u99mZpYMaWDdK4HxFfPj0rJKJwFTASLiDknDgNHA+4FfR8SrwGpJfwRayfZGeqrTzMw2oUbukdwF7CxpkqShZMn0uZ3WWQYcAiBpV2AYsCYtPzgtHwFMAR7MWaeZmW1CDQskEbEeOBWYBywmOztrkaTzJB2dVjsTOFnSAuBa4MSICLIzs7aUtIgseFweEQu7qrNR78HMzHqm7Hu7f2ttbY329vZmd8PMrFQkzY+I1p7Wa3ay3czMSs6BxMzMCnEgMTOzQhxIzMysEAcSMzMrxIHEzMwKcSAxM7NCHEjMzKwQBxIzMyvEgcTMzApxIDEzs0IcSMzMrBAHEjMzK8SBxMzMCnEgMTOzQhxIzMysEAeSrsyeDS0tMGhQ9jx7drN7ZGbWJw1pdgf6pNmzYcYMWLcum3/iiWweoK2tef0yM+uDvEdSzcyZrwWRDuvWZcvNzOx1HEiqWbastuVmZgOYA0k1EybUttzMbABzIKnmggtg+PDXLxs+PFtuZmav40BSTVsbzJoFEyeClD3PmuVEu5lZFT5rqyttbQ4cZmY5eI/EzMwKcSAxM7NCHEjMzKwQBxIzMyvEgcTMzApRRDS7Dw0naQ3wRJ3FRwPPFGje5V3e5V2+rOUnRsSYHteKCD+6eQDtLu/yLu/yA7F83ocPbZmZWSEOJGZmVogDSc9mubzLu7zLD9DyuQyIZLuZmTWO90jMzKwQBxIzMyvEgaQLki6TtFrS/XWUHS/pNkkPSFok6VN11DFM0p8lLUh1fLGOOgZLukfSDbWWTeWXSrpP0r2S2usoP1LSTyQ9KGmxpH+uoewuqd2OxwuSTq+x/TPSZ3e/pGslDaux/KdS2UV52q62zUjaVtItkh5Jz6NqLH9san+jpNY62v96+vwXSvqZpJE1lj8/lb1X0s2SdqilfMVrZ0oKSaNrbP9cSSsrtoNptbYv6bT0GSyS9LUa2/9RRdtLJd1bY/k9JN3Z8T8kaZ8ay/+jpDvS/+EvJW3dTfmq3zu1bIN12xTnGJfxAewP7AXcX0fZ7YG90vRWwMPA5BrrELBlmt4M+BMwpcY6Pg1cA9xQ52ewFBhd4DO8EvhfaXooMLLOegYDT5FdHJW3zFjgcWCLND8HOLGG8u8A7geGk91u4TfAW2rdZoCvAWel6bOAr9ZYfldgF+B2oLWO9g8HhqTpr9bR/tYV058EvldL+bR8PDCP7KLgLrenLto/F/hMzr9ZtfIHpb/d5ml+u1r7X/H6N4DP19j+zcARaXoacHuN5e8CDkjTHwHO76Z81e+dWrbBeh/eI+lCRPweeLbOsqsi4u40/SKwmOyLrZY6IiJeSrObpUfuMyMkjQOOBH5QS7u9RdI2ZP8YlwJExCsRsbbO6g4BHo2IWkcnGAJsIWkIWUB4soayuwJ/ioh1EbEe+B1wTHcFuthmppMFVNLze2opHxGLI+KhPB3uovzNqf8AdwLjaiz/QsXsCLrZBrv5n/kP4F+7K9tD+Vy6KP8x4CsR8fe0zup62pck4H3AtTWWD6BjL2IbutkGuyj/VuD3afoW4H90U76r753c22C9HEgaTFILsCfZHkWtZQenXenVwC0RUUsd3yL7591Ya7sVArhZ0nxJM2osOwlYA1yeDq/9QNKIOvtxHN38A1cTESuBC4FlwCrg+Yi4uYYq7gfeJelNkoaT/ZocX0sfkjdHxKo0/RTw5jrq6C0fAW6qtZCkCyQtB9qAz9dYdjqwMiIW1NpuhVPT4bXL6jgs81ayv+OfJP1O0j/V2Yd3AU9HxCM1ljsd+Hr6/C4Ezq6x/CKyQABwLDm3wU7fOw3fBh1IGkjSlsBPgdM7/bLLJSI2RMQeZL8i95H0jpztHgWsjoj5tbbZyX4RsRdwBPAJSfvXUHYI2W76xRGxJ/Ay2W51TSQNBY4GflxjuVFk/4CTgB2AEZJOyFs+IhaTHQq6Gfg1cC+woZY+VKkzqGGvsjdJmgmsB2bXWjYiZkbE+FT21BraHA78GzUGn04uBnYC9iD7QfCNGssPAbYFpgCfBeakvYtaHU+NP2aSjwFnpM/vDNIeeg0+Anxc0nyyw1Wv9FSgu++dRm2DDiQNImkzsj/m7Ii4vkhd6ZDQbcDUnEXeCRwtaSlwHXCwpKvraHdlel4N/AzoMlFYxQpgRcVe1E/IAkutjgDujoinayx3KPB4RKyJiFeB64H/VksFEXFpROwdEfsDz5Edc67V05K2B0jPXR5aaRRJJwJHAW3pi6Res+nm0EoVO5EF8gVpWxwH3C3pH/JWEBFPpx9UG4FLqG0bhGw7vD4dKv4z2R56lwn/atKh0WOAH9XYNsCHyLY9yH4M1dT/iHgwIg6PiL3JAtmjPfS12vdOw7dBB5IGSL94LgUWR8Q366xjTMcZNpK2AA4DHsxTNiLOjohxEdFCdljo1ojI/Ws8tTlC0lYd02RJ29xnsEXEU8BySbukRYcAD9TSh6TeX4LLgCmShqe/xyFkx4xzk7Rdep5A9kVyTR39mEv2ZUJ6/kUdddRN0lSyQ5xHR8S6OsrvXDE7nZzbIEBE3BcR20VES9oWV5Alg5+qof3tK2bfSw3bYPJzsoQ7kt5KdtJHraPhHgo8GBEraiwHWU7kgDR9MFDTobGKbXAQcA7wvW7W7ep7p/HbYG9n7/vnokpOAAABr0lEQVTLg+zLaxXwKtk/wEk1lN2PbPdxIdkhkXuBaTW2vztwT6rjfro5W6SHeg6kjrO2gB2BBemxCJhZRx17AO3pPfwcGFVj+RHAX4Bt6nzvXyT74rsfuIp05k4N5f8vWfBbABxSzzYDvAn4LdkXyG+AbWss/940/XfgaWBejeWXAMsrtsPuzrqqVv6n6fNbCPwSGFvv/ww9nAXYRftXAfel9ucC29dYfihwdXoPdwMH19p/4ArglDr//vsB89M29Cdg7xrLf4psT/hh4Cuk0Ui6KF/1e6eWbbDeh4dIMTOzQnxoy8zMCnEgMTOzQhxIzMysEAcSMzMrxIHEzMwKcSAxawJJLdVGyTUrIwcSMzMrxIHErMkk7ZgGtqx3QEGzphrS7A6YDWRpCJnryO6VUmSEXLOmcSAxa54xZOMeHRMR9YxDZtYn+NCWWfM8Tza45H7N7ohZEd4jMWueV8gGZZwn6aWIqGd0YbOmcyAxa6KIeDndiOyWFEzmNrtPZrXy6L9mZlaIcyRmZlaIA4mZmRXiQGJmZoU4kJiZWSEOJGZmVogDiZmZFeJAYmZmhfw/+4bDncW4L4sAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#plot the graph\n", + "plt.plot(k_values, test_acc,'ro')\n", + "plt.xticks(range(1,21))\n", + "plt.xlabel('k')\n", + "plt.ylabel('test accuracies')\n", + "plt.title('test accuracies vs k')" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "best test accuracy 0.9033333333333333\n", + "best k: 11\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "max_ind = np.argmax(test_acc)\n", + "print ('best test accuracy',str(test_acc[max_ind]))\n", + "print( 'best k:', str(k_values[max_ind]))# this might change on changing the random_state in test_train_split\n", + "#the model overfits for low k values like k = 1, and under fits for high values like k = 20" + ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [] } @@ -380,7 +691,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.3" + "version": "3.6.3" }, "name": "Linear_Regression_and_K_Nearest_Neighbors_Exercises-ANSWERS", "notebookId": 2125319687183902