diff --git a/practise/KNN+Practise.ipynb b/practise/KNN+Practise.ipynb
index 07a0698..3fd6c9c 100644
--- a/practise/KNN+Practise.ipynb
+++ b/practise/KNN+Practise.ipynb
@@ -19,13 +19,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-09T17:11:00.462641Z",
"start_time": "2017-03-09T12:11:00.457060-05:00"
- },
- "collapsed": true
+ }
},
"outputs": [],
"source": [
@@ -48,46 +47,188 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-09T17:11:00.913456Z",
"start_time": "2017-03-09T12:11:00.883452-05:00"
- },
- "collapsed": true
+ }
},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"# Import the data using the file path\n",
- "filepath = os.sep.join(data_path + ['Orange_Telecom_Churn_Data.csv'])\n",
+ "#filepath = os.sep.join(data_path + ['Orange_Telecom_Churn_Data.csv'])\n",
+ "filepath = 'Orange_Telecom_Churn_Data.csv'\n",
"data = pd.read_csv(filepath)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-09T17:11:01.087485Z",
"start_time": "2017-03-09T12:11:01.075442-05:00"
}
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | state | \n",
+ " KS | \n",
+ "
\n",
+ " \n",
+ " | account_length | \n",
+ " 128 | \n",
+ "
\n",
+ " \n",
+ " | area_code | \n",
+ " 415 | \n",
+ "
\n",
+ " \n",
+ " | phone_number | \n",
+ " 382-4657 | \n",
+ "
\n",
+ " \n",
+ " | intl_plan | \n",
+ " no | \n",
+ "
\n",
+ " \n",
+ " | voice_mail_plan | \n",
+ " yes | \n",
+ "
\n",
+ " \n",
+ " | number_vmail_messages | \n",
+ " 25 | \n",
+ "
\n",
+ " \n",
+ " | total_day_minutes | \n",
+ " 265.1 | \n",
+ "
\n",
+ " \n",
+ " | total_day_calls | \n",
+ " 110 | \n",
+ "
\n",
+ " \n",
+ " | total_day_charge | \n",
+ " 45.07 | \n",
+ "
\n",
+ " \n",
+ " | total_eve_minutes | \n",
+ " 197.4 | \n",
+ "
\n",
+ " \n",
+ " | total_eve_calls | \n",
+ " 99 | \n",
+ "
\n",
+ " \n",
+ " | total_eve_charge | \n",
+ " 16.78 | \n",
+ "
\n",
+ " \n",
+ " | total_night_minutes | \n",
+ " 244.7 | \n",
+ "
\n",
+ " \n",
+ " | total_night_calls | \n",
+ " 91 | \n",
+ "
\n",
+ " \n",
+ " | total_night_charge | \n",
+ " 11.01 | \n",
+ "
\n",
+ " \n",
+ " | total_intl_minutes | \n",
+ " 10 | \n",
+ "
\n",
+ " \n",
+ " | total_intl_calls | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | total_intl_charge | \n",
+ " 2.7 | \n",
+ "
\n",
+ " \n",
+ " | number_customer_service_calls | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | churned | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0\n",
+ "state KS\n",
+ "account_length 128\n",
+ "area_code 415\n",
+ "phone_number 382-4657\n",
+ "intl_plan no\n",
+ "voice_mail_plan yes\n",
+ "number_vmail_messages 25\n",
+ "total_day_minutes 265.1\n",
+ "total_day_calls 110\n",
+ "total_day_charge 45.07\n",
+ "total_eve_minutes 197.4\n",
+ "total_eve_calls 99\n",
+ "total_eve_charge 16.78\n",
+ "total_night_minutes 244.7\n",
+ "total_night_calls 91\n",
+ "total_night_charge 11.01\n",
+ "total_intl_minutes 10\n",
+ "total_intl_calls 3\n",
+ "total_intl_charge 2.7\n",
+ "number_customer_service_calls 1\n",
+ "churned False"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"data.head(1).T"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-09T17:11:01.564122Z",
"start_time": "2017-03-09T12:11:01.557967-05:00"
- },
- "collapsed": true
+ }
},
"outputs": [],
"source": [
@@ -97,14 +238,31 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-09T17:11:02.585712Z",
"start_time": "2017-03-09T12:11:02.579981-05:00"
}
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['account_length', 'intl_plan', 'voice_mail_plan',\n",
+ " 'number_vmail_messages', 'total_day_minutes', 'total_day_calls',\n",
+ " 'total_day_charge', 'total_eve_minutes', 'total_eve_calls',\n",
+ " 'total_eve_charge', 'total_night_minutes', 'total_night_calls',\n",
+ " 'total_night_charge', 'total_intl_minutes', 'total_intl_calls',\n",
+ " 'total_intl_charge', 'number_customer_service_calls', 'churned'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"data.columns"
]
@@ -121,13 +279,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-09T17:11:04.545751Z",
"start_time": "2017-03-09T12:11:04.509105-05:00"
- },
- "collapsed": true
+ }
},
"outputs": [],
"source": [
@@ -141,13 +298,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-09T17:11:04.736451Z",
"start_time": "2017-03-09T12:11:04.718049-05:00"
- },
- "collapsed": true
+ }
},
"outputs": [],
"source": [
@@ -175,13 +331,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-09T17:11:50.280188Z",
"start_time": "2017-03-09T12:11:50.269326-05:00"
- },
- "collapsed": true
+ }
},
"outputs": [],
"source": [
@@ -199,13 +354,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-09T17:11:50.989446Z",
"start_time": "2017-03-09T12:11:50.498708-05:00"
- },
- "collapsed": true
+ }
},
"outputs": [],
"source": [
@@ -232,32 +386,39 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 10,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-09T17:11:50.997204Z",
"start_time": "2017-03-09T12:11:50.991392-05:00"
- },
- "collapsed": true
+ }
},
"outputs": [],
"source": [
"# Function to calculate the % of values that were correctly predicted\n",
"\n",
"def accuracy(real, predict):\n",
- " return sum(y_data == y_pred) / float(real.shape[0])"
+ " return sum(real == predict) / float(real.shape[0])"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-09T17:11:51.128466Z",
"start_time": "2017-03-09T12:11:51.115874-05:00"
}
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.9422\n"
+ ]
+ }
+ ],
"source": [
"print(accuracy(y_data, y_pred))"
]
@@ -276,31 +437,57 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-09T17:11:52.047123Z",
"start_time": "2017-03-09T12:11:51.538212-05:00"
- },
- "collapsed": true
+ }
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1.0\n"
+ ]
+ }
+ ],
"source": [
- "#Student writes code here"
+ "#Student writes code here\n",
+ "#q5 part 1 weights are the invers of distances\n",
+ "knn2 = KNeighborsClassifier(n_neighbors=3, weights = 'distance')\n",
+ "knn2 = knn2.fit(X_data, y_data)\n",
+ "y_pred2 = knn2.predict(X_data)\n",
+ "print(accuracy(y_data, y_pred2))\n",
+ "#we get accuracy 1 because we checked the accuracy on our train set, ie the set which was used to learn"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 13,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-09T17:11:52.755941Z",
"start_time": "2017-03-09T12:11:52.049816-05:00"
- },
- "collapsed": true
+ }
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.9456\n"
+ ]
+ }
+ ],
+ "source": [
+ "# q5 part 2 manhatan distances\n",
+ "knn3 = KNeighborsClassifier(n_neighbors=3, p =1)\n",
+ "knn3 = knn3.fit(X_data, y_data)\n",
+ "y_pred3 = knn3.predict(X_data)\n",
+ "print(accuracy(y_data, y_pred3))"
+ ]
},
{
"cell_type": "markdown",
@@ -314,51 +501,87 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-09T17:12:01.329053Z",
"start_time": "2017-03-09T12:11:52.759302-05:00"
- },
- "collapsed": true
+ }
},
"outputs": [],
"source": [
- "#Student writes code here"
+ "#Student writes code here\n",
+ "# q6 starts. \n",
+ "k_values = [i for i in range(1,21)] # k values from 1 to 20\n",
+ "accuracies = [] # declare an array to store accuracies"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 15,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-09T17:12:01.829160Z",
"start_time": "2017-03-09T12:12:01.331021-05:00"
- },
- "collapsed": true
+ }
},
"outputs": [],
- "source": []
+ "source": [
+ "# iterate through all k values and store the accuracies\n",
+ "for i in k_values :\n",
+ " knnx = KNeighborsClassifier(n_neighbors=i)\n",
+ " knnx = knnx.fit(X_data, y_data)\n",
+ " y_predx = knnx.predict(X_data)\n",
+ " acc = accuracy(y_data, y_predx)\n",
+ " accuracies.append(acc)"
+ ]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-09T17:12:02.238935Z",
"start_time": "2017-03-09T12:12:01.831094-05:00"
- },
- "collapsed": true
+ }
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Text(0.5,1,'accuracies vs k')"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "#plot the graph\n",
+ "import matplotlib.pyplot as plt\n",
+ "%matplotlib inline\n",
+ "plt.plot(k_values, accuracies,'r+')\n",
+ "plt.xlabel('accuracies')\n",
+ "plt.ylabel('k')\n",
+ "plt.title('accuracies vs k')\n",
+ "# for k = 1, accuracy = 1 because the model has overfit"
+ ]
},
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": true
- },
+ "metadata": {},
"outputs": [],
"source": []
}
@@ -380,7 +603,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.5.3"
+ "version": "3.6.3"
},
"name": "Linear_Regression_and_K_Nearest_Neighbors_Exercises-ANSWERS",
"notebookId": 2125319687183902