diff --git a/student.ipynb b/student.ipynb
index d3bb34af..8854a316 100644
--- a/student.ipynb
+++ b/student.ipynb
@@ -16,17 +16,469 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Your code here - remember to use markdown cells for comments as well!\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def load_data(filepath):\n",
+ "# read csv file\n",
+ " data = pd.read_csv(filepath)\n",
+ " \n",
+ " \n",
+ " \n",
+ " return data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here - remember to use markdown cells for comments as well!"
+ "df = load_data(\"data/kc_house_data.csv\")"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " date | \n",
+ " price | \n",
+ " bedrooms | \n",
+ " bathrooms | \n",
+ " sqft_living | \n",
+ " sqft_lot | \n",
+ " floors | \n",
+ " waterfront | \n",
+ " view | \n",
+ " ... | \n",
+ " grade | \n",
+ " sqft_above | \n",
+ " sqft_basement | \n",
+ " yr_built | \n",
+ " yr_renovated | \n",
+ " zipcode | \n",
+ " lat | \n",
+ " long | \n",
+ " sqft_living15 | \n",
+ " sqft_lot15 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 7129300520 | \n",
+ " 10/13/2014 | \n",
+ " 221900.0 | \n",
+ " 3 | \n",
+ " 1.00 | \n",
+ " 1180 | \n",
+ " 5650 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NONE | \n",
+ " ... | \n",
+ " 7 Average | \n",
+ " 1180 | \n",
+ " 0.0 | \n",
+ " 1955 | \n",
+ " 0.0 | \n",
+ " 98178 | \n",
+ " 47.5112 | \n",
+ " -122.257 | \n",
+ " 1340 | \n",
+ " 5650 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 6414100192 | \n",
+ " 12/9/2014 | \n",
+ " 538000.0 | \n",
+ " 3 | \n",
+ " 2.25 | \n",
+ " 2570 | \n",
+ " 7242 | \n",
+ " 2.0 | \n",
+ " NO | \n",
+ " NONE | \n",
+ " ... | \n",
+ " 7 Average | \n",
+ " 2170 | \n",
+ " 400.0 | \n",
+ " 1951 | \n",
+ " 1991.0 | \n",
+ " 98125 | \n",
+ " 47.7210 | \n",
+ " -122.319 | \n",
+ " 1690 | \n",
+ " 7639 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 5631500400 | \n",
+ " 2/25/2015 | \n",
+ " 180000.0 | \n",
+ " 2 | \n",
+ " 1.00 | \n",
+ " 770 | \n",
+ " 10000 | \n",
+ " 1.0 | \n",
+ " NO | \n",
+ " NONE | \n",
+ " ... | \n",
+ " 6 Low Average | \n",
+ " 770 | \n",
+ " 0.0 | \n",
+ " 1933 | \n",
+ " NaN | \n",
+ " 98028 | \n",
+ " 47.7379 | \n",
+ " -122.233 | \n",
+ " 2720 | \n",
+ " 8062 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2487200875 | \n",
+ " 12/9/2014 | \n",
+ " 604000.0 | \n",
+ " 4 | \n",
+ " 3.00 | \n",
+ " 1960 | \n",
+ " 5000 | \n",
+ " 1.0 | \n",
+ " NO | \n",
+ " NONE | \n",
+ " ... | \n",
+ " 7 Average | \n",
+ " 1050 | \n",
+ " 910.0 | \n",
+ " 1965 | \n",
+ " 0.0 | \n",
+ " 98136 | \n",
+ " 47.5208 | \n",
+ " -122.393 | \n",
+ " 1360 | \n",
+ " 5000 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1954400510 | \n",
+ " 2/18/2015 | \n",
+ " 510000.0 | \n",
+ " 3 | \n",
+ " 2.00 | \n",
+ " 1680 | \n",
+ " 8080 | \n",
+ " 1.0 | \n",
+ " NO | \n",
+ " NONE | \n",
+ " ... | \n",
+ " 8 Good | \n",
+ " 1680 | \n",
+ " 0.0 | \n",
+ " 1987 | \n",
+ " 0.0 | \n",
+ " 98074 | \n",
+ " 47.6168 | \n",
+ " -122.045 | \n",
+ " 1800 | \n",
+ " 7503 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 21 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id date price bedrooms bathrooms sqft_living \\\n",
+ "0 7129300520 10/13/2014 221900.0 3 1.00 1180 \n",
+ "1 6414100192 12/9/2014 538000.0 3 2.25 2570 \n",
+ "2 5631500400 2/25/2015 180000.0 2 1.00 770 \n",
+ "3 2487200875 12/9/2014 604000.0 4 3.00 1960 \n",
+ "4 1954400510 2/18/2015 510000.0 3 2.00 1680 \n",
+ "\n",
+ " sqft_lot floors waterfront view ... grade sqft_above \\\n",
+ "0 5650 1.0 NaN NONE ... 7 Average 1180 \n",
+ "1 7242 2.0 NO NONE ... 7 Average 2170 \n",
+ "2 10000 1.0 NO NONE ... 6 Low Average 770 \n",
+ "3 5000 1.0 NO NONE ... 7 Average 1050 \n",
+ "4 8080 1.0 NO NONE ... 8 Good 1680 \n",
+ "\n",
+ " sqft_basement yr_built yr_renovated zipcode lat long \\\n",
+ "0 0.0 1955 0.0 98178 47.5112 -122.257 \n",
+ "1 400.0 1951 1991.0 98125 47.7210 -122.319 \n",
+ "2 0.0 1933 NaN 98028 47.7379 -122.233 \n",
+ "3 910.0 1965 0.0 98136 47.5208 -122.393 \n",
+ "4 0.0 1987 0.0 98074 47.6168 -122.045 \n",
+ "\n",
+ " sqft_living15 sqft_lot15 \n",
+ "0 1340 5650 \n",
+ "1 1690 7639 \n",
+ "2 2720 8062 \n",
+ "3 1360 5000 \n",
+ "4 1800 7503 \n",
+ "\n",
+ "[5 rows x 21 columns]"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 21597 entries, 0 to 21596\n",
+ "Data columns (total 21 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 id 21597 non-null int64 \n",
+ " 1 date 21597 non-null object \n",
+ " 2 price 21597 non-null float64\n",
+ " 3 bedrooms 21597 non-null int64 \n",
+ " 4 bathrooms 21597 non-null float64\n",
+ " 5 sqft_living 21597 non-null int64 \n",
+ " 6 sqft_lot 21597 non-null int64 \n",
+ " 7 floors 21597 non-null float64\n",
+ " 8 waterfront 19221 non-null object \n",
+ " 9 view 21534 non-null object \n",
+ " 10 condition 21597 non-null object \n",
+ " 11 grade 21597 non-null object \n",
+ " 12 sqft_above 21597 non-null int64 \n",
+ " 13 sqft_basement 21597 non-null object \n",
+ " 14 yr_built 21597 non-null int64 \n",
+ " 15 yr_renovated 17755 non-null float64\n",
+ " 16 zipcode 21597 non-null int64 \n",
+ " 17 lat 21597 non-null float64\n",
+ " 18 long 21597 non-null float64\n",
+ " 19 sqft_living15 21597 non-null int64 \n",
+ " 20 sqft_lot15 21597 non-null int64 \n",
+ "dtypes: float64(6), int64(9), object(6)\n",
+ "memory usage: 3.5+ MB\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " bedrooms | \n",
+ " bathrooms | \n",
+ " sqft_living | \n",
+ " sqft_lot | \n",
+ " floors | \n",
+ " sqft_above | \n",
+ " sqft_living15 | \n",
+ " sqft_lot15 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 21597.000000 | \n",
+ " 21597.000000 | \n",
+ " 21597.000000 | \n",
+ " 2.159700e+04 | \n",
+ " 21597.000000 | \n",
+ " 21597.000000 | \n",
+ " 21597.000000 | \n",
+ " 21597.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 3.373200 | \n",
+ " 2.115826 | \n",
+ " 2080.321850 | \n",
+ " 1.509941e+04 | \n",
+ " 1.494096 | \n",
+ " 1788.596842 | \n",
+ " 1986.620318 | \n",
+ " 12758.283512 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 0.926299 | \n",
+ " 0.768984 | \n",
+ " 918.106125 | \n",
+ " 4.141264e+04 | \n",
+ " 0.539683 | \n",
+ " 827.759761 | \n",
+ " 685.230472 | \n",
+ " 27274.441950 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 1.000000 | \n",
+ " 0.500000 | \n",
+ " 370.000000 | \n",
+ " 5.200000e+02 | \n",
+ " 1.000000 | \n",
+ " 370.000000 | \n",
+ " 399.000000 | \n",
+ " 651.000000 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 3.000000 | \n",
+ " 1.750000 | \n",
+ " 1430.000000 | \n",
+ " 5.040000e+03 | \n",
+ " 1.000000 | \n",
+ " 1190.000000 | \n",
+ " 1490.000000 | \n",
+ " 5100.000000 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 3.000000 | \n",
+ " 2.250000 | \n",
+ " 1910.000000 | \n",
+ " 7.618000e+03 | \n",
+ " 1.500000 | \n",
+ " 1560.000000 | \n",
+ " 1840.000000 | \n",
+ " 7620.000000 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 4.000000 | \n",
+ " 2.500000 | \n",
+ " 2550.000000 | \n",
+ " 1.068500e+04 | \n",
+ " 2.000000 | \n",
+ " 2210.000000 | \n",
+ " 2360.000000 | \n",
+ " 10083.000000 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 33.000000 | \n",
+ " 8.000000 | \n",
+ " 13540.000000 | \n",
+ " 1.651359e+06 | \n",
+ " 3.500000 | \n",
+ " 9410.000000 | \n",
+ " 6210.000000 | \n",
+ " 871200.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " bedrooms bathrooms sqft_living sqft_lot floors \\\n",
+ "count 21597.000000 21597.000000 21597.000000 2.159700e+04 21597.000000 \n",
+ "mean 3.373200 2.115826 2080.321850 1.509941e+04 1.494096 \n",
+ "std 0.926299 0.768984 918.106125 4.141264e+04 0.539683 \n",
+ "min 1.000000 0.500000 370.000000 5.200000e+02 1.000000 \n",
+ "25% 3.000000 1.750000 1430.000000 5.040000e+03 1.000000 \n",
+ "50% 3.000000 2.250000 1910.000000 7.618000e+03 1.500000 \n",
+ "75% 4.000000 2.500000 2550.000000 1.068500e+04 2.000000 \n",
+ "max 33.000000 8.000000 13540.000000 1.651359e+06 3.500000 \n",
+ "\n",
+ " sqft_above sqft_living15 sqft_lot15 \n",
+ "count 21597.000000 21597.000000 21597.000000 \n",
+ "mean 1788.596842 1986.620318 12758.283512 \n",
+ "std 827.759761 685.230472 27274.441950 \n",
+ "min 370.000000 399.000000 651.000000 \n",
+ "25% 1190.000000 1490.000000 5100.000000 \n",
+ "50% 1560.000000 1840.000000 7620.000000 \n",
+ "75% 2210.000000 2360.000000 10083.000000 \n",
+ "max 9410.000000 6210.000000 871200.000000 "
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[[\"bedrooms\",\"bathrooms\",\"sqft_living\",\"sqft_lot\",\"floors\",\"sqft_above\",\"sqft_basement\",\"sqft_living15\",\"sqft_lot15\"]].describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -40,9 +492,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.4"
+ "version": "3.11.7"
}
},
"nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
}