-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhousePrizePreditiction.py
84 lines (65 loc) · 2.29 KB
/
housePrizePreditiction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#house prize prediction
#supervised learning is of two types
#1. classification : predicts a class eg yes/no
#2. regression : predicts a value eg rate
#steps
#step 1 : data preprocessing
#step 2 : data analysis
#step 3 : train test split
#step 4 : XGBoosRegressor
#step 5 : Evaluation
#importing the dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn.datasets
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn import metrics
#importing the Boston House Price Dataset
house_price_data = pd.read_csv('/content/housing.csv')
#print first five rows of the data sheet
print(house_price_data)
#check null values in various columns
house_price_data.isnull().sum()
#finding mathematicals features of datasheet
house_price_data.describe()
#finding correlation in various attributes
#positive correlation : values are directly proportional
correlation = house_price_data.corr()
print(correlation)
#constructing a heat map
plt.figure(figsize=(5,5))
sns.heatmap(correlation, cbar=True, fmt='.1f', annot = True, annot_kws={'size':8}, cmap ='Blues' )
#light color is negative correlation
#dividing the dataset in two parts
X = house_price_data.drop(['MEDV'], axis=1)
Y = house_price_data['MEDV']
print(X)
print(Y)
#train test split data
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.2, random_state=2, shuffle =True)
#model training
model = XGBRegressor()
model.fit(X_train, Y_train)
#model evaluation
training_data_prediction = model.predict(X_train)
print(training_data_prediction)
score1 = metrics.r2_score(Y_train, training_data_prediction)
print(score1)
test_data_prediction = model.predict(X_test)
score2 = metrics.r2_score(Y_test , test_data_prediction)
print(score2)
#scatter plot
plt.scatter(Y_train, training_data_prediction)
plt.xlabel('Actual Prizes')
plt.ylabel('Predicted Prizes')
plt.show()
#changing the input data to numpy array
input_data = (6.575,4.98,15.3)
input_data_as_numpy_array = np.asarray(input_data)
#reshape the np array as we are predicting for one instance
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)
prediction = model.predict(input_data_reshaped)
print(prediction)