-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPredictor.py
More file actions
109 lines (89 loc) · 3.25 KB
/
Predictor.py
File metadata and controls
109 lines (89 loc) · 3.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split
import MLPGenerator as MLP
np.random.seed(1)
# Normalized feature from preprocessed data
def dataSpecificPro(path):
datafile = pd.read_csv(path)
# Supervised learning, label
Y = datafile['GDP'].values
# Input features Area,GreenAreaPerPers,Polycentricity,PopulationInCore,#Gov,#GovInCore,Population,Latitude,Longitude
# You can also drop some features here to test it's importance with final score function.
X = datafile.drop(labels = ['CityName','GDP'],axis = 1).values
def stdScl(F):
F = (F - np.average(F))/np.std(F)
return F
def FS(F):
F = (F - min(F))/(max(F) - min(F))
return F
X = X.T
X[0] = stdScl(X[0])
X[1] = stdScl(X[1])
X[3] = stdScl(X[3])
X[4] = stdScl(X[4])
X[5] = stdScl(X[5])
X[6] = stdScl(X[6])
X[7] /= 180
X[8] /= 180
X = X.T
return X,Y
# you can adjust the score function as you want
def validation(model,xTest,yTest):
hypo = model.predict(xTest)
error = 0
for i in range(len(xTest)):
curHypo = max(10000,hypo[i])
error += abs(curHypo - yTest[i])/max(yTest[i],curHypo)
#error += min(curHypo/yTest[i], yTest[i]/curHypo)
print("GDP Predict Result:",float('%.1f' % hypo[i]),"GDP Real Label:",float('%.1f' % yTest[i]))
error /= len(xTest)
return error
X, Y = dataSpecificPro('./Data/Final229CitiesData.csv')
# Spilt the test set at the start
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.1)
# Loop to find appropriate c value and gamma value
import KFoldValidation as KF
dataset = KF.KFold(X_train,Y_train)
averageError = 0
averageTrainError = 0
# This is the SVR result
if 1 == 2:
for i in range(1, 11):
X_train, Y_train, X_validation, Y_validation = dataset.spilt(i)
c_rbf = 90000
c_poly = 1000
gamma = np.power(1.3,-5.)
rbf = svm.SVR(C= c_rbf,kernel='rbf',gamma = gamma,tol = 1e-9)
rbf.fit(X_train,Y_train)
#poly = svm.SVR(C = c_poly,kernel='poly',gamma = 'auto',tol = 1e-8)
#poly.fit(X_train,Y_train)
error = validation(rbf, X_validation, Y_validation)
averageError += error
trainError = validation(rbf,X_train,Y_train)
averageTrainError += trainError
print(error)
print("averageTrainScore:",averageTrainError / 10)
print("averageScore:",averageError / 10)
gamma = np.power(1.3, -5.)
rbf = svm.SVR(C=90000, kernel='rbf', gamma=gamma, tol=1e-9)
rbf.fit(X_train, Y_train)
# Finally validated the test set
print('**Test Validation Start!**')
testRes = validation(rbf,X_test,Y_test)
print('Test result:', testRes)
# This is the MLP result
if 1 == 1:
for i in range(1, 11):
X_train, Y_train, X_validation, Y_validation = dataset.spilt(i)
layer = [8,30,15,1]
print(X_train[0],Y_train[0])
mlp = MLP.MLPGenerator(layer,"regression",X_train,Y_train,30000)
mlp.trainNN()
error = validation(mlp, X_validation, Y_validation)
averageError += error
trainError = validation(mlp, X_train, Y_train)
averageTrainError += trainError
print(trainError)
print(averageTrainError/10)