-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathWalletTest.py
More file actions
126 lines (107 loc) · 4.96 KB
/
WalletTest.py
File metadata and controls
126 lines (107 loc) · 4.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from __future__ import unicode_literals
import sys
import pandas as pd
import time
import numpy as np
global df1
from urllib.parse import urlencode
from urllib.request import urlopen
#This Python code requires Python 3 or higher.
#There is only one global variable df1, which contains y and its prediction.
#This code call an API that makes the prediction. The API takes a string and return a string.
#The input string to the API is a list of observations seperated by ;. The data points in the
#observations are seperated by commas.
#The model read a CSV file, and assumes the file to have all the 304 variables, and
#to have missing values represented as NA.
#The predicted value is saved in the following file walletPredictionOutput.csv
#To Run this code in your terminal type
# python3 WalletTest.py "fileNameWithPathAndExtension"
def wolfram_cloud_call(**args):
result = urlopen('http://www.wolframcloud.com/objects/5c737bf7-7339-4641-860d-5b89e89f677b', urlencode(args).encode('utf-8'))
return result.read()
def call(s):
textresult = wolfram_cloud_call(s=s)
return textresult
#Calculate RMSE: Sqrt of the mean of the square of the differences between the actual and predicted values
def rmse(predictions, targets):
return np.sqrt(((predictions - targets) ** 2).mean())
#Calculate the Mean Absolute Percentage Error
def mape(predictions, targets):
return 100* np.absolute( (targets- predictions)/targets).mean()
#Build a string to pass to
def buildTheInput(lNumb):
st = ''
for i in lNumb:
if pd.isnull(i):
t = 'Missing'
else:
t = int(i)
st = st + str(t) + ','
st = st[:-1]
return st
def prepareMultipleRowsForPrediction(df,startP,endP):
stLong = ''
for k in range(startP,endP):
tp = buildTheInput(df.values[k])
stLong = stLong+tp+';'
return stLong[:-1]
def changeString2ListofNumbers(st,startP):
t = st.split(',')
k = int(startP)
for i in t:
df1.at[k,'PredictedY'] = float(i)
k = k+1
return k
def main(argv):
global df1
infile = argv[0]
data = pd.read_csv(infile)
Var2Keep = ['x005', 'x006', 'x007', 'x008', 'x009', 'x010', 'x011', 'x012',
'x013', 'x014', 'x015', 'x016', 'x017', 'x018', 'x019', 'x020',
'x021', 'x022', 'x023', 'x024', 'x025', 'x026', 'x027', 'x028',
'x029', 'x030', 'x031', 'x032', 'x033', 'x034', 'x035', 'x036',
'x037', 'x038', 'x039', 'x040', 'x042', 'x043', 'x044', 'x045',
'x046', 'x047', 'x048', 'x049', 'x050', 'x051', 'x052', 'x053',
'x054', 'x055', 'x056', 'x059', 'x061', 'x062', 'x063', 'x064',
'x065', 'x066', 'x071', 'x072', 'x073', 'x074', 'x075', 'x076',
'x080', 'x081', 'x082', 'x088', 'x089', 'x097', 'x099', 'x104',
'x106', 'x107', 'x108', 'x110', 'x111', 'x112', 'x113', 'x114',
'x115', 'x116', 'x119', 'x120', 'x121', 'x126', 'x147', 'x168',
'x169', 'x170', 'x171', 'x172', 'x173', 'x174', 'x177', 'x178',
'x179', 'x181', 'x182', 'x183', 'x184', 'x185', 'x186', 'x187',
'x188', 'x189', 'x190', 'x191', 'x192', 'x193', 'x194', 'x195',
'x196', 'x198', 'x199', 'x200', 'x201', 'x209', 'x210', 'x211',
'x224', 'x225', 'x226', 'x227', 'x228', 'x229', 'x230', 'x231',
'x232', 'x233', 'x234', 'x236', 'x240', 'x244', 'x245', 'x246',
'x247', 'x248', 'x249', 'x250', 'x251', 'x254', 'x258', 'x260',
'x261', 'x262', 'x263', 'x264', 'x269', 'x270', 'x271', 'x272',
'x273', 'x274', 'x276', 'x277', 'x278', 'x279', 'x280', 'x281',
'x282', 'x283', 'x284', 'x285', 'x291', 'x292', 'x294', 'x296',
'x298', 'x299', 'x300', 'x301', 'x303']
#Create a dataframe with only the variables needed for prediction
#We assume that the file column names are the same as the one given
#for this test.
df = data[Var2Keep]
#As we want the result to be exported to a csv file,
#I have decided to create a dataframe that will have the actual and the predicted
#This will help me not only
df1 = pd.DataFrame({'y': data['y'], 'PredictedY': 0})
#Remove data from memory
#We want to call the API for every 100 observations
numberOfObsercations = df1.shape[0]
stepS = 1000
startT = time.time()
for j in range(0, numberOfObsercations, stepS):
s = prepareMultipleRowsForPrediction(df, j, j+stepS)
predictionVar = call(s)
predictionVar = predictionVar.decode('utf8')
predictionVar = predictionVar[2:len(predictionVar)-2]
l = changeString2ListofNumbers(predictionVar, j)
endT = time.time()
print('Total Time To Get Predictions :', endT - startT)
print('RMSE :',rmse(df1['PredictedY'],df1['y']))
print('MAPE :',mape(df1['PredictedY'],df1['y']))
#The prediction is in prediction.csv
export_csv = df1.to_csv ('walletPredictionOutput.csv', index = None, header=True)
if __name__ == "__main__":
main(sys.argv[1:])