-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlength_of_stay_utils.py
105 lines (81 loc) · 4.29 KB
/
length_of_stay_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from scipy.ndimage import imread
from revoscalepy import RxSqlServerData, rx_import, RxOdbcData, rx_write_object, rx_serialize_model, rx_read_object
from collections import OrderedDict
import numpy as np
def display_head(table_name, n_rows):
table_sql = RxSqlServerData(sql_query = "SELECT TOP({}}) * FROM {}}".format(n_rows, table_name), connection_string = connection_string)
table = rx_import(table_sql)
print(table)
def detect_table(table_name, connection_string):
detect_sql = RxSqlServerData(sql_query="IF EXISTS (select 1 from information_schema.tables where table_name = '{}') SELECT 1 AS ret FROM LengthOfStay ELSE SELECT 0 AS ret FROM LengthOfStay".format(table_name),
connection_string=connection_string)
does_exist = rx_import(detect_sql)
if does_exist.iloc[0,0] == 1: return True
else: return False
## this is not used
#def drop_view(view_name, connection_string):
# pyodbc_cnxn = pyodbc.connect(connection_string)
# pyodbc_cursor = pyodbc_cnxn.cursor()
# pyodbc_cursor.execute("IF OBJECT_ID ('{}', 'V') IS NOT NULL DROP VIEW {} ;".format(view_name, view_name))
# pyodbc_cursor.close()
# pyodbc_cnxn.commit()
# pyodbc_cnxn.close()
## this is not used anywhere in the template
#def alter_column(table, column, data_type, connection_string):
# pyodbc_cnxn = pyodbc.connect(connection_string)
# pyodbc_cursor = pyodbc_cnxn.cursor()
# pyodbc_cursor.execute("ALTER TABLE {} ALTER COLUMN {} {};".format(table, column, data_type))
# pyodbc_cursor.close()
# pyodbc_cnxn.commit()
# pyodbc_cnxn.close()
def get_num_rows(table, connection_string):
count_sql = RxSqlServerData(sql_query="SELECT COUNT(*) AS count FROM {};".format(table), connection_string=connection_string)
count = rx_import(count_sql)
count = count.iloc[0,0]
return count
def create_formula(response, features, to_remove=None):
if to_remove is None:
feats = [x for x in features if x not in [response]]
else:
feats = [x for x in features if x not in to_remove and x not in [response]]
formula = "{} ~ ".format(response) + " + ".join(feats)
return formula
def train_test_split(id, table, train_table, p, connection_string):
from revoscalepy import rx_set_temp_compute_context, RxInSqlServer, rx_data_step, RxLocalSeq
def transform(data,compute_context):
return data
data = RxSqlServerData(sql_query = "SELECT {} FROM {} WHERE ABS(CAST(CAST(HashBytes('MD5', CAST({} AS varchar(10))) AS VARBINARY(64)) AS BIGINT) % 100) < {};".format(id, table, id, p),
connection_string = connection_string)
train_table_dest = RxSqlServerData(table = 'Train_Id', connection_string = connection_string)
rx_data_step(input_data = data, output_file = train_table_dest, overwrite = True, transform_function = transform)
print("Created {}".format(train_table))
def write_rts_model(model, key, connection_string):
RTS_odbc = RxOdbcData(connection_string, table="RTS")
serialized_model = rx_serialize_model(model, realtime_scoring_only=True)
rx_write_object(RTS_odbc, key=key, value=serialized_model, serialize=False, compress=None, overwrite = True)
def insert_model(classifier, name, connection_string):
classifier_odbc = RxOdbcData(connection_string, table="Models")
rx_write_object(classifier_odbc, key=name, value=classifier, serialize=True, overwrite = True)
def retrieve_model(connection_string, name):
classifier_odbc = RxOdbcData(connection_string, table="Models")
classifier = rx_read_object(classifier_odbc, key=name, deserialize=True)
return classifier
def evaluate_model(observed, predicted, model):
mean_observed = np.mean(observed)
se = (observed - predicted)**2
ae = abs(observed - predicted)
sem = (observed - mean_observed)**2
aem = abs(observed - mean_observed)
mae = np.mean(ae)
rmse = np.sqrt(np.mean(se))
rae = sum(ae) / sum(aem)
rse = sum(se) / sum(sem)
rsq = 1 - rse
metrics = OrderedDict([ ("model_name", [model]),
("mean_absolute_error", [mae]),
("root_mean_squared_error", [rmse]),
("relative_absolute_error", [rae]),
("relative_squared_error", [rse]),
("coefficient_of_determination", [rsq]) ])
print(metrics)
return metrics