-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_model.py
110 lines (78 loc) · 3.48 KB
/
run_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env python3
"""Reproduce your result by your saved model.
This is a script that helps reproduce your prediction results using your saved
model. This script is unfinished and you need to fill in to make this script
work. If you are using R, please use the R script template instead.
The script needs to work by typing the following commandline (file names can be
different):
python3 run_model.py -i unlabelled_sample.txt -m model.pkl -o output.txt
python run_model.py -i data/Validation_call.txt -m models/model.pkl -o output.txt
"""
# author: Chao (Cico) Zhang
# date: 31 Mar 2017
import argparse
import sys
# Start your coding
# import the library you need here
import pickle
import pandas as pd
import csv
# End your coding
def main():
"""Main function."""
parser = argparse.ArgumentParser(description='Reproduce the prediction')
parser.add_argument('-i', '--input', required=True, dest='input_file',
metavar='unlabelled_sample.txt', type=str,
help='Path of the input file')
parser.add_argument('-m', '--model', required=True, dest='model_file',
metavar='model.pkl', type=str,
help='Path of the model file')
parser.add_argument('-o', '--output', required=True,
dest='output_file', metavar='output.txt', type=str,
help='Path of the output file')
# Parse options
args = parser.parse_args()
if args.input_file is None:
sys.exit('Input is missing!')
if args.model_file is None:
sys.exit('Model file is missing!')
if args.output_file is None:
sys.exit('Output is not designated!')
# Start your coding
# suggested steps
# Step 1: load the model from the model file
# Step 2: apply the model to the input file to do the prediction
# Step 3: write the prediction into the desinated output file
names = ['HER2+', 'HR+', 'Triple Neg']
# Lasso reduction
l_features = [177, 192, 230, 486, 576, 621, 623, 669, 765, 818, 1009, 1079, 1191, 1206, 1243, 1352, 1558, 1559, 1656, 1838, 1869, 1871, 1881, 1900, 1902, 1906, 1998, 2017, 2026, 2068, 2077, 2078, 2180, 2184, 2188, 2210, 2218, 2501, 2750, 2828]
d_args = vars(args)
df_valid = pd.read_csv(d_args['input_file'], sep='\t', header=0)
df_valid = df_valid.drop(['Chromosome', 'Start', 'End', 'Nclone'], axis=1)
df_valid = df_valid.transpose()
df_valid = df_valid.reset_index()
df_valid = df_valid.rename(columns={"index": "Sample"})
print('nr of patients: ', len(df_valid))
# Simple Preprocessing
X_valid = df_valid[df_valid.columns[1:]]
samples = df_valid[df_valid.columns[0]]
# Select same features
X_valid = X_valid[X_valid.columns[l_features]]
# remove negative values
X_valid = X_valid.replace(2, 3)
X_valid = X_valid.replace(1, 2)
X_valid = X_valid.replace(0, 1)
X_valid = X_valid.replace(-1, 0)
best_model = pickle.load(open(d_args['model_file'], 'rb'))
y_pred = best_model.predict(X_valid)
d = {'"Sample"' : samples, '"Subgroup"': y_pred}
df = pd.DataFrame(data=d)
# Change to desired Format
df['"Sample"'] = df['"Sample"'].apply(lambda x: '"'+ x+ '"')
df['"Subgroup"'] = df['"Subgroup"'].apply(lambda x: '"'+ names[x]+ '"')
#df.head()
df.to_csv(d_args['output_file'], quoting=csv.QUOTE_NONE, sep='\t', index=False)
print(df.head())
# End your coding
if __name__ == '__main__':
main()