-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtrain_test_split.py
67 lines (61 loc) · 1.93 KB
/
train_test_split.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from sklearn.model_selection import train_test_split
from Get_class import decode, encode
import os
import pickle
import numpy as np
def normalize(x):
xmax, xmin = x.max(), x.min()
x = (x - xmin)/(xmax - xmin)
return x
def get_label_code():
dict_p = os.path.join("dict","dict_e.pkl")
a_file = open(dict_p, "rb")
output = pickle.load(a_file)
output = list(output.keys())
return encode(output,False)
def class_balance(x,y):
from random import randrange
s = y.shape[1]
num = np.zeros(s)
for each in y:
i=0
for e in each:
if e == 1:
num[i] = num[i]+1
i=i+1
min_=min(num)
i=0
for e in num:
if e == min_:
num = np.zeros(s).tolist()
num[i] = 1
num = np.asarray(num)
print(f"{decode([num])[0]} is having minimum data of {min_}")
break
i=i+1
num = np.zeros(s)
came_index = []
x_b=[]
y_b=[]
classes = get_label_code()
# boolean = True
while min(num) != min_ or max(num) != min_:
j=0
while j<len(num):
if num[j]<min_:
index = randrange(y.shape[0])
# print(num)
if index not in came_index:
if (y[index] == classes[j]).all():
x_b.append(x[index])
y_b.append(y[index])
came_index.append(index)
num[j] = num[j] + 1
j=j+1
x_b = np.asarray(x_b)
y_b = np.asarray(y_b)
return x_b,y_b
def tts(x,y):
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2)
x_train, x_val, y_train, y_val = train_test_split(x_train,y_train, test_size=0.2)
return normalize(x_train), normalize(x_test), normalize(x_val), normalize(y_train), normalize(y_test), normalize(y_val)