-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathData_Generation.py
205 lines (161 loc) · 6.98 KB
/
Data_Generation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
import os
import h5py
import ast
from collections import deque
import numpy as np
from collections import Counter
from numpy import savez_compressed
game_folders = ['MO/', 'PLAY/', 'LIU/']
folder_name = 'output2017/'
all_game_files = []
for game_folder in game_folders:
for file_name in os.listdir(folder_name + game_folder):
all_game_files.append(folder_name + game_folder + file_name)
w_dict = {'W' + str(i+1): i for i in range(9)} #万
b_dict = {'B' + str(i+1): i+9 for i in range(9)} #饼
t_dict = {'T' + str(i+1): i+18 for i in range(9)} #条
f_dict = {'F' + str(i+1): i+27 for i in range(4)} #风 东南西北
j_dict = {'J' + str(i+1): i+31 for i in range(3)} #(剑牌)中发白
total_dict = {**w_dict, **b_dict,**t_dict,**f_dict,**j_dict}
def serialize(array):
res = np.zeros((4,34),int)
count = Counter(array)
for i in count:
if i[0] != 'H':
index = total_dict[i]
nums = count[i]
for j in range(nums):
res[j][index] = 1
return np.expand_dims(np.array(res), axis = 0)
def serialize_y(tile):
res=np.zeros((34),int)
index = total_dict[tile]
res[index] = 1
return res
def update_history(past_game_history, player_tile, discard_tile_list, stealing_tile_list, round_tile):
X = serialize(player_tile)
# Only encode the current card for the past history games
X = np.concatenate((X, serialize([round_tile])), axis = 0)#历史手牌拼接到当前手牌上
# Encoding all the discard tile
for discard_list in discard_tile_list:
X = np.concatenate((X, serialize(discard_list)), axis = 0)#历史出牌拼接到当前手牌上
# Encoding all the stealing tile
for stealing_list in stealing_tile_list:
X = np.concatenate((X, serialize(stealing_list)), axis = 0)#历史摸牌拼接到当前手牌上
past_game_history.append(X) # Append the current situation into the history queue
past_game_history.popleft() # Remove the last information history
return past_game_history
def extract_feature_X(player_tile, discard_tile, stealing_tile, past_game_history, player_num):
"""Feature encoding"""
# Encoding own hand feature
X = serialize(player_tile)
# Encoding all the discard tile
for discard_list in discard_tile:
X = np.concatenate((X, serialize(discard_list)), axis = 0)
# Encoding all the stealing tile
for stealing_list in stealing_tile:
X = np.concatenate((X, serialize(stealing_list)), axis = 0)
# Encode the past history situation into feature
for past_game_list in past_game_history:
X = np.concatenate((X, past_game_list), axis = 0)
if player_num == 0:
wind_feature = np.expand_dims(np.ones((4,34),int), axis = 0)
X = np.concatenate((X,wind_feature), axis = 0)
else:
wind_feature = np.expand_dims(np.zeros((4,34),int), axis = 0)
X = np.concatenate((X,wind_feature), axis = 0)
return X
def generate_training_set(file_name, player_num, history_num = 4):
"""
Player tile : 1
discard tile : 4
stealing tile : 4
last 1 game history : 4+4+1+1 = 10(last discard)
last 2 game history : 4+4+1+1 = 10(last discard)
last 3 game history : 4+4+1+1 = 10(last discard)
"""
other_player = [i for i in range(4)]
other_player.pop(player_num)
# Initialize the empty array for data storage
master_X = []
master_Y = []
discard_tile_list = [[],[],[],[]]
stealing_tile_list = [[],[],[],[]]
past_game_history = deque([np.zeros((10, 4, 34),int) for x in range(history_num)])
# Extract the whole dataset
with open(file_name, 'r', encoding='utf8') as f:
all_data_str = f.readlines()
# Only getting the player own tiles
player_hand = all_data_str[2 + player_num]
start = player_hand.find('[')
end = player_hand.find(']') + 1
player_tile = ast.literal_eval(player_hand[start:end])
for round_info in all_data_str[6:]:
round_info = round_info.split('\t')
# Extracting basic information
round_player_num = int(round_info[0])
action = round_info[1]
round_tile = ast.literal_eval(round_info[2])[0]
if len(round_info) == 5:
round_tile = ast.literal_eval(round_info[2])
stealing_tile = round_info[3]
# First stored the current infomation into data record
if action == '打牌':
X = extract_feature_X(player_tile, discard_tile_list, stealing_tile_list, past_game_history, player_num)
Y = serialize_y(round_tile)
master_X.append(X)
master_Y.append(Y)
# Then proceed to game play logic
if action == '打牌':
discard_tile_list[round_player_num].append(round_tile)
if player_num == round_player_num:
player_tile.remove(round_tile)
elif action in ('摸牌', '补花后摸牌', '杠后摸牌'):
if player_num == round_player_num:
player_tile.append(round_tile)
else:
pass
elif action in ('吃', '碰', '明杠'):
# Adding the information into the stealing tile
for tile in round_tile:
stealing_tile_list[round_player_num].append(tile)
if player_num == round_player_num:
# Remove the tile from player own hand
round_tile.remove(stealing_tile)
for tile in round_tile:
player_tile.remove(tile)
else:
pass
# Finally update the history record
if action == '打牌':
past_game_history = update_history(past_game_history, player_tile, discard_tile_list, stealing_tile_list, round_tile)
return master_X, master_Y
if __name__ == '__main__':
master_X = []
master_Y = []
old_file_count = 0
saving_file_count = 0
file_count = 0
for file_name in all_game_files:
for player_num in range(0,4):
X, Y = generate_training_set(file_name, player_num, history_num = 4)
# print('initial x',X[0][0])
# print('initial y',Y[0])
# print('shape of X', np.array(X).shape)
master_X += X
master_Y += Y
# print(np.array(master_X).shape)
# print('mx',master_X[0][0:2])
# print('my',master_Y[0:2])
if len(master_X) > 100000:
import bloscpack as bp
master_X = np.array(master_X)
master_Y = np.array(master_Y)
saving_file_count += len(master_X)
bp.pack_ndarray_to_file(master_X, f'processed_data_blp/input_X_{file_count}_{saving_file_count}.nosync.blp')
bp.pack_ndarray_to_file(master_Y, f'processed_data_blp/input_Y_{file_count}_{saving_file_count}.nosync.blp')
file_count += len(master_X)
print('Saved with the file size', saving_file_count)
master_X = []
master_Y = []
break