-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_reader.py
219 lines (196 loc) · 8.75 KB
/
data_reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# from https://github.com/corgiTrax/Gaze-Data-Processor.git
#
# data_reader.py
#
# Read gaze dataset from asc file or csv file
# -----------------------
import os, re, threading, time
import numpy as np
from IPython import embed
from scipy import misc
def read_gaze_data_asc_file(fname):
""" This function reads a ASC file and returns
a dictionary mapping frame ID to a list of gaze positions,
a dictionary mapping frame ID to action """
with open(fname, 'r') as f:
lines = f.readlines()
frameid, xpos, ypos = 'BEFORE-FIRST-FRAME', None, None
frameid2pos = {frameid: []}
frameid2action = {frameid: None}
frameid2duration = {frameid: None}
frameid2unclipped_reward = {frameid: None}
frameid2episode = {frameid: None}
frameid2score = {frameid: None}
file_meta_data = {'avg_error': None, 'max_error': None, 'low_sample_rate': None, 'total_frame': None}
# frame id list (exclude the 'BEFORE-FIRST-FRAME')
frameid_list = []
start_timestamp = 0
# regex for starting message
scr_msg = re.compile(r"MSG\s+(\d+)\s+SCR_RECORDER FRAMEID (\d+) UTID (\w+)")
# regex for floating point numbers
freg = r"[-+]?[0-9]*\.?[0-9]+"
# regex for gaze message
gaze_msg = re.compile(r"(\d+)\s+(%s)\s+(%s)" % (freg, freg))
# regex for action message
act_msg = re.compile(r"MSG\s+(\d+)\s+key_pressed atari_action (\d+)")
# regex for reward message
reward_msg = re.compile(r"MSG\s+(\d+)\s+reward (\d+)")
# regex for episode message
episode_msg = re.compile(r"MSG\s+(\d+)\s+episode (\d+)")
# regex for score message
score_msg = re.compile(r"MSG\s+(\d+)\s+score (\d+)")
# regex for meta data (validation)
validation_msg = re.compile(r"MSG\s+(\d+)\s+!CAL\sVALIDATION.+ERROR\s+(%s)\s+avg\.\s+(%s)\s+max\s+OFFSET.+" % (freg, freg))
for (i, line) in enumerate(lines):
match_sample = gaze_msg.match(line)
if match_sample:
timestamp, xpos, ypos = match_sample.group(1), match_sample.group(2), match_sample.group(3)
xpos, ypos = float(xpos), float(ypos)
frameid2pos[frameid].append((xpos, ypos))
continue
match_scr_msg = scr_msg.match(line)
# when a new id is encountered
if match_scr_msg:
old_frameid = frameid
timestamp, frameid, UTID = match_scr_msg.group(1), match_scr_msg.group(2), match_scr_msg.group(3)
frameid2duration[old_frameid] = int(timestamp) - start_timestamp
start_timestamp = int(timestamp)
frameid = make_unique_frame_id(UTID, frameid)
frameid_list.append(frameid)
frameid2pos[frameid] = []
frameid2action[frameid] = None
continue
match_action = act_msg.match(line)
if match_action:
timestamp, action_label = match_action.group(1), match_action.group(2)
if frameid2action[frameid] is None:
frameid2action[frameid] = int(action_label)
else:
print ("Warning: there is more than 1 action for frame id %s. Not supposed to happen." % str(frameid))
continue
match_reward = reward_msg.match(line)
if match_reward:
timestamp, reward = match_reward.group(1), match_reward.group(2)
if frameid not in frameid2unclipped_reward:
frameid2unclipped_reward[frameid] = int(reward)
else:
print ("Warning: there is more than 1 reward for frame id %s. Not supposed to happen." % str(frameid))
continue
match_episode = episode_msg.match(line)
if match_episode:
timestamp, episode = match_episode.group(1), match_episode.group(2)
assert frameid not in frameid2episode, "ERROR: there is more than 1 episode for frame id %s. Not supposed to happen." % str(frameid)
frameid2episode[frameid] = int(episode)
continue
match_score = score_msg.match(line)
if match_score:
timestamp, score = match_score.group(1), match_score.group(2)
assert frameid not in frameid2score, "ERROR: there is more than 1 score for frame id %s. Not supposed to happen." % str(
frameid)
frameid2score[frameid] = int(score)
continue
match_validation = validation_msg.match(line)
if match_validation:
avg_error, max_error = match_validation.group(2), match_validation.group(3)
# replace the old value since we will only use the validation data after the last frame
file_meta_data['avg_error'] = float(avg_error)
file_meta_data['max_error'] = float(max_error)
continue
# throw out gazes after the last frame, because the game has ended but eye tracker keeps recording
frameid2pos[frameid] = []
if len(frameid2pos) < 1000: # simple sanity check
print ("Warning: did you provide the correct ASC file? Because the data for only %d frames is detected" % (len(frameid2pos)))
raw_input("Press any key to continue")
few_cnt = 0
n_frame = len(frameid_list)
for v in frameid2pos.values():
if len(v) < 10:
few_cnt += 1
print ("Warning: %d frames have less than 10 gaze samples. (%.1f%%, total frame: %d)" %
(few_cnt, 100.0*few_cnt/n_frame, n_frame))
# save the values to meta data
file_meta_data['low_sample_rate'] = "{:.1f}".format(100.0*float(few_cnt)/float(n_frame)) + "%"
file_meta_data['total_frame'] = n_frame
return frameid2pos, frameid2action, frameid2duration, frameid2unclipped_reward, frameid2episode, frameid2score, frameid_list, file_meta_data
def make_unique_frame_id(UTID, frameid):
# noinspection PyRedundantParentheses
return (UTID, int(frameid))
def read_gaze_data_csv_file(fname, separator=',', pos_separator=','):
""" This function reads a csv file and returns
a dictionary mapping frame ID to a list of gaze positions,
a dictionary mapping frame ID to action """
with open(fname, 'r') as f:
lines = f.readlines()
frameid, xpos, ypos = 'BEFORE-FIRST-FRAME', None, None
frameid2pos = {frameid: []}
frameid2action = {frameid: None}
frameid2duration = {frameid: None}
frameid2unclipped_reward = {frameid: None}
frameid2episode = {frameid: None}
frameid2score = {frameid: None}
# frame id list (exclude the 'BEFORE-FIRST-FRAME')
frameid_list = []
for (i, line) in enumerate(lines):
# for the first line, check if titles (column names) are included
if i == 0 and 'frame' in line:
continue
# parse each section: frameid,episode_id,score,duration,unclipped_reward,action,pos
data_line = line.split(separator)
# frame id
frameid = data_line[0]
frameid_list.append(frameid)
# episode id
episode_id = data_line[1]
if episode_id == 'null':
episode_id = None
else:
episode_id = int(episode_id)
frameid2episode[frameid] = episode_id
# score
score = data_line[2]
if score == 'null':
score = None
else:
score = int(score)
frameid2score[frameid] = score
# duration
duration = data_line[3]
if duration == 'null':
duration = None
else:
duration = int(duration)
frameid2duration[frameid] = duration
# unclipped_reward
unclipped_reward = data_line[4]
if unclipped_reward == 'null':
unclipped_reward = None
else:
unclipped_reward = int(unclipped_reward)
frameid2unclipped_reward[frameid] = unclipped_reward
# action
action = data_line[5]
if action == 'null':
action = None
else:
action = int(action)
frameid2action[frameid] = action
# pos
pos_data = data_line[6]
if pos_data == 'null':
frameid2pos[frameid] = None
else:
# if the separator and the pos separator are the same
if separator == pos_separator:
pos_data_list = data_line[6:]
else:
pos_data_list = pos_data.split(pos_separator)
pos_list = []
n_pos = int(len(pos_data_list) / 2 )
for j in range(0, n_pos):
posX = float(pos_data_list[2*j])
posY = float(pos_data_list[2*j+1])
pos_list.append((posX, posY))
frameid2pos[frameid] = pos_list
return frameid2pos, frameid2action, frameid2duration, frameid2unclipped_reward, frameid2episode, frameid2score, frameid_list
if __name__ == '__main__':
read_gaze_data_csv_file('/Users/lguan/Documents/Study/Research/Gaze-Dataset/data_cleaning/csv/191_JAW_9955253_Jun-25-14-35-04.txt')