forked from donjpierce/traffic
-
Notifications
You must be signed in to change notification settings - Fork 0
/
learn.py
86 lines (74 loc) · 2.67 KB
/
learn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# python=3.6 requires using Qt4Agg backend for animation saving
import matplotlib
matplotlib.use('Qt4Agg')
from environment import Env
from keras import Sequential, layers
import matplotlib.pyplot as plt
import numpy as np
import osmnx as ox
dt = 1 / 1000
N = 1
agent = 0
"""Lower Manhattan"""
# G = ox.load_graphml('lowermanhattan.graphml')
# G = ox.project_graph(G)
# fig, ax = ox.plot_graph(G, node_size=0, edge_linewidth=0.5)
"""San Francisco"""
# G = ox.load_graphml('sanfrancisco.graphml')
# G = ox.project_graph(G)
# fig, ax = ox.plot_graph(G, node_size=0, edge_linewidth=0.5)
"""Piedmont, California"""
G = ox.load_graphml('piedmont.graphml')
G = ox.project_graph(G)
fig, ax = ox.plot_graph(G, node_size=0, edge_linewidth=0.5)
# initialize the environment for the learning agent
env = Env(n=N, fig=fig, ax=ax, agent=agent, dt=dt, animate=False)
# initialize the Keras training model
model = Sequential()
model.add(layers.InputLayer(batch_input_shape=(1, 10)))
model.add(layers.Dense(10, activation='sigmoid'))
model.add(layers.Dense(2, activation='linear'))
model.compile(loss='mse', optimizer='adam', metrics=['mae'])
# now execute Q learning
y = 0.95
eps = 0.5
decay_factor = 0.999
num_episodes = 10
r_avg_list = []
r_sum_list = []
file = open('diag.txt', 'w')
for i in range(num_episodes):
print("Episode {} of {}".format(i + 1, num_episodes))
eps *= decay_factor
r_sum = 0
done = False
diag_action = 0
diag_reward = 0
state = env.reset((i, num_episodes))
while not done:
env.reset((i, num_episodes))
rand = np.random.random()
if rand < eps:
action = np.random.randint(0, 2)
else:
action = np.argmax(model.predict(np.identity(10)[state:state + 1]))
new_s, r, done, _ = env.step(action=action, num=(i, num_episodes))
target = r + y * np.max(model.predict(np.identity(10)[new_s:new_s + 1]))
target_vec = model.predict(np.identity(10)[state:state + 1])[0]
target_vec[action] = target
model.fit(np.identity(10)[state:state + 1], target_vec.reshape(-1, 2), epochs=1, verbose=0)
state = new_s
r_sum += r
print('Action: {}, Reward: {}'.format(action, r))
file.write('Action: {}, Reward: {}'.format(action, round(r, 2)))
diag_action += action
diag_reward += r
r_avg_list.append(r_sum)
r_sum_list.append(sum(r_avg_list) / (i + 1))
file.write('Episode: {}, Total Rewards: {} \n'.format(i, round(r_sum, 2)))
file.close()
plt.plot(np.arange(num_episodes), r_sum_list)
plt.xlabel('Game number')
plt.ylabel('Average reward per game')
plt.suptitle('Average reward per game for car no. {}'.format(agent))
plt.savefig('avg_rewards.png')