Skip to content

Commit

Permalink
Hotfix 0.3.0b (#519)
Browse files Browse the repository at this point in the history
* Fixes internal brain for Banana Imitation.
* Fixes Discrete Control training for Imitation Learning.
* Fixes Visual Observations in internal brain with non-square inputs.
  • Loading branch information
awjuliani authored Mar 21, 2018
1 parent 862543e commit 72835e8
Show file tree
Hide file tree
Showing 9 changed files with 155 additions and 169 deletions.
4 changes: 3 additions & 1 deletion python/unitytrainers/bc/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ def __init__(self, brain, h_size=128, lr=1e-4, n_layers=2, m_size=128,

if brain.vector_action_space_type == "discrete":
self.action_probs = tf.nn.softmax(self.policy)
self.sample_action = tf.cast(tf.multinomial(self.policy, 1, name="action"), tf.int32)
self.sample_action_float = tf.multinomial(self.policy, 1)
self.sample_action_float = tf.identity(self.sample_action_float, name="action")
self.sample_action = tf.cast(self.sample_action_float, tf.int32)
self.true_action = tf.placeholder(shape=[None], dtype=tf.int32, name="teacher_action")
self.action_oh = tf.one_hot(self.true_action, self.a_size)
self.loss = tf.reduce_sum(-tf.log(self.action_probs + 1e-10) * self.action_oh)
Expand Down
15 changes: 10 additions & 5 deletions python/unitytrainers/bc/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ def __init__(self, sess, env, brain_name, trainer_parameters, training, seed):
self.stats = {'losses': [], 'episode_length': [], 'cumulative_reward': []}

self.training_buffer = Buffer()
self.is_continuous = (env.brains[brain_name].vector_action_space_type == "continuous")
self.is_continuous_action = (env.brains[brain_name].vector_action_space_type == "continuous")
self.is_continuous_observation = (env.brains[brain_name].vector_observation_space_type == "continuous")
self.use_observations = (env.brains[brain_name].number_visual_observations > 0)
if self.use_observations:
logger.info('Cannot use observations with imitation learning')
Expand Down Expand Up @@ -286,12 +287,16 @@ def update_model(self):
end = (j + 1) * self.n_sequences
batch_states = np.array(_buffer['vector_observations'][start:end])
batch_actions = np.array(_buffer['actions'][start:end])
feed_dict = {self.model.true_action: batch_actions.reshape([-1, self.brain.vector_action_space_size]),
self.model.dropout_rate: 0.5,

feed_dict = {self.model.dropout_rate: 0.5,
self.model.batch_size: self.n_sequences,
self.model.sequence_length: self.sequence_length}
if not self.is_continuous:
feed_dict[self.model.vector_in] = batch_states.reshape([-1, 1])
if self.is_continuous_action:
feed_dict[self.model.true_action] = batch_actions.reshape([-1, self.brain.vector_action_space_size])
else:
feed_dict[self.model.true_action] = batch_actions.reshape([-1])
if not self.is_continuous_observation:
feed_dict[self.model.vector_in] = batch_states.reshape([-1, self.brain.num_stacked_vector_observations])
else:
feed_dict[self.model.vector_in] = batch_states.reshape([-1, self.brain.vector_observation_space_size *
self.brain.num_stacked_vector_observations])
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,35 @@ public void MoveAgent(float[] act)
Vector3 dirToGo = Vector3.zero;
Vector3 rotateDir = Vector3.zero;


if (!frozen)
{
dirToGo = transform.forward * Mathf.Clamp(act[0], -1f, 1f);
rotateDir = transform.up * Mathf.Clamp(act[1], -1f, 1f);
if (Mathf.Clamp(act[2], 0f, 1f) > 0.5f)
bool shootCommand = false;
if (brain.brainParameters.vectorActionSpaceType == SpaceType.continuous)
{
dirToGo = transform.forward * Mathf.Clamp(act[0], -1f, 1f);
rotateDir = transform.up * Mathf.Clamp(act[1], -1f, 1f);
shootCommand = Mathf.Clamp(act[2], 0f, 1f) > 0.5f;
}
else
{
switch ((int)(act[0]))
{
case 1:
dirToGo = transform.forward;
break;
case 2:
shootCommand = true;
break;
case 3:
rotateDir = -transform.up;
break;
case 4:
rotateDir = transform.up;
break;
}
}
if (shootCommand)
{
shoot = true;
dirToGo *= 0.5f;
Expand Down Expand Up @@ -121,9 +145,9 @@ public void MoveAgent(float[] act)
myLaser.transform.localScale = new Vector3(0f, 0f, 0f);

}

}


void Freeze()
{
gameObject.tag = "frozenAgent";
Expand Down Expand Up @@ -182,8 +206,8 @@ public override void AgentReset()
agentRB.velocity = Vector3.zero;
bananas = 0;
myLaser.transform.localScale = new Vector3(0f, 0f, 0f);
transform.position = new Vector3(Random.Range(-myArea.range, myArea.range),
2f, Random.Range(-myArea.range, myArea.range))
transform.position = new Vector3(Random.Range(-myArea.range, myArea.range),
2f, Random.Range(-myArea.range, myArea.range))
+ area.transform.position;
transform.rotation = Quaternion.Euler(new Vector3(0f, Random.Range(0, 360)));
}
Expand Down
Binary file not shown.
Binary file not shown.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ public void OnInspector()
pixels = 1;
else
pixels = 3;
float[,,,] result = new float[batchSize, width, height, pixels];
float[,,,] result = new float[batchSize, height, width, pixels];

for (int b = 0; b < batchSize; b++)
{
Expand Down
38 changes: 1 addition & 37 deletions unity-environment/ProjectSettings/EditorBuildSettings.asset
Original file line number Diff line number Diff line change
Expand Up @@ -4,40 +4,4 @@
EditorBuildSettings:
m_ObjectHideFlags: 0
serializedVersion: 2
m_Scenes:
- enabled: 0
path: Assets/ML-Agents/Examples/GridWorld/GridWorld.unity
guid: 7c777442467e245108558a5155153927
- enabled: 0
path: Assets/ML-Agents/Examples/Tennis/Tennis.unity
guid: 25c0c9e81e55c4e129e1a5c0ac254100
- enabled: 0
path: Assets/ML-Agents/Examples/Banana/BananaImitation.unity
guid: 3ae10073cde7641f488ef7c87862333a
- enabled: 0
path: Assets/ML-Agents/Examples/PushBlock/Scenes/PushBlock.unity
guid: ae8cc75939e3e4d07a79c8c6a08b54f4
- enabled: 0
path: Assets/ML-Agents/Examples/3DBall/3DScene.unity
guid: 6f62a2ccb3830437ea4e85a617e856b3
- enabled: 0
path: Assets/ML-Agents/Examples/3DBall/3DHardScene.unity
guid: 35c41099ceec44889bdbe95ed86c97ac
- enabled: 0
path: Assets/ML-Agents/Examples/Banana/BananaRL.unity
guid: 11583205ab5b74bb4bb1b9951cf9e437
- enabled: 0
path: Assets/ML-Agents/Examples/Basic/Scene.unity
guid: cf1d119a8748d406e90ecb623b45f92f
- enabled: 0
path: Assets/ML-Agents/Examples/Bouncer/Bouncer.unity
guid: 2c29359d4c9fe49219b21cd83e246596
- enabled: 0
path: Assets/ML-Agents/Examples/Crawler/Crawler.unity
guid: 4cf841b0478fb4b33971627b40c6420b
- enabled: 0
path: Assets/ML-Agents/Examples/Hallway/Scenes/Hallway.unity
guid: d6d6a33ed0e18459a8d61817d600978a
- enabled: 0
path: Assets/ML-Agents/Examples/Reacher/Scene.unity
guid: e58a3c10c43de4b6b91b7149838d1dfb
m_configObjects: {}

0 comments on commit 72835e8

Please sign in to comment.