Skip to content

Commit

Permalink
Added new hopper and swimmer examples
Browse files Browse the repository at this point in the history
  • Loading branch information
aravindr93 committed Aug 19, 2016
1 parent 9d7e925 commit 45ed7c1
Show file tree
Hide file tree
Showing 74 changed files with 6,137 additions and 584 deletions.
30 changes: 1 addition & 29 deletions examples/hopper/MDP_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,9 @@
def make_train_MDP():
return _standard_hopper()


def make_test_MDP():
return _heavy_hopper()


def make_custom_MDP():
return _ensemble_hopper()

# ===================================================================
# Local functions to create envs

Expand All @@ -35,40 +30,17 @@ def _heavy_hopper():
e.env.model.body_mass = bm; e.env.model.geom_size = gs;
return e

def _ensemble_hopper():
return GymEnv("UncertainHopper-v0")

def _standard_walker():
return GymEnv('Walker2d-v1')

def _heavy_walker():
# Make the torso heavy
e = GymEnv('Walker2d-v1')
bm = np.array(e.env.model.body_mass)
gs = np.array(e.env.model.geom_size)
bm[1] = 7; gs[1][0] = 0.1;
e.env.model.body_mass = bm; e.env.model.geom_size = gs;
return e

# =======================================================================================
# Generate environment corresponding to the given mode

def get_environment(env_mode):

modes = ['train', 'test', 'custom', 'standard', 'heavy', 'random']
modes = ['train', 'test']

if env_mode == 'train':
env = make_train_MDP()
elif env_mode == 'test':
env = make_test_MDP()
elif env_mode == 'custom':
env = make_custom_MDP()
elif env_mode == 'standard':
env = _make_standard_MDP()
elif env_mode == 'heavy':
env = _make_heavy_MDP()
elif env_mode == 'random':
env = _make_random_MDP()
else:
print "ERROR: Unknown environment mode specified. Allowed modes are ", modes

Expand Down
2 changes: 1 addition & 1 deletion examples/hopper/README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
### Example 1
In this example, we train a (32,32) net policy on the standard hopper environment and test it on a heavy hopper (twice the torso mass). This is just to show an example. Typically, we need to train longer (for approx 250 or 300 iterations).
In this example, we train a (64,64) net policy on the standard hopper environment and test it on a heavy hopper (twice the torso mass). This is just to show an example. Typically, we need to train longer (for approx 250 or 300 iterations) and with a lower learning rate (max_kl=0.01).

Here, we see that training on the standard hopper which has a torso mass of approx 3.5 (calculated using density=1000 in MuJoCo) and testing it on a hopper with heavier torso (mass=7, twice of standard) fails badly. However, if we train on an ensemble, the performance on test MDPs improve dramatically. See paper for more details.

Expand Down
53 changes: 53 additions & 0 deletions examples/hopper/experiment_0/iterations/baseline_101.pickle
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
ccopy_reg
_reconstructor
p0
(crllab.baselines.linear_feature_baseline
LinearFeatureBaseline
p1
c__builtin__
object
p2
Ntp3
Rp4
(dp5
S'_coeffs'
p6
cnumpy.core.multiarray
_reconstruct
p7
(cnumpy
ndarray
p8
(I0
tp9
S'b'
p10
tp11
Rp12
(I1
(I26
tp13
cnumpy
dtype
p14
(S'f8'
p15
I0
I1
tp16
Rp17
(I3
S'<'
p18
NNNI-1
I-1
I0
tp19
bI00
S'\xcd\xe7\x98\xcb\xce|\xa9\xc0\xba\x13\xc8\x19,\xf0\x80\xc0\x9ej\x9f\xfd.\x84\\@e\x11"m\xa9\x99o@\x90y\x06^+\x07J@\xc0\x92v7\n\xa9q@\xd1\x84yo\xe3%D@:\xa1H\x83\xe2\x13=\xc0\x14\x13@\x86i\x0b@@*{\x8b\xa4\xf1\x001@\xf6\xea \xec\xdf\xbc\x00\xc0\xf9\xf6\xd4\x87Y\xdf\x92@\x1c6|IQ\xf7\x19\xc0\xf5\xf8\x9b\xc0\x15\x9ay\xc0\xf6\xddS\x08<J\x86\xc0\x8d\x05\xd87\xb6\x1cD\xc0\x99\xe1\xff``\rJ\xc0\xf92\x96FMI\x1a@/\x8e\xaa^,\xa8\x06\xc0\xf8F(\xda-\x9a\xf8?+\xaf\xd2\x8b\xde5\xed\xbf\x80^3\x9a\xa1W\xc2\xbf\x12\\U0dFY\xc0\xbc\x0eWV\x00\x118@\x00:\x83\x8d\xa6\x92\x00\xc0\xcaU\x135\x08!\xa5@'
p20
tp21
bsS'_reg_coeff'
p22
F1e-05
sb.
53 changes: 53 additions & 0 deletions examples/hopper/experiment_0/iterations/baseline_11.pickle
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
ccopy_reg
_reconstructor
p0
(crllab.baselines.linear_feature_baseline
LinearFeatureBaseline
p1
c__builtin__
object
p2
Ntp3
Rp4
(dp5
S'_coeffs'
p6
cnumpy.core.multiarray
_reconstruct
p7
(cnumpy
ndarray
p8
(I0
tp9
S'b'
p10
tp11
Rp12
(I1
(I26
tp13
cnumpy
dtype
p14
(S'f8'
p15
I0
I1
tp16
Rp17
(I3
S'<'
p18
NNNI-1
I-1
I0
tp19
bI00
S'\x86\x1aGk\xb5\x18r\xc0\xa5b\x811\x92\x16Z\xc0\xf6T0vb?N@r\x86H=\xbc\x00N@\x0b\rpv\xdd\x16D@V*\xba\x92\xec89\xc0\x18\x8bo\xccZ\x04&@\x18v\xe2\x15\xa5\x7f\x16\xc0\x9a\x83h\xccw\xc5\x1c@\xaaX\xc9\xa4\'\x16\x0e@\x80IP3\xeeJ\xd2?2\xb4.\xde\x97\xe4^@}o\xb5\xc4\xb1\xe3\x90\xc0\xb2\xb2\x80\t\xf66H\xc0m\xf1"\xc6Jl_@6\x1aT\xc1y-%@\x1bu\x14\xe9{$ @d\xdf\xe4}\xc5\xee\xe9?|Y\x08\xbcX\xef\xf4\xbf \xb3\x0c\xd5H\xc9\xdc?\x002]\xfd\x13R\x87\xbf\x00\xe0\xe5\xb9\x14\xb0\xb2?\x17\xfa\x12%\x00\x14K@\x1e\xcf!\xad\xd5\x92K\xc0\x81\x11\xf4\xa9\xc1\xb0+@b\xe1\xfd\xf7\xab\xd9x@'
p20
tp21
bsS'_reg_coeff'
p22
F1e-05
sb.
53 changes: 53 additions & 0 deletions examples/hopper/experiment_0/iterations/baseline_21.pickle
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
ccopy_reg
_reconstructor
p0
(crllab.baselines.linear_feature_baseline
LinearFeatureBaseline
p1
c__builtin__
object
p2
Ntp3
Rp4
(dp5
S'_coeffs'
p6
cnumpy.core.multiarray
_reconstruct
p7
(cnumpy
ndarray
p8
(I0
tp9
S'b'
p10
tp11
Rp12
(I1
(I26
tp13
cnumpy
dtype
p14
(S'f8'
p15
I0
I1
tp16
Rp17
(I3
S'<'
p18
NNNI-1
I-1
I0
tp19
bI00
S'D\xb8\xdf\xb0o\xf1\x89\xc0`\xcd\xce%myc\xc0\x91\x02\x91\x85\xf8\xf9Q@T\x01Z\xdd\xd1`I@\xab:\xf8\xb31\x92\x15@\x95 \xc06\xda\xa0W@\xab4\x8c]\r-/@#}${\xf5t$@%\x1c\xb7\x81L\xdf\x0f\xc0\xf2\x04\xc0cB~\x01\xc0p\xfa]*\x00i\xfc\xbfWN\n\x16| t@7\x92\x1f\x94z\xc0\x9f\xc0\xfa\xd6\xe8\xc7z\xb3a\xc0\xa6\x08\x13\xf2_X{\xc0\n\xf0\xdc\x9d\x88>A@\x85\xfb\xeeg\xbf\xc9?\xc0K\xc0\x80\xf5\x9cg\xfe\xbf.\xcf?k\x99\xa4\x02\xc0\xd8p\xb2\xcd\xa3D\xcd?\xa8\xaf\x87\xa1Q\xc9\xbf\xbf\x00\xd9\xbd\xb5\x00%\xcd?\xe8\xba(Y\x8a\xce\x07\xc0\xd8\xb7\xe3\xa0,\xe1\x05@\x00\x9e\xa6\x8c\xb1\xe3\xdc\xbf\x19\x8aV\xfe\x90\n\x8d@'
p20
tp21
bsS'_reg_coeff'
p22
F1e-05
sb.
53 changes: 53 additions & 0 deletions examples/hopper/experiment_0/iterations/baseline_31.pickle
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
ccopy_reg
_reconstructor
p0
(crllab.baselines.linear_feature_baseline
LinearFeatureBaseline
p1
c__builtin__
object
p2
Ntp3
Rp4
(dp5
S'_coeffs'
p6
cnumpy.core.multiarray
_reconstruct
p7
(cnumpy
ndarray
p8
(I0
tp9
S'b'
p10
tp11
Rp12
(I1
(I26
tp13
cnumpy
dtype
p14
(S'f8'
p15
I0
I1
tp16
Rp17
(I3
S'<'
p18
NNNI-1
I-1
I0
tp19
bI00
S"+)K2f\xbf\xa5\xc0\xb7\xc2X1jit\xc0c\x8e\x8fU'\x9bI\xc0\x13\xa4t\xfd\xd5\xa2Y@?\xb0&\x87\x84b-@*TP3\x16\x03a@\xed\xf9<\x87\xac.0@\x01\x84\xfb\x9a\x19\x1c-\xc0\xc7V?`\xfbJ/@\xb4Z\x01U\xe9\x98\x1c@\xe8\xd2_\xd8\x99\xec\xd6\xbf<\xdd\x94K\xddV\x8f@(\x1e\xf0C\xfbL\x8c\xc0\xa7\x84\xe8\xb9(jx\xc0\x8f\xe7P\x0b\xfd\xb4i@\xb9`\xe0\x0f(\xca@@\xe8\xcd\x9e\x08\xc5\xa4A\xc0P\xb9#\x99%.\xd7\xbf\x8fC\xe9\xd6T\xdd\xe4?\xc0f#\x02\x95\xdf\xb9?(\x9cD~\xf1\xf7\xd3?\xc0*\xdfp\xc7\xc5\xd4?\x8fO\x98\xe9s\xf1S\xc0\xe2\xf19z\xdf\x195@\x00M\x0f\xfb\xaf\xf2\xfb\xbf7\xe2\x13\x85Q\x8b\xa2@"
p20
tp21
bsS'_reg_coeff'
p22
F1e-05
sb.
53 changes: 53 additions & 0 deletions examples/hopper/experiment_0/iterations/baseline_41.pickle
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
ccopy_reg
_reconstructor
p0
(crllab.baselines.linear_feature_baseline
LinearFeatureBaseline
p1
c__builtin__
object
p2
Ntp3
Rp4
(dp5
S'_coeffs'
p6
cnumpy.core.multiarray
_reconstruct
p7
(cnumpy
ndarray
p8
(I0
tp9
S'b'
p10
tp11
Rp12
(I1
(I26
tp13
cnumpy
dtype
p14
(S'f8'
p15
I0
I1
tp16
Rp17
(I3
S'<'
p18
NNNI-1
I-1
I0
tp19
bI00
S"\xc2\xd2u\xe9v\xed\x9f\xc0$!\x08\xb9R\xa8\x80\xc0\x15W\xc1 \xa2\x8d9@p\x9b\x08\x8b\xec\xf4w@\x80\x0e\x84\xc7Et\t\xc0\xc3\x99\x83\xf1\x1c\xf1j@&\xde\xf4\xd1?q$@\xdb\xed\\2\x85i%\xc0\xbfv, \xd3:0@\xee\xf3-A\x1c%%@p\xc2\x8b\xbfwn\xfa\xbf\xea~@\xa7(,\x88@\xea\xf1\xb0\x03\xf4\xf8\x98\xc0'\xf4\x00G,@q\xc0m,MS\x84\x1db@,\xbe\xcdb,\x88\x0f@\x9d\x9f$+\xec\xc5J\xc0\x89\x90\x93\xd3f\xb3\n\xc0\x14\x88\xf8%\xfd\x98\xff\xbf`\x86\x9b\xbf\xba\x82\xee?$\xbf\xfeTn\xaf\xe1\xbf\x00\xacG[\x93\xa4\xb7?\xf61\x9e\x14\x9dy`\xc0\\Fi\xfe\xb4\x7f@@\x00\xcd\x97\xff2\xb9\x03\xc0F\x198yh\xeb\x9b@"
p20
tp21
bsS'_reg_coeff'
p22
F1e-05
sb.
53 changes: 53 additions & 0 deletions examples/hopper/experiment_0/iterations/baseline_51.pickle
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
ccopy_reg
_reconstructor
p0
(crllab.baselines.linear_feature_baseline
LinearFeatureBaseline
p1
c__builtin__
object
p2
Ntp3
Rp4
(dp5
S'_coeffs'
p6
cnumpy.core.multiarray
_reconstruct
p7
(cnumpy
ndarray
p8
(I0
tp9
S'b'
p10
tp11
Rp12
(I1
(I26
tp13
cnumpy
dtype
p14
(S'f8'
p15
I0
I1
tp16
Rp17
(I3
S'<'
p18
NNNI-1
I-1
I0
tp19
bI00
S'\x1b\xceP\xf2\xf4\xf2\xa5\xc0\xa6\xc1\x8b\xde\'\xd0\x83\xc0\xf4C\xef\xff\x1a$=@~\xc6-\xd0\xe3ri@\xdd\xc7U\x16\x07\x83-\xc0\x1e@\xfe\xca\x0c\x7fr@\x10B\x810\x02\xa0&@\xad\nek\xe2$A\xc0%V>O?\x9e@@\xc1<Lv\x96\xb74@(\xad\xd3\x89\xddk\xff\xbf\xec\x94`\xb0F\xc1\x8e@[av\xd0\xe5\xbd\x85\xc0\x92 \x0bW\xdd\x0b|\xc0\xa7MV\xb7\x8e\x1dk\xc09\xff\xcc\x18\x88\xf7+\xc0Z\x17\xa5\x9e\x8e)Q\xc0\x8b\x01\x97\xfa*\xd7"\xc0\xc6l\x060\x9b\xe6\xf6\xbfhs\x8e\x9e:=\xfd?\xe8g\xeb\xa1\xfaw\xd2\xbf\x80F\x15\xe8\'\x81\xd0?\nu\xdf\xc3\xf7\x8da\xc0x`\x84\xdcj(A@\x00\xadeMQ\x03\x05\xc0A\x19\x88\xe0._\xa3@'
p20
tp21
bsS'_reg_coeff'
p22
F1e-05
sb.
Loading

0 comments on commit 45ed7c1

Please sign in to comment.