Skip to content

Commit a38d013

Browse files
committed
Unpack archives
1 parent f21d672 commit a38d013

File tree

277 files changed

+48291
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

277 files changed

+48291
-0
lines changed

course1/Bandits/.ipynb_checkpoints/Assignment1-checkpoint.ipynb

+1,615
Large diffs are not rendered by default.

course1/Bandits/Assignment1.ipynb

+1,801
Large diffs are not rendered by default.
Binary file not shown.
Binary file not shown.
Binary file not shown.

course1/Bandits/dependencies.tar.gz

5.5 KB
Binary file not shown.
+82
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#!/usr/bin/env python
2+
3+
from rlglue.agent import BaseAgent
4+
5+
import numpy as np
6+
7+
class Agent(BaseAgent):
8+
"""agent does *no* learning, selects action 0 always"""
9+
def __init__(self):
10+
self.last_action = None
11+
self.num_actions = None
12+
self.q_values = None
13+
self.step_size = None
14+
self.epsilon = None
15+
self.initial_value = 0.0
16+
self.arm_count = [0.0 for _ in range(10)]
17+
18+
def agent_init(self, agent_info={}):
19+
"""Setup for the agent called when the experiment first starts."""
20+
21+
# if "actions" in agent_info:
22+
# self.num_actions = agent_info["actions"]
23+
24+
# if "state_array" in agent_info:
25+
# self.q_values = agent_info["state_array"]
26+
27+
self.num_actions = agent_info.get("num_actions", 2)
28+
self.initial_value = agent_info.get("initial_value", 0.0)
29+
self.q_values = np.ones(agent_info.get("num_actions", 2)) * self.initial_value
30+
self.step_size = agent_info.get("step_size", 0.1)
31+
self.epsilon = agent_info.get("epsilon", 0.0)
32+
33+
self.last_action = 0
34+
35+
def agent_start(self, observation):
36+
"""The first method called when the experiment starts, called after
37+
the environment starts.
38+
Args:
39+
observation (Numpy array): the state observation from the
40+
environment's evn_start function.
41+
Returns:
42+
The first action the agent takes.
43+
"""
44+
self.last_action = np.random.choice(self.num_actions) # set first action to 0
45+
46+
return self.last_action
47+
48+
def agent_step(self, reward, observation):
49+
"""A step taken by the agent.
50+
Args:
51+
reward (float): the reward received for taking the last action taken
52+
observation (Numpy array): the state observation from the
53+
environment's step based, where the agent ended up after the
54+
last step
55+
Returns:
56+
The action the agent is taking.
57+
"""
58+
# local_action = 0 # choose the action here
59+
self.last_action = np.random.choice(self.num_actions)
60+
61+
return self.last_action
62+
63+
def agent_end(self, reward):
64+
"""Run when the agent terminates.
65+
Args:
66+
reward (float): the reward the agent received for entering the
67+
terminal state.
68+
"""
69+
pass
70+
71+
def agent_cleanup(self):
72+
"""Cleanup done after the agent ends."""
73+
pass
74+
75+
def agent_message(self, message):
76+
"""A function used to pass information from the agent to the experiment.
77+
Args:
78+
message: The message passed to the agent.
79+
Returns:
80+
The response (or answer) to the message.
81+
"""
82+
pass
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#!/usr/bin/env python
2+
3+
"""An abstract class that specifies the Agent API for RL-Glue-py.
4+
"""
5+
6+
from __future__ import print_function
7+
from abc import ABCMeta, abstractmethod
8+
9+
10+
class BaseAgent:
11+
"""Implements the agent for an RL-Glue environment.
12+
Note:
13+
agent_init, agent_start, agent_step, agent_end, agent_cleanup, and
14+
agent_message are required methods.
15+
"""
16+
17+
__metaclass__ = ABCMeta
18+
19+
def __init__(self):
20+
pass
21+
22+
@abstractmethod
23+
def agent_init(self, agent_info= {}):
24+
"""Setup for the agent called when the experiment first starts."""
25+
26+
@abstractmethod
27+
def agent_start(self, observation):
28+
"""The first method called when the experiment starts, called after
29+
the environment starts.
30+
Args:
31+
observation (Numpy array): the state observation from the environment's evn_start function.
32+
Returns:
33+
The first action the agent takes.
34+
"""
35+
36+
@abstractmethod
37+
def agent_step(self, reward, observation):
38+
"""A step taken by the agent.
39+
Args:
40+
reward (float): the reward received for taking the last action taken
41+
observation (Numpy array): the state observation from the
42+
environment's step based, where the agent ended up after the
43+
last step
44+
Returns:
45+
The action the agent is taking.
46+
"""
47+
48+
@abstractmethod
49+
def agent_end(self, reward):
50+
"""Run when the agent terminates.
51+
Args:
52+
reward (float): the reward the agent received for entering the terminal state.
53+
"""
54+
55+
@abstractmethod
56+
def agent_cleanup(self):
57+
"""Cleanup done after the agent ends."""
58+
59+
@abstractmethod
60+
def agent_message(self, message):
61+
"""A function used to pass information from the agent to the experiment.
62+
Args:
63+
message: The message passed to the agent.
64+
Returns:
65+
The response (or answer) to the message.
66+
"""
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#!/usr/bin/env python
2+
3+
"""Abstract environment base class for RL-Glue-py.
4+
"""
5+
6+
from __future__ import print_function
7+
8+
from abc import ABCMeta, abstractmethod
9+
10+
11+
class BaseEnvironment:
12+
"""Implements the environment for an RLGlue environment
13+
14+
Note:
15+
env_init, env_start, env_step, env_cleanup, and env_message are required
16+
methods.
17+
"""
18+
19+
__metaclass__ = ABCMeta
20+
21+
def __init__(self):
22+
reward = None
23+
observation = None
24+
termination = None
25+
self.reward_obs_term = (reward, observation, termination)
26+
27+
@abstractmethod
28+
def env_init(self, env_info={}):
29+
"""Setup for the environment called when the experiment first starts.
30+
31+
Note:
32+
Initialize a tuple with the reward, first state observation, boolean
33+
indicating if it's terminal.
34+
"""
35+
36+
@abstractmethod
37+
def env_start(self):
38+
"""The first method called when the experiment starts, called before the
39+
agent starts.
40+
41+
Returns:
42+
The first state observation from the environment.
43+
"""
44+
45+
@abstractmethod
46+
def env_step(self, action):
47+
"""A step taken by the environment.
48+
49+
Args:
50+
action: The action taken by the agent
51+
52+
Returns:
53+
(float, state, Boolean): a tuple of the reward, state observation,
54+
and boolean indicating if it's terminal.
55+
"""
56+
57+
@abstractmethod
58+
def env_cleanup(self):
59+
"""Cleanup done after the environment ends"""
60+
61+
@abstractmethod
62+
def env_message(self, message):
63+
"""A message asking the environment for information
64+
65+
Args:
66+
message: the message passed to the environment
67+
68+
Returns:
69+
the response (or answer) to the message
70+
"""

0 commit comments

Comments
 (0)