imimali
diff --git a/‎course1/Bandits/.ipynb_checkpoints/Assignment1-checkpoint.ipynb
+1,615 b/‎course1/Bandits/.ipynb_checkpoints/Assignment1-checkpoint.ipynb
+1,615
diff --git a/‎course1/Bandits/Assignment1.ipynb
+1,801 b/‎course1/Bandits/Assignment1.ipynb
+1,801
diff --git a/‎course1/Bandits/__pycache__/main_agent.cpython-37.pyc
2.96 KB b/‎course1/Bandits/__pycache__/main_agent.cpython-37.pyc
2.96 KB
diff --git a/‎course1/Bandits/__pycache__/ten_arm_env.cpython-37.pyc
2.63 KB b/‎course1/Bandits/__pycache__/ten_arm_env.cpython-37.pyc
2.63 KB
diff --git a/‎course1/Bandits/__pycache__/test_env.cpython-37.pyc
2.58 KB b/‎course1/Bandits/__pycache__/test_env.cpython-37.pyc
2.58 KB
diff --git a/‎course1/Bandits/dependencies.tar.gz
5.5 KB b/‎course1/Bandits/dependencies.tar.gz
5.5 KB
diff --git a/‎course1/Bandits/dependencies/main_agent.py
+82 b/‎course1/Bandits/dependencies/main_agent.py
+82
diff --git a/‎course1/Bandits/dependencies/rlglue/agent.py
+66 b/‎course1/Bandits/dependencies/rlglue/agent.py
+66
diff --git a/‎course1/Bandits/dependencies/rlglue/environment.py
+70 b/‎course1/Bandits/dependencies/rlglue/environment.py
+70
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+
+from rlglue.agent import BaseAgent
+
+import numpy as np
+
+class Agent(BaseAgent):
+    """agent does *no* learning, selects action 0 always"""
+    def __init__(self):
+        self.last_action = None
+        self.num_actions = None
+        self.q_values = None
+        self.step_size = None
+        self.epsilon = None
+        self.initial_value = 0.0
+        self.arm_count = [0.0 for _ in range(10)]
+
+    def agent_init(self, agent_info={}):
+        """Setup for the agent called when the experiment first starts."""
+
+        # if "actions" in agent_info:
+        #     self.num_actions = agent_info["actions"]
+
+        # if "state_array" in agent_info:
+        #     self.q_values = agent_info["state_array"]
+
+        self.num_actions = agent_info.get("num_actions", 2)
+        self.initial_value = agent_info.get("initial_value", 0.0)
+        self.q_values = np.ones(agent_info.get("num_actions", 2)) * self.initial_value
+        self.step_size = agent_info.get("step_size", 0.1)
+        self.epsilon = agent_info.get("epsilon", 0.0)
+
+        self.last_action = 0
+
+    def agent_start(self, observation):
+        """The first method called when the experiment starts, called after
+        the environment starts.
+        Args:
+            observation (Numpy array): the state observation from the
+                environment's evn_start function.
+        Returns:
+            The first action the agent takes.
+        """
+        self.last_action = np.random.choice(self.num_actions)  # set first action to 0
+
+        return self.last_action
+
+    def agent_step(self, reward, observation):
+        """A step taken by the agent.
+        Args:
+            reward (float): the reward received for taking the last action taken
+            observation (Numpy array): the state observation from the
+                environment's step based, where the agent ended up after the
+                last step
+        Returns:
+            The action the agent is taking.
+        """
+        # local_action = 0  # choose the action here
+        self.last_action = np.random.choice(self.num_actions)
+
+        return self.last_action
+
+    def agent_end(self, reward):
+        """Run when the agent terminates.
+        Args:
+            reward (float): the reward the agent received for entering the
+                terminal state.
+        """
+        pass
+
+    def agent_cleanup(self):
+        """Cleanup done after the agent ends."""
+        pass
+
+    def agent_message(self, message):
+        """A function used to pass information from the agent to the experiment.
+        Args:
+            message: The message passed to the agent.
+        Returns:
+            The response (or answer) to the message.
+        """
+        pass
@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+
+"""An abstract class that specifies the Agent API for RL-Glue-py.
+"""
+
+from __future__ import print_function
+from abc import ABCMeta, abstractmethod
+
+
+class BaseAgent:
+    """Implements the agent for an RL-Glue environment.
+    Note:
+        agent_init, agent_start, agent_step, agent_end, agent_cleanup, and
+        agent_message are required methods.
+    """
+
+    __metaclass__ = ABCMeta
+
+    def __init__(self):
+        pass
+
+    @abstractmethod
+    def agent_init(self, agent_info= {}):
+        """Setup for the agent called when the experiment first starts."""
+
+    @abstractmethod
+    def agent_start(self, observation):
+        """The first method called when the experiment starts, called after
+        the environment starts.
+        Args:
+            observation (Numpy array): the state observation from the environment's evn_start function.
+        Returns:
+            The first action the agent takes.
+        """
+
+    @abstractmethod
+    def agent_step(self, reward, observation):
+        """A step taken by the agent.
+        Args:
+            reward (float): the reward received for taking the last action taken
+            observation (Numpy array): the state observation from the
+                environment's step based, where the agent ended up after the
+                last step
+        Returns:
+            The action the agent is taking.
+        """
+
+    @abstractmethod
+    def agent_end(self, reward):
+        """Run when the agent terminates.
+        Args:
+            reward (float): the reward the agent received for entering the terminal state.
+        """
+
+    @abstractmethod
+    def agent_cleanup(self):
+        """Cleanup done after the agent ends."""
+
+    @abstractmethod
+    def agent_message(self, message):
+        """A function used to pass information from the agent to the experiment.
+        Args:
+            message: The message passed to the agent.
+        Returns:
+            The response (or answer) to the message.
+        """
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+
+"""Abstract environment base class for RL-Glue-py.
+"""
+
+from __future__ import print_function
+
+from abc import ABCMeta, abstractmethod
+
+
+class BaseEnvironment:
+    """Implements the environment for an RLGlue environment
+
+    Note:
+        env_init, env_start, env_step, env_cleanup, and env_message are required
+        methods.
+    """
+
+    __metaclass__ = ABCMeta
+
+    def __init__(self):
+        reward = None
+        observation = None
+        termination = None
+        self.reward_obs_term = (reward, observation, termination)
+
+    @abstractmethod
+    def env_init(self, env_info={}):
+        """Setup for the environment called when the experiment first starts.
+
+        Note:
+            Initialize a tuple with the reward, first state observation, boolean
+            indicating if it's terminal.
+        """
+
+    @abstractmethod
+    def env_start(self):
+        """The first method called when the experiment starts, called before the
+        agent starts.
+
+        Returns:
+            The first state observation from the environment.
+        """
+
+    @abstractmethod
+    def env_step(self, action):
+        """A step taken by the environment.
+
+        Args:
+            action: The action taken by the agent
+
+        Returns:
+            (float, state, Boolean): a tuple of the reward, state observation,
+                and boolean indicating if it's terminal.
+        """
+
+    @abstractmethod
+    def env_cleanup(self):
+        """Cleanup done after the environment ends"""
+
+    @abstractmethod
+    def env_message(self, message):
+        """A message asking the environment for information
+
+        Args:
+            message: the message passed to the environment
+
+        Returns:
+            the response (or answer) to the message
+        """