Updates to tutorial

pblouw · pblouw · commit 75cd65153fd4 · 2019-02-13T14:11:27.000-08:00
diff --git a/Reinforcement Learning in Nengo.ipynb b/Reinforcement Learning in Nengo.ipynb
@@ -84,7 +84,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
    "metadata": {
     "slideshow": {
      "slide_type": "subslide"
@@ -134,7 +134,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -153,11 +153,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
-    "with nengo.Network() as model:\n",
+    "with nengo.Network(seed=2) as model:\n",
     "    env = td_grid.GridNode(environment, dt=0.001)\n",
     "    \n",
     "    # define nodes for plotting data, managing agent's interface with environment\n",
@@ -171,7 +171,7 @@
     "                           intercepts=nengo.dists.Choice([0.15]), radius=2)\n",
     "    \n",
     "    # define neurons that compute the learning signal\n",
-    "    learn_signal = nengo.Ensemble(n_neurons=1000, dimensions=4, neuron_type=nengo.LIF())\n",
+    "    learn_signal = nengo.Ensemble(n_neurons=1000, dimensions=4)\n",
     "                 \n",
     "    # connect the sensor to state ensemble\n",
     "    nengo.Connection(sensor_node, state, synapse=None)\n",
@@ -197,7 +197,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 6,
    "metadata": {
     "slideshow": {
      "slide_type": "subslide"
@@ -208,9 +208,9 @@
      "data": {
       "text/html": [
        "\n",
-       "                <div id=\"0f6d33cf-7e1f-457b-9444-f1e3f778dd12\">\n",
+       "                <div id=\"70bbc289-1bbc-4cfc-bd1f-4bc3db75e00f\">\n",
        "                    <iframe\n",
-       "                        src=\"http://localhost:60833/?token=90d6ab7ca7678f0607c082d335727f5c53810a699d4bcc03\"\n",
+       "                        src=\"http://localhost:53191/?token=8eb3f65da8a76e6cf31820ef8613cda506f3eeefb3632442\"\n",
        "                        width=\"100%\"\n",
        "                        height=\"600\"\n",
        "                        frameborder=\"0\"\n",
@@ -255,7 +255,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -280,7 +280,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -318,7 +318,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 9,
    "metadata": {
     "slideshow": {
      "slide_type": "subslide"
@@ -365,7 +365,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 10,
    "metadata": {
     "slideshow": {
      "slide_type": "subslide"
@@ -376,9 +376,9 @@
      "data": {
       "text/html": [
        "\n",
-       "                <div id=\"392796a8-b128-421f-920a-a6ecb6efdadb\">\n",
+       "                <div id=\"bf33c426-004c-4dac-a9bc-93ae7b00137b\">\n",
        "                    <iframe\n",
-       "                        src=\"http://localhost:61025/?token=441e808a24bcd81a6497f66689838f3785b6ead1e0d41334\"\n",
+       "                        src=\"http://localhost:53264/?token=3cd1018bbdcfa030679f51c0bde3a6a54ac1001f9101ec8c\"\n",
        "                        width=\"100%\"\n",
        "                        height=\"600\"\n",
        "                        frameborder=\"0\"\n",
diff --git a/configs/default.py.cfg b/configs/default.py.cfg
@@ -22,8 +22,8 @@ _viz_config[_viz_1].height = 0.40030392521829083
 _viz_config[_viz_1].label_visible = True
 _viz_2 = nengo_gui.components.Raster(learn_signal)
 _viz_config[_viz_2].n_neurons = 100
-_viz_config[_viz_2].x = 0.9562372057490022
-_viz_config[_viz_2].y = 0.261286048718174
+_viz_config[_viz_2].x = 0.9151104251757648
+_viz_config[_viz_2].y = 0.22700986118226302
 _viz_config[_viz_2].width = 0.2455550152884971
 _viz_config[_viz_2].height = 0.35734453744254163
 _viz_config[_viz_2].label_visible = True
@@ -45,8 +45,8 @@ _viz_config[_viz_4].width = 0.555808142243912
 _viz_config[_viz_4].height = 0.5104425418379376
 _viz_config[_viz_4].label_visible = True
 _viz_8 = nengo_gui.components.SpikeGrid(state)
-_viz_config[_viz_8].x = 1.5088625183590774
-_viz_config[_viz_8].y = 0.18595803714975423
+_viz_config[_viz_8].x = 1.4912367552562615
+_viz_config[_viz_8].y = 0.15168184961384323
 _viz_config[_viz_8].width = 0.2552559706112676
 _viz_config[_viz_8].height = 0.3554230847816174
 _viz_config[_viz_8].label_visible = True
@@ -64,13 +64,13 @@ _viz_config[env].pos=(2.0688976229564138, 0.3195690695620929)
 _viz_config[env].size=(0.1, 0.1)
 _viz_config[learn_signal].pos=(0.8142521273436952, 0.8820053350648869)
 _viz_config[learn_signal].size=(0.1, 0.1)
-_viz_config[model].pos=(1.0815485124596864, 1.1291685346823868)
-_viz_config[model].size=(0.3459459281303725, 0.3459459281303725)
+_viz_config[model].pos=(1.3653844099521146, 1.336242672340802)
+_viz_config[model].size=(0.30168332401368336, 0.30168332401368336)
 _viz_config[model].expanded=True
 _viz_config[model].has_layout=True
 _viz_config[qvalue_node].pos=(1.5219768231749249, 0.9078091928089596)
 _viz_config[qvalue_node].size=(0.1, 0.1)
-_viz_config[reward_node].pos=(-0.8019821637475498, 1.8231801626252788)
+_viz_config[reward_node].pos=(-0.5682603312530061, 2.485424050468169)
 _viz_config[reward_node].size=(0.1, 0.1)
 _viz_config[sensor_node].pos=(0.8029007404374915, 1.336993998970692)
 _viz_config[sensor_node].size=(0.1, 0.12514175766245555)
diff --git a/configs/learning6-value.py.cfg b/configs/learning6-value.py.cfg
@@ -1,41 +1,30 @@
 _viz_0 = nengo_gui.components.HTMLView(env)
-_viz_config[_viz_0].x = 0.779824356063676
-_viz_config[_viz_0].y = 0.2994841745479973
-_viz_config[_viz_0].width = 0.13850415512465375
-_viz_config[_viz_0].height = 0.18083182640144665
+_viz_config[_viz_0].x = 0.8774793077580895
+_viz_config[_viz_0].y = 0.42583851046818516
+_viz_config[_viz_0].width = 0.18482451594727958
+_viz_config[_viz_0].height = 0.2415716648986474
 _viz_config[_viz_0].label_visible = True
 _viz_1 = nengo_gui.components.XYValue(state)
 _viz_config[_viz_1].max_value = 1
 _viz_config[_viz_1].min_value = -1
 _viz_config[_viz_1].index_x = 0
 _viz_config[_viz_1].index_y = 1
-_viz_config[_viz_1].x = 0.10813922988152525
-_viz_config[_viz_1].y = 0.7138886698485432
-_viz_config[_viz_1].width = 0.13850415512465375
-_viz_config[_viz_1].height = 0.18083182640144665
+_viz_config[_viz_1].x = 0.02613411018942154
+_viz_config[_viz_1].y = 0.7770658378086368
+_viz_config[_viz_1].width = 0.1679226973847849
+_viz_config[_viz_1].height = 0.3047488328587414
 _viz_config[_viz_1].label_visible = True
-_viz_2 = nengo_gui.components.Value(reward)
-_viz_config[_viz_2].max_value = 10
-_viz_config[_viz_2].min_value = -10
+_viz_2 = nengo_gui.components.Value(value)
+_viz_config[_viz_2].max_value = 3
+_viz_config[_viz_2].min_value = -3
 _viz_config[_viz_2].show_legend = False
 _viz_config[_viz_2].legend_labels = ['label_0']
 _viz_config[_viz_2].synapse = 0.01
-_viz_config[_viz_2].x = 0.3803227744877454
-_viz_config[_viz_2].y = 0.7064717582566888
-_viz_config[_viz_2].width = 0.10964955033075563
-_viz_config[_viz_2].height = 0.16543033364861992
+_viz_config[_viz_2].x = 0.3151722259100631
+_viz_config[_viz_2].y = 0.8123759102576043
+_viz_config[_viz_2].width = 0.12519865601847896
+_viz_config[_viz_2].height = 0.24298910753882308
 _viz_config[_viz_2].label_visible = True
-_viz_3 = nengo_gui.components.Value(value)
-_viz_config[_viz_3].max_value = 1.5
-_viz_config[_viz_3].min_value = -1.5
-_viz_config[_viz_3].show_legend = False
-_viz_config[_viz_3].legend_labels = ['label_0']
-_viz_config[_viz_3].synapse = 0.01
-_viz_config[_viz_3].x = 0.8016667532806668
-_viz_config[_viz_3].y = 0.7106255243594504
-_viz_config[_viz_3].width = 0.13850415512465375
-_viz_config[_viz_3].height = 0.18083182640144665
-_viz_config[_viz_3].label_visible = True
 _viz_net_graph = nengo_gui.components.NetGraph()
 _viz_progress = nengo_gui.components.Progress()
 _viz_config[_viz_progress].x = 0
@@ -48,21 +37,21 @@ _viz_config[_viz_sim_control].shown_time = 0.5
 _viz_config[_viz_sim_control].kept_time = 4.0
 _viz_config[env].pos=(0.49863439098327234, 1.2535465790590687)
 _viz_config[env].size=(0.29494106379918655, 0.05045044512354506)
-_viz_config[model].pos=(0.0634570655505291, -0.08615261133288868)
-_viz_config[model].size=(0.8954611734951313, 0.8954611734951313)
+_viz_config[model].pos=(0.19267060726875407, 0.016160128715177136)
+_viz_config[model].size=(0.7858385976587554, 0.7858385976587554)
 _viz_config[model].expanded=True
 _viz_config[model].has_layout=True
-_viz_config[movement].pos=(0.8255734843067148, 1.0690267739030497)
-_viz_config[movement].size=(0.075181055478224, 0.050450445123545046)
-_viz_config[position].pos=(0.12604643094326723, 0.19045870165051)
-_viz_config[position].size=(0.0723440345167816, 0.05045044512354506)
-_viz_config[radar].pos=(0.4843601683374062, 1.071451576862655)
-_viz_config[radar].size=(0.09397631934778002, 0.06306305640443131)
-_viz_config[reward].pos=(0.3215221418227118, 0.4370894765073292)
-_viz_config[reward].size=(0.027346424993232085, 0.040376746672174134)
-_viz_config[state].pos=(0.3771058783570984, 0.1908731433935878)
+_viz_config[movement].pos=(0.15548512716379476, 0.24996069309428318)
+_viz_config[movement].size=(0.1, 0.1)
+_viz_config[position].pos=(0.5752419773806918, 1.0152372365661286)
+_viz_config[position].size=(0.1, 0.1)
+_viz_config[radar].pos=(0.1168014893583144, 0.37145651012735564)
+_viz_config[radar].size=(0.1, 0.1)
+_viz_config[reward].pos=(0.5452335256050307, 0.28120210217205893)
+_viz_config[reward].size=(0.1, 0.1)
+_viz_config[state].pos=(0.4759960334248104, 0.7624876369995743)
 _viz_config[state].size=(0.090430043145977, 0.06306305640443133)
-_viz_config[stim_radar].pos=(0.08183101895225513, 1.073876379822259)
-_viz_config[stim_radar].size=(0.075181055478224, 0.04560083920433565)
-_viz_config[value].pos=(0.49391319325690364, 0.437965664396405)
-_viz_config[value].size=(0.090430043145977, 0.06306305640443133)
+_viz_config[stim_radar].pos=(-0.05698205482833595, 0.3237229331568389)
+_viz_config[stim_radar].size=(0.1, 0.1)
+_viz_config[value].pos=(0.30774080340442267, 0.31389243854735405)
+_viz_config[value].size=(0.1, 0.1)
diff --git a/td_grid.py b/td_grid.py
@@ -533,9 +533,7 @@ def take_action(self, action_idx, epsilon=0.1):
             action_idx = np.random.choice(np.arange(4))    
         
         x_pos, y_pos = self.compute_position(action_idx)
-        print('TYPE')
         self.agent.set_position(x_pos, y_pos)
-        print('TEST')
 
         return action_idx
 
@@ -565,9 +563,7 @@ def step(self, t, x):
             
             qs = self.output[8:]
             idx = np.argmax(qs)
-            print('TEST')
             self.current_action_index = self.take_action(idx)
-            print('PASSED')
 
         # then on next step store new qvalues
         elif int(t * 1000) % self.stepsize == 1:
diff --git a/utils.py b/utils.py
@@ -102,4 +102,4 @@ def step(self, t, x):
             self.output = np.concatenate(
                 (c_output, f_output, qvalues))
             
-        return self.output
+        return self.output