-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcp__onrobot.m
130 lines (102 loc) · 4.91 KB
/
cp__onrobot.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
%% Using the PILCO toolbox for solving the cart pole problem
clear all;
close all;
clc;
%% 0. Settings of the learning scenarrio
N_episode=10; % Number of episodes
flag.printing=1; % Printing optimization information: intensity 0-3
flag.plotting=2; % Plotting information: intensity 0-3
%% 1. Defining the system
% For each scenario, we need the following set of files, which are specific to this scenario.
% - settings.m: A file that contains scenario-specific settings and initializations
% - loss.m: A cost function
% - dynamics.m: A file that implements the ODE, which governs the dynamics
% - (optional) visualization
% First the enviroment and the actual system is specified
cp_settings; % Load settings for the cart-pole system
%% 2. Initializiaton
% Here we conduct initial trials without a controller to gather data in
% terms of states, actions, and resulting responses. Trajectores have
% length H (see settings).
% When iterating with a real setup, this corresponds to random experiments
% with random/preliminary controllers
x = []; % used to gather intial data
y = [];
initfiles = {'initdata_1.txt', 'initdata_2.txt', 'initdata_3.txt', 'initdata_4.txt'};
for i = 1:length(initfiles)
[xx, yy, realCost{i}, latent{i}] = readArdData(initfiles{i}, dt, H,cost); % Execute interaction with the plant using the current random policy and a given inital condition (determined by a Gaussian distribution in this case)
x = [x; xx]; y = [y; yy]; % Gathering obtained data into x and y
if plotting.verbosity > 0 % visualization of trajectory
if ~ishandle(1)
figure(1);
else
set(0,'CurrentFigure',1);
end
clf(1);
cp_display_rollout; % plot experiment (stored in xx, yy)
end
end
N_initial = length(initfiles);
J=N_initial; % Used by the toolbox
%% 3. Policy optimization
% for j = 1:N_episode % Note that the loop varaiable "j" is used in the "display_rollout" script
trainDynModel; % Step 1. train GP model of the process dynamics [off line]
% The script that takes care of training the GP executes the following high-level steps:
% a. Extract states and control inputs to be matched form x and y
% b. Define the training inputs and targets of the GP
% c. Train the GP
% d. Display GP hyper-parameters, the learned noise hyper-parameters, and the signal-to-noise ratios. This information is very valuable for debugging purposes.
learnPolicy; % Step 2. optimization of the policy based on the learnt model [off line, simulation phase]
% a. Learn the policy by calling minimize. This uses the whole
% inference toolchain to compute the gradients required by the
% minimization
% b. (optional) Plot overall optimization progress. (line search etc.)
% c. Long-term prediction of a state trajectory from p(x0) using the
% learned policy by calling pred. This prediction is equivalent to
% the last predicted trajectory during policy learning, i.e.,
% the predicted state trajectory that belongs to the the learned controller.
% d. The predicted state trajectory is used to compute the corresponding
% distribution over immediate costs by calling calcCost. => stored
% in "fantasy"
% c. (optional) Plot the predicted immediate cost distribution as a
% function of the time steps.
%
writePolicyArd(policy) % display the necessary vectors and matrices
% for applying controller
%% 4. Execute policy, read and plot measurement results
filename = ['ardmeas/policydata_' num2str(j) '.txt'];
[xx, yy, realCost{j+J}, latent{j}] = readArdData(filename, dt, H,cost);
x = [x; xx]; y = [y; yy]; % Gathering obtained data into x and y
filename = ['pilco_realrobot_' num2str(j)]; save(filename);
% Step 3. apply controller to the system and gather data [on line, experiment phase]
% The specific steps of this generic script are
% (a) determine start state
% (b) generate rollout
% i. compute control signal ??(xt) [this uses the policy object]
% ii. simulate dynamics (or apply control to real robot) [this will use the function handle that describes the dynamics]
% iii. transition to state xt+1 to continue the interaction
% This script also plots the response using figures already set up by the
% display scripts. Furthermore, it also stores the whole trajectory
% information in a saved file.
% When iteracting with a real setup, Step 3 means that with the control
% policy a full experiment is run. The rest of the steps remain
% unchanged.
% Plotting the most recent trajectory (stored in xx, yy)
disp(['controlled trial # ' num2str(j)]);
if plotting.verbosity > 0; % visualization of trajectory
if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
cp_display_rollout;
end
figure()
hold on
plot(M{j}(1,:))
plot(xx(:,1))
hold off
figure()
hold on
plot(M{j}(4,:))
plot(xx(:,4))
hold off
j=j+1;
%%
% end