diff --git a/examples/mpc/config_overrides/quadrotor_3D/mpc_acados_quadrotor_3D_tracking.yaml b/examples/mpc/config_overrides/quadrotor_3D/mpc_acados_quadrotor_3D_tracking.yaml index b6254f0a2..8c58f97ba 100644 --- a/examples/mpc/config_overrides/quadrotor_3D/mpc_acados_quadrotor_3D_tracking.yaml +++ b/examples/mpc/config_overrides/quadrotor_3D/mpc_acados_quadrotor_3D_tracking.yaml @@ -25,4 +25,5 @@ algo_config: randomize_prior_prop: False prior_prop_rand_info: null warmstart: True + warmstart_type: "tracking_goal" # "tracking_goal", "lqr", "ipopt" use_RTI: False diff --git a/examples/mpc/mpc_experiment.py b/examples/mpc/mpc_experiment.py index 5d69738a3..d35d962e2 100644 --- a/examples/mpc/mpc_experiment.py +++ b/examples/mpc/mpc_experiment.py @@ -64,11 +64,11 @@ def run(gui=False, plot=True, n_episodes=1, n_steps=None, save_data=False): def post_analysis(state_stack, input_stack, env): - '''Plots the input and states to determine iLQR's success. + '''Plots the input and states to determine MPC's success. Args: - state_stack (ndarray): The list of observations of iLQR in the latest run. - input_stack (ndarray): The list of inputs of iLQR in the latest run. + state_stack (ndarray): The list of observations of MPC in the latest run. + input_stack (ndarray): The list of inputs of MPC in the latest run. ''' model = env.symbolic stepsize = model.dt @@ -81,29 +81,33 @@ def post_analysis(state_stack, input_stack, env): reference = np.tile(reference.reshape(1, model.nx), (plot_length, 1)) # Plot states - fig, axs = plt.subplots(model.nx) + fig, axs = plt.subplots(model.nx, figsize=(6, 9), sharex=True) for k in range(model.nx): axs[k].plot(times, np.array(state_stack).transpose()[k, 0:plot_length], label='actual') axs[k].plot(times, reference.transpose()[k, 0:plot_length], color='r', label='desired') axs[k].set(ylabel=env.STATE_LABELS[k] + f'\n[{env.STATE_UNITS[k]}]') axs[k].yaxis.set_major_formatter(FormatStrFormatter('%.1f')) - if k != model.nx - 1: - axs[k].set_xticks([]) + axs[k].set_xlim(times[0], times[-1]) axs[0].set_title('State Trajectories') axs[-1].legend(ncol=3, bbox_transform=fig.transFigure, bbox_to_anchor=(1, 0), loc='lower right') axs[-1].set(xlabel='time (sec)') - + fig.tight_layout() # Plot inputs - _, axs = plt.subplots(model.nu) + fig, axs = plt.subplots(model.nu, sharex=True) if model.nu == 1: axs = [axs] for k in range(model.nu): axs[k].plot(times, np.array(input_stack).transpose()[k, 0:plot_length]) axs[k].set(ylabel=f'input {k}') axs[k].set(ylabel=env.ACTION_LABELS[k] + f'\n[{env.ACTION_UNITS[k]}]') - axs[k].yaxis.set_major_formatter(FormatStrFormatter('%.1f')) + axs[k].yaxis.set_major_formatter(FormatStrFormatter('%.2f')) + axs[k].set_ylim(0.8*env.physical_action_bounds[0][k], 1.2*env.physical_action_bounds[1][k]) + axs[k].axhline(env.physical_action_bounds[0][k], color="r", alpha=0.5, linestyle="--") + axs[k].axhline(env.physical_action_bounds[1][k], color="r", alpha=0.5, linestyle="--") + axs[k].set_xlim(times[0], times[-1]) axs[0].set_title('Input Trajectories') axs[-1].set(xlabel='time (sec)') + fig.tight_layout() plt.show() diff --git a/safe_control_gym/controllers/mpc/mpc.py b/safe_control_gym/controllers/mpc/mpc.py index 61287b92f..686ba1e90 100644 --- a/safe_control_gym/controllers/mpc/mpc.py +++ b/safe_control_gym/controllers/mpc/mpc.py @@ -216,6 +216,9 @@ def compute_initial_guess(self, init_state, goal_states=None): u = self.lqr_gain @ (x_guess[:, i] - goal_states[:, i]) + np.atleast_2d(self.model.U_EQ)[0, :].T u_guess[:, i] = u x_guess[:, i + 1, None] = self.dynamics_func(x0=x_guess[:, i], p=u)['xf'].toarray() + elif self.compute_initial_guess_method == 'tracking_goal': + x_guess = goal_states + u_guess = np.repeat(np.atleast_2d(self.U_EQ), self.T, axis=0).T else: raise Exception('Initial guess method not implemented.') @@ -223,7 +226,7 @@ def compute_initial_guess(self, init_state, goal_states=None): self.u_prev = u_guess # set the solver back - self.setup_optimizer(solver=self.solver) + self.setup_optimizer(solver=self.solver) # TODO why? this method is also called from mpc_acados! at least move to line 209! return x_guess, u_guess diff --git a/safe_control_gym/controllers/mpc/mpc_acados.py b/safe_control_gym/controllers/mpc/mpc_acados.py index 83d4b47a2..8677e8621 100644 --- a/safe_control_gym/controllers/mpc/mpc_acados.py +++ b/safe_control_gym/controllers/mpc/mpc_acados.py @@ -13,6 +13,7 @@ try: from acados_template import AcadosModel, AcadosOcp, AcadosOcpSolver + from acados_template.mpc_utils import detect_constraint_structure except ImportError as e: print(colored(f'Error: {e}', 'red')) print(colored('acados not installed, cannot use acados-based controller. Exiting.', 'red')) @@ -44,6 +45,7 @@ def __init__( seed: int = 0, use_RTI: bool = False, use_lqr_gain_and_terminal_cost: bool = False, + warmstart_type: str = "ipopt", **kwargs ): '''Creates task and controller. @@ -68,6 +70,7 @@ def __init__( for k, v in locals().items(): if k != 'self' and k != 'kwargs' and '__' not in k: self.__dict__.update({k: v}) + assert (warmstart_type == "ipopt" or warmstart_type=="tracking_goal" or warmstart_type == "lqr") super().__init__( env_func, horizon=horizon, @@ -80,7 +83,7 @@ def __init__( constraint_tol=constraint_tol, output_dir=output_dir, additional_constraints=additional_constraints, - # compute_initial_guess_method='ipopt', # use ipopt initial guess by default + compute_initial_guess_method=warmstart_type, use_lqr_gain_and_terminal_cost=use_lqr_gain_and_terminal_cost, use_gpu=use_gpu, seed=seed, @@ -88,6 +91,7 @@ def __init__( ) # acados settings self.use_RTI = use_RTI + self.ocp_solver_exist = False @timing def reset(self): @@ -96,14 +100,16 @@ def reset(self): self.x_guess = None self.u_guess = None super().reset() - self.acados_model = None - self.ocp = None - self.acados_ocp_solver = None - # Dynamics model. - self.setup_acados_model() - # Acados optimizer. - self.setup_acados_optimizer() - self.acados_ocp_solver = AcadosOcpSolver(self.ocp) + if not self.ocp_solver_exist: + self.ocp_solver_exist = True + self.acados_model = None + self.ocp = None + self.acados_ocp_solver = None + # Dynamics model. + self.setup_acados_model() + # Acados optimizer. + self.setup_acados_optimizer() + self.acados_ocp_solver = AcadosOcpSolver(self.ocp) def setup_acados_model(self) -> AcadosModel: '''Sets up symbolic model for acados. @@ -123,14 +129,7 @@ def setup_acados_model(self) -> AcadosModel: acados_model.u = self.model.u_sym acados_model.name = self.env.NAME - # set up rk4 (acados need symbolic expression of dynamics, not function) - k1 = self.model.fc_func(acados_model.x, acados_model.u) - k2 = self.model.fc_func(acados_model.x + self.dt / 2 * k1, acados_model.u) - k3 = self.model.fc_func(acados_model.x + self.dt / 2 * k2, acados_model.u) - k4 = self.model.fc_func(acados_model.x + self.dt * k3, acados_model.u) - f_disc = acados_model.x + self.dt / 6 * (k1 + 2 * k2 + 2 * k3 + k4) - - acados_model.disc_dyn_expr = f_disc + acados_model.f_expl_expr = self.model.fc_func(acados_model.x, acados_model.u) # store meta information # NOTE: unit is missing acados_model.x_labels = self.env.STATE_LABELS @@ -162,12 +161,12 @@ def setup_acados_optimizer(self): ocp.model = self.acados_model # set dimensions - ocp.dims.N = self.T # prediction horizon + ocp.solver_options.N_horizon = self.T # prediction horizon # set cost (NOTE: safe-control-gym uses quadratic cost) ocp.cost.cost_type = 'LINEAR_LS' ocp.cost.cost_type_e = 'LINEAR_LS' - ocp.cost.W = scipy.linalg.block_diag(self.Q, self.R) + ocp.cost.W = scipy.linalg.block_diag(self.Q/self.dt, self.R/self.dt) # scaling by dt as Q, R are given at discrete time ocp.cost.W_e = self.Q if not self.use_lqr_gain_and_terminal_cost else self.P ocp.cost.Vx = np.zeros((ny, nx)) ocp.cost.Vx[:nx, :nx] = np.eye(nx) @@ -223,11 +222,12 @@ def setup_acados_optimizer(self): # set up solver options ocp.solver_options.qp_solver = 'PARTIAL_CONDENSING_HPIPM' ocp.solver_options.hessian_approx = 'GAUSS_NEWTON' - ocp.solver_options.integrator_type = 'DISCRETE' + ocp.solver_options.integrator_type = 'ERK' ocp.solver_options.nlp_solver_type = 'SQP' if not self.use_RTI else 'SQP_RTI' - ocp.solver_options.nlp_solver_max_iter = 25 if not self.use_RTI else 1 - ocp.solver_options.globalization = 'MERIT_BACKTRACKING' - # ocp.solver_options.globalization = 'FUNNEL_L1PEN_LINESEARCH' if not self.use_RTI else 'MERIT_BACKTRACKING' + ocp.solver_options.nlp_solver_max_iter = 200 + # toggle globalization only if convergence problems are encountered. + # if not self.use_RTI: + # ocp.solver_options.globalization = 'FUNNEL_L1PEN_LINESEARCH' # 'MERIT_BACKTRACKING' ocp.solver_options.tf = self.T * self.dt # prediction horizon # c code generation @@ -260,38 +260,120 @@ def processing_acados_constraints_expression(self, ocp.constraints.ubu = self.env.constraints.input_constraints[0].upper_bounds ocp.constraints.idxbu = idxbu # active constraints dimension ''' - - ub = {'h': set_acados_constraint_bound(h_expr, 'ub', self.constraint_tol), - 'h0': set_acados_constraint_bound(h0_expr, 'ub', self.constraint_tol), - 'he': set_acados_constraint_bound(he_expr, 'ub', self.constraint_tol), } - - lb = {'h': set_acados_constraint_bound(h_expr, 'lb'), - 'h0': set_acados_constraint_bound(h0_expr, 'lb'), - 'he': set_acados_constraint_bound(he_expr, 'lb'), } - - # make sure all the ub and lb are 1D numpy arrays - # (see: https://discourse.acados.org/t/infeasible-qps-when-using-nonlinear-casadi-constraint-expressions/1595/5?u=mxche) - for key in ub.keys(): - ub[key] = ub[key].flatten() if ub[key].ndim != 1 else ub[key] - lb[key] = lb[key].flatten() if lb[key].ndim != 1 else lb[key] - # check ub and lb dimensions - for key in ub.keys(): - assert ub[key].ndim == 1, f'ub[{key}] is not 1D numpy array' - assert lb[key].ndim == 1, f'lb[{key}] is not 1D numpy array' - assert ub['h'].shape == lb['h'].shape, 'h_ub and h_lb have different shapes' - - # pass the constraints to the ocp object - ocp.model.con_h_expr_0, ocp.model.con_h_expr, ocp.model.con_h_expr_e = \ - h0_expr, h_expr, he_expr - ocp.dims.nh_0, ocp.dims.nh, ocp.dims.nh_e = \ - h0_expr.shape[0], h_expr.shape[0], he_expr.shape[0] - # assign constraints upper and lower bounds - ocp.constraints.uh_0 = ub['h0'] - ocp.constraints.lh_0 = lb['h0'] - ocp.constraints.uh = ub['h'] - ocp.constraints.lh = lb['h'] - ocp.constraints.uh_e = ub['he'] - ocp.constraints.lh_e = lb['he'] + ub = {} + lb = {} + # Check if given constraints are double-sided with heuristics based on the constraint jacobian: + h_list = [h0_expr, h_expr, he_expr] + stage_type = ["initial", "path", "terminal"] + for h_s, s_type in zip(h_list, stage_type): + jac_h_expr = cs.jacobian(h_s, cs.vertcat(ocp.model.x, ocp.model.u)) + jac_h_fn = cs.Function("jac_h_fn", [ocp.model.x, ocp.model.u], [jac_h_expr]) + jac_eval = jac_h_fn(0, 0).full() + + # Check jacobian by blocks: + nx = ocp.model.x.shape[0] + nu = ocp.model.u.shape[0] + if jac_eval.shape[0] == nx * 2: + if np.all(jac_eval[:nx, :nx] == -jac_eval[nx:, :nx]): + h_double_bounded = True + case = 1 + elif jac_eval.shape[0] == nu * 2: + if np.all(jac_eval[:nu, nx:] == -jac_eval[nu:, nx:]): + h_double_bounded = True + case = 2 + elif jac_eval.shape[0] == nx * 2 + nu * 2: + if np.all(jac_eval[:nx, :nx] == -jac_eval[nx:2*nx, :nx]) and \ + np.all (jac_eval[nx*2:nx*2+nu, nx:] == -jac_eval[nx*2+nu:, nx:]): + h_double_bounded = True + case = 3 + else: + h_double_bounded = False + + if h_double_bounded: + h_fn = cs.Function("h_fn", [ocp.model.x, ocp.model.u], [h_s]) + if all(jac_eval[0, :] >= 0): + start_with_ub = True + else: + start_with_ub = False + if case == 1: + if start_with_ub: + uh = np.array(-h_fn(0,0)[:nx]) + lh = np.array(h_fn(0,0)[nx:]) + h_s_short = h_s[:nx] + uh + else: + lh = np.array(h_fn(0,0)[:nx]) + uh = np.array(-h_fn(0,0)[nx:]) + h_s_short = h_s[:nx] - lh + elif case == 2: + if start_with_ub: + uh = np.array(-h_fn(0,0)[:nu]) + lh = np.array(h_fn(0,0)[nu:]) + h_s_short = h_s[:nu] + uh + else: + lh = np.array(h_fn(0,0)[:nu]) + uh = np.array(-h_fn(0,0)[nu:]) + h_s_short = h_s[:nu] - lh + elif case == 3: + if start_with_ub: + uh = np.concatenate([-h_fn(0,0)[:nx], -h_fn(0,0)[nx*2:nx*2+nu]]) + lh = np.concatenate([h_fn(0,0)[nx:nx*2], h_fn(0,0)[nx*2+nu:]]) + h_s_short = cs.vertcat(h_s[:nx], h_s[2*nx:nx*2+nu]) + uh + else: + lh = np.concatenate([h_fn(0,0)[:nx], h_fn(0,0)[nx*2:nx*2+nu]]) + uh = np.concatenate([-h_fn(0,0)[nx:nx*2], -h_fn(0,0)[nx*2+nu:]]) + h_s_short = cs.vertcat(h_s[:nx], h_s[2*nx:nx*2+nu]) - lh + if s_type == "initial": + ocp.model.con_h_expr_0 = -h_s_short + ocp.constraints.lh_0 = lh + ocp.constraints.uh_0 = uh + elif s_type == "path": + ocp.model.con_h_expr = -h_s_short + ocp.constraints.lh = lh + ocp.constraints.uh = uh + elif s_type == "terminal": + ocp.model.con_h_expr_e = -h_s_short + ocp.constraints.lh_e = lh + ocp.constraints.uh_e = uh + detect_constraint_structure(ocp.model, ocp.constraints, stage_type=s_type) + else: + if s_type == "initial": + ub.update({'h0': set_acados_constraint_bound(h0_expr, 'ub', self.constraint_tol)}) + lb.update({'h0': set_acados_constraint_bound(h0_expr, 'lb')}) + elif s_type == "path": + ub.update({'h': set_acados_constraint_bound(h_expr, 'ub', self.constraint_tol)}) + lb.update({'h': set_acados_constraint_bound(h_expr, 'lb')}) + elif s_type == "terminal": + ub.update({'he': set_acados_constraint_bound(he_expr, 'ub', self.constraint_tol)}) + lb.update({'he': set_acados_constraint_bound(he_expr, 'lb')}) + + if ub != {}: + # make sure all the ub and lb are 1D numpy arrays + # (see: https://discourse.acados.org/t/infeasible-qps-when-using-nonlinear-casadi-constraint-expressions/1595/5?u=mxche) + for key in ub.keys(): + ub[key] = ub[key].flatten() if ub[key].ndim != 1 else ub[key] + lb[key] = lb[key].flatten() if lb[key].ndim != 1 else lb[key] + # check ub and lb dimensions + for key in ub.keys(): + assert ub[key].ndim == 1, f'ub[{key}] is not 1D numpy array' + assert lb[key].ndim == 1, f'lb[{key}] is not 1D numpy array' + assert ub['h'].shape == lb['h'].shape, 'h_ub and h_lb have different shapes' + # update acados ocp constraints + for key in ub.keys(): + if key == 'h0': + ocp.model.con_h_expr_0 = h0_expr + ocp.dims.nh_0 = h0_expr.shape[0] + ocp.constraints.uh_0 = ub['h0'] + ocp.constraints.lh_0 = lb['h0'] + elif key == 'h': + ocp.model.con_h_expr = h_expr + ocp.dims.nh = h_expr.shape[0] + ocp.constraints.uh = ub['h'] + ocp.constraints.lh = lb['h'] + elif key == 'he': + ocp.model.con_h_expr_e = he_expr + ocp.dims.nh_e = he_expr.shape[0] + ocp.constraints.uh_e = ub['he'] + ocp.constraints.lh_e = lb['he'] return ocp @@ -319,17 +401,17 @@ def select_action(self, # has a built-in warm-starting mechanism. if self.warmstart: if self.x_guess is None or self.u_guess is None: - # compute initial guess with IPOPT + # compute initial guess with the method specified in 'warmstart_type' self.compute_initial_guess(obs) - for idx in range(self.T + 1): - init_x = self.x_guess[:, idx] - self.acados_ocp_solver.set(idx, 'x', init_x) - for idx in range(self.T): - if nu == 1: - init_u = np.array([self.u_guess[idx]]) - else: - init_u = self.u_guess[:, idx] - self.acados_ocp_solver.set(idx, 'u', init_u) + for idx in range(self.T + 1): + init_x = self.x_guess[:, idx] + self.acados_ocp_solver.set(idx, 'x', init_x) + for idx in range(self.T): + if nu == 1: + init_u = np.array([self.u_guess[idx]]) + else: + init_u = self.u_guess[:, idx] + self.acados_ocp_solver.set(idx, 'u', init_u) # set reference for the control horizon goal_states = self.get_references() @@ -344,31 +426,14 @@ def select_action(self, self.acados_ocp_solver.set(self.T, 'yref', y_ref_e) # solve the optimization problem - if self.use_RTI: - # preparation phase - self.acados_ocp_solver.options_set('rti_phase', 1) - status = self.acados_ocp_solver.solve() - - # feedback phase - self.acados_ocp_solver.options_set('rti_phase', 2) - status = self.acados_ocp_solver.solve() - - if status not in [0, 2]: - self.acados_ocp_solver.print_statistics() - raise Exception(f'acados returned status {status}. Exiting.') - if status == 2: - print(f'acados returned status {status}. ') - - action = self.acados_ocp_solver.get(0, 'u') - - elif not self.use_RTI: - status = self.acados_ocp_solver.solve() - if status not in [0, 2]: - self.acados_ocp_solver.print_statistics() - raise Exception(f'acados returned status {status}. Exiting.') - if status == 2: - print(f'acados returned status {status}. ') - action = self.acados_ocp_solver.get(0, 'u') + status = self.acados_ocp_solver.solve() + if status not in [0, 2]: + self.acados_ocp_solver.print_statistics() + raise Exception(f'acados returned status {status}. Exiting.') + if status == 2: + self.acados_ocp_solver.print_statistics() + print(f'acados returned status {status}. ') + action = self.acados_ocp_solver.get(0, 'u') # get the open-loop solution if self.x_prev is None and self.u_prev is None: @@ -388,6 +453,7 @@ def select_action(self, self.results_dict['horizon_states'].append(deepcopy(self.x_prev)) self.results_dict['horizon_inputs'].append(deepcopy(self.u_prev)) self.results_dict['goal_states'].append(deepcopy(goal_states)) + self.results_dict['t_wall'].append(self.acados_ocp_solver.get_stats("time_tot")) self.prev_action = action if self.use_lqr_gain_and_terminal_cost: diff --git a/safe_control_gym/experiments/base_experiment.py b/safe_control_gym/experiments/base_experiment.py index d8f86c536..89e4f0aa1 100644 --- a/safe_control_gym/experiments/base_experiment.py +++ b/safe_control_gym/experiments/base_experiment.py @@ -409,10 +409,19 @@ def compute_metrics(self, data, verbose=False): 'average_constraint_violation': np.asarray(self.get_episode_constraint_violation_steps()).mean(), 'constraint_violation_std': np.asarray(self.get_episode_constraint_violation_steps()).std(), 'constraint_violation': np.asarray(self.get_episode_constraint_violation_steps()) if len(self.get_episode_constraint_violation_steps()) > 1 else self.get_episode_constraint_violation_steps()[0], + 'mean_t_wall_ms': np.asarray(self.get_t_wall()).mean()*1e3, + 'median_t_wall_ms': np.median(np.asarray(self.get_t_wall()))*1e3, + 'max_t_wall_ms': np.max(np.asarray(self.get_t_wall()))*1e3, # others ??? } return metrics + def get_t_wall(self): + try: + return self.data['controller_data'][0]['t_wall'][0] + except: + return np.nan + def get_episode_data(self, key, postprocess_func=lambda x: x): '''Extract data field from recorded trajectory data, optionally postprocess each episode data (e.g. get sum). diff --git a/safe_control_gym/math_and_models/symbolic_systems.py b/safe_control_gym/math_and_models/symbolic_systems.py index 0bcc50f70..0aae261c5 100644 --- a/safe_control_gym/math_and_models/symbolic_systems.py +++ b/safe_control_gym/math_and_models/symbolic_systems.py @@ -69,7 +69,7 @@ def setup_model(self): # Discrete time dynamics. self.fd_func = cs.integrator('fd', self.integration_algo, {'x': self.x_sym, 'p': self.u_sym, - 'ode': self.x_dot}, {'tf': self.dt} + 'ode': self.x_dot}, 0, self.dt ) # Observation model. self.g_func = cs.Function('g', [self.x_sym, self.u_sym], [self.y_sym], ['x', 'u'], ['g']) @@ -101,7 +101,7 @@ def setup_linearization(self): 'x': self.x_eval, 'p': cs.vertcat(self.u_eval, self.x_sym, self.u_sym), 'ode': self.x_dot_linear - }, {'tf': self.dt}) + }, 0, self.dt) # Linearized observation model. self.y_linear = self.y_sym + self.dgdx @ ( self.x_eval - self.x_sym) + self.dgdu @ (self.u_eval - self.u_sym) diff --git a/safe_control_gym/safety_filters/mpsc/linear_mpsc.py b/safe_control_gym/safety_filters/mpsc/linear_mpsc.py index 4f095a89e..e930d639f 100644 --- a/safe_control_gym/safety_filters/mpsc/linear_mpsc.py +++ b/safe_control_gym/safety_filters/mpsc/linear_mpsc.py @@ -101,7 +101,7 @@ def set_dynamics(self): 'x': delta_x, 'p': delta_u, 'ode': x_dot_lin_vec - }, {'tf': self.dt} + }, 0, self.dt ) self.dynamics_func = dynamics_func