diff --git a/examples/lqr/config_overrides/cartpole/cartpole_stabilization.yaml b/examples/lqr/config_overrides/cartpole/cartpole_stabilization.yaml index 0cbb7226e..eefdf5dca 100644 --- a/examples/lqr/config_overrides/cartpole/cartpole_stabilization.yaml +++ b/examples/lqr/config_overrides/cartpole/cartpole_stabilization.yaml @@ -30,4 +30,6 @@ task_config: episode_len_sec: 6 cost: quadratic + rew_state_weight: [1, 1, 1, 1] # Match LQR weights + rew_act_weight: [0.1] done_on_out_of_bound: True diff --git a/examples/lqr/config_overrides/cartpole/cartpole_tracking.yaml b/examples/lqr/config_overrides/cartpole/cartpole_tracking.yaml index 7eefd0fb0..11b956747 100644 --- a/examples/lqr/config_overrides/cartpole/cartpole_tracking.yaml +++ b/examples/lqr/config_overrides/cartpole/cartpole_tracking.yaml @@ -33,4 +33,6 @@ task_config: episode_len_sec: 6 cost: quadratic + rew_state_weight: [1, 0.1, 0.1, 0.1] # Match LQR weights + rew_act_weight: [0.1] done_on_out_of_bound: True diff --git a/examples/lqr/config_overrides/quadrotor_2D/quadrotor_2D_stabilization.yaml b/examples/lqr/config_overrides/quadrotor_2D/quadrotor_2D_stabilization.yaml index 134575710..8132fbca0 100644 --- a/examples/lqr/config_overrides/quadrotor_2D/quadrotor_2D_stabilization.yaml +++ b/examples/lqr/config_overrides/quadrotor_2D/quadrotor_2D_stabilization.yaml @@ -42,4 +42,6 @@ task_config: episode_len_sec: 6 cost: quadratic + rew_state_weight: [1, 1, 1, 1, 1, 1] # Match LQR weights + rew_act_weight: [0.1] done_on_out_of_bound: True diff --git a/examples/lqr/config_overrides/quadrotor_2D/quadrotor_2D_tracking.yaml b/examples/lqr/config_overrides/quadrotor_2D/quadrotor_2D_tracking.yaml index aa68b6912..8cf000795 100644 --- a/examples/lqr/config_overrides/quadrotor_2D/quadrotor_2D_tracking.yaml +++ b/examples/lqr/config_overrides/quadrotor_2D/quadrotor_2D_tracking.yaml @@ -45,4 +45,6 @@ task_config: episode_len_sec: 6 cost: quadratic + rew_state_weight: [1, 0.1, 1, 0.1, 0.1, 0.1] # Match LQR weights + rew_act_weight: [0.1] done_on_out_of_bound: True diff --git a/examples/lqr/config_overrides/quadrotor_3D/quadrotor_3D_stabilization.yaml b/examples/lqr/config_overrides/quadrotor_3D/quadrotor_3D_stabilization.yaml index 8ebffccb7..878726f3f 100644 --- a/examples/lqr/config_overrides/quadrotor_3D/quadrotor_3D_stabilization.yaml +++ b/examples/lqr/config_overrides/quadrotor_3D/quadrotor_3D_stabilization.yaml @@ -68,4 +68,7 @@ task_config: episode_len_sec: 6 cost: quadratic + # Match LQR weights + rew_state_weight: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + rew_act_weight: [0.1] done_on_out_of_bound: True diff --git a/examples/lqr/config_overrides/quadrotor_3D/quadrotor_3D_tracking.yaml b/examples/lqr/config_overrides/quadrotor_3D/quadrotor_3D_tracking.yaml index 4089e5359..42fab5a12 100644 --- a/examples/lqr/config_overrides/quadrotor_3D/quadrotor_3D_tracking.yaml +++ b/examples/lqr/config_overrides/quadrotor_3D/quadrotor_3D_tracking.yaml @@ -71,4 +71,7 @@ task_config: episode_len_sec: 6 cost: quadratic + # Match LQR weights + rew_state_weight: [1, 0.1, 1, 0.1, 1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] + rew_act_weight: [0.1] done_on_out_of_bound: True diff --git a/examples/lqr/lqr_experiment.py b/examples/lqr/lqr_experiment.py index 4b6b46d86..e21807185 100644 --- a/examples/lqr/lqr_experiment.py +++ b/examples/lqr/lqr_experiment.py @@ -15,11 +15,12 @@ from safe_control_gym.utils.registration import make -def run(gui=True, n_episodes=1, n_steps=None, save_data=False): +def run(gui=False, plot=True, n_episodes=1, n_steps=None, save_data=False): '''The main function running LQR and iLQR experiments. Args: - gui (bool): Whether to display the gui and plot graphs. + gui (bool): Whether to display the gui. + plot (bool): Whether to plot graphs. n_episodes (int): The number of episodes to execute. n_steps (int): The total number of steps to execute. save_data (bool): Whether to save the collected experiment data. @@ -61,7 +62,7 @@ def run(gui=True, n_episodes=1, n_steps=None, save_data=False): else: trajs_data, _ = experiment.run_evaluation(training=True, n_steps=n_steps) - if gui: + if plot: post_analysis(trajs_data['obs'][0], trajs_data['action'][0], ctrl.env) # Close environments @@ -132,20 +133,5 @@ def post_analysis(state_stack, input_stack, env): plt.show() -def wrap2pi_vec(angle_vec): - '''Wraps a vector of angles between -pi and pi. - - Args: - angle_vec (ndarray): A vector of angles. - ''' - for k, angle in enumerate(angle_vec): - while angle > np.pi: - angle -= np.pi - while angle <= -np.pi: - angle += np.pi - angle_vec[k] = angle - return angle_vec - - if __name__ == '__main__': run() diff --git a/examples/mpc/config_overrides/cartpole/cartpole_stabilization.yaml b/examples/mpc/config_overrides/cartpole/cartpole_stabilization.yaml index b942acb4a..4e800fa31 100644 --- a/examples/mpc/config_overrides/cartpole/cartpole_stabilization.yaml +++ b/examples/mpc/config_overrides/cartpole/cartpole_stabilization.yaml @@ -30,6 +30,8 @@ task_config: episode_len_sec: 6 cost: quadratic + rew_state_weight: [5.0, 0.1, 5.0, 0.1] # Match MPC weights + rew_act_weight: [0.1] done_on_out_of_bound: True constraints: diff --git a/examples/mpc/config_overrides/cartpole/cartpole_tracking.yaml b/examples/mpc/config_overrides/cartpole/cartpole_tracking.yaml index 37c07aa09..cd4164f41 100644 --- a/examples/mpc/config_overrides/cartpole/cartpole_tracking.yaml +++ b/examples/mpc/config_overrides/cartpole/cartpole_tracking.yaml @@ -33,6 +33,8 @@ task_config: episode_len_sec: 6 cost: quadratic + rew_state_weight: [5.0, 0.1, 5.0, 0.1] # Match MPC weights + rew_act_weight: [0.1] done_on_out_of_bound: True constraints: diff --git a/examples/mpc/config_overrides/quadrotor_2D/linear_mpc_quadrotor_2D_tracking.yaml b/examples/mpc/config_overrides/quadrotor_2D/linear_mpc_quadrotor_2D_tracking.yaml index ee1853730..3786a1db4 100644 --- a/examples/mpc/config_overrides/quadrotor_2D/linear_mpc_quadrotor_2D_tracking.yaml +++ b/examples/mpc/config_overrides/quadrotor_2D/linear_mpc_quadrotor_2D_tracking.yaml @@ -5,9 +5,9 @@ algo_config: - 0.1 - 0.1 q_mpc: - - 1.0 + - 5.0 - 0.1 - - 1.0 + - 5.0 - 0.1 - 0.1 - 0.1 diff --git a/examples/mpc/config_overrides/quadrotor_2D/quadrotor_2D_stabilization.yaml b/examples/mpc/config_overrides/quadrotor_2D/quadrotor_2D_stabilization.yaml index 494c9aefa..8d380fc11 100644 --- a/examples/mpc/config_overrides/quadrotor_2D/quadrotor_2D_stabilization.yaml +++ b/examples/mpc/config_overrides/quadrotor_2D/quadrotor_2D_stabilization.yaml @@ -42,6 +42,8 @@ task_config: episode_len_sec: 6 cost: quadratic + rew_state_weight: [5.0, 0.1, 5.0, 0.1, 0.1, 0.1] # Match MPC weights + rew_act_weight: [0.1, 0.1] done_on_out_of_bound: True constraints: diff --git a/examples/mpc/config_overrides/quadrotor_2D/quadrotor_2D_tracking.yaml b/examples/mpc/config_overrides/quadrotor_2D/quadrotor_2D_tracking.yaml index 2405a2238..e3eb0959b 100644 --- a/examples/mpc/config_overrides/quadrotor_2D/quadrotor_2D_tracking.yaml +++ b/examples/mpc/config_overrides/quadrotor_2D/quadrotor_2D_tracking.yaml @@ -45,6 +45,8 @@ task_config: episode_len_sec: 6 cost: quadratic + rew_state_weight: [5.0, 0.1, 5.0, 0.1, 0.1, 0.1] # Match MPC weights + rew_act_weight: [0.1, 0.1] done_on_out_of_bound: True constraints: diff --git a/examples/mpc/config_overrides/quadrotor_3D/linear_mpc_quadrotor_3D_tracking.yaml b/examples/mpc/config_overrides/quadrotor_3D/linear_mpc_quadrotor_3D_tracking.yaml index 1102acce3..009f561b2 100644 --- a/examples/mpc/config_overrides/quadrotor_3D/linear_mpc_quadrotor_3D_tracking.yaml +++ b/examples/mpc/config_overrides/quadrotor_3D/linear_mpc_quadrotor_3D_tracking.yaml @@ -7,11 +7,11 @@ algo_config: - 0.1 - 0.1 q_mpc: - - 1.0 + - 5.0 - 0.1 - - 1.0 + - 5.0 - 0.1 - - 1.0 + - 5.0 - 0.1 - 0.1 - 0.1 diff --git a/examples/mpc/config_overrides/quadrotor_3D/quadrotor_3D_stabilization.yaml b/examples/mpc/config_overrides/quadrotor_3D/quadrotor_3D_stabilization.yaml index 4c652d130..305cda6eb 100644 --- a/examples/mpc/config_overrides/quadrotor_3D/quadrotor_3D_stabilization.yaml +++ b/examples/mpc/config_overrides/quadrotor_3D/quadrotor_3D_stabilization.yaml @@ -68,6 +68,9 @@ task_config: episode_len_sec: 6 cost: quadratic + # Match MPC weights + rew_state_weight: [5.0, 0.1, 5.0, 0.1, 5.0, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] + rew_act_weight: [0.1, 0.1, 0.1, 0.1] done_on_out_of_bound: True constraints: - constraint_form: default_constraint diff --git a/examples/mpc/config_overrides/quadrotor_3D/quadrotor_3D_tracking.yaml b/examples/mpc/config_overrides/quadrotor_3D/quadrotor_3D_tracking.yaml index e4263c249..1890b5251 100644 --- a/examples/mpc/config_overrides/quadrotor_3D/quadrotor_3D_tracking.yaml +++ b/examples/mpc/config_overrides/quadrotor_3D/quadrotor_3D_tracking.yaml @@ -70,6 +70,9 @@ task_config: proj_normal: [0, 1, 1] episode_len_sec: 6 cost: quadratic + # Match MPC weights + rew_state_weight: [5.0, 0.1, 5.0, 0.1, 5.0, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] + rew_act_weight: [0.1, 0.1, 0.1, 0.1] done_on_out_of_bound: True constraints: - constraint_form: default_constraint diff --git a/examples/mpc/mpc_experiment.py b/examples/mpc/mpc_experiment.py index 27b8874b7..5d69738a3 100644 --- a/examples/mpc/mpc_experiment.py +++ b/examples/mpc/mpc_experiment.py @@ -2,7 +2,6 @@ import os import pickle -from collections import defaultdict from functools import partial import matplotlib.pyplot as plt @@ -15,11 +14,12 @@ from safe_control_gym.utils.registration import make -def run(gui=True, n_episodes=1, n_steps=None, save_data=False): +def run(gui=False, plot=True, n_episodes=1, n_steps=None, save_data=False): '''The main function running MPC and Linear MPC experiments. Args: - gui (bool): Whether to display the gui and plot graphs. + gui (bool): Whether to display the gui. + plot (bool): Whether to plot graphs. n_episodes (int): The number of episodes to execute. n_steps (int): The total number of steps to execute. save_data (bool): Whether to save the collected experiment data. @@ -34,7 +34,7 @@ def run(gui=True, n_episodes=1, n_steps=None, save_data=False): config.task, **config.task_config ) - random_env = env_func(gui=False) + env = env_func(gui=gui) # Create controller. ctrl = make(config.algo, @@ -42,43 +42,19 @@ def run(gui=True, n_episodes=1, n_steps=None, save_data=False): **config.algo_config ) - all_trajs = defaultdict(list) - n_episodes = 1 if n_episodes is None else n_episodes - # Run the experiment. - for _ in range(n_episodes): - # Get initial state and create environments - init_state, _ = random_env.reset() - static_env = env_func(gui=gui, randomized_init=False, init_state=init_state) - static_train_env = env_func(gui=False, randomized_init=False, init_state=init_state) - - # Create experiment, train, and run evaluation - experiment = BaseExperiment(env=static_env, ctrl=ctrl, train_env=static_train_env) - experiment.launch_training() - - if n_steps is None: - trajs_data, _ = experiment.run_evaluation(training=True, n_episodes=1) - else: - trajs_data, _ = experiment.run_evaluation(training=True, n_steps=n_steps) - - if gui: - post_analysis(trajs_data['obs'][0], trajs_data['action'][0], ctrl.env) - - # Close environments - static_env.close() - static_train_env.close() + experiment = BaseExperiment(env=env, ctrl=ctrl) + trajs_data, metrics = experiment.run_evaluation(training=True, n_episodes=n_episodes, n_steps=n_steps) - # Merge in new trajectory data - for key, value in trajs_data.items(): - all_trajs[key] += value + if plot: + for i in range(len(trajs_data['obs'])): + post_analysis(trajs_data['obs'][i], trajs_data['action'][i], ctrl.env) ctrl.close() - random_env.close() - metrics = experiment.compute_metrics(all_trajs) - all_trajs = dict(all_trajs) + env.close() if save_data: - results = {'trajs_data': all_trajs, 'metrics': metrics} + results = {'trajs_data': trajs_data, 'metrics': metrics} path_dir = os.path.dirname('./temp-data/') os.makedirs(path_dir, exist_ok=True) with open(f'./temp-data/{config.algo}_data_{config.task}_{config.task_config.task}.pkl', 'wb') as file: @@ -132,20 +108,5 @@ def post_analysis(state_stack, input_stack, env): plt.show() -def wrap2pi_vec(angle_vec): - '''Wraps a vector of angles between -pi and pi. - - Args: - angle_vec (ndarray): A vector of angles. - ''' - for k, angle in enumerate(angle_vec): - while angle > np.pi: - angle -= np.pi - while angle <= -np.pi: - angle += np.pi - angle_vec[k] = angle - return angle_vec - - if __name__ == '__main__': run() diff --git a/safe_control_gym/controllers/lqr/ilqr.py b/safe_control_gym/controllers/lqr/ilqr.py index 3ebc484c7..7407a923f 100644 --- a/safe_control_gym/controllers/lqr/ilqr.py +++ b/safe_control_gym/controllers/lqr/ilqr.py @@ -64,7 +64,6 @@ def __init__( self.model = self.get_prior(self.env) self.Q = get_cost_weight_matrix(self.q_lqr, self.model.nx) self.R = get_cost_weight_matrix(self.r_lqr, self.model.nu) - self.env.set_cost_function_param(self.Q, self.R) self.gain = compute_lqr_gain(self.model, self.model.X_EQ, self.model.U_EQ, self.Q, self.R, self.discrete_dynamics) diff --git a/safe_control_gym/controllers/lqr/lqr.py b/safe_control_gym/controllers/lqr/lqr.py index 5e9597d51..f03069525 100644 --- a/safe_control_gym/controllers/lqr/lqr.py +++ b/safe_control_gym/controllers/lqr/lqr.py @@ -33,7 +33,6 @@ def __init__( self.discrete_dynamics = discrete_dynamics self.Q = get_cost_weight_matrix(q_lqr, self.model.nx) self.R = get_cost_weight_matrix(r_lqr, self.model.nu) - self.env.set_cost_function_param(self.Q, self.R) self.gain = compute_lqr_gain(self.model, self.model.X_EQ, self.model.U_EQ, self.Q, self.R, self.discrete_dynamics) diff --git a/safe_control_gym/controllers/mpc/mpc.py b/safe_control_gym/controllers/mpc/mpc.py index ac2ed4e59..61287b92f 100644 --- a/safe_control_gym/controllers/mpc/mpc.py +++ b/safe_control_gym/controllers/mpc/mpc.py @@ -454,10 +454,7 @@ def run(self, self.x_prev = None self.u_prev = None - if not env.initial_reset: - env.set_cost_function_param(self.Q, self.R) - # obs, info = env.reset() - obs = env.reset() + obs, info = env.reset() print('Init State:') print(obs) ep_returns, ep_lengths = [], [] diff --git a/safe_control_gym/controllers/mpc/mpc_acados.py b/safe_control_gym/controllers/mpc/mpc_acados.py index 83d4b47a2..4232de638 100644 --- a/safe_control_gym/controllers/mpc/mpc_acados.py +++ b/safe_control_gym/controllers/mpc/mpc_acados.py @@ -13,6 +13,7 @@ try: from acados_template import AcadosModel, AcadosOcp, AcadosOcpSolver + from acados_template.mpc_utils import detect_constraint_structure except ImportError as e: print(colored(f'Error: {e}', 'red')) print(colored('acados not installed, cannot use acados-based controller. Exiting.', 'red')) @@ -123,14 +124,7 @@ def setup_acados_model(self) -> AcadosModel: acados_model.u = self.model.u_sym acados_model.name = self.env.NAME - # set up rk4 (acados need symbolic expression of dynamics, not function) - k1 = self.model.fc_func(acados_model.x, acados_model.u) - k2 = self.model.fc_func(acados_model.x + self.dt / 2 * k1, acados_model.u) - k3 = self.model.fc_func(acados_model.x + self.dt / 2 * k2, acados_model.u) - k4 = self.model.fc_func(acados_model.x + self.dt * k3, acados_model.u) - f_disc = acados_model.x + self.dt / 6 * (k1 + 2 * k2 + 2 * k3 + k4) - - acados_model.disc_dyn_expr = f_disc + acados_model.f_expl_expr = self.model.fc_func(acados_model.x, acados_model.u) # store meta information # NOTE: unit is missing acados_model.x_labels = self.env.STATE_LABELS @@ -162,12 +156,12 @@ def setup_acados_optimizer(self): ocp.model = self.acados_model # set dimensions - ocp.dims.N = self.T # prediction horizon + ocp.solver_options.N_horizon = self.T # prediction horizon # set cost (NOTE: safe-control-gym uses quadratic cost) ocp.cost.cost_type = 'LINEAR_LS' ocp.cost.cost_type_e = 'LINEAR_LS' - ocp.cost.W = scipy.linalg.block_diag(self.Q, self.R) + ocp.cost.W = scipy.linalg.block_diag(self.Q/self.dt, self.R/self.dt) # scaling by dt as Q, R are given at discrete time ocp.cost.W_e = self.Q if not self.use_lqr_gain_and_terminal_cost else self.P ocp.cost.Vx = np.zeros((ny, nx)) ocp.cost.Vx[:nx, :nx] = np.eye(nx) @@ -223,9 +217,9 @@ def setup_acados_optimizer(self): # set up solver options ocp.solver_options.qp_solver = 'PARTIAL_CONDENSING_HPIPM' ocp.solver_options.hessian_approx = 'GAUSS_NEWTON' - ocp.solver_options.integrator_type = 'DISCRETE' + ocp.solver_options.integrator_type = 'ERK' ocp.solver_options.nlp_solver_type = 'SQP' if not self.use_RTI else 'SQP_RTI' - ocp.solver_options.nlp_solver_max_iter = 25 if not self.use_RTI else 1 + ocp.solver_options.nlp_solver_max_iter = 200 ocp.solver_options.globalization = 'MERIT_BACKTRACKING' # ocp.solver_options.globalization = 'FUNNEL_L1PEN_LINESEARCH' if not self.use_RTI else 'MERIT_BACKTRACKING' ocp.solver_options.tf = self.T * self.dt # prediction horizon @@ -260,38 +254,120 @@ def processing_acados_constraints_expression(self, ocp.constraints.ubu = self.env.constraints.input_constraints[0].upper_bounds ocp.constraints.idxbu = idxbu # active constraints dimension ''' - - ub = {'h': set_acados_constraint_bound(h_expr, 'ub', self.constraint_tol), - 'h0': set_acados_constraint_bound(h0_expr, 'ub', self.constraint_tol), - 'he': set_acados_constraint_bound(he_expr, 'ub', self.constraint_tol), } - - lb = {'h': set_acados_constraint_bound(h_expr, 'lb'), - 'h0': set_acados_constraint_bound(h0_expr, 'lb'), - 'he': set_acados_constraint_bound(he_expr, 'lb'), } - - # make sure all the ub and lb are 1D numpy arrays - # (see: https://discourse.acados.org/t/infeasible-qps-when-using-nonlinear-casadi-constraint-expressions/1595/5?u=mxche) - for key in ub.keys(): - ub[key] = ub[key].flatten() if ub[key].ndim != 1 else ub[key] - lb[key] = lb[key].flatten() if lb[key].ndim != 1 else lb[key] - # check ub and lb dimensions - for key in ub.keys(): - assert ub[key].ndim == 1, f'ub[{key}] is not 1D numpy array' - assert lb[key].ndim == 1, f'lb[{key}] is not 1D numpy array' - assert ub['h'].shape == lb['h'].shape, 'h_ub and h_lb have different shapes' - - # pass the constraints to the ocp object - ocp.model.con_h_expr_0, ocp.model.con_h_expr, ocp.model.con_h_expr_e = \ - h0_expr, h_expr, he_expr - ocp.dims.nh_0, ocp.dims.nh, ocp.dims.nh_e = \ - h0_expr.shape[0], h_expr.shape[0], he_expr.shape[0] - # assign constraints upper and lower bounds - ocp.constraints.uh_0 = ub['h0'] - ocp.constraints.lh_0 = lb['h0'] - ocp.constraints.uh = ub['h'] - ocp.constraints.lh = lb['h'] - ocp.constraints.uh_e = ub['he'] - ocp.constraints.lh_e = lb['he'] + ub = {} + lb = {} + # Check if given constraints are double-sided with heuristics based on the constraint jacobian: + h_list = [h0_expr, h_expr, he_expr] + stage_type = ["initial", "path", "terminal"] + for h_s, s_type in zip(h_list, stage_type): + jac_h_expr = cs.jacobian(h_s, cs.vertcat(ocp.model.x, ocp.model.u)) + jac_h_fn = cs.Function("jac_h_fn", [ocp.model.x, ocp.model.u], [jac_h_expr]) + jac_eval = jac_h_fn(0, 0).full() + + # Check jacobian by blocks: + nx = ocp.model.x.shape[0] + nu = ocp.model.u.shape[0] + if jac_eval.shape[0] == nx * 2: + if np.all(jac_eval[:nx, :nx] == -jac_eval[nx:, :nx]): + h_double_bounded = True + case = 1 + elif jac_eval.shape[0] == nu * 2: + if np.all(jac_eval[:nu, nx:] == -jac_eval[nu:, nx:]): + h_double_bounded = True + case = 2 + elif jac_eval.shape[0] == nx * 2 + nu * 2: + if np.all(jac_eval[:nx, :nx] == -jac_eval[nx:2*nx, :nx]) and \ + np.all (jac_eval[nx*2:nx*2+nu, nx:] == -jac_eval[nx*2+nu:, nx:]): + h_double_bounded = True + case = 3 + else: + h_double_bounded = False + + if h_double_bounded: + h_fn = cs.Function("h_fn", [ocp.model.x, ocp.model.u], [h_s]) + if all(jac_eval[0, :] >= 0): + start_with_ub = True + else: + start_with_ub = False + if case == 1: + if start_with_ub: + uh = np.array(-h_fn(0,0)[:nx]) + lh = np.array(h_fn(0,0)[nx:]) + h_s_short = h_s[:nx] + uh + else: + lh = np.array(h_fn(0,0)[:nx]) + uh = np.array(-h_fn(0,0)[nx:]) + h_s_short = h_s[:nx] - lh + elif case == 2: + if start_with_ub: + uh = np.array(-h_fn(0,0)[:nu]) + lh = np.array(h_fn(0,0)[nu:]) + h_s_short = h_s[:nu] + uh + else: + lh = np.array(h_fn(0,0)[:nu]) + uh = np.array(-h_fn(0,0)[nu:]) + h_s_short = h_s[:nu] - lh + elif case == 3: + if start_with_ub: + uh = np.concatenate([-h_fn(0,0)[:nx], -h_fn(0,0)[nx*2:nx*2+nu]]) + lh = np.concatenate([h_fn(0,0)[nx:nx*2], h_fn(0,0)[nx*2+nu:]]) + h_s_short = cs.vertcat(h_s[:nx], h_s[2*nx:nx*2+nu]) + uh + else: + lh = np.concatenate([h_fn(0,0)[:nx], h_fn(0,0)[nx*2:nx*2+nu]]) + uh = np.concatenate([-h_fn(0,0)[nx:nx*2], -h_fn(0,0)[nx*2+nu:]]) + h_s_short = cs.vertcat(h_s[:nx], h_s[2*nx:nx*2+nu]) - lh + if s_type == "initial": + ocp.model.con_h_expr_0 = -h_s_short + ocp.constraints.lh_0 = lh + ocp.constraints.uh_0 = uh + elif s_type == "path": + ocp.model.con_h_expr = -h_s_short + ocp.constraints.lh = lh + ocp.constraints.uh = uh + elif s_type == "terminal": + ocp.model.con_h_expr_e = -h_s_short + ocp.constraints.lh_e = lh + ocp.constraints.uh_e = uh + detect_constraint_structure(ocp.model, ocp.constraints, stage_type=s_type) + else: + if s_type == "initial": + ub.update({'h0': set_acados_constraint_bound(h0_expr, 'ub', self.constraint_tol)}) + lb.update({'h0': set_acados_constraint_bound(h0_expr, 'lb')}) + elif s_type == "path": + ub.update({'h': set_acados_constraint_bound(h_expr, 'ub', self.constraint_tol)}) + lb.update({'h': set_acados_constraint_bound(h_expr, 'lb')}) + elif s_type == "terminal": + ub.update({'he': set_acados_constraint_bound(he_expr, 'ub', self.constraint_tol)}) + lb.update({'he': set_acados_constraint_bound(he_expr, 'lb')}) + + if ub != {}: + # make sure all the ub and lb are 1D numpy arrays + # (see: https://discourse.acados.org/t/infeasible-qps-when-using-nonlinear-casadi-constraint-expressions/1595/5?u=mxche) + for key in ub.keys(): + ub[key] = ub[key].flatten() if ub[key].ndim != 1 else ub[key] + lb[key] = lb[key].flatten() if lb[key].ndim != 1 else lb[key] + # check ub and lb dimensions + for key in ub.keys(): + assert ub[key].ndim == 1, f'ub[{key}] is not 1D numpy array' + assert lb[key].ndim == 1, f'lb[{key}] is not 1D numpy array' + assert ub['h'].shape == lb['h'].shape, 'h_ub and h_lb have different shapes' + # update acados ocp constraints + for key in ub.keys(): + if key == 'h0': + ocp.model.con_h_expr_0 = h0_expr + ocp.dims.nh_0 = h0_expr.shape[0] + ocp.constraints.uh_0 = ub['h0'] + ocp.constraints.lh_0 = lb['h0'] + elif key == 'h': + ocp.model.con_h_expr = h_expr + ocp.dims.nh = h_expr.shape[0] + ocp.constraints.uh = ub['h'] + ocp.constraints.lh = lb['h'] + elif key == 'he': + ocp.model.con_h_expr_e = he_expr + ocp.dims.nh_e = he_expr.shape[0] + ocp.constraints.uh_e = ub['he'] + ocp.constraints.lh_e = lb['he'] return ocp @@ -344,31 +420,14 @@ def select_action(self, self.acados_ocp_solver.set(self.T, 'yref', y_ref_e) # solve the optimization problem - if self.use_RTI: - # preparation phase - self.acados_ocp_solver.options_set('rti_phase', 1) - status = self.acados_ocp_solver.solve() - - # feedback phase - self.acados_ocp_solver.options_set('rti_phase', 2) - status = self.acados_ocp_solver.solve() - - if status not in [0, 2]: - self.acados_ocp_solver.print_statistics() - raise Exception(f'acados returned status {status}. Exiting.') - if status == 2: - print(f'acados returned status {status}. ') - - action = self.acados_ocp_solver.get(0, 'u') - - elif not self.use_RTI: - status = self.acados_ocp_solver.solve() - if status not in [0, 2]: - self.acados_ocp_solver.print_statistics() - raise Exception(f'acados returned status {status}. Exiting.') - if status == 2: - print(f'acados returned status {status}. ') - action = self.acados_ocp_solver.get(0, 'u') + status = self.acados_ocp_solver.solve() + if status not in [0, 2]: + self.acados_ocp_solver.print_statistics() + raise Exception(f'acados returned status {status}. Exiting.') + if status == 2: + self.acados_ocp_solver.print_statistics() + print(f'acados returned status {status}. ') + action = self.acados_ocp_solver.get(0, 'u') # get the open-loop solution if self.x_prev is None and self.u_prev is None: @@ -388,6 +447,7 @@ def select_action(self, self.results_dict['horizon_states'].append(deepcopy(self.x_prev)) self.results_dict['horizon_inputs'].append(deepcopy(self.u_prev)) self.results_dict['goal_states'].append(deepcopy(goal_states)) + self.results_dict['t_wall'].append(self.acados_ocp_solver.get_stats("time_tot")) self.prev_action = action if self.use_lqr_gain_and_terminal_cost: diff --git a/safe_control_gym/envs/benchmark_env.py b/safe_control_gym/envs/benchmark_env.py index c60220974..61df37a61 100644 --- a/safe_control_gym/envs/benchmark_env.py +++ b/safe_control_gym/envs/benchmark_env.py @@ -176,10 +176,6 @@ def __init__(self, self.state_dim = self.state_space.shape[0] else: self.state_dim = self.obs_dim - # Default Q and R matrices for quadratic cost. - if self.COST == Cost.QUADRATIC: - self.Q = np.eye(self.observation_space.shape[0]) - self.R = np.eye(self.action_space.shape[0]) # Set constraint info. self.CONSTRAINTS = constraints self.DONE_ON_VIOLATION = done_on_violation @@ -221,25 +217,6 @@ def seed(self, disturbs.seed(self) return [seed] - def set_cost_function_param(self, - Q, - R - ): - '''Set the cost function parameters. - - Args: - Q (ndarray): State weight matrix (nx by nx). - R (ndarray): Input weight matrix (nu by nu). - ''' - - if not self.initial_reset: - self.Q = Q - self.R = R - else: - raise RuntimeError( - '[ERROR] env.set_cost_function_param() cannot be called after the first reset of the environment.' - ) - def set_adversary_control(self, action): '''Sets disturbance by an adversary controller, called before (each) step(). diff --git a/safe_control_gym/envs/gym_control/cartpole.py b/safe_control_gym/envs/gym_control/cartpole.py index 6a1c91317..7ed6ea442 100644 --- a/safe_control_gym/envs/gym_control/cartpole.py +++ b/safe_control_gym/envs/gym_control/cartpole.py @@ -150,7 +150,9 @@ def __init__(self, self.obs_goal_horizon = obs_goal_horizon self.obs_wrap_angle = obs_wrap_angle self.rew_state_weight = np.array(rew_state_weight, ndmin=1, dtype=float) + self.Q = np.diag(self.rew_state_weight) self.rew_act_weight = np.array(rew_act_weight, ndmin=1, dtype=float) + self.R = np.diag(self.rew_act_weight) self.rew_exponential = rew_exponential self.done_on_out_of_bound = done_on_out_of_bound # BenchmarkEnv constructor, called after defining the custom args, diff --git a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py index 2e2cb1887..2619bdfe5 100644 --- a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py +++ b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py @@ -178,7 +178,9 @@ def __init__(self, self.norm_act_scale = norm_act_scale self.obs_goal_horizon = obs_goal_horizon self.rew_state_weight = np.array(rew_state_weight, ndmin=1, dtype=float) + self.Q = np.diag(self.rew_state_weight) self.rew_act_weight = np.array(rew_act_weight, ndmin=1, dtype=float) + self.R = np.diag(self.rew_act_weight) self.rew_exponential = rew_exponential self.done_on_out_of_bound = done_on_out_of_bound if info_mse_metric_state_weight is None: diff --git a/safe_control_gym/experiments/base_experiment.py b/safe_control_gym/experiments/base_experiment.py index d8f86c536..d7542832d 100644 --- a/safe_control_gym/experiments/base_experiment.py +++ b/safe_control_gym/experiments/base_experiment.py @@ -409,10 +409,16 @@ def compute_metrics(self, data, verbose=False): 'average_constraint_violation': np.asarray(self.get_episode_constraint_violation_steps()).mean(), 'constraint_violation_std': np.asarray(self.get_episode_constraint_violation_steps()).std(), 'constraint_violation': np.asarray(self.get_episode_constraint_violation_steps()) if len(self.get_episode_constraint_violation_steps()) > 1 else self.get_episode_constraint_violation_steps()[0], + 'mean_t_wall_ms': np.asarray(self.get_t_wall()).mean()*1e3, + 'median_t_wall_ms': np.median(np.asarray(self.get_t_wall()))*1e3, + 'max_t_wall_ms': np.max(np.asarray(self.get_t_wall()))*1e3, # others ??? } return metrics + def get_t_wall(self): + return self.data['controller_data'][0]['t_wall'][0] + def get_episode_data(self, key, postprocess_func=lambda x: x): '''Extract data field from recorded trajectory data, optionally postprocess each episode data (e.g. get sum). diff --git a/safe_control_gym/math_and_models/symbolic_systems.py b/safe_control_gym/math_and_models/symbolic_systems.py index 0bcc50f70..0aae261c5 100644 --- a/safe_control_gym/math_and_models/symbolic_systems.py +++ b/safe_control_gym/math_and_models/symbolic_systems.py @@ -69,7 +69,7 @@ def setup_model(self): # Discrete time dynamics. self.fd_func = cs.integrator('fd', self.integration_algo, {'x': self.x_sym, 'p': self.u_sym, - 'ode': self.x_dot}, {'tf': self.dt} + 'ode': self.x_dot}, 0, self.dt ) # Observation model. self.g_func = cs.Function('g', [self.x_sym, self.u_sym], [self.y_sym], ['x', 'u'], ['g']) @@ -101,7 +101,7 @@ def setup_linearization(self): 'x': self.x_eval, 'p': cs.vertcat(self.u_eval, self.x_sym, self.u_sym), 'ode': self.x_dot_linear - }, {'tf': self.dt}) + }, 0, self.dt) # Linearized observation model. self.y_linear = self.y_sym + self.dgdx @ ( self.x_eval - self.x_sym) + self.dgdu @ (self.u_eval - self.u_sym) diff --git a/safe_control_gym/safety_filters/mpsc/linear_mpsc.py b/safe_control_gym/safety_filters/mpsc/linear_mpsc.py index 4f095a89e..e930d639f 100644 --- a/safe_control_gym/safety_filters/mpsc/linear_mpsc.py +++ b/safe_control_gym/safety_filters/mpsc/linear_mpsc.py @@ -101,7 +101,7 @@ def set_dynamics(self): 'x': delta_x, 'p': delta_u, 'ode': x_dot_lin_vec - }, {'tf': self.dt} + }, 0, self.dt ) self.dynamics_func = dynamics_func