diff --git a/cli.py b/cli.py index b7f8d82..449d2c7 100644 --- a/cli.py +++ b/cli.py @@ -20,7 +20,7 @@ def run_optimize(args, logger): from lib.RLTrader import RLTrader trader = RLTrader(**vars(args), logger=logger, reward_strategy=reward_strategy) - trader.optimize(n_trials=args.trials, n_prune_evals_per_trial=args.prune_evals, n_tests_per_eval=args.eval_tests) + trader.optimize(n_trials=args.trials) if __name__ == '__main__': diff --git a/data/params.db b/data/params.db index 58a552e..b69ba53 100644 Binary files a/data/params.db and b/data/params.db differ diff --git a/lib/RLTrader.py b/lib/RLTrader.py index c223c8a..4a8ccc0 100644 --- a/lib/RLTrader.py +++ b/lib/RLTrader.py @@ -185,9 +185,9 @@ def optimize_params(self, trial, n_prune_evals_per_trial: int = 2, n_tests_per_e return -1 * last_reward - def optimize(self, n_trials: int = 20, **optimize_params): + def optimize(self, n_trials: int = 20): try: - self.optuna_study.optimize(self.optimize_params, n_trials=n_trials, n_jobs=1, **optimize_params) + self.optuna_study.optimize(self.optimize_params, n_trials=n_trials, n_jobs=1) except KeyboardInterrupt: pass @@ -278,7 +278,7 @@ def test(self, model_epoch: int = 0, render_env: bool = True, render_report: boo if done: net_worths = pd.DataFrame({ 'Date': info[0]['timestamps'], - 'Balance': info[0]['networths'], + 'Balance': info[0]['net_worths'], }) net_worths.set_index('Date', drop=True, inplace=True) diff --git a/lib/env/TradingEnv.py b/lib/env/TradingEnv.py index e0fe796..18dba3d 100644 --- a/lib/env/TradingEnv.py +++ b/lib/env/TradingEnv.py @@ -94,6 +94,7 @@ def _get_trade(self, action: int): def _take_action(self, action: int): amount_asset_to_buy, amount_asset_to_sell = self._get_trade(action) + asset_bought, asset_sold, purchase_cost, sale_revenue = self.trade_strategy.trade(buy_amount=amount_asset_to_buy, sell_amount=amount_asset_to_sell, balance=self.balance, @@ -104,15 +105,20 @@ def _take_action(self, action: int): self.asset_held += asset_bought self.balance -= purchase_cost - self.trades.append({'step': self.current_step, 'amount': asset_bought, - 'total': purchase_cost, 'type': 'buy'}) + self.trades.append({'step': self.current_step, + 'amount': asset_bought, + 'total': purchase_cost, + 'type': 'buy'}) elif asset_sold: self.asset_held -= asset_sold self.balance += sale_revenue + self.reward_strategy.reset_reward() - self.trades.append({'step': self.current_step, 'amount': asset_sold, - 'total': sale_revenue, 'type': 'sell'}) + self.trades.append({'step': self.current_step, + 'amount': asset_sold, + 'total': sale_revenue, + 'type': 'sell'}) current_net_worth = round(self.balance + self.asset_held * self._current_price(), self.base_precision) self.net_worths.append(current_net_worth) @@ -132,7 +138,7 @@ def _done(self): def _reward(self): reward = self.reward_strategy.get_reward(current_step=self.current_step, - current_price=self._current_price(), + current_price=self._current_price, observations=self.observations, account_history=self.account_history, net_worths=self.net_worths) @@ -214,7 +220,8 @@ def step(self, action): obs = self._next_observation() reward = self._reward() done = self._done() - return obs, reward, done, {'networths': self.net_worths, 'timestamps': self.timestamps} + + return obs, reward, done, {'net_worths': self.net_worths, 'timestamps': self.timestamps} def render(self, mode='human'): if mode == 'system': diff --git a/lib/env/reward/WeightedUnrealizedProfit.py b/lib/env/reward/WeightedUnrealizedProfit.py index ae18aca..41254f1 100644 --- a/lib/env/reward/WeightedUnrealizedProfit.py +++ b/lib/env/reward/WeightedUnrealizedProfit.py @@ -36,6 +36,6 @@ def get_reward(self, if account_history['asset_sold'].values[-1] > 0: reward = self.calc_reward(account_history['sale_revenue'].values[-1]) else: - reward = self.calc_reward(account_history['asset_held'].values[-1] * current_price) + reward = self.calc_reward(account_history['asset_held'].values[-1] * current_price()) return reward diff --git a/lib/env/trade/SimulatedTradeStrategy.py b/lib/env/trade/SimulatedTradeStrategy.py index 5053ff8..acbd985 100644 --- a/lib/env/trade/SimulatedTradeStrategy.py +++ b/lib/env/trade/SimulatedTradeStrategy.py @@ -30,7 +30,7 @@ def trade(self, commission = self.commissionPercent / 100 slippage = np.random.uniform(0, self.maxSlippagePercent) / 100 - asset_bought, asset_sold, purchase_cost, sale_revenue = 0, 0, 0, 0 + asset_bought, asset_sold, purchase_cost, sale_revenue = buy_amount, sell_amount, 0, 0 if buy_amount > 0 and balance >= self.min_cost_limit: price_adjustment = (1 + commission) * (1 + slippage)