From 745e91bdda6babdd78008393cea488614ec2e811 Mon Sep 17 00:00:00 2001 From: Sahil Sharma Date: Mon, 11 Mar 2024 09:02:55 +0530 Subject: [PATCH 1/4] debug subprocess run output --- src/distributed_rl.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/distributed_rl.py b/src/distributed_rl.py index 3c73bad5..ce12f4f0 100644 --- a/src/distributed_rl.py +++ b/src/distributed_rl.py @@ -516,8 +516,9 @@ def interrupt_learning_server(args: argparse.Namespace) -> None: pass def cancel_workers(args: argparse.Namespace) -> None: - subprocess.run([f"scancel -u$USER -n drl-all-{args.output_file}"], - shell=True, check=True) + res = subprocess.run([f"scancel -u$USER -n drl-all-{args.output_file}"], + shell=True, check=True, capture_output=True) + print(res) def build_final_save(args: argparse.Namespace, steps_done: int) -> None: save_path = latest_common_save(args) From de582dcb993c3feaa5be0d55b60d3a0f1b73107a Mon Sep 17 00:00:00 2001 From: Sahil Sharma Date: Thu, 21 Mar 2024 11:21:35 +0530 Subject: [PATCH 2/4] fix divide by zero --- src/distributed_rl_learning_server.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/distributed_rl_learning_server.py b/src/distributed_rl_learning_server.py index 7a8911a6..3fe1849b 100755 --- a/src/distributed_rl_learning_server.py +++ b/src/distributed_rl_learning_server.py @@ -33,6 +33,8 @@ import torch.distributed as dist import torch.optim.lr_scheduler as scheduler +EPSILON = 0.0000001 + print("Finished torch imports", file=sys.stderr) # pylint: disable=wrong-import-position sys.path.append(str(Path(os.getcwd()) / "src")) @@ -179,7 +181,7 @@ def serve_parameters(args: argparse.Namespace, backend='mpi') -> None: loss_buffer.append(loss) iters_trained += 1 if len(loss_buffer) == args.loss_smoothing: - smoothed_loss = sum(loss_buffer) / args.loss_smoothing + smoothed_loss = sum(loss_buffer) / (args.loss_smoothing + EPSILON) lr = optimizer.param_groups[0]['lr'] eprint(f"Loss: {smoothed_loss} (learning rate {lr/20:.3e})") if vsample_changed and args.reset_on_updated_sample: @@ -785,7 +787,7 @@ def print_vvalue_errors(gamma: float, vnetwork: nn.Module, predicted_v_values = vnetwork( torch.cat([obl.local_context.view(1, -1) for obl, _ in items], dim=0), torch.LongTensor([obl.previous_tactic for obl, _ in items]).to(device)).view(-1) - predicted_steps = torch.log(predicted_v_values) / math.log(gamma) + predicted_steps = torch.log(predicted_v_values) / (math.log(gamma) + EPSILON) num_predicted_zeros = torch.count_nonzero(predicted_steps == float("inf")) target_steps: FloatTensor = torch.tensor([steps for _, steps in items]).to(device) #type: ignore step_errors = torch.abs(predicted_steps - target_steps) @@ -793,7 +795,7 @@ def print_vvalue_errors(gamma: float, vnetwork: nn.Module, total_error = torch.sum(torch.where(predicted_steps == float("inf"), torch.zeros_like(step_errors), step_errors)).item() - avg_error = total_error / (len(items) - num_predicted_zeros) + avg_error = total_error / (len(items) - num_predicted_zeros + EPSILON) eprint(f"Average step error across {len(items) - num_predicted_zeros} " f"initial states with finite predictions: {avg_error:.6f}") eprint(f"{num_predicted_zeros} predicted as infinite steps (impossible)") From 751ba038e7b39686d9be3327de438568458fa2aa Mon Sep 17 00:00:00 2001 From: Sahil Sharma Date: Thu, 21 Mar 2024 11:33:16 +0530 Subject: [PATCH 3/4] fix torch log nan --- src/distributed_rl_learning_server.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/distributed_rl_learning_server.py b/src/distributed_rl_learning_server.py index 3fe1849b..41046bcb 100755 --- a/src/distributed_rl_learning_server.py +++ b/src/distributed_rl_learning_server.py @@ -33,7 +33,7 @@ import torch.distributed as dist import torch.optim.lr_scheduler as scheduler -EPSILON = 0.0000001 +EPSILON = 1e-7 print("Finished torch imports", file=sys.stderr) # pylint: disable=wrong-import-position @@ -394,7 +394,7 @@ def train(args: argparse.Namespace, v_model: VModel, loss = F.mse_loss(actual_values, target_values) else: assert args.loss == "log" - loss = F.mse_loss(torch.log(actual_values), torch.log(target_values)) + loss = F.mse_loss(torch.log(F.relu(actual_values) + EPSILON), torch.log(F.relu(target_values) + EPSILON)) if args.verbose >= 1: eprint("Training obligations to values:") for idx, (context, prev_tactic, output, actual_value) \ @@ -787,7 +787,7 @@ def print_vvalue_errors(gamma: float, vnetwork: nn.Module, predicted_v_values = vnetwork( torch.cat([obl.local_context.view(1, -1) for obl, _ in items], dim=0), torch.LongTensor([obl.previous_tactic for obl, _ in items]).to(device)).view(-1) - predicted_steps = torch.log(predicted_v_values) / (math.log(gamma) + EPSILON) + predicted_steps = torch.log(F.relu(predicted_v_values) + EPSILON) / (math.log(gamma) + EPSILON) num_predicted_zeros = torch.count_nonzero(predicted_steps == float("inf")) target_steps: FloatTensor = torch.tensor([steps for _, steps in items]).to(device) #type: ignore step_errors = torch.abs(predicted_steps - target_steps) From d89c10162956e8302cddc14c8a0b2d84f88c0e35 Mon Sep 17 00:00:00 2001 From: Sahil Sharma Date: Tue, 9 Apr 2024 14:18:53 -0400 Subject: [PATCH 4/4] Revert changes to distributed_rl.py related to output --- src/distributed_rl.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/distributed_rl.py b/src/distributed_rl.py index ce12f4f0..3c73bad5 100644 --- a/src/distributed_rl.py +++ b/src/distributed_rl.py @@ -516,9 +516,8 @@ def interrupt_learning_server(args: argparse.Namespace) -> None: pass def cancel_workers(args: argparse.Namespace) -> None: - res = subprocess.run([f"scancel -u$USER -n drl-all-{args.output_file}"], - shell=True, check=True, capture_output=True) - print(res) + subprocess.run([f"scancel -u$USER -n drl-all-{args.output_file}"], + shell=True, check=True) def build_final_save(args: argparse.Namespace, steps_done: int) -> None: save_path = latest_common_save(args)