From 745e91bdda6babdd78008393cea488614ec2e811 Mon Sep 17 00:00:00 2001
From: Sahil Sharma <sahilsharma@umass.edu>
Date: Mon, 11 Mar 2024 09:02:55 +0530
Subject: [PATCH 1/4] debug subprocess run output

---
 src/distributed_rl.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/distributed_rl.py b/src/distributed_rl.py
index 3c73bad5..ce12f4f0 100644
--- a/src/distributed_rl.py
+++ b/src/distributed_rl.py
@@ -516,8 +516,9 @@ def interrupt_learning_server(args: argparse.Namespace) -> None:
     pass
 
 def cancel_workers(args: argparse.Namespace) -> None:
-    subprocess.run([f"scancel -u$USER -n drl-all-{args.output_file}"],
-                   shell=True, check=True)
+    res = subprocess.run([f"scancel -u$USER -n drl-all-{args.output_file}"],
+                   shell=True, check=True, capture_output=True)
+    print(res)
 
 def build_final_save(args: argparse.Namespace, steps_done: int) -> None:
     save_path = latest_common_save(args)

From de582dcb993c3feaa5be0d55b60d3a0f1b73107a Mon Sep 17 00:00:00 2001
From: Sahil Sharma <sahilsharma@umass.edu>
Date: Thu, 21 Mar 2024 11:21:35 +0530
Subject: [PATCH 2/4] fix divide by zero

---
 src/distributed_rl_learning_server.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/distributed_rl_learning_server.py b/src/distributed_rl_learning_server.py
index 7a8911a6..3fe1849b 100755
--- a/src/distributed_rl_learning_server.py
+++ b/src/distributed_rl_learning_server.py
@@ -33,6 +33,8 @@
 import torch.distributed as dist
 import torch.optim.lr_scheduler as scheduler
 
+EPSILON = 0.0000001
+
 print("Finished torch imports", file=sys.stderr)
 # pylint: disable=wrong-import-position
 sys.path.append(str(Path(os.getcwd()) / "src"))
@@ -179,7 +181,7 @@ def serve_parameters(args: argparse.Namespace, backend='mpi') -> None:
             loss_buffer.append(loss)
           iters_trained += 1
         if len(loss_buffer) == args.loss_smoothing:
-          smoothed_loss = sum(loss_buffer) / args.loss_smoothing
+          smoothed_loss = sum(loss_buffer) / (args.loss_smoothing + EPSILON)
           lr = optimizer.param_groups[0]['lr']
           eprint(f"Loss: {smoothed_loss} (learning rate {lr/20:.3e})")
         if vsample_changed and args.reset_on_updated_sample:
@@ -785,7 +787,7 @@ def print_vvalue_errors(gamma: float, vnetwork: nn.Module,
   predicted_v_values = vnetwork(
     torch.cat([obl.local_context.view(1, -1) for obl, _ in items], dim=0),
     torch.LongTensor([obl.previous_tactic for obl, _ in items]).to(device)).view(-1)
-  predicted_steps = torch.log(predicted_v_values) / math.log(gamma)
+  predicted_steps = torch.log(predicted_v_values) / (math.log(gamma) + EPSILON)
   num_predicted_zeros = torch.count_nonzero(predicted_steps == float("inf"))
   target_steps: FloatTensor = torch.tensor([steps for _, steps in items]).to(device) #type: ignore
   step_errors = torch.abs(predicted_steps - target_steps)
@@ -793,7 +795,7 @@ def print_vvalue_errors(gamma: float, vnetwork: nn.Module,
   total_error = torch.sum(torch.where(predicted_steps == float("inf"),
                                       torch.zeros_like(step_errors),
                                       step_errors)).item()
-  avg_error = total_error / (len(items) - num_predicted_zeros)
+  avg_error = total_error / (len(items) - num_predicted_zeros + EPSILON)
   eprint(f"Average step error across {len(items) - num_predicted_zeros} "
          f"initial states with finite predictions: {avg_error:.6f}")
   eprint(f"{num_predicted_zeros} predicted as infinite steps (impossible)")

From 751ba038e7b39686d9be3327de438568458fa2aa Mon Sep 17 00:00:00 2001
From: Sahil Sharma <sahilsharma@umass.edu>
Date: Thu, 21 Mar 2024 11:33:16 +0530
Subject: [PATCH 3/4] fix torch log nan

---
 src/distributed_rl_learning_server.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/distributed_rl_learning_server.py b/src/distributed_rl_learning_server.py
index 3fe1849b..41046bcb 100755
--- a/src/distributed_rl_learning_server.py
+++ b/src/distributed_rl_learning_server.py
@@ -33,7 +33,7 @@
 import torch.distributed as dist
 import torch.optim.lr_scheduler as scheduler
 
-EPSILON = 0.0000001
+EPSILON = 1e-7
 
 print("Finished torch imports", file=sys.stderr)
 # pylint: disable=wrong-import-position
@@ -394,7 +394,7 @@ def train(args: argparse.Namespace, v_model: VModel,
     loss = F.mse_loss(actual_values, target_values)
   else:
     assert args.loss == "log"
-    loss = F.mse_loss(torch.log(actual_values), torch.log(target_values))
+    loss = F.mse_loss(torch.log(F.relu(actual_values) + EPSILON), torch.log(F.relu(target_values) + EPSILON))
   if args.verbose >= 1:
     eprint("Training obligations to values:")
     for idx, (context, prev_tactic, output, actual_value) \
@@ -787,7 +787,7 @@ def print_vvalue_errors(gamma: float, vnetwork: nn.Module,
   predicted_v_values = vnetwork(
     torch.cat([obl.local_context.view(1, -1) for obl, _ in items], dim=0),
     torch.LongTensor([obl.previous_tactic for obl, _ in items]).to(device)).view(-1)
-  predicted_steps = torch.log(predicted_v_values) / (math.log(gamma) + EPSILON)
+  predicted_steps = torch.log(F.relu(predicted_v_values) + EPSILON) / (math.log(gamma) + EPSILON)
   num_predicted_zeros = torch.count_nonzero(predicted_steps == float("inf"))
   target_steps: FloatTensor = torch.tensor([steps for _, steps in items]).to(device) #type: ignore
   step_errors = torch.abs(predicted_steps - target_steps)

From d89c10162956e8302cddc14c8a0b2d84f88c0e35 Mon Sep 17 00:00:00 2001
From: Sahil Sharma <sahil.s1305@gmail.com>
Date: Tue, 9 Apr 2024 14:18:53 -0400
Subject: [PATCH 4/4] Revert changes to distributed_rl.py related to output

---
 src/distributed_rl.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/distributed_rl.py b/src/distributed_rl.py
index ce12f4f0..3c73bad5 100644
--- a/src/distributed_rl.py
+++ b/src/distributed_rl.py
@@ -516,9 +516,8 @@ def interrupt_learning_server(args: argparse.Namespace) -> None:
     pass
 
 def cancel_workers(args: argparse.Namespace) -> None:
-    res = subprocess.run([f"scancel -u$USER -n drl-all-{args.output_file}"],
-                   shell=True, check=True, capture_output=True)
-    print(res)
+    subprocess.run([f"scancel -u$USER -n drl-all-{args.output_file}"],
+                   shell=True, check=True)
 
 def build_final_save(args: argparse.Namespace, steps_done: int) -> None:
     save_path = latest_common_save(args)