Adding a few try-except blocks

shikharras · Shikhar · Shikhar · commit 0737c2e158df · 2025-10-17T20:28:40.000-07:00
* Adding a few try-except blocks
* Bumping genrl tag

---------

Co-authored-by: Shikhar &lt;shikhar@gensyn.ai&gt;
GitOrigin-RevId: 97ee6db73e9dd168fe644ebbed9fd66f50f03c26
diff --git a/containerfiles/swarm-node/swarm.containerfile b/containerfiles/swarm-node/swarm.containerfile
@@ -53,7 +53,7 @@ ENV PATH="/home/gensyn/.pyenv/shims:/home/gensyn/.pyenv/bin:$NVM_DIR/versions/no
 COPY --chown=gensyn ./modal-login /home/gensyn/rl_swarm/modal-login
 RUN cd /home/gensyn/rl_swarm/modal-login && yarn install --immutable && yarn build
 
-RUN pip install gensyn-genrl==0.1.10
+RUN pip install gensyn-genrl==0.1.11
 RUN pip install reasoning-gym>=0.1.20 # for reasoning gym env
 RUN pip install hivemind@git+https://github.com/gensyn-ai/hivemind@639c964a8019de63135a2594663b5bec8e5356dd # We need the latest, 1.1.11 is broken
 
diff --git a/rgym_exp/src/manager.py b/rgym_exp/src/manager.py
@@ -141,23 +141,35 @@ def _try_submit_to_chain(self, signal_by_agent):
                 get_logger().debug(str(e))
 
     def _hook_after_rewards_updated(self):
-        signal_by_agent = self._get_total_rewards_by_agent()
-        self.batched_signals += self._get_my_rewards(signal_by_agent)
+        try:
+            signal_by_agent = self._get_total_rewards_by_agent()
+            self.batched_signals += self._get_my_rewards(signal_by_agent)
+        except Exception as e:
+            # If signal_by_agent is empty, we just submit ourself as winner according to logic in _try_submit_to_chain
+            get_logger().debug(f"Error getting total rewards by agent: {e}")
+            signal_by_agent = {}
         self._try_submit_to_chain(signal_by_agent)
 
     def _hook_after_round_advanced(self):
-        if self.prg_game:
-            # TODO: Ideally I think the judge client request question bit should come in the manager and the trainer should be doing only PyTorch-y stuff, 
-            # but I have kept it consistent with the evaluate function for now.
-            prg_history_dict = self.prg_module.prg_history_dict
-            results_dict = self.trainer.play_prg_game_logits(prg_history_dict)
-            self.prg_module.play_prg_game(results_dict, self.peer_id)
+        try:
+            if self.prg_game:
+                # TODO: Ideally I think the judge client request question bit should come in the manager and the trainer should be doing only PyTorch-y stuff, 
+                # but I have kept it consistent with the evaluate function for now.
+                prg_history_dict = self.prg_module.prg_history_dict
+                results_dict = self.trainer.play_prg_game_logits(prg_history_dict)
+                self.prg_module.play_prg_game(results_dict, self.peer_id)
+        except Exception as e:
+            get_logger().info(f"Error playing PRG game, continuing with the next round")
 
         self._save_to_hf()
 
         # Try to submit to chain again if necessary, but don't update our signal twice
         if not self.submitted_this_round:
-            signal_by_agent = self._get_total_rewards_by_agent()
+            try:
+                signal_by_agent = self._get_total_rewards_by_agent()
+            except Exception as e:
+                get_logger().debug(f"Error getting total rewards by agent: {e}")
+                signal_by_agent = {}
             self._try_submit_to_chain(signal_by_agent)
 
         # Reset flag for next round
diff --git a/rgym_exp/src/rewards.py b/rgym_exp/src/rewards.py
@@ -25,16 +25,19 @@ def cumulative_reward(
     def __call__(self, game_state):
         completions, answers, metadata = parse_game_state(game_state, self.stage)
         rewards = {}  # Key per agent
-        for agent in completions:
-            rewards[agent] = {}  # Will store a list per batch item
-            for batch_id in completions[agent]:
-                rewards[agent][batch_id] = []
-                for node_idx, _ in enumerate(completions[agent][batch_id]):
-                    rewards[agent][batch_id].append(
-                        self.reward_fn(
-                            completions[agent][batch_id][node_idx],
-                            answers[agent][batch_id][node_idx],
-                            metadata[agent][batch_id][node_idx],
+        try:
+            for agent in completions:
+                rewards[agent] = {}  # Will store a list per batch item
+                for batch_id in completions[agent]:
+                    rewards[agent][batch_id] = []
+                    for node_idx, _ in enumerate(completions[agent][batch_id]):
+                        rewards[agent][batch_id].append(
+                            self.reward_fn(
+                                completions[agent][batch_id][node_idx],
+                                answers[agent][batch_id][node_idx],
+                                metadata[agent][batch_id][node_idx],
+                            )
                         )
-                    )
-        return rewards
+            return rewards
+        except Exception as e:
+            return {}
diff --git a/run_rl_swarm.sh b/run_rl_swarm.sh
@@ -6,7 +6,7 @@ set -euo pipefail
 ROOT=$PWD
 
 # GenRL Swarm version to use
-GENRL_TAG="0.1.10"
+GENRL_TAG="0.1.11"
 
 export IDENTITY_PATH
 export GENSYN_RESET_CONFIG