Skip to content

Commit 0737c2e

Browse files
shikharrasShikhar
andcommitted
Adding a few try-except blocks
* Adding a few try-except blocks * Bumping genrl tag --------- Co-authored-by: Shikhar <[email protected]> GitOrigin-RevId: 97ee6db73e9dd168fe644ebbed9fd66f50f03c26
1 parent 8c96704 commit 0737c2e

File tree

4 files changed

+38
-23
lines changed

4 files changed

+38
-23
lines changed

containerfiles/swarm-node/swarm.containerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ ENV PATH="/home/gensyn/.pyenv/shims:/home/gensyn/.pyenv/bin:$NVM_DIR/versions/no
5353
COPY --chown=gensyn ./modal-login /home/gensyn/rl_swarm/modal-login
5454
RUN cd /home/gensyn/rl_swarm/modal-login && yarn install --immutable && yarn build
5555

56-
RUN pip install gensyn-genrl==0.1.10
56+
RUN pip install gensyn-genrl==0.1.11
5757
RUN pip install reasoning-gym>=0.1.20 # for reasoning gym env
5858
RUN pip install hivemind@git+https://github.com/gensyn-ai/hivemind@639c964a8019de63135a2594663b5bec8e5356dd # We need the latest, 1.1.11 is broken
5959

rgym_exp/src/manager.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -141,23 +141,35 @@ def _try_submit_to_chain(self, signal_by_agent):
141141
get_logger().debug(str(e))
142142

143143
def _hook_after_rewards_updated(self):
144-
signal_by_agent = self._get_total_rewards_by_agent()
145-
self.batched_signals += self._get_my_rewards(signal_by_agent)
144+
try:
145+
signal_by_agent = self._get_total_rewards_by_agent()
146+
self.batched_signals += self._get_my_rewards(signal_by_agent)
147+
except Exception as e:
148+
# If signal_by_agent is empty, we just submit ourself as winner according to logic in _try_submit_to_chain
149+
get_logger().debug(f"Error getting total rewards by agent: {e}")
150+
signal_by_agent = {}
146151
self._try_submit_to_chain(signal_by_agent)
147152

148153
def _hook_after_round_advanced(self):
149-
if self.prg_game:
150-
# TODO: Ideally I think the judge client request question bit should come in the manager and the trainer should be doing only PyTorch-y stuff,
151-
# but I have kept it consistent with the evaluate function for now.
152-
prg_history_dict = self.prg_module.prg_history_dict
153-
results_dict = self.trainer.play_prg_game_logits(prg_history_dict)
154-
self.prg_module.play_prg_game(results_dict, self.peer_id)
154+
try:
155+
if self.prg_game:
156+
# TODO: Ideally I think the judge client request question bit should come in the manager and the trainer should be doing only PyTorch-y stuff,
157+
# but I have kept it consistent with the evaluate function for now.
158+
prg_history_dict = self.prg_module.prg_history_dict
159+
results_dict = self.trainer.play_prg_game_logits(prg_history_dict)
160+
self.prg_module.play_prg_game(results_dict, self.peer_id)
161+
except Exception as e:
162+
get_logger().info(f"Error playing PRG game, continuing with the next round")
155163

156164
self._save_to_hf()
157165

158166
# Try to submit to chain again if necessary, but don't update our signal twice
159167
if not self.submitted_this_round:
160-
signal_by_agent = self._get_total_rewards_by_agent()
168+
try:
169+
signal_by_agent = self._get_total_rewards_by_agent()
170+
except Exception as e:
171+
get_logger().debug(f"Error getting total rewards by agent: {e}")
172+
signal_by_agent = {}
161173
self._try_submit_to_chain(signal_by_agent)
162174

163175
# Reset flag for next round

rgym_exp/src/rewards.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,16 +25,19 @@ def cumulative_reward(
2525
def __call__(self, game_state):
2626
completions, answers, metadata = parse_game_state(game_state, self.stage)
2727
rewards = {} # Key per agent
28-
for agent in completions:
29-
rewards[agent] = {} # Will store a list per batch item
30-
for batch_id in completions[agent]:
31-
rewards[agent][batch_id] = []
32-
for node_idx, _ in enumerate(completions[agent][batch_id]):
33-
rewards[agent][batch_id].append(
34-
self.reward_fn(
35-
completions[agent][batch_id][node_idx],
36-
answers[agent][batch_id][node_idx],
37-
metadata[agent][batch_id][node_idx],
28+
try:
29+
for agent in completions:
30+
rewards[agent] = {} # Will store a list per batch item
31+
for batch_id in completions[agent]:
32+
rewards[agent][batch_id] = []
33+
for node_idx, _ in enumerate(completions[agent][batch_id]):
34+
rewards[agent][batch_id].append(
35+
self.reward_fn(
36+
completions[agent][batch_id][node_idx],
37+
answers[agent][batch_id][node_idx],
38+
metadata[agent][batch_id][node_idx],
39+
)
3840
)
39-
)
40-
return rewards
41+
return rewards
42+
except Exception as e:
43+
return {}

run_rl_swarm.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ set -euo pipefail
66
ROOT=$PWD
77

88
# GenRL Swarm version to use
9-
GENRL_TAG="0.1.10"
9+
GENRL_TAG="0.1.11"
1010

1111
export IDENTITY_PATH
1212
export GENSYN_RESET_CONFIG

0 commit comments

Comments
 (0)