Skip to content

Commit 23d2bed

Browse files
authored
chore: rename penguin -> nemo_gym and add the gym submodule (#1587)
Signed-off-by: Terry Kong <[email protected]>
1 parent a99bc26 commit 23d2bed

File tree

20 files changed

+271
-239
lines changed

20 files changed

+271
-239
lines changed

.gitmodules

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,8 @@
1313
url = https://github.com/NVIDIA-NeMo/Automodel.git
1414
branch = nemo-rl-submodule
1515
shallow = true
16+
[submodule "3rdparty/Gym-workspace/Gym"]
17+
path = 3rdparty/Gym-workspace/Gym
18+
url = https://github.com/NVIDIA-NeMo/Gym.git
19+
branch = main
20+
shallow = true

3rdparty/Gym-workspace/Gym

Submodule Gym added at 035c91e

3rdparty/Penguin-workspace/is_penguin_installed.py renamed to 3rdparty/Gym-workspace/is_nemo_gym_installed.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414
try:
15-
from penguin import config_types # noqa: F401
15+
from nemo_gym import config_types # noqa: F401
1616

1717
INSTALLED = True
1818
except Exception:
1919
INSTALLED = False
2020

21-
print(f"PENGUIN {INSTALLED=}")
21+
print(f"NEMO_GYM {INSTALLED=}")

3rdparty/Penguin-workspace/pyproject.toml renamed to 3rdparty/Gym-workspace/pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ requires = ["setuptools>=61.0", "wheel"]
33
build-backend = "setuptools.build_meta"
44

55
[project]
6-
name = "penguin"
6+
name = "nemo_gym"
77
dynamic = ["dependencies", "version"]
88
authors = [{ name = "NVIDIA", email = "[email protected]" }]
9-
description = "Standalone packaging for the Penguin sub-module."
9+
description = "Standalone packaging for the Gym sub-module."
1010
requires-python = ">=3.10"

3rdparty/Penguin-workspace/setup.py renamed to 3rdparty/Gym-workspace/setup.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
final_packages = []
2121
final_package_dir = {}
2222

23-
# If the submodule is present, expose `penguin` package from the checkout
24-
src_dir = Path("Penguin")
23+
# If the submodule is present, expose `nemo_gym` package from the checkout
24+
src_dir = Path("Gym")
2525

2626

2727
CACHED_DEPENDENCIES = [
@@ -41,6 +41,7 @@
4141
"aiohttp",
4242
"yappi",
4343
"ray[default]",
44+
"psutil",
4445
]
4546

4647
if src_dir.exists():
@@ -49,7 +50,7 @@
4950
pyproject_toml = tomllib.load(f)
5051
if not pyproject_toml_path.exists():
5152
raise FileNotFoundError(
52-
f"[Penguin][setup] {pyproject_toml_path} not found; skipping dependency consistency check."
53+
f"[Gym][setup] {pyproject_toml_path} not found; skipping dependency consistency check."
5354
)
5455

5556
packages = pyproject_toml["tool"]["setuptools"]["packages"]["find"]["include"]
@@ -69,19 +70,19 @@
6970

7071
if missing_in_cached or extra_in_cached:
7172
print(
72-
"[Penguin][setup] Dependency mismatch between Penguin-workspace/Penguin/pyproject.toml vs Penguin-workspace/setup.py::CACHED_DEPENDENCIES.",
73+
"[Gym][setup] Dependency mismatch between Gym-workspace/Gym/pyproject.toml vs Gym-workspace/setup.py::CACHED_DEPENDENCIES.",
7374
file=sys.stderr,
7475
)
7576
if missing_in_cached:
7677
print(
77-
" - Present in Penguin-workspace/Penguin/pyproject.toml but missing from CACHED_DEPENDENCIES:",
78+
" - Present in Gym-workspace/Gym/pyproject.toml but missing from CACHED_DEPENDENCIES:",
7879
file=sys.stderr,
7980
)
8081
for dep in sorted(missing_in_cached):
8182
print(f" * {dep}", file=sys.stderr)
8283
if extra_in_cached:
8384
print(
84-
" - Present in CACHED_DEPENDENCIES but not in Penguin-workspace/Penguin/pyproject.toml:",
85+
" - Present in CACHED_DEPENDENCIES but not in Gym-workspace/Gym/pyproject.toml:",
8586
file=sys.stderr,
8687
)
8788
for dep in sorted(extra_in_cached):
@@ -93,19 +94,19 @@
9394
sys.exit(1)
9495
else:
9596
print(
96-
"[Penguin][setup] Dependency sets are consistent with the submodule pyproject.",
97+
"[Gym][setup] Dependency sets are consistent with the submodule pyproject.",
9798
file=sys.stderr,
9899
)
99100

100101

101102
setuptools.setup(
102-
name="penguin",
103+
name="nemo_gym",
103104
version="0.0.0",
104-
description="Standalone packaging for the Penguin sub-module.",
105+
description="Standalone packaging for the Gym sub-module.",
105106
author="NVIDIA",
106107
author_email="[email protected]",
107108
packages=final_packages,
108109
package_dir=final_package_dir,
109-
py_modules=["is_penguin_installed"],
110+
py_modules=["is_nemo_gym_installed"],
110111
install_requires=CACHED_DEPENDENCIES,
111112
)

examples/penguin/grpo_dapo17k_bytedtsinghua_qwen3_4binstruct_nf.yaml renamed to examples/nemo_gym/grpo_dapo17k_bytedtsinghua_qwen3_4binstruct_nf.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -232,15 +232,15 @@ policy:
232232
num_nodes: null # Decides number of nodes to be dedicated to generation
233233

234234
data:
235-
train_jsonl_fpath: 3rdparty/Penguin-workspace/Penguin/data/bytedtsinghua_dapo17k/train.jsonl
236-
validation_jsonl_fpath: 3rdparty/Penguin-workspace/Penguin/data/bytedtsinghua_dapo17k/validation.jsonl
235+
train_jsonl_fpath: 3rdparty/Gym-workspace/Gym/data/bytedtsinghua_dapo17k/train.jsonl
236+
validation_jsonl_fpath: 3rdparty/Gym-workspace/Gym/data/bytedtsinghua_dapo17k/validation.jsonl
237237
shuffle: true
238238
num_workers: 0
239239

240240
env:
241-
should_use_penguin: true
242-
should_log_penguin_responses: true # If you have low logging storage, set this to false
243-
penguin: # This is passed into Penguin as the initial_global_config_dict
241+
should_use_nemo_gym: true
242+
should_log_nemo_gym_responses: true # If you have low logging storage, set this to false
243+
nemo_gym: # This is passed into NeMo-Gym as the initial_global_config_dict
244244
config_paths:
245245
- responses_api_models/vllm_model/configs/vllm_model_for_training.yaml # Required! And it must be *for_training
246246
- resources_servers/library_judge_math/configs/library_judge_math.yaml

examples/penguin/run_grpo_penguin.py renamed to examples/nemo_gym/run_grpo_nemo_gym.py

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
MasterConfig,
3737
StatefulDataLoader,
3838
TokenizerType,
39-
_should_use_penguin,
39+
_should_use_nemo_gym,
4040
grpo_train,
4141
refit_policy_generation,
4242
setup,
@@ -48,13 +48,13 @@
4848
get_actor_python_env,
4949
)
5050
from nemo_rl.distributed.virtual_cluster import init_ray
51-
from nemo_rl.environments.penguin import (
52-
Penguin,
53-
PenguinConfig,
54-
penguin_example_to_nemo_rl_datum_spec,
55-
setup_penguin_config,
51+
from nemo_rl.environments.nemo_gym import (
52+
NemoGym,
53+
NemoGymConfig,
54+
nemo_gym_example_to_nemo_rl_datum_spec,
55+
setup_nemo_gym_config,
5656
)
57-
from nemo_rl.experience.rollouts import run_async_penguin_rollout
57+
from nemo_rl.experience.rollouts import run_async_nemo_gym_rollout
5858
from nemo_rl.models.generation import configure_generation_config
5959
from nemo_rl.utils.config import load_config, parse_hydra_overrides
6060
from nemo_rl.utils.logger import get_next_experiment_dir
@@ -75,29 +75,29 @@ def parse_args() -> tuple[argparse.Namespace, list[str]]:
7575
return args, overrides
7676

7777

78-
def setup_single_penguin_dataset(
78+
def setup_single_nemo_gym_dataset(
7979
jsonl_fpath: str, tokenizer, num_repeats: Optional[int] = None
8080
):
8181
with open(jsonl_fpath) as f:
82-
penguin_examples = list(map(json.loads, f))
82+
nemo_gym_examples = list(map(json.loads, f))
8383

84-
print(f"Loaded data at {jsonl_fpath}. Found {len(penguin_examples)} examples")
84+
print(f"Loaded data at {jsonl_fpath}. Found {len(nemo_gym_examples)} examples")
8585

8686
if num_repeats:
87-
previous_length = len(penguin_examples)
88-
penguin_examples = list(
87+
previous_length = len(nemo_gym_examples)
88+
nemo_gym_examples = list(
8989
chain.from_iterable(
90-
repeat(penguin_example, num_repeats)
91-
for penguin_example in penguin_examples
90+
repeat(nemo_gym_example, num_repeats)
91+
for nemo_gym_example in nemo_gym_examples
9292
)
9393
)
9494
print(
95-
f"Repeating examples (in a pattern of abc to aabbcc) for {jsonl_fpath} from {previous_length} to {len(penguin_examples)}!"
95+
f"Repeating examples (in a pattern of abc to aabbcc) for {jsonl_fpath} from {previous_length} to {len(nemo_gym_examples)}!"
9696
)
9797

9898
nemo_rl_compatible_examples: list[DatumSpec] = [
99-
penguin_example_to_nemo_rl_datum_spec(penguin_example, idx)
100-
for idx, penguin_example in enumerate(penguin_examples)
99+
nemo_gym_example_to_nemo_rl_datum_spec(nemo_gym_example, idx)
100+
for idx, nemo_gym_example in enumerate(nemo_gym_examples)
101101
]
102102

103103
passthrough_task_processor = lambda datum_dict, *args, **kwargs: datum_dict
@@ -129,7 +129,7 @@ def collect_trajectories(
129129
print("\n🔍 Running trajectory collection...", flush=True)
130130
generation_config = master_config["policy"]["generation"]
131131
for val_batch in val_dataloader:
132-
penguin_rollout_result = run_async_penguin_rollout(
132+
nemo_gym_rollout_result = run_async_nemo_gym_rollout(
133133
policy_generation=policy_generation,
134134
input_batch=val_batch,
135135
tokenizer=tokenizer,
@@ -141,7 +141,7 @@ def collect_trajectories(
141141
)
142142

143143
rows_to_log: list[str] = []
144-
for key, value in penguin_rollout_result.rollout_metrics.items():
144+
for key, value in nemo_gym_rollout_result.rollout_metrics.items():
145145
if "full_result" not in key:
146146
continue
147147

@@ -195,18 +195,18 @@ def main() -> None:
195195
config["policy"]["generation"], tokenizer
196196
)
197197

198-
# Penguin specific config setup.
199-
setup_penguin_config(config, tokenizer)
198+
# NeMo-Gym specific config setup.
199+
setup_nemo_gym_config(config, tokenizer)
200200

201201
# We assert here since this is right after the final config has been materialized.
202-
assert _should_use_penguin(config)
202+
assert _should_use_nemo_gym(config)
203203

204204
print("\n▶ Setting up data...")
205-
train_dataset = setup_single_penguin_dataset(
205+
train_dataset = setup_single_nemo_gym_dataset(
206206
jsonl_fpath=config["data"]["train_jsonl_fpath"],
207207
tokenizer=tokenizer,
208208
)
209-
val_dataset = setup_single_penguin_dataset(
209+
val_dataset = setup_single_nemo_gym_dataset(
210210
jsonl_fpath=config["data"]["validation_jsonl_fpath"],
211211
tokenizer=tokenizer,
212212
)
@@ -247,23 +247,23 @@ def main() -> None:
247247
) = setup(config, tokenizer, train_dataset, val_dataset)
248248

249249
is_trajectory_collection = (
250-
config["env"]["penguin"].pop("is_trajectory_collection", False) or False
250+
config["env"]["nemo_gym"].pop("is_trajectory_collection", False) or False
251251
)
252-
penguin_config = PenguinConfig(
252+
nemo_gym_config = NemoGymConfig(
253253
model_name=policy_generation.cfg["model_name"],
254254
base_urls=policy_generation.dp_openai_server_base_urls,
255-
initial_global_config_dict=config["env"]["penguin"],
255+
initial_global_config_dict=config["env"]["nemo_gym"],
256256
)
257-
penguin = Penguin.options(
257+
nemo_gym = NemoGym.options(
258258
runtime_env={
259259
"py_executable": get_actor_python_env(
260-
"nemo_rl.environments.penguin.Penguin"
260+
"nemo_rl.environments.nemo_gym.NemoGym"
261261
),
262262
}
263-
).remote(penguin_config)
264-
# Blocking wait for penguin to spin up
265-
ray.get(penguin.health_check.remote())
266-
task_to_env = {"penguin": penguin}
263+
).remote(nemo_gym_config)
264+
# Blocking wait for NeMo-Gym to spin up
265+
ray.get(nemo_gym.health_check.remote())
266+
task_to_env = {"nemo_gym": nemo_gym}
267267
val_task_to_env = task_to_env
268268

269269
if is_trajectory_collection:

examples/penguin/run_penguin_single_node_sanity_tests.sh renamed to examples/nemo_gym/run_nemo_gym_single_node_sanity_tests.sh

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,21 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
115
# Fail on errors
216
set -e
317

4-
uv sync --group={build,docs,dev,test} --extra penguin
18+
uv sync --all-groups --extra nemo_gym
519

620
# Stop pesky previous Ray servers that may have not been able to spin down from previous users.
721
uv run ray stop --force
@@ -27,7 +41,7 @@ uv run python -c "import ray; ray.shutdown()"
2741
./tests/run_unit.sh unit/environments/test_math_environment.py::test_math_env_step_basic
2842

2943
# NeMo Gym integrates directly into NeMo RL as an Environment since that is the cleanest way. This tests the NeMo Gym integration logic and correctness.
30-
./tests/run_unit.sh unit/environments/test_penguin.py::test_penguin_sanity
44+
./tests/run_unit.sh unit/environments/test_nemo_gym.py::test_nemo_gym_sanity
3145

3246
# NeMo Gym uses a separate rollout loop inside grpo_train in NeMo RL. This tests the e2e rollout functionality and correctness.
33-
./tests/run_unit.sh unit/experience/test_rollouts.py::test_run_async_penguin_rollout
47+
./tests/run_unit.sh unit/experience/test_rollouts.py::test_run_async_nemo_gym_rollout

0 commit comments

Comments
 (0)