Update environment configuration in YAML files and adjust dataset setup for vlm grpo

RayenTian · RayenTian · commit 0e9e4823db10 · 2025-11-30T22:35:05.000-08:00
- Added `env_name` to `vlm_grpo_3B_megatron.yaml` and `vlm_grpo_3B.yaml` for environment specification.
- Modified `setup_data` function in `run_vlm_grpo.py` to use `env_name` for environment configuration, enhancing flexibility in dataset processing.

Signed-off-by: ruit &lt;ruit@nvidia.com&gt;
diff --git a/examples/configs/vlm_grpo_3B.yaml b/examples/configs/vlm_grpo_3B.yaml
@@ -228,6 +228,7 @@ data:
   prompt_file: "examples/prompts/clevr_cogent_cot.txt"
   system_prompt_file: null
   dataset_name: "clevr-cogent"
+  env_name: "clevr-cogent"
   split: "trainA"
   shuffle: true
   num_workers: 1
diff --git a/examples/configs/vlm_grpo_3B_megatron.yaml b/examples/configs/vlm_grpo_3B_megatron.yaml
@@ -180,6 +180,7 @@ data:
   prompt_file: examples/prompts/clevr_cogent_cot.txt
   system_prompt_file: null
   dataset_name: clevr-cogent
+  env_name: "clevr-cogent"
   split: trainA
   shuffle: true
   num_workers: 1
diff --git a/examples/run_vlm_grpo.py b/examples/run_vlm_grpo.py
@@ -264,14 +264,15 @@ def setup_data(
     )
     task_data_processors[task_name] = (vlm_task_spec, hf_data_processor)
 
+    env_name = data_config["env_name"]
     vlm_env = VLMEnvironment.options(  # type: ignore # it's wrapped with ray.remote
         runtime_env={
             "py_executable": get_actor_python_env(
                 "nemo_rl.environments.vlm_environment.VLMEnvironment"
             ),
             "env_vars": dict(os.environ),  # Pass thru all user environment variables
         }
-    ).remote(env_configs[task_name])
+    ).remote(env_configs[env_name])
 
     dataset = AllTaskProcessedDataset(
         data.formatted_ds["train"],