pjin/nemotron-ray-dev-20251208 vs main #449

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft

pjin-nvidia wants to merge 129 commits into main from pjin/nemotron-ray-dev-20251208

nemo_gym/cli.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -28,7 +28,7 @@
  
    from subprocess import Popen

    from threading import Thread

    from time import sleep, time

    from typing import Dict, List, Optional, Tuple

    from typing import Any, Dict, List, Optional, Tuple

    import psutil

    import rich

    @@ -49,6 +49,9 @@
  
        GlobalConfigDictParserConfig,

        get_global_config_dict,

    )

    from nemo_gym.ray_utils import (

        _start_global_ray_gpu_scheduling_helper,

    )

    from nemo_gym.server_status import StatusCommand

    from nemo_gym.server_utils import (

        HEAD_SERVER_KEY_NAME,

    @@ -60,10 +63,15 @@
  
    )

    def _setup_env_command(dir_path: Path, global_config_dict: DictConfig) -> str:  # pragma: no cover

    def _setup_env_command(dir_path: Path, global_config_dict: DictConfig, top_level_name: Optional[str] = None) -> str:  # pragma: no cover

        head_server_deps = global_config_dict[HEAD_SERVER_DEPS_KEY_NAME]

        uv_venv_cmd = f"uv venv --seed --allow-existing --python {global_config_dict[PYTHON_VERSION_KEY_NAME]} .venv"

        if top_level_name is not None:

            venv = f".venv-{top_level_name}"

        else:

            venv = ".venv"

        uv_venv_cmd = f"uv venv --seed --allow-existing --python {global_config_dict[PYTHON_VERSION_KEY_NAME]} {venv}"

        has_pyproject_toml = exists(f"{dir_path / 'pyproject.toml'}")

        has_requirements_txt = exists(f"{dir_path / 'requirements.txt'}")

    @@ -74,29 +82,49 @@ def _setup_env_command(dir_path: Path, global_config_dict: DictConfig) -> str:
  
            )

        elif has_pyproject_toml:

            install_cmd = f"""uv pip install '-e .' {" ".join(head_server_deps)}"""

            if dir_path.name == "vllm_model":

                # NB: --no-deps is a workaround for installing vllm (current version: 0.11.2) on a cpu target,

                # b/c `uv pip install` resolves dependencies differently vs `pip install`.

                install_cmd = f"""uv pip install --no-deps 'vllm==0.11.2' && {install_cmd}"""

        elif has_requirements_txt:

            install_cmd = f"""uv pip install -r requirements.txt {" ".join(head_server_deps)}"""

        else:

            raise RuntimeError(f"Missing pyproject.toml or requirements.txt for uv venv setup in server dir: {dir_path}")

        if top_level_name is not None:

            uv_venv_cmd = f"{uv_venv_cmd} > >(sed 's/^/({top_level_name}) /') 2> >(sed 's/^/({top_level_name}) /' >&2)"

            install_cmd = f"{install_cmd} > >(sed 's/^/({top_level_name}) /') 2> >(sed 's/^/({top_level_name}) /' >&2)"

        cmd = f"""cd {dir_path} \\

        && {uv_venv_cmd} \\

        && source .venv/bin/activate \\

        && source {venv}/bin/activate \\

        && {install_cmd} \\

        """

        return cmd

    def _run_command(command: str, working_dir_path: Path) -> Popen:  # pragma: no cover

    def _run_command(command: str, working_dir_path: Path, top_level_name: Optional[str] = None) -> Popen:  # pragma: no cover

        work_dir = f"{working_dir_path.absolute()}"

        custom_env = environ.copy()

        py_path = custom_env.get("PYTHONPATH", None)

        if py_path is not None:

            custom_env["PYTHONPATH"] = f"{work_dir}:{py_path}"

        else:

            custom_env["PYTHONPATH"] = work_dir

        return Popen(command, executable="/bin/bash", shell=True, env=custom_env)

        redirect_stdout = sys.stdout

        redirect_stderr = sys.stderr

        if top_level_name is not None:

            redirect_stdout = open(f"{work_dir}/run-{top_level_name}.out.log", "a")

            redirect_stderr = open(f"{work_dir}/run-{top_level_name}.err.log", "a")

        return Popen(

            command,

            executable="/bin/bash",

            shell=True,

            env=custom_env,

            stdout=redirect_stdout,

            stderr=redirect_stderr,

        )

    class RunConfig(BaseNeMoGymCLIConfig):

    @@ -152,6 +180,7 @@ class RunHelper:  # pragma: no cover
  
        _head_server: uvicorn.Server

        _head_server_thread: Thread

        _head_server_instance: HeadServer

        _head_ray_gpu_helper: Any

        _processes: Dict[str, Popen]

        _server_instance_display_configs: List[ServerInstanceDisplayConfig]

    @@ -164,6 +193,8 @@ def start(self, global_config_dict_parser_config: GlobalConfigDictParserConfig)
  
            # Note: This function will modify the global config dict - update `ray_head_node_address`

            initialize_ray()

            self._head_ray_gpu_helper = _start_global_ray_gpu_scheduling_helper()

            # Assume Nemo Gym Run is for a single agent.

            escaped_config_dict_yaml_str = shlex.quote(OmegaConf.to_yaml(global_config_dict))

    @@ -201,7 +232,7 @@ def start(self, global_config_dict_parser_config: GlobalConfigDictParserConfig)
  
                dir_path = PARENT_DIR / Path(first_key, second_key)

                command = f"""{_setup_env_command(dir_path, global_config_dict)} \\

                command = f"""{_setup_env_command(dir_path, global_config_dict, top_level_path)} \\

        && {NEMO_GYM_CONFIG_DICT_ENV_VAR_NAME}={escaped_config_dict_yaml_str} \\

        {NEMO_GYM_CONFIG_PATH_ENV_VAR_NAME}={shlex.quote(top_level_path)} \\

        python {str(entrypoint_fpath)}"""

nemo_gym/comparison_strategies.py

-Original file line number
+Diff line change
@@ -0,0 +1,150 @@
+    # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+    # SPDX-License-Identifier: Apache-2.0
+    """
+    Comparison strategies for multi-generation reward computation.
+    """
+    import hashlib
+    import json
+    from typing import Any, Dict, List, Optional, Protocol, Tuple, runtime_checkable
+    from pydantic import BaseModel, Field
+    from nemo_gym.server_utils import ServerClient, raise_for_status
+    @runtime_checkable
+    class ComparisonStrategy(Protocol):
+        """Protocol for comparison strategies that compute rewards from multiple generations."""
+        agent_names: List[str]
+        num_generations_per_prompt: int
+        policy_model_server_name: str
+        async def compare(
+            self,
+            conversation_history: List[Dict[str, str]],
+            responses: List[str],
+            server_client: ServerClient,
+            principle: Optional[str] = None,
+        ) -> Tuple[List[float], Dict[str, float]]:
+            """Compare N responses and return (rewards, metrics)."""
+            ...
+    class GenRMStrategyConfig(BaseModel):
+        """Configuration for GenRM comparison strategy."""
+        agent_names: List[str] = Field(default_factory=lambda: ["genrm_simple_agent"])
+        num_generations_per_prompt: int = 16
+        genrm_compare_server_name: str = "genrm_compare"
+        policy_model_server_name: str = "policy_model"
+    class GenRMStrategy:
+        """GenRM comparison strategy using pairwise comparisons."""
+        def __init__(self, config: GenRMStrategyConfig):
+            self.config = config
+            self.agent_names = config.agent_names
+            self.num_generations_per_prompt = config.num_generations_per_prompt
+            self.policy_model_server_name = config.policy_model_server_name
+        async def compare(
+            self,
+            conversation_history: List[Dict[str, str]],
+            response_objs: List[Dict],
+            server_client: ServerClient,
+            principle: Optional[str] = None,
+        ) -> Tuple[List[float], Dict[str, float]]:
+            """Call genrm_compare server to get rewards for each response.
+            Args:
+                conversation_history: The conversation context
+                response_objs: List of raw Response API objects
+                server_client: The server client for making requests
+                principle: Optional principle for principle-based GenRM comparison
+            Returns:
+                Tuple of (rewards, metrics) from GenRM comparison
+            """
+            payload = {
+                "conversation_history": conversation_history,
+                "response_objs": response_objs,
+            }
+            if principle is not None:
+                payload["principle"] = principle
+            res = await server_client.post(
+                server_name=self.config.genrm_compare_server_name,
+                url_path="/compare",
+                json=payload,
+            )
+            await raise_for_status(res)
+            result = await res.json()
+            rewards = result.get("rewards", [0.0] * len(response_objs))
+            metrics = result.get("metrics", {})
+            return rewards, metrics
+    def get_prompt_key(example: Dict) -> str:
+        """Get stable key for grouping examples by prompt and principle.
+        Examples with the same conversation history but different principles
+        should be in separate groups, so we include principle in the hash.
+        """
+        if "prompt_id" in example:
+            # If prompt_id exists, combine it with principle for uniqueness
+            prompt_id = str(example["prompt_id"])
+            principle = example.get("principle")
+            if principle is not None:
+                return f"{prompt_id}::{principle}"
+            return prompt_id
+        # Hash both conversation history and principle together
+        conv = extract_conversation_history(example)
+        principle = example.get("principle")
+        key_data = {
+            "conversation": conv,
+            "principle": principle,
+        }
+        return hashlib.sha256(json.dumps(key_data, sort_keys=True).encode()).hexdigest()
+    def extract_conversation_history(example: Dict) -> List[Dict]:
+        """Extract conversation history from example.
+        Gym examples store history in responses_create_params.input
+        """
+        responses_create_params = example.get("responses_create_params")
+        if responses_create_params is None:
+            raise ValueError(f"Example missing 'responses_create_params': {list(example.keys())}")
+        if "input" not in responses_create_params:
+            raise ValueError(f"responses_create_params missing 'input': {list(responses_create_params.keys())}")
+        return responses_create_params["input"]
+    def extract_generated_text(gen_result: Dict) -> str:
+        """Extract generated text from generation result."""
+        if not isinstance(gen_result, dict):
+            raise ValueError(f"Expected dict, got {type(gen_result)}")
+        if "output" in gen_result:
+            output = gen_result["output"]
+            if isinstance(output, list) and output:
+                return output[0].get("content", "")
+            if isinstance(output, str):
+                return output
+        if "response" in gen_result:
+            return gen_result["response"]
+        raise ValueError(f"Cannot extract generated text from: {list(gen_result.keys())}")
+    async def generate_response(example: Dict, server_client: ServerClient, model_server: str) -> Dict:
+        """Generate a single response using the policy model."""
+        params = example.get("responses_create_params")
+        if params is None:
+            raise ValueError(f"Example missing 'responses_create_params': {list(example.keys())}")
+        res = await server_client.post(server_name=model_server, url_path="/v1/responses", json=params)
+        await raise_for_status(res)
+        return await res.json()

nemo_gym/config_types.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -393,6 +393,7 @@ class Domain(str, Enum): @@
         GAMES = "games"
         TRANSLATION = "translation"
         E2E = "e2e"
+        RLHF = "rlhf"
         OTHER = "other"
@@ Expand Down @@

nemo_gym/global_config.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -45,6 +45,10 @@ @@
     DISALLOWED_PORTS_KEY_NAME = "disallowed_ports"
     HEAD_SERVER_DEPS_KEY_NAME = "head_server_deps"
     PYTHON_VERSION_KEY_NAME = "python_version"
+    RAY_HEAD_NODE_ADDRESS_KEY_NAME = "ray_head_node_address"
+    RAY_NAMESPACE_KEY_NAME = "ray_namespace"
+    RAY_GPU_NODES_KEY_NAME = "ray_gpu_nodes"
+    RAY_NUM_GPUS_PER_NODE_KEY_NAME = "ray_num_gpus_per_node"
     USE_ABSOLUTE_IP = "use_absolute_ip"
     NEMO_GYM_RESERVED_TOP_LEVEL_KEYS = [
         CONFIG_PATHS_KEY_NAME,
@@ Expand All / @@ -54,6 +58,10 @@ @@
         DISALLOWED_PORTS_KEY_NAME,
         HEAD_SERVER_DEPS_KEY_NAME,
         PYTHON_VERSION_KEY_NAME,
+        RAY_HEAD_NODE_ADDRESS_KEY_NAME,
+        RAY_NAMESPACE_KEY_NAME,
+        RAY_GPU_NODES_KEY_NAME,
+        RAY_NUM_GPUS_PER_NODE_KEY_NAME,
         USE_ABSOLUTE_IP,
     ]
@@ Expand Down Expand Up @@
     def find_open_port(
         disallowed_ports: Optional[List[int]] = None,
-        max_retries: int = 50,
+        max_retries: int = 100,
     ) -> int:  # pragma: no cover
         if disallowed_ports is None:
             disallowed_ports = []
+        default_disallowed_ports = set(
+            list(range(53000, 53010+1)) +
+            list(range(54000, 60000+1)) +
+            [10001, 8265, 52365, 52365+1]
+        )
+        disallowed_ports = default_disallowed_ports | set(disallowed_ports)
         # Find an open port that doesn't conflict with disallowed ports.
         for _ in range(max_retries):
             with socket() as s:
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

pjin/nemotron-ray-dev-20251208 vs main #449

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

pjin/nemotron-ray-dev-20251208 vs main #449

Are you sure you want to change the base?

Uh oh!

pjin/nemotron-ray-dev-20251208 vs main #449

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!