diff --git a/resources_servers/grl_tetris/README.md b/resources_servers/grl_tetris/README.md new file mode 100644 index 000000000..14321d429 --- /dev/null +++ b/resources_servers/grl_tetris/README.md @@ -0,0 +1,70 @@ +# GRL Tetris Resource Server + +FastAPI-based resource server that exposes the GRL Tetris environment through NeMo Gym conventions. The environment logic lives under `resources_servers/grl_tetris/tetris_env` and is a standalone adaptation of the upstream GRL implementation. + +## Why it exists +- **Domain**: Classic falling-block Tetris on a configurable grid. +- **Evaluation**: Agents must clear at least one line; `/verify` rewards the cumulative score only when the environment reports success. +- **Independence**: No runtime dependency on the GRL repository—the environment is vendored and self-contained. + +## Setup + +Please follow the setup instructions as outlined in: https://github.com/NVIDIA-NeMo/Gym/blob/main/docs/tutorials/02-setup.md#step-1-clone-and-install. + +## Running +Spin up the server alongside a compatible agent: +```bash +config_paths="responses_api_models/openai_model/configs/openai_model.yaml,\ +resources_servers/grl_tetris/configs/grl_tetris.yaml" +ng_run "+config_paths=[$config_paths]" +``` + +Collect trajectories: +```bash +ng_collect_rollouts +agent_name=grl_tetris_simple_agent \ + +input_jsonl_fpath=resources_servers/grl_tetris/data/example.jsonl \ + +output_jsonl_fpath=resources_servers/grl_tetris/data/example_rollouts.jsonl \ + +limit=5 +``` + +Launch the rollout viewer: +```bash +ng_viewer +jsonl_fpath=resources_servers/grl_tetris/data/example_rollouts.jsonl +``` + +## Tests +```bash +pytest resources_servers/grl_tetris/tests +``` + +## Licensing +- Code: Apache 2.0 +- Data: Apache 2.0 + +--- + +## Reward Profiling Results + +### Qwen3-4B + +**Dataset**: 3,200 rollouts (200 prompts × 16 repeats) + +**Performance Metrics**: +- **Success Rate**: 5.09% (163/3,200 rollouts) +- **Mean Reward**: -0.29 (range: -2.00 to 19.20) +- **Median Reward**: -0.80 + +**Key Findings**: +- Most rollouts (21%) received reward of -0.90 (piece dropped without clearing lines) +- Successful line clears achieved rewards of ~9.0-9.2 +- Average 7.48 tool calls per rollout +- Weak negative correlation between tool calls and reward (-0.06) + +**Top Reward Distribution**: +- `-0.9`: 672 rollouts (21.0%) - piece dropped, no line clear +- `-0.8`: 603 rollouts (18.8%) +- `-0.7`: 495 rollouts (15.5%) +- `9.1`: 29 rollouts (0.9%) - successful line clear +- `8.9`: 26 rollouts (0.8%) + +The relatively low success rate (5.09%) suggests that Tetris line-clearing is challenging for the model, requiring precise spatial reasoning and action sequencing. Most rollouts result in pieces dropping without clearing lines (negative rewards from -0.1 per action step). diff --git a/resources_servers/grl_tetris/app.py b/resources_servers/grl_tetris/app.py new file mode 100644 index 000000000..12d2bb01b --- /dev/null +++ b/resources_servers/grl_tetris/app.py @@ -0,0 +1,226 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional, Union + +import numpy as np +from fastapi import FastAPI, HTTPException, Request +from pydantic import BaseModel, Field + +from nemo_gym.base_resources_server import ( + BaseResourcesServerConfig, + BaseSeedSessionRequest, + BaseSeedSessionResponse, + BaseVerifyRequest, + BaseVerifyResponse, + SimpleResourcesServer, +) +from nemo_gym.server_utils import SESSION_ID_KEY, ServerClient +from resources_servers.grl_tetris.tetris_env import TetrisEnv + + +DEFAULT_GRID_LOOKUP = {0: "_", 1: "#", 2: "X"} +DEFAULT_ACTION_LOOKUP = {0: "Left", 1: "Right", 2: "Down"} + + +class GrlTetrisResourcesServerConfig(BaseResourcesServerConfig): + env_config: Dict[str, Any] = Field( + default_factory=lambda: { + "grid_lookup": DEFAULT_GRID_LOOKUP, + "action_lookup": DEFAULT_ACTION_LOOKUP, + "render_mode": "text", + "dim_x": 4, + "dim_y": 4, + "box_type": 3, + } + ) + + +class GrlTetrisSeedSessionRequest(BaseSeedSessionRequest): + seed: Optional[int] = None + + +class GrlTetrisSeedSessionResponse(BaseSeedSessionResponse): + observation: str + + +class GrlTetrisStepRequest(BaseModel): + actions: List[Union[str, int]] = Field(default_factory=list) + + +class GrlTetrisStepTrace(BaseModel): + action_id: int + action_label: str + reward: float + done: bool + info: Dict[str, Any] + + +class GrlTetrisStepResponse(BaseModel): + observation: str + reward: float + total_reward: float + done: bool + steps: List[GrlTetrisStepTrace] + history: List[GrlTetrisStepTrace] = Field(default_factory=list) + + +class GrlTetrisVerifyResponse(BaseVerifyResponse): + success: bool + + +@dataclass +class TetrisSessionState: + env: Any + observation: str + total_reward: float = 0.0 + done: bool = False + last_info: Dict[str, Any] = field(default_factory=dict) + history: List[GrlTetrisStepTrace] = field(default_factory=list) + + +class GrlTetrisResourcesServer(SimpleResourcesServer): + config: GrlTetrisResourcesServerConfig + server_client: ServerClient + session_id_to_state: Dict[str, TetrisSessionState] = Field(default_factory=dict) + + def setup_webserver(self) -> FastAPI: + app = super().setup_webserver() + app.post("/step")(self.step) + return app + + def _create_env(self) -> TetrisEnv: + return TetrisEnv(self.config.env_config) + + async def seed_session(self, request: Request, body: GrlTetrisSeedSessionRequest) -> GrlTetrisSeedSessionResponse: + session_id = request.session[SESSION_ID_KEY] + env = self._create_env() + observation = env.reset(seed=body.seed) + + self.session_id_to_state[session_id] = TetrisSessionState( + env=env, + observation=observation, + ) + return GrlTetrisSeedSessionResponse(observation=observation) + + async def step(self, request: Request, body: GrlTetrisStepRequest) -> GrlTetrisStepResponse: + session_id = request.session.get(SESSION_ID_KEY) + if session_id is None or session_id not in self.session_id_to_state: + raise HTTPException(status_code=400, detail="Session not initialized. Call /seed_session first.") + + session_state = self.session_id_to_state[session_id] + env = session_state.env + + reverse_lookup = {label.lower(): idx for idx, label in env.ACTION_LOOKUP.items()} + total_step_reward = 0.0 + steps: List[GrlTetrisStepTrace] = [] + + if session_state.done: + return GrlTetrisStepResponse( + observation=session_state.observation, + reward=0.0, + total_reward=session_state.total_reward, + done=True, + steps=[], + history=list(session_state.history), + ) + + for action in body.actions: + action_id = self._parse_action(action, reverse_lookup) + if action_id not in env.ACTION_LOOKUP: + raise HTTPException(status_code=400, detail=f"Invalid action identifier: {action}") + + next_obs, reward, done, info = env.step(action_id) + info = self._to_python_types(info) + total_step_reward += reward + session_state.total_reward += reward + session_state.observation = next_obs + session_state.last_info = info + session_state.done = bool(done) + + step = GrlTetrisStepTrace( + action_id=action_id, + action_label=env.ACTION_LOOKUP[action_id], + reward=reward, + done=session_state.done, + info=info, + ) + session_state.history.append(step) + steps.append(step) + + if session_state.done: + break + + return GrlTetrisStepResponse( + observation=session_state.observation, + reward=total_step_reward, + total_reward=session_state.total_reward, + done=session_state.done, + steps=steps, + history=list(session_state.history), + ) + + async def verify(self, request: Request, body: BaseVerifyRequest) -> GrlTetrisVerifyResponse: + session_id = request.session.get(SESSION_ID_KEY) + session_state = self.session_id_to_state.get(session_id) + + success = False + reward = 0.0 + if session_state is not None: + success = bool(session_state.last_info.get("success")) + reward = session_state.total_reward + + if session_id in self.session_id_to_state: + try: + session_state.env.close() # type: ignore[union-attr] + except Exception: # pragma: no cover - defensive cleanup + pass + del self.session_id_to_state[session_id] + + return GrlTetrisVerifyResponse( + **body.model_dump(), + reward=reward, + success=success, + ) + + @staticmethod + def _parse_action(action: Union[str, int], reverse_lookup: Dict[str, int]) -> int: + if isinstance(action, int): + return action + + candidate = action.strip() + lower_candidate = candidate.lower() + if lower_candidate in reverse_lookup: + return reverse_lookup[lower_candidate] + + try: + return int(candidate) + except ValueError as exc: + raise HTTPException(status_code=400, detail=f"Unable to parse action: {action}") from exc + + @staticmethod + def _to_python_types(obj: Any) -> Any: + if isinstance(obj, dict): + return {k: GrlTetrisResourcesServer._to_python_types(v) for k, v in obj.items()} + if isinstance(obj, list): + return [GrlTetrisResourcesServer._to_python_types(v) for v in obj] + if isinstance(obj, np.generic): + return obj.item() + return obj + + +if __name__ == "__main__": + GrlTetrisResourcesServer.run_webserver() diff --git a/resources_servers/grl_tetris/configs/grl_tetris.yaml b/resources_servers/grl_tetris/configs/grl_tetris.yaml new file mode 100644 index 000000000..80e8bfe75 --- /dev/null +++ b/resources_servers/grl_tetris/configs/grl_tetris.yaml @@ -0,0 +1,27 @@ +grl_tetris_resources_server: + resources_servers: + grl_tetris: + entrypoint: app.py + domain: games + verified: false +grl_tetris_simple_agent: + responses_api_agents: + simple_agent: + entrypoint: app.py + max_steps: 10 + resources_server: + type: resources_servers + name: grl_tetris_resources_server + model_server: + type: responses_api_models + name: policy_model + datasets: + - name: example + type: example + jsonl_fpath: resources_servers/grl_tetris/data/example.jsonl + num_repeats: 1 + gitlab_identifier: + dataset_name: grl_tetris + version: 0.0.1 + artifact_fpath: example.jsonl + license: Apache 2.0 diff --git a/resources_servers/grl_tetris/data/example.jsonl b/resources_servers/grl_tetris/data/example.jsonl new file mode 100644 index 000000000..2eb162fde --- /dev/null +++ b/resources_servers/grl_tetris/data/example.jsonl @@ -0,0 +1,5 @@ +{"game_id": 1, "seed": 93810, "dim_board": [5, 5], "box_type": 0, "responses_create_params": {"input": [{"role": "developer", "content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with Action1 || Action2 || ... summarizing all moves you made."}, {"role": "user", "content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible."}], "tools": [{"name": "step", "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving.", "strict": true, "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}}]}} +{"game_id": 2, "seed": 46185, "dim_board": [4, 6], "box_type": 1, "responses_create_params": {"input": [{"role": "developer", "content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with Action1 || Action2 || ... summarizing all moves you made."}, {"role": "user", "content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible."}], "tools": [{"name": "step", "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving.", "strict": true, "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}}]}} +{"game_id": 3, "seed": 28563, "dim_board": [5, 5], "box_type": 0, "responses_create_params": {"input": [{"role": "developer", "content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with Action1 || Action2 || ... summarizing all moves you made."}, {"role": "user", "content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible."}], "tools": [{"name": "step", "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving.", "strict": true, "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}}]}} +{"game_id": 4, "seed": 87808, "dim_board": [6, 5], "box_type": 0, "responses_create_params": {"input": [{"role": "developer", "content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with Action1 || Action2 || ... summarizing all moves you made."}, {"role": "user", "content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible."}], "tools": [{"name": "step", "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving.", "strict": true, "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}}]}} +{"game_id": 5, "seed": 14453, "dim_board": [5, 5], "box_type": 1, "responses_create_params": {"input": [{"role": "developer", "content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with Action1 || Action2 || ... summarizing all moves you made."}, {"role": "user", "content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible."}], "tools": [{"name": "step", "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving.", "strict": true, "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}}]}} diff --git a/resources_servers/grl_tetris/data/example_metrics.json b/resources_servers/grl_tetris/data/example_metrics.json new file mode 100644 index 000000000..88030ce73 --- /dev/null +++ b/resources_servers/grl_tetris/data/example_metrics.json @@ -0,0 +1,8 @@ +{ + "name": "example", + "type": "example", + "jsonl_fpath": "resources_servers/grl_tetris/data/example.jsonl", + "gitlab_identifier": null, + "license": "Apache 2.0", + "Number of examples": 5 +} diff --git a/resources_servers/grl_tetris/data/example_rollouts.jsonl b/resources_servers/grl_tetris/data/example_rollouts.jsonl new file mode 100644 index 000000000..3ee989b4c --- /dev/null +++ b/resources_servers/grl_tetris/data/example_rollouts.jsonl @@ -0,0 +1,5 @@ +{"responses_create_params": {"background": null, "include": null, "input": [{"content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with Action1 || Action2 || ... summarizing all moves you made.", "role": "developer", "type": "message"}, {"content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible.", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": null, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_0652b8fcf1e2dce400696565e6b2e881959ed1de12f162997e", "created_at": 1768252902.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "gpt-4o-2024-08-06", "object": "response", "output": [{"arguments": "{\"actions\":[]}", "call_id": "call_KiPwWiCxtkOEN1RDK4wi52Sp", "name": "step", "type": "function_call", "id": "fc_0652b8fcf1e2dce400696565e47d2c8195bb1da44b843ec537", "status": "completed"}, {"call_id": "call_KiPwWiCxtkOEN1RDK4wi52Sp", "output": "{\"observation\":\"__XX\\n____\\n____\\n____\",\"reward\":0.0,\"total_reward\":0.0,\"done\":false,\"steps\":[],\"history\":[]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_g5iqpJJHzJnhmDa3btuotPF3", "name": "step", "type": "function_call", "id": "fc_0652b8fcf1e2dce400696565e6273881959577dc95640f8566", "status": "completed"}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_2q9Elms7fMTWkIhRWR5OldMv", "name": "step", "type": "function_call", "id": "fc_0652b8fcf1e2dce400696565e6401881958e2569ad4f42dcef", "status": "completed"}, {"call_id": "call_g5iqpJJHzJnhmDa3btuotPF3", "output": "{\"observation\":\"__XX\\n__XX\\n____\\n____\",\"reward\":-0.1,\"total_reward\":-0.1,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"call_id": "call_2q9Elms7fMTWkIhRWR5OldMv", "output": "{\"observation\":\"____\\n__XX\\n__XX\\n____\",\"reward\":-0.1,\"total_reward\":-0.2,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"id": "msg_0652b8fcf1e2dce400696565e73d4c81958bf7c73d4d24fa8f", "content": [{"annotations": [], "text": "Down || Down", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": 1.0, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_p": 1.0, "background": false, "conversation": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "prompt_cache_key": null, "reasoning": {"effort": null, "generate_summary": null, "summary": null}, "safety_identifier": null, "service_tier": "default", "status": "completed", "text": {"format": {"type": "text"}, "verbosity": "medium"}, "top_logprobs": 0, "truncation": "disabled", "usage": {"input_tokens": 625, "input_tokens_details": {"cached_tokens": 0}, "output_tokens": 11, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 636}, "user": null, "billing": {"payer": "developer"}, "completed_at": 1768252903, "frequency_penalty": 0.0, "presence_penalty": 0.0, "prompt_cache_retention": null, "store": true}, "reward": -0.2, "success": false} +{"responses_create_params": {"background": null, "include": null, "input": [{"content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with Action1 || Action2 || ... summarizing all moves you made.", "role": "developer", "type": "message"}, {"content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible.", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": null, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_0690794e9bb8139d00696565e9fc20819eb9128e7907a19bd3", "created_at": 1768252906.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "gpt-4o-2024-08-06", "object": "response", "output": [{"arguments": "{\"actions\":[]}", "call_id": "call_uL4WuAX2lvwJOdGVdqlezO8y", "name": "step", "type": "function_call", "id": "fc_0690794e9bb8139d00696565e4c938819eba8c692fc8aba62e", "status": "completed"}, {"call_id": "call_uL4WuAX2lvwJOdGVdqlezO8y", "output": "{\"observation\":\"_X__\\n____\\n____\\n____\",\"reward\":0.0,\"total_reward\":0.0,\"done\":false,\"steps\":[],\"history\":[]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_0Ny2wn272A6wZIy2SSExmBxS", "name": "step", "type": "function_call", "id": "fc_0690794e9bb8139d00696565e651ac819ea88648ddd7f7e4a0", "status": "completed"}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_ykybl3HByrFr34weNdftZxBr", "name": "step", "type": "function_call", "id": "fc_0690794e9bb8139d00696565e66ea8819e845eab68c6b7fab8", "status": "completed"}, {"call_id": "call_0Ny2wn272A6wZIy2SSExmBxS", "output": "{\"observation\":\"_X__\\n_X__\\n____\\n____\",\"reward\":-0.1,\"total_reward\":-0.1,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"call_id": "call_ykybl3HByrFr34weNdftZxBr", "output": "{\"observation\":\"____\\n_X__\\n_X__\\n____\",\"reward\":-0.1,\"total_reward\":-0.2,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_r9hSUcSt8qx9OTLE8158e0Rt", "name": "step", "type": "function_call", "id": "fc_0690794e9bb8139d00696565e7aec4819eb416d637c8a04d4e", "status": "completed"}, {"call_id": "call_r9hSUcSt8qx9OTLE8158e0Rt", "output": "{\"observation\":\"__XX\\n____\\n_#__\\n_#__\",\"reward\":-0.1,\"total_reward\":-0.30000000000000004,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_oZwMj5SDCFo9oQEllqrmjBNk", "name": "step", "type": "function_call", "id": "fc_0690794e9bb8139d00696565e9b808819eb791aa22730c445f", "status": "completed"}, {"call_id": "call_oZwMj5SDCFo9oQEllqrmjBNk", "output": "{\"observation\":\"____\\n__XX\\n_#__\\n_#__\",\"reward\":-0.1,\"total_reward\":-0.4,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"id": "msg_0690794e9bb8139d00696565ea6138819eb58f850f635e9b0b", "content": [{"annotations": [], "text": "Down || Down || Down || Down || Down", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": 1.0, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_p": 1.0, "background": false, "conversation": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "prompt_cache_key": null, "reasoning": {"effort": null, "generate_summary": null, "summary": null}, "safety_identifier": null, "service_tier": "default", "status": "completed", "text": {"format": {"type": "text"}, "verbosity": "medium"}, "top_logprobs": 0, "truncation": "disabled", "usage": {"input_tokens": 1179, "input_tokens_details": {"cached_tokens": 0}, "output_tokens": 17, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 1196}, "user": null, "billing": {"payer": "developer"}, "completed_at": 1768252906, "frequency_penalty": 0.0, "presence_penalty": 0.0, "prompt_cache_retention": null, "store": true}, "reward": -0.4, "success": false} +{"responses_create_params": {"background": null, "include": null, "input": [{"content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with Action1 || Action2 || ... summarizing all moves you made.", "role": "developer", "type": "message"}, {"content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible.", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": null, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_045dc8dead221e0500696565ea3c6c81a08481f9287fae7c5c", "created_at": 1768252906.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "gpt-4o-2024-08-06", "object": "response", "output": [{"arguments": "{\"actions\":[]}", "call_id": "call_AKpurldhUA7kkDmHHTk9bvq7", "name": "step", "type": "function_call", "id": "fc_045dc8dead221e0500696565e4912481a08f2bff033e7f7cc3", "status": "completed"}, {"call_id": "call_AKpurldhUA7kkDmHHTk9bvq7", "output": "{\"observation\":\"___X\\n____\\n____\\n____\",\"reward\":0.0,\"total_reward\":0.0,\"done\":false,\"steps\":[],\"history\":[]}", "type": "function_call_output", "id": null, "status": null}, {"id": "msg_045dc8dead221e0500696565e5d2fc81a092444caabf20fff3", "content": [{"annotations": [], "text": "The current board state is:\n\n```\n___X\n____\n____\n____\n```\n\nThe active piece ('X') is in the top-right corner. I'll move it down to try and clear a line.", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}, {"arguments": "{\"actions\":[\"Left\"]}", "call_id": "call_f9yuUc5kBntRpWGckYNy9NIR", "name": "step", "type": "function_call", "id": "fc_045dc8dead221e0500696565e7543481a0bd34b836a71328c5", "status": "completed"}, {"call_id": "call_f9yuUc5kBntRpWGckYNy9NIR", "output": "{\"observation\":\"__X_\\n____\\n____\\n____\",\"reward\":-0.1,\"total_reward\":-0.1,\"done\":false,\"steps\":[{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Left\"]}", "call_id": "call_0ZU1Ffo5TaYfQ7Rc4SdBYFHk", "name": "step", "type": "function_call", "id": "fc_045dc8dead221e0500696565e86dd481a09155bae8b20354f9", "status": "completed"}, {"call_id": "call_0ZU1Ffo5TaYfQ7Rc4SdBYFHk", "output": "{\"observation\":\"_X__\\n____\\n____\\n____\",\"reward\":-0.1,\"total_reward\":-0.2,\"done\":false,\"steps\":[{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Left\"]}", "call_id": "call_vOxbvT3jQz5cxquUrBbOScQi", "name": "step", "type": "function_call", "id": "fc_045dc8dead221e0500696565e91f6881a0a4e5d2323a1f998e", "status": "completed"}, {"call_id": "call_vOxbvT3jQz5cxquUrBbOScQi", "output": "{\"observation\":\"X___\\n____\\n____\\n____\",\"reward\":-0.1,\"total_reward\":-0.30000000000000004,\"done\":false,\"steps\":[{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\",\"Down\",\"Down\"]}", "call_id": "call_mqgGieUPMttGAlOVkRTJ7TM6", "name": "step", "type": "function_call", "id": "fc_045dc8dead221e0500696565e9d2b481a0934a7e539636b6cd", "status": "completed"}, {"call_id": "call_mqgGieUPMttGAlOVkRTJ7TM6", "output": "{\"observation\":\"__XX\\n____\\n#___\\n#___\",\"reward\":-0.30000000000000004,\"total_reward\":-0.6,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}],\"history\":[{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}]}", "type": "function_call_output", "id": null, "status": null}, {"id": "msg_045dc8dead221e0500696565eaafc881a0bb0db1f9c9c5815d", "content": [{"annotations": [], "text": "I moved the piece, but no line was cleared. Here's the sequence of actions taken:\n\nLeft || Left || Left || Down || Down || Down", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": 1.0, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_p": 1.0, "background": false, "conversation": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "prompt_cache_key": null, "reasoning": {"effort": null, "generate_summary": null, "summary": null}, "safety_identifier": null, "service_tier": "default", "status": "completed", "text": {"format": {"type": "text"}, "verbosity": "medium"}, "top_logprobs": 0, "truncation": "disabled", "usage": {"input_tokens": 1420, "input_tokens_details": {"cached_tokens": 0}, "output_tokens": 37, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 1457}, "user": null, "billing": {"payer": "developer"}, "completed_at": 1768252907, "frequency_penalty": 0.0, "presence_penalty": 0.0, "prompt_cache_retention": null, "store": true}, "reward": -0.6, "success": false} +{"responses_create_params": {"background": null, "include": null, "input": [{"content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with Action1 || Action2 || ... summarizing all moves you made.", "role": "developer", "type": "message"}, {"content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible.", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": null, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_0d5a2c8365850b8e00696565eab4288192b2da66afb906d98e", "created_at": 1768252906.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "gpt-4o-2024-08-06", "object": "response", "output": [{"arguments": "{\"actions\":[]}", "call_id": "call_KgJRph2MXPBIZpi0YTtUUBGc", "name": "step", "type": "function_call", "id": "fc_0d5a2c8365850b8e00696565e48c00819282ccef6c4a83d6b9", "status": "completed"}, {"call_id": "call_KgJRph2MXPBIZpi0YTtUUBGc", "output": "{\"observation\":\"XX__\\n____\\n____\\n____\",\"reward\":0.0,\"total_reward\":0.0,\"done\":false,\"steps\":[],\"history\":[]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_erUkQxpj6GAIbwEwYPk6TRse", "name": "step", "type": "function_call", "id": "fc_0d5a2c8365850b8e00696565e55df08192988609846746dba4", "status": "completed"}, {"call_id": "call_erUkQxpj6GAIbwEwYPk6TRse", "output": "{\"observation\":\"____\\nXX__\\n____\\n____\",\"reward\":-0.1,\"total_reward\":-0.1,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_0PIqBadc3XMyvboNUnDvschd", "name": "step", "type": "function_call", "id": "fc_0d5a2c8365850b8e00696565e645d081929b23c8104eb226d7", "status": "completed"}, {"call_id": "call_0PIqBadc3XMyvboNUnDvschd", "output": "{\"observation\":\"____\\n____\\nXX__\\n____\",\"reward\":-0.1,\"total_reward\":-0.2,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_hdtWjww3kRgedkTackhU9wBK", "name": "step", "type": "function_call", "id": "fc_0d5a2c8365850b8e00696565e721a8819291340dca9bafe5e9", "status": "completed"}, {"call_id": "call_hdtWjww3kRgedkTackhU9wBK", "output": "{\"observation\":\"XX__\\n____\\n____\\n##__\",\"reward\":-0.1,\"total_reward\":-0.30000000000000004,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_eiAPDwgSwieMKVJrNgCQO4sX", "name": "step", "type": "function_call", "id": "fc_0d5a2c8365850b8e00696565e83f208192a9d24d3ae0a5a550", "status": "completed"}, {"call_id": "call_eiAPDwgSwieMKVJrNgCQO4sX", "output": "{\"observation\":\"XX__\\nXX__\\n____\\n##__\",\"reward\":-0.1,\"total_reward\":-0.4,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_gUYouOo1KFFZfIOW2T31ooRi", "name": "step", "type": "function_call", "id": "fc_0d5a2c8365850b8e00696565e945888192837561cdb478f81a", "status": "completed"}, {"call_id": "call_gUYouOo1KFFZfIOW2T31ooRi", "output": "{\"observation\":\"__XX\\n##__\\n##__\\n##__\",\"reward\":-0.1,\"total_reward\":-0.5,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Left\"]}", "call_id": "call_GObmJdA0qMGI0cniT0VAv9mU", "name": "step", "type": "function_call", "id": "fc_0d5a2c8365850b8e00696565ea51148192bef30ae7c3576d74", "status": "completed"}, {"call_id": "call_GObmJdA0qMGI0cniT0VAv9mU", "output": "{\"observation\":\"_XX_\\n##__\\n##__\\n##__\",\"reward\":-0.1,\"total_reward\":-0.6,\"done\":true,\"steps\":[{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":true,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}},{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":true,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}]}", "type": "function_call_output", "id": null, "status": null}, {"id": "msg_0d5a2c8365850b8e00696565eb619c8192a650c33c2aeacd37", "content": [{"annotations": [], "text": "Down || Down || Down || Down || Down || Left", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": 1.0, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_p": 1.0, "background": false, "conversation": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "prompt_cache_key": null, "reasoning": {"effort": null, "generate_summary": null, "summary": null}, "safety_identifier": null, "service_tier": "default", "status": "completed", "text": {"format": {"type": "text"}, "verbosity": "medium"}, "top_logprobs": 0, "truncation": "disabled", "usage": {"input_tokens": 1911, "input_tokens_details": {"cached_tokens": 1408}, "output_tokens": 19, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 1930}, "user": null, "billing": {"payer": "developer"}, "completed_at": 1768252907, "frequency_penalty": 0.0, "presence_penalty": 0.0, "prompt_cache_retention": null, "store": true}, "reward": -0.6, "success": false} +{"responses_create_params": {"background": null, "include": null, "input": [{"content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with Action1 || Action2 || ... summarizing all moves you made.", "role": "developer", "type": "message"}, {"content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible.", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": null, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_0ab7221f9fb73bd200696565ec54088197b9f83d678139f026", "created_at": 1768252908.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "gpt-4o-2024-08-06", "object": "response", "output": [{"arguments": "{\"actions\":[]}", "call_id": "call_fobyRyvvP30QTkyCq2FOdcXr", "name": "step", "type": "function_call", "id": "fc_0ab7221f9fb73bd200696565e4bfa88197aa8aa2203c6f21d1", "status": "completed"}, {"call_id": "call_fobyRyvvP30QTkyCq2FOdcXr", "output": "{\"observation\":\"__XX\\n____\\n____\\n____\",\"reward\":0.0,\"total_reward\":0.0,\"done\":false,\"steps\":[],\"history\":[]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_0Je4QPq9mXY39BORQxE7D8OR", "name": "step", "type": "function_call", "id": "fc_0ab7221f9fb73bd200696565e6640881979fea8f9521117bcd", "status": "completed"}, {"call_id": "call_0Je4QPq9mXY39BORQxE7D8OR", "output": "{\"observation\":\"____\\n__XX\\n____\\n____\",\"reward\":-0.1,\"total_reward\":-0.1,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_Gxat5KDZc8yUZwxnBoMWOhul", "name": "step", "type": "function_call", "id": "fc_0ab7221f9fb73bd200696565e713cc8197865d18096bbb121f", "status": "completed"}, {"call_id": "call_Gxat5KDZc8yUZwxnBoMWOhul", "output": "{\"observation\":\"____\\n____\\n__XX\\n____\",\"reward\":-0.1,\"total_reward\":-0.2,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_P8BpK5nYCCcTwxF0TqeQ3AKi", "name": "step", "type": "function_call", "id": "fc_0ab7221f9fb73bd200696565e80afc8197955bf941b8fe6c6e", "status": "completed"}, {"call_id": "call_P8BpK5nYCCcTwxF0TqeQ3AKi", "output": "{\"observation\":\"__X_\\n____\\n____\\n__##\",\"reward\":-0.1,\"total_reward\":-0.30000000000000004,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[]}", "call_id": "call_CANNEhbdjIMwbhkVMRU906i5", "name": "step", "type": "function_call", "id": "fc_0ab7221f9fb73bd200696565e9c2548197916632847ec8a644", "status": "completed"}, {"call_id": "call_CANNEhbdjIMwbhkVMRU906i5", "output": "{\"observation\":\"__X_\\n____\\n____\\n__##\",\"reward\":0.0,\"total_reward\":-0.30000000000000004,\"done\":false,\"steps\":[],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_bnjsIqzspnvRyq8MtsEt3vxS", "name": "step", "type": "function_call", "id": "fc_0ab7221f9fb73bd200696565eabcc08197be40e1b0fc1e021f", "status": "completed"}, {"call_id": "call_bnjsIqzspnvRyq8MtsEt3vxS", "output": "{\"observation\":\"__X_\\n__X_\\n____\\n__##\",\"reward\":-0.1,\"total_reward\":-0.4,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_V3is9Vdy5aJlTpknfBqWp1aN", "name": "step", "type": "function_call", "id": "fc_0ab7221f9fb73bd200696565ebd490819790fc411b3af2fc6a", "status": "completed"}, {"call_id": "call_V3is9Vdy5aJlTpknfBqWp1aN", "output": "{\"observation\":\"__XX\\n__#_\\n__#_\\n__##\",\"reward\":-0.1,\"total_reward\":-0.5,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}]}", "type": "function_call_output", "id": null, "status": null}, {"id": "msg_0ab7221f9fb73bd200696565ecc7b481978ebd3a1369d5396b", "content": [{"annotations": [], "text": "Down || Down || Down || Down || Down", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": 1.0, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_p": 1.0, "background": false, "conversation": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "prompt_cache_key": null, "reasoning": {"effort": null, "generate_summary": null, "summary": null}, "safety_identifier": null, "service_tier": "default", "status": "completed", "text": {"format": {"type": "text"}, "verbosity": "medium"}, "top_logprobs": 0, "truncation": "disabled", "usage": {"input_tokens": 1724, "input_tokens_details": {"cached_tokens": 1280}, "output_tokens": 17, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 1741}, "user": null, "billing": {"payer": "developer"}, "completed_at": 1768252908, "frequency_penalty": 0.0, "presence_penalty": 0.0, "prompt_cache_retention": null, "store": true}, "reward": -0.5, "success": false} diff --git a/resources_servers/grl_tetris/generate_test_examples.py b/resources_servers/grl_tetris/generate_test_examples.py new file mode 100755 index 000000000..ac46d2d1f --- /dev/null +++ b/resources_servers/grl_tetris/generate_test_examples.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Generate test examples for GRL Tetris environment.""" + +import json +import random +from pathlib import Path +from typing import Any, Dict, List + + +def generate_tetris_example(game_id: int, seed: int, dim_board: List[int], box_type: int) -> Dict[str, Any]: + """Generate a single Tetris test example in the expected JSONL format. + + Args: + game_id: Unique identifier for the game + seed: Random seed for reproducible game generation + dim_board: Board dimensions as [width, height] + box_type: Type of Tetris pieces (0=single, 1=single, 2=I and -, 3=I, -, and O) + + Returns: + Dictionary containing the game configuration and prompt + """ + return { + "game_id": game_id, + "seed": seed, + "dim_board": dim_board, + "box_type": box_type, + "responses_create_params": { + "input": [ + { + "role": "developer", + "content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with Action1 || Action2 || ... summarizing all moves you made.", + }, + { + "role": "user", + "content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible.", + }, + ], + "tools": [ + { + "name": "step", + "type": "function", + "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving.", + "strict": True, + "parameters": { + "type": "object", + "properties": { + "actions": { + "type": "array", + "items": {"type": "string"}, + "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state.", + } + }, + "required": ["actions"], + "additionalProperties": False, + }, + } + ], + }, + } + + +def generate_test_examples(num_examples: int = 500, output_file: str = "data/test_examples.jsonl") -> None: + """Generate diverse test examples for Tetris environment. + + Args: + num_examples: Number of examples to generate (default: 500) + output_file: Output JSONL file path + """ + examples = [] + + # Define parameter ranges for diversity + board_sizes = [ + [4, 4], # Small square + [5, 5], # Medium square + [6, 6], # Large square + [4, 6], # Narrow tall + [6, 4], # Wide short + [5, 6], # Medium tall + [6, 5], # Medium wide + [4, 5], # Small tall + [5, 4], # Small wide + ] + + box_types = [0, 1, 2, 3] # All available piece types + + # Generate diverse examples + for i in range(num_examples): + game_id = i + 1 + + # Use game_id as base for seed to ensure reproducibility but diversity + seed = random.randint(10000, 99999) + i * 137 # Prime offset for better distribution + + # Cycle through board sizes with some randomness + dim_board = random.choice(board_sizes) + + # Distribute box types evenly but with some randomness + box_type = random.choice(box_types) + + example = generate_tetris_example(game_id, seed, dim_board, box_type) + examples.append(example) + + # Write to JSONL file + output_path = Path(__file__).parent / output_file + output_path.parent.mkdir(parents=True, exist_ok=True) + + with open(output_path, "w") as f: + for example in examples: + f.write(json.dumps(example) + "\n") + + print(f"Generated {num_examples} test examples") + print(f"Output file: {output_path}") + print("\nParameter distribution:") + print(f" Board sizes: {set(tuple(e['dim_board']) for e in examples)}") + print(f" Box types: {set(e['box_type'] for e in examples)}") + print(f" Seed range: {min(e['seed'] for e in examples)} - {max(e['seed'] for e in examples)}") + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Generate Tetris test examples") + parser.add_argument("--num-examples", type=int, default=500, help="Number of examples to generate (default: 500)") + parser.add_argument( + "--output", + type=str, + default="data/test_examples.jsonl", + help="Output JSONL file path (default: data/test_examples.jsonl)", + ) + parser.add_argument("--seed", type=int, default=42, help="Random seed for generation (default: 42)") + + args = parser.parse_args() + + # Set random seed for reproducibility + random.seed(args.seed) + + generate_test_examples(args.num_examples, args.output) diff --git a/resources_servers/grl_tetris/requirements.txt b/resources_servers/grl_tetris/requirements.txt new file mode 100644 index 000000000..1c193ea4b --- /dev/null +++ b/resources_servers/grl_tetris/requirements.txt @@ -0,0 +1,3 @@ +-e nemo-gym[dev] @ ../../ +gymnasium==1.1.1 +numpy>=1.26 diff --git a/resources_servers/grl_tetris/tests/test_app.py b/resources_servers/grl_tetris/tests/test_app.py new file mode 100644 index 000000000..38150cae8 --- /dev/null +++ b/resources_servers/grl_tetris/tests/test_app.py @@ -0,0 +1,253 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from unittest.mock import MagicMock, patch + +from fastapi.testclient import TestClient + +from nemo_gym.openai_utils import ( + NeMoGymResponse, + NeMoGymResponseCreateParamsNonStreaming, +) +from nemo_gym.server_utils import ServerClient +from resources_servers.grl_tetris.app import ( + GrlTetrisResourcesServer, + GrlTetrisResourcesServerConfig, +) + + +_VERIFY_CREATE_PARAMS = NeMoGymResponseCreateParamsNonStreaming( + input="placeholder", +) + +_VERIFY_RESPONSE = NeMoGymResponse.model_construct( + id="resp_test", + object="response", + created_at=0.0, + status="completed", + output=[], + model="gpt-4.1", + parallel_tool_calls=True, + tool_choice="auto", + tools=[], +) + + +def _verify_payload() -> dict: + return { + "responses_create_params": _VERIFY_CREATE_PARAMS.model_dump(mode="json"), + "response": _VERIFY_RESPONSE.model_dump(mode="json"), + } + + +class TestApp: + def test_sanity(self) -> None: + config = GrlTetrisResourcesServerConfig( + host="0.0.0.0", + port=8080, + entrypoint="", + name="", + ) + GrlTetrisResourcesServer(config=config, server_client=MagicMock(spec=ServerClient)) + + def test_seed_and_step_flow(self) -> None: + config = GrlTetrisResourcesServerConfig( + host="0.0.0.0", + port=8080, + entrypoint="", + name="", + ) + server = GrlTetrisResourcesServer( + config=config, + server_client=MagicMock(spec=ServerClient), + ) + + class FakeEnv: + ACTION_LOOKUP = {0: "Left"} + + def __init__(self, *_args, **_kwargs) -> None: + self._closed = False + self.step_calls = 0 + + def reset(self, seed=None): # noqa: ARG002 + return "Initial observation" + + def step(self, action): + self.step_calls += 1 + assert action == 0 + reward = 1.0 + done = self.step_calls >= 1 + info = {"success": done} + return "Next observation", reward, done, info + + def close(self): + self._closed = True + + fake_env = FakeEnv() + with patch("resources_servers.grl_tetris.app.TetrisEnv", return_value=fake_env): + app = server.setup_webserver() + client = TestClient(app) + + response = client.post("/seed_session", json={"seed": 123}) + assert response.status_code == 200 + assert response.json()["observation"] == "Initial observation" + + cookies = response.cookies + response = client.post("/step", json={"actions": ["Left"]}, cookies=cookies) + payload = response.json() + assert response.status_code == 200 + assert payload["observation"] == "Next observation" + assert payload["reward"] == 1.0 + assert payload["done"] is True + assert payload["steps"][0]["action_label"] == "Left" + assert fake_env.step_calls == 1 + + response = client.post("/verify", json=_verify_payload(), cookies=cookies) + assert response.status_code == 200 + payload = response.json() + assert payload["success"] is True + assert payload["reward"] == 1.0 + assert fake_env._closed is True + + def test_step_action_mapping_stops_after_done(self) -> None: + config = GrlTetrisResourcesServerConfig( + host="0.0.0.0", + port=8080, + entrypoint="", + name="", + ) + server = GrlTetrisResourcesServer( + config=config, + server_client=MagicMock(spec=ServerClient), + ) + + class FakeEnv: + ACTION_LOOKUP = {0: "Left", 1: "Right"} + + def __init__(self, *_args, **_kwargs) -> None: + self.calls = 0 + self.closed = False + + def reset(self, seed=None): # noqa: ARG002 + return "Init" + + def step(self, action): + self.calls += 1 + if self.calls == 1: + assert action == 0 + return "Obs1", 0.5, True, {"success": True} + raise AssertionError("Env.step should not be called after done") + + def close(self): + self.closed = True + + fake_env = FakeEnv() + with patch("resources_servers.grl_tetris.app.TetrisEnv", return_value=fake_env): + app = server.setup_webserver() + client = TestClient(app) + + seed_resp = client.post("/seed_session", json={}) + cookies = seed_resp.cookies + resp = client.post("/step", json={"actions": ["Left", "Right"]}, cookies=cookies) + payload = resp.json() + assert resp.status_code == 200 + assert payload["done"] is True + assert payload["steps"][0]["action_label"] == "Left" + assert len(payload["steps"]) == 1 + assert len(payload["history"]) == 1 + assert fake_env.calls == 1 + + def test_step_invalid_action_raises(self) -> None: + config = GrlTetrisResourcesServerConfig( + host="0.0.0.0", + port=8080, + entrypoint="", + name="", + ) + server = GrlTetrisResourcesServer( + config=config, + server_client=MagicMock(spec=ServerClient), + ) + + class FakeEnv: + ACTION_LOOKUP = {0: "Left"} + + def __init__(self, *_args, **_kwargs) -> None: + pass + + def reset(self, seed=None): # noqa: ARG002 + return "Init" + + def step(self, action): # pragma: no cover - not reached + raise AssertionError("Should not call step for invalid action") + + def close(self): + pass + + with patch("resources_servers.grl_tetris.app.TetrisEnv", return_value=FakeEnv()): + app = server.setup_webserver() + client = TestClient(app) + + seed_resp = client.post("/seed_session", json={}) + cookies = seed_resp.cookies + resp = client.post("/step", json={"actions": ["Rotate"]}, cookies=cookies) + assert resp.status_code == 400 + assert resp.json()["detail"].startswith("Unable to parse action") + + def test_verify_failure_zero_reward_and_cleanup(self) -> None: + config = GrlTetrisResourcesServerConfig( + host="0.0.0.0", + port=8080, + entrypoint="", + name="", + ) + server = GrlTetrisResourcesServer( + config=config, + server_client=MagicMock(spec=ServerClient), + ) + + class FakeEnv: + ACTION_LOOKUP = {0: "Left"} + + def __init__(self, *_args, **_kwargs) -> None: + self.closed = False + + def reset(self, seed=None): # noqa: ARG002 + return "Init" + + def step(self, action): + return "Obs", 0.0, False, {"success": False} + + def close(self): + self.closed = True + + fake_env = FakeEnv() + with patch("resources_servers.grl_tetris.app.TetrisEnv", return_value=fake_env): + app = server.setup_webserver() + client = TestClient(app) + + seed_resp = client.post("/seed_session", json={}) + cookies = seed_resp.cookies + client.post("/step", json={"actions": [0]}, cookies=cookies) + + verify_resp = client.post( + "/verify", + json=_verify_payload(), + cookies=cookies, + ) + assert verify_resp.status_code == 200 + payload = verify_resp.json() + assert payload["success"] is False + assert payload["reward"] == 0.0 + assert fake_env.closed is True + assert server.session_id_to_state == {} diff --git a/resources_servers/grl_tetris/tetris_env/__init__.py b/resources_servers/grl_tetris/tetris_env/__init__.py new file mode 100644 index 000000000..271a95004 --- /dev/null +++ b/resources_servers/grl_tetris/tetris_env/__init__.py @@ -0,0 +1,6 @@ +"""Local Tetris environment implementation.""" + +from .tetris_env import TetrisEnv + + +__all__ = ["TetrisEnv"] diff --git a/resources_servers/grl_tetris/tetris_env/tetris_env.py b/resources_servers/grl_tetris/tetris_env/tetris_env.py new file mode 100644 index 000000000..d9ce8558b --- /dev/null +++ b/resources_servers/grl_tetris/tetris_env/tetris_env.py @@ -0,0 +1,289 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Standalone Tetris environment implementation for the GRL Tetris resource server. + +This module adapts the environment logic from ``GRL/grl/agents/tetrisAgent/env.py`` +and removes dependencies on the upstream GRL repository so it can run entirely +within the NeMo Gym project. +""" + +from __future__ import annotations + +import copy +import random +from contextlib import contextmanager +from typing import Any, Dict, Iterator, List, Tuple + +import gymnasium as gym +import numpy as np + + +# ─────────────────────────── utilities ──────────────────────────── + + +@contextmanager +def all_seed(seed: int | None) -> Iterator[None]: + """Temporarily set ``random`` and ``numpy`` seeds within a context.""" + random_state = random.getstate() + numpy_state = np.random.get_state() + + try: + if seed is not None: + random.seed(seed) + np.random.seed(seed) + yield + finally: + random.setstate(random_state) + np.random.set_state(numpy_state) + + +class BaseEnv: + """Minimal base class mirroring the GRL interface.""" + + def __init__(self, config: Dict[str, Any] | None = None, **_kwargs: Any) -> None: + self.config: Dict[str, Any] = config or {} + + def reset(self, seed: int | None = None, **_kwargs: Any) -> Any: + raise NotImplementedError + + def step(self, action: int) -> Tuple[Any, float, bool, Dict[str, Any]]: + raise NotImplementedError + + def render(self, mode: str = "text") -> Any: + raise NotImplementedError + + def close(self) -> None: + raise NotImplementedError + + +def is_occupied(shape: List[Tuple[int, int]], anchor: Tuple[int, int], board: np.ndarray) -> bool: + """Return True when ``shape`` anchored at ``anchor`` collides with the board.""" + for dx, dy in shape: + x, y = anchor[0] + dx, anchor[1] + dy + if y < 0: + continue + if x < 0 or x >= board.shape[0] or y >= board.shape[1] or board[x, y]: + return True + return False + + +class TetrisEnv(BaseEnv): + """Self-contained Tetris environment modelled after the GRL training environment.""" + + def __init__(self, config: Dict[str, Any] | None = None, **_kwargs: Any) -> None: + super().__init__(config=config or {}) + self.width = self.config.get("dim_x", 4) + self.height = self.config.get("dim_y", 4) + self.board = np.zeros((self.width, self.height), dtype=np.bool_) + self.GRID_LOOKUP = self.config.get("grid_lookup", {0: "_", 1: "#", 2: "X"}) + self.ACTION_LOOKUP = self.config.get( + "action_lookup", + {0: "Left", 1: "Right", 2: "Down"}, + ) + self.ACTION_SPACE = gym.spaces.Discrete(3, start=0) + self.render_mode = self.config.get("render_mode", "text") + + box_type = self.config.get("box_type", 1) + if box_type == 2: + self.shapes = { + "I": [(0, 0), (0, -1)], + "-": [(0, 0), (-1, 0)], + } + self.shape_names = ["I", "-"] + elif box_type == 3: + self.shapes = { + "I": [(0, 0), (0, -1)], + "-": [(0, 0), (-1, 0)], + "O": [(0, 0), (-1, 0), (0, -1), (-1, -1)], + } + self.shape_names = ["I", "-", "O"] + else: + self.shapes = {"O": [(0, 0)]} + self.shape_names = ["O"] + + self.actions = {0: self._left, 1: self._right, 2: self._soft_drop} + + self.time = 0 + self.score = 0 + self.anchor: Tuple[int, int] | None = None + self.shape: List[Tuple[int, int]] | None = None + self.n_deaths = 0 + self._shape_counts = [0] * len(self.shapes) + + self.pre_generated_pieces: List[Tuple[Tuple[int, int], List[Tuple[int, int]]]] = [] + self.current_piece_index = 0 + + self.reset() + + # ─────────────────────────── core helpers ──────────────────────────── + + def _choose_shape(self) -> List[Tuple[int, int]]: + max_count = max(self._shape_counts) + weights = [5 + max_count - count for count in self._shape_counts] + r = random.randint(1, sum(weights)) + for i, weight in enumerate(weights): + r -= weight + if r <= 0: + self._shape_counts[i] += 1 + return self.shapes[self.shape_names[i]] + return self.shapes[self.shape_names[0]] + + def _generate_piece(self) -> Tuple[Tuple[int, int], List[Tuple[int, int]]]: + shape = self._choose_shape() + if (-1, 0) in shape: + anchor = (random.randint(1, self.width - 1), 0) + else: + anchor = (random.randint(0, self.width - 1), 0) + return anchor, shape + + def _new_piece(self) -> None: + if self.current_piece_index < len(self.pre_generated_pieces): + self.anchor, self.shape = self.pre_generated_pieces[self.current_piece_index] + self.current_piece_index += 1 + else: + self.anchor, self.shape = self._generate_piece() + + def _has_dropped(self) -> bool: + assert self.shape is not None and self.anchor is not None + return is_occupied(self.shape, (self.anchor[0], self.anchor[1] + 1), self.board) + + def _clear_lines(self) -> int: + can_clear = [np.all(self.board[:, i]) for i in range(self.height)] + new_board = np.zeros_like(self.board) + write_idx = self.height - 1 + for i in range(self.height - 1, -1, -1): + if not can_clear[i]: + new_board[:, write_idx] = self.board[:, i] + write_idx -= 1 + lines_cleared = sum(can_clear) + self.score += lines_cleared + self.board = new_board + return lines_cleared + + def _set_piece(self, on: bool = False) -> None: + assert self.shape is not None and self.anchor is not None + for dx, dy in self.shape: + x, y = self.anchor[0] + dx, self.anchor[1] + dy + if 0 <= x < self.width and 0 <= y < self.height: + self.board[x, y] = on + + def _left(self) -> None: + assert self.shape is not None and self.anchor is not None + new_anchor = (self.anchor[0] - 1, self.anchor[1]) + if not is_occupied(self.shape, new_anchor, self.board): + self.anchor = new_anchor + + def _right(self) -> None: + assert self.shape is not None and self.anchor is not None + new_anchor = (self.anchor[0] + 1, self.anchor[1]) + if not is_occupied(self.shape, new_anchor, self.board): + self.anchor = new_anchor + + def _soft_drop(self) -> None: + assert self.shape is not None and self.anchor is not None + new_anchor = (self.anchor[0], self.anchor[1] + 1) + if not is_occupied(self.shape, new_anchor, self.board): + self.anchor = new_anchor + + def _idle(self) -> None: + pass + + # ───────────────────────────── API ────────────────────────────── + + def reset(self, seed: int | None = None, **_kwargs: Any) -> Any: + """Reset the environment to its initial state.""" + try: + with all_seed(seed): + self.time = 0 + self.score = 0 + self.board = np.zeros((self.width, self.height), dtype=np.bool_) + + self.pre_generated_pieces = [] + self.current_piece_index = 0 + num_pieces_to_generate = self.width * self.height + 1 + for _ in range(num_pieces_to_generate): + self.pre_generated_pieces.append(self._generate_piece()) + + self._new_piece() + return self.render() + except (RuntimeError, RuntimeWarning): + next_seed = abs(hash(str(seed))) % (2**32) if seed is not None else None + return self.reset(next_seed) + + def step(self, action: int) -> Tuple[Any, float, bool, Dict[str, Any]]: + if action not in self.actions: + return self.render(), 0.0, True, {"error": "Invalid action"} + + previous_pos = copy.deepcopy(self.anchor) + self.actions[action]() + + self.time += 1 + reward = -0.1 + done = False + dropped = False + info: Dict[str, Any] = {} + + lines_cleared = 0 + if self._has_dropped(): + dropped = True + self._set_piece(True) + lines_cleared = self._clear_lines() + reward += lines_cleared * 10 + + if np.any(self.board[:, 0]): + done = True + else: + self._new_piece() + + self._set_piece(True) + state = self.render() + self._set_piece(False) + + action_effective = previous_pos is not None and previous_pos != self.anchor + info["action_is_effective"] = action_effective + info["action_is_valid"] = True + info["success"] = lines_cleared > 0 + info["dropped"] = dropped + + if lines_cleared > 0: + done = True + + return state, reward, done, info + + def render(self, mode: str = "text") -> Any: + if mode != "text": + return self.board.copy() + + board_str = "\n".join("".join("#" if cell else "_" for cell in row) for row in self.board.T) + + self._set_piece(True) + assert self.shape is not None and self.anchor is not None + positions = [(self.anchor[0] + dx, self.anchor[1] + dy) for dx, dy in self.shape] + self._set_piece(False) + + lines = board_str.split("\n") + for x, y in positions: + if 0 <= y < len(lines) and 0 <= x < len(lines[0]): + line = list(lines[y]) + line[x] = "X" + lines[y] = "".join(line) + return "\n".join(lines) + + def get_all_actions(self) -> List[int]: + return list(self.actions.keys()) + + def close(self) -> None: + self.board = None + self.anchor = None + self.shape = None