diff --git a/resources_servers/grl_tetris/README.md b/resources_servers/grl_tetris/README.md
new file mode 100644
index 000000000..14321d429
--- /dev/null
+++ b/resources_servers/grl_tetris/README.md
@@ -0,0 +1,70 @@
+# GRL Tetris Resource Server
+
+FastAPI-based resource server that exposes the GRL Tetris environment through NeMo Gym conventions. The environment logic lives under `resources_servers/grl_tetris/tetris_env` and is a standalone adaptation of the upstream GRL implementation.
+
+## Why it exists
+- **Domain**: Classic falling-block Tetris on a configurable grid.
+- **Evaluation**: Agents must clear at least one line; `/verify` rewards the cumulative score only when the environment reports success.
+- **Independence**: No runtime dependency on the GRL repository—the environment is vendored and self-contained.
+
+## Setup
+
+Please follow the setup instructions as outlined in: https://github.com/NVIDIA-NeMo/Gym/blob/main/docs/tutorials/02-setup.md#step-1-clone-and-install.
+
+## Running
+Spin up the server alongside a compatible agent:
+```bash
+config_paths="responses_api_models/openai_model/configs/openai_model.yaml,\
+resources_servers/grl_tetris/configs/grl_tetris.yaml"
+ng_run "+config_paths=[$config_paths]"
+```
+
+Collect trajectories:
+```bash
+ng_collect_rollouts +agent_name=grl_tetris_simple_agent \
+ +input_jsonl_fpath=resources_servers/grl_tetris/data/example.jsonl \
+ +output_jsonl_fpath=resources_servers/grl_tetris/data/example_rollouts.jsonl \
+ +limit=5
+```
+
+Launch the rollout viewer:
+```bash
+ng_viewer +jsonl_fpath=resources_servers/grl_tetris/data/example_rollouts.jsonl
+```
+
+## Tests
+```bash
+pytest resources_servers/grl_tetris/tests
+```
+
+## Licensing
+- Code: Apache 2.0
+- Data: Apache 2.0
+
+---
+
+## Reward Profiling Results
+
+### Qwen3-4B
+
+**Dataset**: 3,200 rollouts (200 prompts × 16 repeats)
+
+**Performance Metrics**:
+- **Success Rate**: 5.09% (163/3,200 rollouts)
+- **Mean Reward**: -0.29 (range: -2.00 to 19.20)
+- **Median Reward**: -0.80
+
+**Key Findings**:
+- Most rollouts (21%) received reward of -0.90 (piece dropped without clearing lines)
+- Successful line clears achieved rewards of ~9.0-9.2
+- Average 7.48 tool calls per rollout
+- Weak negative correlation between tool calls and reward (-0.06)
+
+**Top Reward Distribution**:
+- `-0.9`: 672 rollouts (21.0%) - piece dropped, no line clear
+- `-0.8`: 603 rollouts (18.8%)
+- `-0.7`: 495 rollouts (15.5%)
+- `9.1`: 29 rollouts (0.9%) - successful line clear
+- `8.9`: 26 rollouts (0.8%)
+
+The relatively low success rate (5.09%) suggests that Tetris line-clearing is challenging for the model, requiring precise spatial reasoning and action sequencing. Most rollouts result in pieces dropping without clearing lines (negative rewards from -0.1 per action step).
diff --git a/resources_servers/grl_tetris/app.py b/resources_servers/grl_tetris/app.py
new file mode 100644
index 000000000..12d2bb01b
--- /dev/null
+++ b/resources_servers/grl_tetris/app.py
@@ -0,0 +1,226 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Union
+
+import numpy as np
+from fastapi import FastAPI, HTTPException, Request
+from pydantic import BaseModel, Field
+
+from nemo_gym.base_resources_server import (
+ BaseResourcesServerConfig,
+ BaseSeedSessionRequest,
+ BaseSeedSessionResponse,
+ BaseVerifyRequest,
+ BaseVerifyResponse,
+ SimpleResourcesServer,
+)
+from nemo_gym.server_utils import SESSION_ID_KEY, ServerClient
+from resources_servers.grl_tetris.tetris_env import TetrisEnv
+
+
+DEFAULT_GRID_LOOKUP = {0: "_", 1: "#", 2: "X"}
+DEFAULT_ACTION_LOOKUP = {0: "Left", 1: "Right", 2: "Down"}
+
+
+class GrlTetrisResourcesServerConfig(BaseResourcesServerConfig):
+ env_config: Dict[str, Any] = Field(
+ default_factory=lambda: {
+ "grid_lookup": DEFAULT_GRID_LOOKUP,
+ "action_lookup": DEFAULT_ACTION_LOOKUP,
+ "render_mode": "text",
+ "dim_x": 4,
+ "dim_y": 4,
+ "box_type": 3,
+ }
+ )
+
+
+class GrlTetrisSeedSessionRequest(BaseSeedSessionRequest):
+ seed: Optional[int] = None
+
+
+class GrlTetrisSeedSessionResponse(BaseSeedSessionResponse):
+ observation: str
+
+
+class GrlTetrisStepRequest(BaseModel):
+ actions: List[Union[str, int]] = Field(default_factory=list)
+
+
+class GrlTetrisStepTrace(BaseModel):
+ action_id: int
+ action_label: str
+ reward: float
+ done: bool
+ info: Dict[str, Any]
+
+
+class GrlTetrisStepResponse(BaseModel):
+ observation: str
+ reward: float
+ total_reward: float
+ done: bool
+ steps: List[GrlTetrisStepTrace]
+ history: List[GrlTetrisStepTrace] = Field(default_factory=list)
+
+
+class GrlTetrisVerifyResponse(BaseVerifyResponse):
+ success: bool
+
+
+@dataclass
+class TetrisSessionState:
+ env: Any
+ observation: str
+ total_reward: float = 0.0
+ done: bool = False
+ last_info: Dict[str, Any] = field(default_factory=dict)
+ history: List[GrlTetrisStepTrace] = field(default_factory=list)
+
+
+class GrlTetrisResourcesServer(SimpleResourcesServer):
+ config: GrlTetrisResourcesServerConfig
+ server_client: ServerClient
+ session_id_to_state: Dict[str, TetrisSessionState] = Field(default_factory=dict)
+
+ def setup_webserver(self) -> FastAPI:
+ app = super().setup_webserver()
+ app.post("/step")(self.step)
+ return app
+
+ def _create_env(self) -> TetrisEnv:
+ return TetrisEnv(self.config.env_config)
+
+ async def seed_session(self, request: Request, body: GrlTetrisSeedSessionRequest) -> GrlTetrisSeedSessionResponse:
+ session_id = request.session[SESSION_ID_KEY]
+ env = self._create_env()
+ observation = env.reset(seed=body.seed)
+
+ self.session_id_to_state[session_id] = TetrisSessionState(
+ env=env,
+ observation=observation,
+ )
+ return GrlTetrisSeedSessionResponse(observation=observation)
+
+ async def step(self, request: Request, body: GrlTetrisStepRequest) -> GrlTetrisStepResponse:
+ session_id = request.session.get(SESSION_ID_KEY)
+ if session_id is None or session_id not in self.session_id_to_state:
+ raise HTTPException(status_code=400, detail="Session not initialized. Call /seed_session first.")
+
+ session_state = self.session_id_to_state[session_id]
+ env = session_state.env
+
+ reverse_lookup = {label.lower(): idx for idx, label in env.ACTION_LOOKUP.items()}
+ total_step_reward = 0.0
+ steps: List[GrlTetrisStepTrace] = []
+
+ if session_state.done:
+ return GrlTetrisStepResponse(
+ observation=session_state.observation,
+ reward=0.0,
+ total_reward=session_state.total_reward,
+ done=True,
+ steps=[],
+ history=list(session_state.history),
+ )
+
+ for action in body.actions:
+ action_id = self._parse_action(action, reverse_lookup)
+ if action_id not in env.ACTION_LOOKUP:
+ raise HTTPException(status_code=400, detail=f"Invalid action identifier: {action}")
+
+ next_obs, reward, done, info = env.step(action_id)
+ info = self._to_python_types(info)
+ total_step_reward += reward
+ session_state.total_reward += reward
+ session_state.observation = next_obs
+ session_state.last_info = info
+ session_state.done = bool(done)
+
+ step = GrlTetrisStepTrace(
+ action_id=action_id,
+ action_label=env.ACTION_LOOKUP[action_id],
+ reward=reward,
+ done=session_state.done,
+ info=info,
+ )
+ session_state.history.append(step)
+ steps.append(step)
+
+ if session_state.done:
+ break
+
+ return GrlTetrisStepResponse(
+ observation=session_state.observation,
+ reward=total_step_reward,
+ total_reward=session_state.total_reward,
+ done=session_state.done,
+ steps=steps,
+ history=list(session_state.history),
+ )
+
+ async def verify(self, request: Request, body: BaseVerifyRequest) -> GrlTetrisVerifyResponse:
+ session_id = request.session.get(SESSION_ID_KEY)
+ session_state = self.session_id_to_state.get(session_id)
+
+ success = False
+ reward = 0.0
+ if session_state is not None:
+ success = bool(session_state.last_info.get("success"))
+ reward = session_state.total_reward
+
+ if session_id in self.session_id_to_state:
+ try:
+ session_state.env.close() # type: ignore[union-attr]
+ except Exception: # pragma: no cover - defensive cleanup
+ pass
+ del self.session_id_to_state[session_id]
+
+ return GrlTetrisVerifyResponse(
+ **body.model_dump(),
+ reward=reward,
+ success=success,
+ )
+
+ @staticmethod
+ def _parse_action(action: Union[str, int], reverse_lookup: Dict[str, int]) -> int:
+ if isinstance(action, int):
+ return action
+
+ candidate = action.strip()
+ lower_candidate = candidate.lower()
+ if lower_candidate in reverse_lookup:
+ return reverse_lookup[lower_candidate]
+
+ try:
+ return int(candidate)
+ except ValueError as exc:
+ raise HTTPException(status_code=400, detail=f"Unable to parse action: {action}") from exc
+
+ @staticmethod
+ def _to_python_types(obj: Any) -> Any:
+ if isinstance(obj, dict):
+ return {k: GrlTetrisResourcesServer._to_python_types(v) for k, v in obj.items()}
+ if isinstance(obj, list):
+ return [GrlTetrisResourcesServer._to_python_types(v) for v in obj]
+ if isinstance(obj, np.generic):
+ return obj.item()
+ return obj
+
+
+if __name__ == "__main__":
+ GrlTetrisResourcesServer.run_webserver()
diff --git a/resources_servers/grl_tetris/configs/grl_tetris.yaml b/resources_servers/grl_tetris/configs/grl_tetris.yaml
new file mode 100644
index 000000000..80e8bfe75
--- /dev/null
+++ b/resources_servers/grl_tetris/configs/grl_tetris.yaml
@@ -0,0 +1,27 @@
+grl_tetris_resources_server:
+ resources_servers:
+ grl_tetris:
+ entrypoint: app.py
+ domain: games
+ verified: false
+grl_tetris_simple_agent:
+ responses_api_agents:
+ simple_agent:
+ entrypoint: app.py
+ max_steps: 10
+ resources_server:
+ type: resources_servers
+ name: grl_tetris_resources_server
+ model_server:
+ type: responses_api_models
+ name: policy_model
+ datasets:
+ - name: example
+ type: example
+ jsonl_fpath: resources_servers/grl_tetris/data/example.jsonl
+ num_repeats: 1
+ gitlab_identifier:
+ dataset_name: grl_tetris
+ version: 0.0.1
+ artifact_fpath: example.jsonl
+ license: Apache 2.0
diff --git a/resources_servers/grl_tetris/data/example.jsonl b/resources_servers/grl_tetris/data/example.jsonl
new file mode 100644
index 000000000..2eb162fde
--- /dev/null
+++ b/resources_servers/grl_tetris/data/example.jsonl
@@ -0,0 +1,5 @@
+{"game_id": 1, "seed": 93810, "dim_board": [5, 5], "box_type": 0, "responses_create_params": {"input": [{"role": "developer", "content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with Action1 || Action2 || ... summarizing all moves you made."}, {"role": "user", "content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible."}], "tools": [{"name": "step", "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving.", "strict": true, "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}}]}}
+{"game_id": 2, "seed": 46185, "dim_board": [4, 6], "box_type": 1, "responses_create_params": {"input": [{"role": "developer", "content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with Action1 || Action2 || ... summarizing all moves you made."}, {"role": "user", "content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible."}], "tools": [{"name": "step", "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving.", "strict": true, "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}}]}}
+{"game_id": 3, "seed": 28563, "dim_board": [5, 5], "box_type": 0, "responses_create_params": {"input": [{"role": "developer", "content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with Action1 || Action2 || ... summarizing all moves you made."}, {"role": "user", "content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible."}], "tools": [{"name": "step", "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving.", "strict": true, "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}}]}}
+{"game_id": 4, "seed": 87808, "dim_board": [6, 5], "box_type": 0, "responses_create_params": {"input": [{"role": "developer", "content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with Action1 || Action2 || ... summarizing all moves you made."}, {"role": "user", "content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible."}], "tools": [{"name": "step", "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving.", "strict": true, "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}}]}}
+{"game_id": 5, "seed": 14453, "dim_board": [5, 5], "box_type": 1, "responses_create_params": {"input": [{"role": "developer", "content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with Action1 || Action2 || ... summarizing all moves you made."}, {"role": "user", "content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible."}], "tools": [{"name": "step", "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving.", "strict": true, "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}}]}}
diff --git a/resources_servers/grl_tetris/data/example_metrics.json b/resources_servers/grl_tetris/data/example_metrics.json
new file mode 100644
index 000000000..88030ce73
--- /dev/null
+++ b/resources_servers/grl_tetris/data/example_metrics.json
@@ -0,0 +1,8 @@
+{
+ "name": "example",
+ "type": "example",
+ "jsonl_fpath": "resources_servers/grl_tetris/data/example.jsonl",
+ "gitlab_identifier": null,
+ "license": "Apache 2.0",
+ "Number of examples": 5
+}
diff --git a/resources_servers/grl_tetris/data/example_rollouts.jsonl b/resources_servers/grl_tetris/data/example_rollouts.jsonl
new file mode 100644
index 000000000..3ee989b4c
--- /dev/null
+++ b/resources_servers/grl_tetris/data/example_rollouts.jsonl
@@ -0,0 +1,5 @@
+{"responses_create_params": {"background": null, "include": null, "input": [{"content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with Action1 || Action2 || ... summarizing all moves you made.", "role": "developer", "type": "message"}, {"content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible.", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": null, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_0652b8fcf1e2dce400696565e6b2e881959ed1de12f162997e", "created_at": 1768252902.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "gpt-4o-2024-08-06", "object": "response", "output": [{"arguments": "{\"actions\":[]}", "call_id": "call_KiPwWiCxtkOEN1RDK4wi52Sp", "name": "step", "type": "function_call", "id": "fc_0652b8fcf1e2dce400696565e47d2c8195bb1da44b843ec537", "status": "completed"}, {"call_id": "call_KiPwWiCxtkOEN1RDK4wi52Sp", "output": "{\"observation\":\"__XX\\n____\\n____\\n____\",\"reward\":0.0,\"total_reward\":0.0,\"done\":false,\"steps\":[],\"history\":[]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_g5iqpJJHzJnhmDa3btuotPF3", "name": "step", "type": "function_call", "id": "fc_0652b8fcf1e2dce400696565e6273881959577dc95640f8566", "status": "completed"}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_2q9Elms7fMTWkIhRWR5OldMv", "name": "step", "type": "function_call", "id": "fc_0652b8fcf1e2dce400696565e6401881958e2569ad4f42dcef", "status": "completed"}, {"call_id": "call_g5iqpJJHzJnhmDa3btuotPF3", "output": "{\"observation\":\"__XX\\n__XX\\n____\\n____\",\"reward\":-0.1,\"total_reward\":-0.1,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"call_id": "call_2q9Elms7fMTWkIhRWR5OldMv", "output": "{\"observation\":\"____\\n__XX\\n__XX\\n____\",\"reward\":-0.1,\"total_reward\":-0.2,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"id": "msg_0652b8fcf1e2dce400696565e73d4c81958bf7c73d4d24fa8f", "content": [{"annotations": [], "text": "Down || Down", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": 1.0, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_p": 1.0, "background": false, "conversation": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "prompt_cache_key": null, "reasoning": {"effort": null, "generate_summary": null, "summary": null}, "safety_identifier": null, "service_tier": "default", "status": "completed", "text": {"format": {"type": "text"}, "verbosity": "medium"}, "top_logprobs": 0, "truncation": "disabled", "usage": {"input_tokens": 625, "input_tokens_details": {"cached_tokens": 0}, "output_tokens": 11, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 636}, "user": null, "billing": {"payer": "developer"}, "completed_at": 1768252903, "frequency_penalty": 0.0, "presence_penalty": 0.0, "prompt_cache_retention": null, "store": true}, "reward": -0.2, "success": false}
+{"responses_create_params": {"background": null, "include": null, "input": [{"content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with Action1 || Action2 || ... summarizing all moves you made.", "role": "developer", "type": "message"}, {"content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible.", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": null, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_0690794e9bb8139d00696565e9fc20819eb9128e7907a19bd3", "created_at": 1768252906.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "gpt-4o-2024-08-06", "object": "response", "output": [{"arguments": "{\"actions\":[]}", "call_id": "call_uL4WuAX2lvwJOdGVdqlezO8y", "name": "step", "type": "function_call", "id": "fc_0690794e9bb8139d00696565e4c938819eba8c692fc8aba62e", "status": "completed"}, {"call_id": "call_uL4WuAX2lvwJOdGVdqlezO8y", "output": "{\"observation\":\"_X__\\n____\\n____\\n____\",\"reward\":0.0,\"total_reward\":0.0,\"done\":false,\"steps\":[],\"history\":[]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_0Ny2wn272A6wZIy2SSExmBxS", "name": "step", "type": "function_call", "id": "fc_0690794e9bb8139d00696565e651ac819ea88648ddd7f7e4a0", "status": "completed"}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_ykybl3HByrFr34weNdftZxBr", "name": "step", "type": "function_call", "id": "fc_0690794e9bb8139d00696565e66ea8819e845eab68c6b7fab8", "status": "completed"}, {"call_id": "call_0Ny2wn272A6wZIy2SSExmBxS", "output": "{\"observation\":\"_X__\\n_X__\\n____\\n____\",\"reward\":-0.1,\"total_reward\":-0.1,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"call_id": "call_ykybl3HByrFr34weNdftZxBr", "output": "{\"observation\":\"____\\n_X__\\n_X__\\n____\",\"reward\":-0.1,\"total_reward\":-0.2,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_r9hSUcSt8qx9OTLE8158e0Rt", "name": "step", "type": "function_call", "id": "fc_0690794e9bb8139d00696565e7aec4819eb416d637c8a04d4e", "status": "completed"}, {"call_id": "call_r9hSUcSt8qx9OTLE8158e0Rt", "output": "{\"observation\":\"__XX\\n____\\n_#__\\n_#__\",\"reward\":-0.1,\"total_reward\":-0.30000000000000004,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_oZwMj5SDCFo9oQEllqrmjBNk", "name": "step", "type": "function_call", "id": "fc_0690794e9bb8139d00696565e9b808819eb791aa22730c445f", "status": "completed"}, {"call_id": "call_oZwMj5SDCFo9oQEllqrmjBNk", "output": "{\"observation\":\"____\\n__XX\\n_#__\\n_#__\",\"reward\":-0.1,\"total_reward\":-0.4,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"id": "msg_0690794e9bb8139d00696565ea6138819eb58f850f635e9b0b", "content": [{"annotations": [], "text": "Down || Down || Down || Down || Down", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": 1.0, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_p": 1.0, "background": false, "conversation": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "prompt_cache_key": null, "reasoning": {"effort": null, "generate_summary": null, "summary": null}, "safety_identifier": null, "service_tier": "default", "status": "completed", "text": {"format": {"type": "text"}, "verbosity": "medium"}, "top_logprobs": 0, "truncation": "disabled", "usage": {"input_tokens": 1179, "input_tokens_details": {"cached_tokens": 0}, "output_tokens": 17, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 1196}, "user": null, "billing": {"payer": "developer"}, "completed_at": 1768252906, "frequency_penalty": 0.0, "presence_penalty": 0.0, "prompt_cache_retention": null, "store": true}, "reward": -0.4, "success": false}
+{"responses_create_params": {"background": null, "include": null, "input": [{"content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with Action1 || Action2 || ... summarizing all moves you made.", "role": "developer", "type": "message"}, {"content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible.", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": null, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_045dc8dead221e0500696565ea3c6c81a08481f9287fae7c5c", "created_at": 1768252906.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "gpt-4o-2024-08-06", "object": "response", "output": [{"arguments": "{\"actions\":[]}", "call_id": "call_AKpurldhUA7kkDmHHTk9bvq7", "name": "step", "type": "function_call", "id": "fc_045dc8dead221e0500696565e4912481a08f2bff033e7f7cc3", "status": "completed"}, {"call_id": "call_AKpurldhUA7kkDmHHTk9bvq7", "output": "{\"observation\":\"___X\\n____\\n____\\n____\",\"reward\":0.0,\"total_reward\":0.0,\"done\":false,\"steps\":[],\"history\":[]}", "type": "function_call_output", "id": null, "status": null}, {"id": "msg_045dc8dead221e0500696565e5d2fc81a092444caabf20fff3", "content": [{"annotations": [], "text": "The current board state is:\n\n```\n___X\n____\n____\n____\n```\n\nThe active piece ('X') is in the top-right corner. I'll move it down to try and clear a line.", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}, {"arguments": "{\"actions\":[\"Left\"]}", "call_id": "call_f9yuUc5kBntRpWGckYNy9NIR", "name": "step", "type": "function_call", "id": "fc_045dc8dead221e0500696565e7543481a0bd34b836a71328c5", "status": "completed"}, {"call_id": "call_f9yuUc5kBntRpWGckYNy9NIR", "output": "{\"observation\":\"__X_\\n____\\n____\\n____\",\"reward\":-0.1,\"total_reward\":-0.1,\"done\":false,\"steps\":[{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Left\"]}", "call_id": "call_0ZU1Ffo5TaYfQ7Rc4SdBYFHk", "name": "step", "type": "function_call", "id": "fc_045dc8dead221e0500696565e86dd481a09155bae8b20354f9", "status": "completed"}, {"call_id": "call_0ZU1Ffo5TaYfQ7Rc4SdBYFHk", "output": "{\"observation\":\"_X__\\n____\\n____\\n____\",\"reward\":-0.1,\"total_reward\":-0.2,\"done\":false,\"steps\":[{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Left\"]}", "call_id": "call_vOxbvT3jQz5cxquUrBbOScQi", "name": "step", "type": "function_call", "id": "fc_045dc8dead221e0500696565e91f6881a0a4e5d2323a1f998e", "status": "completed"}, {"call_id": "call_vOxbvT3jQz5cxquUrBbOScQi", "output": "{\"observation\":\"X___\\n____\\n____\\n____\",\"reward\":-0.1,\"total_reward\":-0.30000000000000004,\"done\":false,\"steps\":[{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\",\"Down\",\"Down\"]}", "call_id": "call_mqgGieUPMttGAlOVkRTJ7TM6", "name": "step", "type": "function_call", "id": "fc_045dc8dead221e0500696565e9d2b481a0934a7e539636b6cd", "status": "completed"}, {"call_id": "call_mqgGieUPMttGAlOVkRTJ7TM6", "output": "{\"observation\":\"__XX\\n____\\n#___\\n#___\",\"reward\":-0.30000000000000004,\"total_reward\":-0.6,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}],\"history\":[{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}]}", "type": "function_call_output", "id": null, "status": null}, {"id": "msg_045dc8dead221e0500696565eaafc881a0bb0db1f9c9c5815d", "content": [{"annotations": [], "text": "I moved the piece, but no line was cleared. Here's the sequence of actions taken:\n\nLeft || Left || Left || Down || Down || Down", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": 1.0, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_p": 1.0, "background": false, "conversation": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "prompt_cache_key": null, "reasoning": {"effort": null, "generate_summary": null, "summary": null}, "safety_identifier": null, "service_tier": "default", "status": "completed", "text": {"format": {"type": "text"}, "verbosity": "medium"}, "top_logprobs": 0, "truncation": "disabled", "usage": {"input_tokens": 1420, "input_tokens_details": {"cached_tokens": 0}, "output_tokens": 37, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 1457}, "user": null, "billing": {"payer": "developer"}, "completed_at": 1768252907, "frequency_penalty": 0.0, "presence_penalty": 0.0, "prompt_cache_retention": null, "store": true}, "reward": -0.6, "success": false}
+{"responses_create_params": {"background": null, "include": null, "input": [{"content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with Action1 || Action2 || ... summarizing all moves you made.", "role": "developer", "type": "message"}, {"content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible.", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": null, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_0d5a2c8365850b8e00696565eab4288192b2da66afb906d98e", "created_at": 1768252906.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "gpt-4o-2024-08-06", "object": "response", "output": [{"arguments": "{\"actions\":[]}", "call_id": "call_KgJRph2MXPBIZpi0YTtUUBGc", "name": "step", "type": "function_call", "id": "fc_0d5a2c8365850b8e00696565e48c00819282ccef6c4a83d6b9", "status": "completed"}, {"call_id": "call_KgJRph2MXPBIZpi0YTtUUBGc", "output": "{\"observation\":\"XX__\\n____\\n____\\n____\",\"reward\":0.0,\"total_reward\":0.0,\"done\":false,\"steps\":[],\"history\":[]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_erUkQxpj6GAIbwEwYPk6TRse", "name": "step", "type": "function_call", "id": "fc_0d5a2c8365850b8e00696565e55df08192988609846746dba4", "status": "completed"}, {"call_id": "call_erUkQxpj6GAIbwEwYPk6TRse", "output": "{\"observation\":\"____\\nXX__\\n____\\n____\",\"reward\":-0.1,\"total_reward\":-0.1,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_0PIqBadc3XMyvboNUnDvschd", "name": "step", "type": "function_call", "id": "fc_0d5a2c8365850b8e00696565e645d081929b23c8104eb226d7", "status": "completed"}, {"call_id": "call_0PIqBadc3XMyvboNUnDvschd", "output": "{\"observation\":\"____\\n____\\nXX__\\n____\",\"reward\":-0.1,\"total_reward\":-0.2,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_hdtWjww3kRgedkTackhU9wBK", "name": "step", "type": "function_call", "id": "fc_0d5a2c8365850b8e00696565e721a8819291340dca9bafe5e9", "status": "completed"}, {"call_id": "call_hdtWjww3kRgedkTackhU9wBK", "output": "{\"observation\":\"XX__\\n____\\n____\\n##__\",\"reward\":-0.1,\"total_reward\":-0.30000000000000004,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_eiAPDwgSwieMKVJrNgCQO4sX", "name": "step", "type": "function_call", "id": "fc_0d5a2c8365850b8e00696565e83f208192a9d24d3ae0a5a550", "status": "completed"}, {"call_id": "call_eiAPDwgSwieMKVJrNgCQO4sX", "output": "{\"observation\":\"XX__\\nXX__\\n____\\n##__\",\"reward\":-0.1,\"total_reward\":-0.4,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_gUYouOo1KFFZfIOW2T31ooRi", "name": "step", "type": "function_call", "id": "fc_0d5a2c8365850b8e00696565e945888192837561cdb478f81a", "status": "completed"}, {"call_id": "call_gUYouOo1KFFZfIOW2T31ooRi", "output": "{\"observation\":\"__XX\\n##__\\n##__\\n##__\",\"reward\":-0.1,\"total_reward\":-0.5,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Left\"]}", "call_id": "call_GObmJdA0qMGI0cniT0VAv9mU", "name": "step", "type": "function_call", "id": "fc_0d5a2c8365850b8e00696565ea51148192bef30ae7c3576d74", "status": "completed"}, {"call_id": "call_GObmJdA0qMGI0cniT0VAv9mU", "output": "{\"observation\":\"_XX_\\n##__\\n##__\\n##__\",\"reward\":-0.1,\"total_reward\":-0.6,\"done\":true,\"steps\":[{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":true,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}},{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":true,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}]}", "type": "function_call_output", "id": null, "status": null}, {"id": "msg_0d5a2c8365850b8e00696565eb619c8192a650c33c2aeacd37", "content": [{"annotations": [], "text": "Down || Down || Down || Down || Down || Left", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": 1.0, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_p": 1.0, "background": false, "conversation": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "prompt_cache_key": null, "reasoning": {"effort": null, "generate_summary": null, "summary": null}, "safety_identifier": null, "service_tier": "default", "status": "completed", "text": {"format": {"type": "text"}, "verbosity": "medium"}, "top_logprobs": 0, "truncation": "disabled", "usage": {"input_tokens": 1911, "input_tokens_details": {"cached_tokens": 1408}, "output_tokens": 19, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 1930}, "user": null, "billing": {"payer": "developer"}, "completed_at": 1768252907, "frequency_penalty": 0.0, "presence_penalty": 0.0, "prompt_cache_retention": null, "store": true}, "reward": -0.6, "success": false}
+{"responses_create_params": {"background": null, "include": null, "input": [{"content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with Action1 || Action2 || ... summarizing all moves you made.", "role": "developer", "type": "message"}, {"content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible.", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": null, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_0ab7221f9fb73bd200696565ec54088197b9f83d678139f026", "created_at": 1768252908.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "gpt-4o-2024-08-06", "object": "response", "output": [{"arguments": "{\"actions\":[]}", "call_id": "call_fobyRyvvP30QTkyCq2FOdcXr", "name": "step", "type": "function_call", "id": "fc_0ab7221f9fb73bd200696565e4bfa88197aa8aa2203c6f21d1", "status": "completed"}, {"call_id": "call_fobyRyvvP30QTkyCq2FOdcXr", "output": "{\"observation\":\"__XX\\n____\\n____\\n____\",\"reward\":0.0,\"total_reward\":0.0,\"done\":false,\"steps\":[],\"history\":[]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_0Je4QPq9mXY39BORQxE7D8OR", "name": "step", "type": "function_call", "id": "fc_0ab7221f9fb73bd200696565e6640881979fea8f9521117bcd", "status": "completed"}, {"call_id": "call_0Je4QPq9mXY39BORQxE7D8OR", "output": "{\"observation\":\"____\\n__XX\\n____\\n____\",\"reward\":-0.1,\"total_reward\":-0.1,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_Gxat5KDZc8yUZwxnBoMWOhul", "name": "step", "type": "function_call", "id": "fc_0ab7221f9fb73bd200696565e713cc8197865d18096bbb121f", "status": "completed"}, {"call_id": "call_Gxat5KDZc8yUZwxnBoMWOhul", "output": "{\"observation\":\"____\\n____\\n__XX\\n____\",\"reward\":-0.1,\"total_reward\":-0.2,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_P8BpK5nYCCcTwxF0TqeQ3AKi", "name": "step", "type": "function_call", "id": "fc_0ab7221f9fb73bd200696565e80afc8197955bf941b8fe6c6e", "status": "completed"}, {"call_id": "call_P8BpK5nYCCcTwxF0TqeQ3AKi", "output": "{\"observation\":\"__X_\\n____\\n____\\n__##\",\"reward\":-0.1,\"total_reward\":-0.30000000000000004,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[]}", "call_id": "call_CANNEhbdjIMwbhkVMRU906i5", "name": "step", "type": "function_call", "id": "fc_0ab7221f9fb73bd200696565e9c2548197916632847ec8a644", "status": "completed"}, {"call_id": "call_CANNEhbdjIMwbhkVMRU906i5", "output": "{\"observation\":\"__X_\\n____\\n____\\n__##\",\"reward\":0.0,\"total_reward\":-0.30000000000000004,\"done\":false,\"steps\":[],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_bnjsIqzspnvRyq8MtsEt3vxS", "name": "step", "type": "function_call", "id": "fc_0ab7221f9fb73bd200696565eabcc08197be40e1b0fc1e021f", "status": "completed"}, {"call_id": "call_bnjsIqzspnvRyq8MtsEt3vxS", "output": "{\"observation\":\"__X_\\n__X_\\n____\\n__##\",\"reward\":-0.1,\"total_reward\":-0.4,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_V3is9Vdy5aJlTpknfBqWp1aN", "name": "step", "type": "function_call", "id": "fc_0ab7221f9fb73bd200696565ebd490819790fc411b3af2fc6a", "status": "completed"}, {"call_id": "call_V3is9Vdy5aJlTpknfBqWp1aN", "output": "{\"observation\":\"__XX\\n__#_\\n__#_\\n__##\",\"reward\":-0.1,\"total_reward\":-0.5,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}]}", "type": "function_call_output", "id": null, "status": null}, {"id": "msg_0ab7221f9fb73bd200696565ecc7b481978ebd3a1369d5396b", "content": [{"annotations": [], "text": "Down || Down || Down || Down || Down", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": 1.0, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_p": 1.0, "background": false, "conversation": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "prompt_cache_key": null, "reasoning": {"effort": null, "generate_summary": null, "summary": null}, "safety_identifier": null, "service_tier": "default", "status": "completed", "text": {"format": {"type": "text"}, "verbosity": "medium"}, "top_logprobs": 0, "truncation": "disabled", "usage": {"input_tokens": 1724, "input_tokens_details": {"cached_tokens": 1280}, "output_tokens": 17, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 1741}, "user": null, "billing": {"payer": "developer"}, "completed_at": 1768252908, "frequency_penalty": 0.0, "presence_penalty": 0.0, "prompt_cache_retention": null, "store": true}, "reward": -0.5, "success": false}
diff --git a/resources_servers/grl_tetris/generate_test_examples.py b/resources_servers/grl_tetris/generate_test_examples.py
new file mode 100755
index 000000000..ac46d2d1f
--- /dev/null
+++ b/resources_servers/grl_tetris/generate_test_examples.py
@@ -0,0 +1,149 @@
+#!/usr/bin/env python3
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Generate test examples for GRL Tetris environment."""
+
+import json
+import random
+from pathlib import Path
+from typing import Any, Dict, List
+
+
+def generate_tetris_example(game_id: int, seed: int, dim_board: List[int], box_type: int) -> Dict[str, Any]:
+ """Generate a single Tetris test example in the expected JSONL format.
+
+ Args:
+ game_id: Unique identifier for the game
+ seed: Random seed for reproducible game generation
+ dim_board: Board dimensions as [width, height]
+ box_type: Type of Tetris pieces (0=single, 1=single, 2=I and -, 3=I, -, and O)
+
+ Returns:
+ Dictionary containing the game configuration and prompt
+ """
+ return {
+ "game_id": game_id,
+ "seed": seed,
+ "dim_board": dim_board,
+ "box_type": box_type,
+ "responses_create_params": {
+ "input": [
+ {
+ "role": "developer",
+ "content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with Action1 || Action2 || ... summarizing all moves you made.",
+ },
+ {
+ "role": "user",
+ "content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible.",
+ },
+ ],
+ "tools": [
+ {
+ "name": "step",
+ "type": "function",
+ "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving.",
+ "strict": True,
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "actions": {
+ "type": "array",
+ "items": {"type": "string"},
+ "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state.",
+ }
+ },
+ "required": ["actions"],
+ "additionalProperties": False,
+ },
+ }
+ ],
+ },
+ }
+
+
+def generate_test_examples(num_examples: int = 500, output_file: str = "data/test_examples.jsonl") -> None:
+ """Generate diverse test examples for Tetris environment.
+
+ Args:
+ num_examples: Number of examples to generate (default: 500)
+ output_file: Output JSONL file path
+ """
+ examples = []
+
+ # Define parameter ranges for diversity
+ board_sizes = [
+ [4, 4], # Small square
+ [5, 5], # Medium square
+ [6, 6], # Large square
+ [4, 6], # Narrow tall
+ [6, 4], # Wide short
+ [5, 6], # Medium tall
+ [6, 5], # Medium wide
+ [4, 5], # Small tall
+ [5, 4], # Small wide
+ ]
+
+ box_types = [0, 1, 2, 3] # All available piece types
+
+ # Generate diverse examples
+ for i in range(num_examples):
+ game_id = i + 1
+
+ # Use game_id as base for seed to ensure reproducibility but diversity
+ seed = random.randint(10000, 99999) + i * 137 # Prime offset for better distribution
+
+ # Cycle through board sizes with some randomness
+ dim_board = random.choice(board_sizes)
+
+ # Distribute box types evenly but with some randomness
+ box_type = random.choice(box_types)
+
+ example = generate_tetris_example(game_id, seed, dim_board, box_type)
+ examples.append(example)
+
+ # Write to JSONL file
+ output_path = Path(__file__).parent / output_file
+ output_path.parent.mkdir(parents=True, exist_ok=True)
+
+ with open(output_path, "w") as f:
+ for example in examples:
+ f.write(json.dumps(example) + "\n")
+
+ print(f"Generated {num_examples} test examples")
+ print(f"Output file: {output_path}")
+ print("\nParameter distribution:")
+ print(f" Board sizes: {set(tuple(e['dim_board']) for e in examples)}")
+ print(f" Box types: {set(e['box_type'] for e in examples)}")
+ print(f" Seed range: {min(e['seed'] for e in examples)} - {max(e['seed'] for e in examples)}")
+
+
+if __name__ == "__main__":
+ import argparse
+
+ parser = argparse.ArgumentParser(description="Generate Tetris test examples")
+ parser.add_argument("--num-examples", type=int, default=500, help="Number of examples to generate (default: 500)")
+ parser.add_argument(
+ "--output",
+ type=str,
+ default="data/test_examples.jsonl",
+ help="Output JSONL file path (default: data/test_examples.jsonl)",
+ )
+ parser.add_argument("--seed", type=int, default=42, help="Random seed for generation (default: 42)")
+
+ args = parser.parse_args()
+
+ # Set random seed for reproducibility
+ random.seed(args.seed)
+
+ generate_test_examples(args.num_examples, args.output)
diff --git a/resources_servers/grl_tetris/requirements.txt b/resources_servers/grl_tetris/requirements.txt
new file mode 100644
index 000000000..1c193ea4b
--- /dev/null
+++ b/resources_servers/grl_tetris/requirements.txt
@@ -0,0 +1,3 @@
+-e nemo-gym[dev] @ ../../
+gymnasium==1.1.1
+numpy>=1.26
diff --git a/resources_servers/grl_tetris/tests/test_app.py b/resources_servers/grl_tetris/tests/test_app.py
new file mode 100644
index 000000000..38150cae8
--- /dev/null
+++ b/resources_servers/grl_tetris/tests/test_app.py
@@ -0,0 +1,253 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from unittest.mock import MagicMock, patch
+
+from fastapi.testclient import TestClient
+
+from nemo_gym.openai_utils import (
+ NeMoGymResponse,
+ NeMoGymResponseCreateParamsNonStreaming,
+)
+from nemo_gym.server_utils import ServerClient
+from resources_servers.grl_tetris.app import (
+ GrlTetrisResourcesServer,
+ GrlTetrisResourcesServerConfig,
+)
+
+
+_VERIFY_CREATE_PARAMS = NeMoGymResponseCreateParamsNonStreaming(
+ input="placeholder",
+)
+
+_VERIFY_RESPONSE = NeMoGymResponse.model_construct(
+ id="resp_test",
+ object="response",
+ created_at=0.0,
+ status="completed",
+ output=[],
+ model="gpt-4.1",
+ parallel_tool_calls=True,
+ tool_choice="auto",
+ tools=[],
+)
+
+
+def _verify_payload() -> dict:
+ return {
+ "responses_create_params": _VERIFY_CREATE_PARAMS.model_dump(mode="json"),
+ "response": _VERIFY_RESPONSE.model_dump(mode="json"),
+ }
+
+
+class TestApp:
+ def test_sanity(self) -> None:
+ config = GrlTetrisResourcesServerConfig(
+ host="0.0.0.0",
+ port=8080,
+ entrypoint="",
+ name="",
+ )
+ GrlTetrisResourcesServer(config=config, server_client=MagicMock(spec=ServerClient))
+
+ def test_seed_and_step_flow(self) -> None:
+ config = GrlTetrisResourcesServerConfig(
+ host="0.0.0.0",
+ port=8080,
+ entrypoint="",
+ name="",
+ )
+ server = GrlTetrisResourcesServer(
+ config=config,
+ server_client=MagicMock(spec=ServerClient),
+ )
+
+ class FakeEnv:
+ ACTION_LOOKUP = {0: "Left"}
+
+ def __init__(self, *_args, **_kwargs) -> None:
+ self._closed = False
+ self.step_calls = 0
+
+ def reset(self, seed=None): # noqa: ARG002
+ return "Initial observation"
+
+ def step(self, action):
+ self.step_calls += 1
+ assert action == 0
+ reward = 1.0
+ done = self.step_calls >= 1
+ info = {"success": done}
+ return "Next observation", reward, done, info
+
+ def close(self):
+ self._closed = True
+
+ fake_env = FakeEnv()
+ with patch("resources_servers.grl_tetris.app.TetrisEnv", return_value=fake_env):
+ app = server.setup_webserver()
+ client = TestClient(app)
+
+ response = client.post("/seed_session", json={"seed": 123})
+ assert response.status_code == 200
+ assert response.json()["observation"] == "Initial observation"
+
+ cookies = response.cookies
+ response = client.post("/step", json={"actions": ["Left"]}, cookies=cookies)
+ payload = response.json()
+ assert response.status_code == 200
+ assert payload["observation"] == "Next observation"
+ assert payload["reward"] == 1.0
+ assert payload["done"] is True
+ assert payload["steps"][0]["action_label"] == "Left"
+ assert fake_env.step_calls == 1
+
+ response = client.post("/verify", json=_verify_payload(), cookies=cookies)
+ assert response.status_code == 200
+ payload = response.json()
+ assert payload["success"] is True
+ assert payload["reward"] == 1.0
+ assert fake_env._closed is True
+
+ def test_step_action_mapping_stops_after_done(self) -> None:
+ config = GrlTetrisResourcesServerConfig(
+ host="0.0.0.0",
+ port=8080,
+ entrypoint="",
+ name="",
+ )
+ server = GrlTetrisResourcesServer(
+ config=config,
+ server_client=MagicMock(spec=ServerClient),
+ )
+
+ class FakeEnv:
+ ACTION_LOOKUP = {0: "Left", 1: "Right"}
+
+ def __init__(self, *_args, **_kwargs) -> None:
+ self.calls = 0
+ self.closed = False
+
+ def reset(self, seed=None): # noqa: ARG002
+ return "Init"
+
+ def step(self, action):
+ self.calls += 1
+ if self.calls == 1:
+ assert action == 0
+ return "Obs1", 0.5, True, {"success": True}
+ raise AssertionError("Env.step should not be called after done")
+
+ def close(self):
+ self.closed = True
+
+ fake_env = FakeEnv()
+ with patch("resources_servers.grl_tetris.app.TetrisEnv", return_value=fake_env):
+ app = server.setup_webserver()
+ client = TestClient(app)
+
+ seed_resp = client.post("/seed_session", json={})
+ cookies = seed_resp.cookies
+ resp = client.post("/step", json={"actions": ["Left", "Right"]}, cookies=cookies)
+ payload = resp.json()
+ assert resp.status_code == 200
+ assert payload["done"] is True
+ assert payload["steps"][0]["action_label"] == "Left"
+ assert len(payload["steps"]) == 1
+ assert len(payload["history"]) == 1
+ assert fake_env.calls == 1
+
+ def test_step_invalid_action_raises(self) -> None:
+ config = GrlTetrisResourcesServerConfig(
+ host="0.0.0.0",
+ port=8080,
+ entrypoint="",
+ name="",
+ )
+ server = GrlTetrisResourcesServer(
+ config=config,
+ server_client=MagicMock(spec=ServerClient),
+ )
+
+ class FakeEnv:
+ ACTION_LOOKUP = {0: "Left"}
+
+ def __init__(self, *_args, **_kwargs) -> None:
+ pass
+
+ def reset(self, seed=None): # noqa: ARG002
+ return "Init"
+
+ def step(self, action): # pragma: no cover - not reached
+ raise AssertionError("Should not call step for invalid action")
+
+ def close(self):
+ pass
+
+ with patch("resources_servers.grl_tetris.app.TetrisEnv", return_value=FakeEnv()):
+ app = server.setup_webserver()
+ client = TestClient(app)
+
+ seed_resp = client.post("/seed_session", json={})
+ cookies = seed_resp.cookies
+ resp = client.post("/step", json={"actions": ["Rotate"]}, cookies=cookies)
+ assert resp.status_code == 400
+ assert resp.json()["detail"].startswith("Unable to parse action")
+
+ def test_verify_failure_zero_reward_and_cleanup(self) -> None:
+ config = GrlTetrisResourcesServerConfig(
+ host="0.0.0.0",
+ port=8080,
+ entrypoint="",
+ name="",
+ )
+ server = GrlTetrisResourcesServer(
+ config=config,
+ server_client=MagicMock(spec=ServerClient),
+ )
+
+ class FakeEnv:
+ ACTION_LOOKUP = {0: "Left"}
+
+ def __init__(self, *_args, **_kwargs) -> None:
+ self.closed = False
+
+ def reset(self, seed=None): # noqa: ARG002
+ return "Init"
+
+ def step(self, action):
+ return "Obs", 0.0, False, {"success": False}
+
+ def close(self):
+ self.closed = True
+
+ fake_env = FakeEnv()
+ with patch("resources_servers.grl_tetris.app.TetrisEnv", return_value=fake_env):
+ app = server.setup_webserver()
+ client = TestClient(app)
+
+ seed_resp = client.post("/seed_session", json={})
+ cookies = seed_resp.cookies
+ client.post("/step", json={"actions": [0]}, cookies=cookies)
+
+ verify_resp = client.post(
+ "/verify",
+ json=_verify_payload(),
+ cookies=cookies,
+ )
+ assert verify_resp.status_code == 200
+ payload = verify_resp.json()
+ assert payload["success"] is False
+ assert payload["reward"] == 0.0
+ assert fake_env.closed is True
+ assert server.session_id_to_state == {}
diff --git a/resources_servers/grl_tetris/tetris_env/__init__.py b/resources_servers/grl_tetris/tetris_env/__init__.py
new file mode 100644
index 000000000..271a95004
--- /dev/null
+++ b/resources_servers/grl_tetris/tetris_env/__init__.py
@@ -0,0 +1,6 @@
+"""Local Tetris environment implementation."""
+
+from .tetris_env import TetrisEnv
+
+
+__all__ = ["TetrisEnv"]
diff --git a/resources_servers/grl_tetris/tetris_env/tetris_env.py b/resources_servers/grl_tetris/tetris_env/tetris_env.py
new file mode 100644
index 000000000..d9ce8558b
--- /dev/null
+++ b/resources_servers/grl_tetris/tetris_env/tetris_env.py
@@ -0,0 +1,289 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Standalone Tetris environment implementation for the GRL Tetris resource server.
+
+This module adapts the environment logic from ``GRL/grl/agents/tetrisAgent/env.py``
+and removes dependencies on the upstream GRL repository so it can run entirely
+within the NeMo Gym project.
+"""
+
+from __future__ import annotations
+
+import copy
+import random
+from contextlib import contextmanager
+from typing import Any, Dict, Iterator, List, Tuple
+
+import gymnasium as gym
+import numpy as np
+
+
+# ─────────────────────────── utilities ────────────────────────────
+
+
+@contextmanager
+def all_seed(seed: int | None) -> Iterator[None]:
+ """Temporarily set ``random`` and ``numpy`` seeds within a context."""
+ random_state = random.getstate()
+ numpy_state = np.random.get_state()
+
+ try:
+ if seed is not None:
+ random.seed(seed)
+ np.random.seed(seed)
+ yield
+ finally:
+ random.setstate(random_state)
+ np.random.set_state(numpy_state)
+
+
+class BaseEnv:
+ """Minimal base class mirroring the GRL interface."""
+
+ def __init__(self, config: Dict[str, Any] | None = None, **_kwargs: Any) -> None:
+ self.config: Dict[str, Any] = config or {}
+
+ def reset(self, seed: int | None = None, **_kwargs: Any) -> Any:
+ raise NotImplementedError
+
+ def step(self, action: int) -> Tuple[Any, float, bool, Dict[str, Any]]:
+ raise NotImplementedError
+
+ def render(self, mode: str = "text") -> Any:
+ raise NotImplementedError
+
+ def close(self) -> None:
+ raise NotImplementedError
+
+
+def is_occupied(shape: List[Tuple[int, int]], anchor: Tuple[int, int], board: np.ndarray) -> bool:
+ """Return True when ``shape`` anchored at ``anchor`` collides with the board."""
+ for dx, dy in shape:
+ x, y = anchor[0] + dx, anchor[1] + dy
+ if y < 0:
+ continue
+ if x < 0 or x >= board.shape[0] or y >= board.shape[1] or board[x, y]:
+ return True
+ return False
+
+
+class TetrisEnv(BaseEnv):
+ """Self-contained Tetris environment modelled after the GRL training environment."""
+
+ def __init__(self, config: Dict[str, Any] | None = None, **_kwargs: Any) -> None:
+ super().__init__(config=config or {})
+ self.width = self.config.get("dim_x", 4)
+ self.height = self.config.get("dim_y", 4)
+ self.board = np.zeros((self.width, self.height), dtype=np.bool_)
+ self.GRID_LOOKUP = self.config.get("grid_lookup", {0: "_", 1: "#", 2: "X"})
+ self.ACTION_LOOKUP = self.config.get(
+ "action_lookup",
+ {0: "Left", 1: "Right", 2: "Down"},
+ )
+ self.ACTION_SPACE = gym.spaces.Discrete(3, start=0)
+ self.render_mode = self.config.get("render_mode", "text")
+
+ box_type = self.config.get("box_type", 1)
+ if box_type == 2:
+ self.shapes = {
+ "I": [(0, 0), (0, -1)],
+ "-": [(0, 0), (-1, 0)],
+ }
+ self.shape_names = ["I", "-"]
+ elif box_type == 3:
+ self.shapes = {
+ "I": [(0, 0), (0, -1)],
+ "-": [(0, 0), (-1, 0)],
+ "O": [(0, 0), (-1, 0), (0, -1), (-1, -1)],
+ }
+ self.shape_names = ["I", "-", "O"]
+ else:
+ self.shapes = {"O": [(0, 0)]}
+ self.shape_names = ["O"]
+
+ self.actions = {0: self._left, 1: self._right, 2: self._soft_drop}
+
+ self.time = 0
+ self.score = 0
+ self.anchor: Tuple[int, int] | None = None
+ self.shape: List[Tuple[int, int]] | None = None
+ self.n_deaths = 0
+ self._shape_counts = [0] * len(self.shapes)
+
+ self.pre_generated_pieces: List[Tuple[Tuple[int, int], List[Tuple[int, int]]]] = []
+ self.current_piece_index = 0
+
+ self.reset()
+
+ # ─────────────────────────── core helpers ────────────────────────────
+
+ def _choose_shape(self) -> List[Tuple[int, int]]:
+ max_count = max(self._shape_counts)
+ weights = [5 + max_count - count for count in self._shape_counts]
+ r = random.randint(1, sum(weights))
+ for i, weight in enumerate(weights):
+ r -= weight
+ if r <= 0:
+ self._shape_counts[i] += 1
+ return self.shapes[self.shape_names[i]]
+ return self.shapes[self.shape_names[0]]
+
+ def _generate_piece(self) -> Tuple[Tuple[int, int], List[Tuple[int, int]]]:
+ shape = self._choose_shape()
+ if (-1, 0) in shape:
+ anchor = (random.randint(1, self.width - 1), 0)
+ else:
+ anchor = (random.randint(0, self.width - 1), 0)
+ return anchor, shape
+
+ def _new_piece(self) -> None:
+ if self.current_piece_index < len(self.pre_generated_pieces):
+ self.anchor, self.shape = self.pre_generated_pieces[self.current_piece_index]
+ self.current_piece_index += 1
+ else:
+ self.anchor, self.shape = self._generate_piece()
+
+ def _has_dropped(self) -> bool:
+ assert self.shape is not None and self.anchor is not None
+ return is_occupied(self.shape, (self.anchor[0], self.anchor[1] + 1), self.board)
+
+ def _clear_lines(self) -> int:
+ can_clear = [np.all(self.board[:, i]) for i in range(self.height)]
+ new_board = np.zeros_like(self.board)
+ write_idx = self.height - 1
+ for i in range(self.height - 1, -1, -1):
+ if not can_clear[i]:
+ new_board[:, write_idx] = self.board[:, i]
+ write_idx -= 1
+ lines_cleared = sum(can_clear)
+ self.score += lines_cleared
+ self.board = new_board
+ return lines_cleared
+
+ def _set_piece(self, on: bool = False) -> None:
+ assert self.shape is not None and self.anchor is not None
+ for dx, dy in self.shape:
+ x, y = self.anchor[0] + dx, self.anchor[1] + dy
+ if 0 <= x < self.width and 0 <= y < self.height:
+ self.board[x, y] = on
+
+ def _left(self) -> None:
+ assert self.shape is not None and self.anchor is not None
+ new_anchor = (self.anchor[0] - 1, self.anchor[1])
+ if not is_occupied(self.shape, new_anchor, self.board):
+ self.anchor = new_anchor
+
+ def _right(self) -> None:
+ assert self.shape is not None and self.anchor is not None
+ new_anchor = (self.anchor[0] + 1, self.anchor[1])
+ if not is_occupied(self.shape, new_anchor, self.board):
+ self.anchor = new_anchor
+
+ def _soft_drop(self) -> None:
+ assert self.shape is not None and self.anchor is not None
+ new_anchor = (self.anchor[0], self.anchor[1] + 1)
+ if not is_occupied(self.shape, new_anchor, self.board):
+ self.anchor = new_anchor
+
+ def _idle(self) -> None:
+ pass
+
+ # ───────────────────────────── API ──────────────────────────────
+
+ def reset(self, seed: int | None = None, **_kwargs: Any) -> Any:
+ """Reset the environment to its initial state."""
+ try:
+ with all_seed(seed):
+ self.time = 0
+ self.score = 0
+ self.board = np.zeros((self.width, self.height), dtype=np.bool_)
+
+ self.pre_generated_pieces = []
+ self.current_piece_index = 0
+ num_pieces_to_generate = self.width * self.height + 1
+ for _ in range(num_pieces_to_generate):
+ self.pre_generated_pieces.append(self._generate_piece())
+
+ self._new_piece()
+ return self.render()
+ except (RuntimeError, RuntimeWarning):
+ next_seed = abs(hash(str(seed))) % (2**32) if seed is not None else None
+ return self.reset(next_seed)
+
+ def step(self, action: int) -> Tuple[Any, float, bool, Dict[str, Any]]:
+ if action not in self.actions:
+ return self.render(), 0.0, True, {"error": "Invalid action"}
+
+ previous_pos = copy.deepcopy(self.anchor)
+ self.actions[action]()
+
+ self.time += 1
+ reward = -0.1
+ done = False
+ dropped = False
+ info: Dict[str, Any] = {}
+
+ lines_cleared = 0
+ if self._has_dropped():
+ dropped = True
+ self._set_piece(True)
+ lines_cleared = self._clear_lines()
+ reward += lines_cleared * 10
+
+ if np.any(self.board[:, 0]):
+ done = True
+ else:
+ self._new_piece()
+
+ self._set_piece(True)
+ state = self.render()
+ self._set_piece(False)
+
+ action_effective = previous_pos is not None and previous_pos != self.anchor
+ info["action_is_effective"] = action_effective
+ info["action_is_valid"] = True
+ info["success"] = lines_cleared > 0
+ info["dropped"] = dropped
+
+ if lines_cleared > 0:
+ done = True
+
+ return state, reward, done, info
+
+ def render(self, mode: str = "text") -> Any:
+ if mode != "text":
+ return self.board.copy()
+
+ board_str = "\n".join("".join("#" if cell else "_" for cell in row) for row in self.board.T)
+
+ self._set_piece(True)
+ assert self.shape is not None and self.anchor is not None
+ positions = [(self.anchor[0] + dx, self.anchor[1] + dy) for dx, dy in self.shape]
+ self._set_piece(False)
+
+ lines = board_str.split("\n")
+ for x, y in positions:
+ if 0 <= y < len(lines) and 0 <= x < len(lines[0]):
+ line = list(lines[y])
+ line[x] = "X"
+ lines[y] = "".join(line)
+ return "\n".join(lines)
+
+ def get_all_actions(self) -> List[int]:
+ return list(self.actions.keys())
+
+ def close(self) -> None:
+ self.board = None
+ self.anchor = None
+ self.shape = None