diff --git a/resources_servers/grl_tetris/README.md b/resources_servers/grl_tetris/README.md
new file mode 100644
index 000000000..14321d429
--- /dev/null
+++ b/resources_servers/grl_tetris/README.md
@@ -0,0 +1,70 @@
+# GRL Tetris Resource Server
+
+FastAPI-based resource server that exposes the GRL Tetris environment through NeMo Gym conventions. The environment logic lives under `resources_servers/grl_tetris/tetris_env` and is a standalone adaptation of the upstream GRL implementation.
+
+## Why it exists
+- **Domain**: Classic falling-block Tetris on a configurable grid.
+- **Evaluation**: Agents must clear at least one line; `/verify` rewards the cumulative score only when the environment reports success.
+- **Independence**: No runtime dependency on the GRL repository—the environment is vendored and self-contained.
+
+## Setup
+
+Please follow the setup instructions as outlined in: https://github.com/NVIDIA-NeMo/Gym/blob/main/docs/tutorials/02-setup.md#step-1-clone-and-install.
+
+## Running
+Spin up the server alongside a compatible agent:
+```bash
+config_paths="responses_api_models/openai_model/configs/openai_model.yaml,\
+resources_servers/grl_tetris/configs/grl_tetris.yaml"
+ng_run "+config_paths=[$config_paths]"
+```
+
+Collect trajectories:
+```bash
+ng_collect_rollouts +agent_name=grl_tetris_simple_agent \
+    +input_jsonl_fpath=resources_servers/grl_tetris/data/example.jsonl \
+    +output_jsonl_fpath=resources_servers/grl_tetris/data/example_rollouts.jsonl \
+    +limit=5
+```
+
+Launch the rollout viewer:
+```bash
+ng_viewer +jsonl_fpath=resources_servers/grl_tetris/data/example_rollouts.jsonl
+```
+
+## Tests
+```bash
+pytest resources_servers/grl_tetris/tests
+```
+
+## Licensing
+- Code: Apache 2.0
+- Data: Apache 2.0
+
+---
+
+## Reward Profiling Results
+
+### Qwen3-4B
+
+**Dataset**: 3,200 rollouts (200 prompts × 16 repeats)
+
+**Performance Metrics**:
+- **Success Rate**: 5.09% (163/3,200 rollouts)
+- **Mean Reward**: -0.29 (range: -2.00 to 19.20)
+- **Median Reward**: -0.80
+
+**Key Findings**:
+- Most rollouts (21%) received reward of -0.90 (piece dropped without clearing lines)
+- Successful line clears achieved rewards of ~9.0-9.2
+- Average 7.48 tool calls per rollout
+- Weak negative correlation between tool calls and reward (-0.06)
+
+**Top Reward Distribution**:
+- `-0.9`: 672 rollouts (21.0%) - piece dropped, no line clear
+- `-0.8`: 603 rollouts (18.8%)
+- `-0.7`: 495 rollouts (15.5%)
+- `9.1`: 29 rollouts (0.9%) - successful line clear
+- `8.9`: 26 rollouts (0.8%)
+
+The relatively low success rate (5.09%) suggests that Tetris line-clearing is challenging for the model, requiring precise spatial reasoning and action sequencing. Most rollouts result in pieces dropping without clearing lines (negative rewards from -0.1 per action step).
diff --git a/resources_servers/grl_tetris/app.py b/resources_servers/grl_tetris/app.py
new file mode 100644
index 000000000..12d2bb01b
--- /dev/null
+++ b/resources_servers/grl_tetris/app.py
@@ -0,0 +1,226 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Union
+
+import numpy as np
+from fastapi import FastAPI, HTTPException, Request
+from pydantic import BaseModel, Field
+
+from nemo_gym.base_resources_server import (
+    BaseResourcesServerConfig,
+    BaseSeedSessionRequest,
+    BaseSeedSessionResponse,
+    BaseVerifyRequest,
+    BaseVerifyResponse,
+    SimpleResourcesServer,
+)
+from nemo_gym.server_utils import SESSION_ID_KEY, ServerClient
+from resources_servers.grl_tetris.tetris_env import TetrisEnv
+
+
+DEFAULT_GRID_LOOKUP = {0: "_", 1: "#", 2: "X"}
+DEFAULT_ACTION_LOOKUP = {0: "Left", 1: "Right", 2: "Down"}
+
+
+class GrlTetrisResourcesServerConfig(BaseResourcesServerConfig):
+    env_config: Dict[str, Any] = Field(
+        default_factory=lambda: {
+            "grid_lookup": DEFAULT_GRID_LOOKUP,
+            "action_lookup": DEFAULT_ACTION_LOOKUP,
+            "render_mode": "text",
+            "dim_x": 4,
+            "dim_y": 4,
+            "box_type": 3,
+        }
+    )
+
+
+class GrlTetrisSeedSessionRequest(BaseSeedSessionRequest):
+    seed: Optional[int] = None
+
+
+class GrlTetrisSeedSessionResponse(BaseSeedSessionResponse):
+    observation: str
+
+
+class GrlTetrisStepRequest(BaseModel):
+    actions: List[Union[str, int]] = Field(default_factory=list)
+
+
+class GrlTetrisStepTrace(BaseModel):
+    action_id: int
+    action_label: str
+    reward: float
+    done: bool
+    info: Dict[str, Any]
+
+
+class GrlTetrisStepResponse(BaseModel):
+    observation: str
+    reward: float
+    total_reward: float
+    done: bool
+    steps: List[GrlTetrisStepTrace]
+    history: List[GrlTetrisStepTrace] = Field(default_factory=list)
+
+
+class GrlTetrisVerifyResponse(BaseVerifyResponse):
+    success: bool
+
+
+@dataclass
+class TetrisSessionState:
+    env: Any
+    observation: str
+    total_reward: float = 0.0
+    done: bool = False
+    last_info: Dict[str, Any] = field(default_factory=dict)
+    history: List[GrlTetrisStepTrace] = field(default_factory=list)
+
+
+class GrlTetrisResourcesServer(SimpleResourcesServer):
+    config: GrlTetrisResourcesServerConfig
+    server_client: ServerClient
+    session_id_to_state: Dict[str, TetrisSessionState] = Field(default_factory=dict)
+
+    def setup_webserver(self) -> FastAPI:
+        app = super().setup_webserver()
+        app.post("/step")(self.step)
+        return app
+
+    def _create_env(self) -> TetrisEnv:
+        return TetrisEnv(self.config.env_config)
+
+    async def seed_session(self, request: Request, body: GrlTetrisSeedSessionRequest) -> GrlTetrisSeedSessionResponse:
+        session_id = request.session[SESSION_ID_KEY]
+        env = self._create_env()
+        observation = env.reset(seed=body.seed)
+
+        self.session_id_to_state[session_id] = TetrisSessionState(
+            env=env,
+            observation=observation,
+        )
+        return GrlTetrisSeedSessionResponse(observation=observation)
+
+    async def step(self, request: Request, body: GrlTetrisStepRequest) -> GrlTetrisStepResponse:
+        session_id = request.session.get(SESSION_ID_KEY)
+        if session_id is None or session_id not in self.session_id_to_state:
+            raise HTTPException(status_code=400, detail="Session not initialized. Call /seed_session first.")
+
+        session_state = self.session_id_to_state[session_id]
+        env = session_state.env
+
+        reverse_lookup = {label.lower(): idx for idx, label in env.ACTION_LOOKUP.items()}
+        total_step_reward = 0.0
+        steps: List[GrlTetrisStepTrace] = []
+
+        if session_state.done:
+            return GrlTetrisStepResponse(
+                observation=session_state.observation,
+                reward=0.0,
+                total_reward=session_state.total_reward,
+                done=True,
+                steps=[],
+                history=list(session_state.history),
+            )
+
+        for action in body.actions:
+            action_id = self._parse_action(action, reverse_lookup)
+            if action_id not in env.ACTION_LOOKUP:
+                raise HTTPException(status_code=400, detail=f"Invalid action identifier: {action}")
+
+            next_obs, reward, done, info = env.step(action_id)
+            info = self._to_python_types(info)
+            total_step_reward += reward
+            session_state.total_reward += reward
+            session_state.observation = next_obs
+            session_state.last_info = info
+            session_state.done = bool(done)
+
+            step = GrlTetrisStepTrace(
+                action_id=action_id,
+                action_label=env.ACTION_LOOKUP[action_id],
+                reward=reward,
+                done=session_state.done,
+                info=info,
+            )
+            session_state.history.append(step)
+            steps.append(step)
+
+            if session_state.done:
+                break
+
+        return GrlTetrisStepResponse(
+            observation=session_state.observation,
+            reward=total_step_reward,
+            total_reward=session_state.total_reward,
+            done=session_state.done,
+            steps=steps,
+            history=list(session_state.history),
+        )
+
+    async def verify(self, request: Request, body: BaseVerifyRequest) -> GrlTetrisVerifyResponse:
+        session_id = request.session.get(SESSION_ID_KEY)
+        session_state = self.session_id_to_state.get(session_id)
+
+        success = False
+        reward = 0.0
+        if session_state is not None:
+            success = bool(session_state.last_info.get("success"))
+            reward = session_state.total_reward
+
+        if session_id in self.session_id_to_state:
+            try:
+                session_state.env.close()  # type: ignore[union-attr]
+            except Exception:  # pragma: no cover - defensive cleanup
+                pass
+            del self.session_id_to_state[session_id]
+
+        return GrlTetrisVerifyResponse(
+            **body.model_dump(),
+            reward=reward,
+            success=success,
+        )
+
+    @staticmethod
+    def _parse_action(action: Union[str, int], reverse_lookup: Dict[str, int]) -> int:
+        if isinstance(action, int):
+            return action
+
+        candidate = action.strip()
+        lower_candidate = candidate.lower()
+        if lower_candidate in reverse_lookup:
+            return reverse_lookup[lower_candidate]
+
+        try:
+            return int(candidate)
+        except ValueError as exc:
+            raise HTTPException(status_code=400, detail=f"Unable to parse action: {action}") from exc
+
+    @staticmethod
+    def _to_python_types(obj: Any) -> Any:
+        if isinstance(obj, dict):
+            return {k: GrlTetrisResourcesServer._to_python_types(v) for k, v in obj.items()}
+        if isinstance(obj, list):
+            return [GrlTetrisResourcesServer._to_python_types(v) for v in obj]
+        if isinstance(obj, np.generic):
+            return obj.item()
+        return obj
+
+
+if __name__ == "__main__":
+    GrlTetrisResourcesServer.run_webserver()
diff --git a/resources_servers/grl_tetris/configs/grl_tetris.yaml b/resources_servers/grl_tetris/configs/grl_tetris.yaml
new file mode 100644
index 000000000..80e8bfe75
--- /dev/null
+++ b/resources_servers/grl_tetris/configs/grl_tetris.yaml
@@ -0,0 +1,27 @@
+grl_tetris_resources_server:
+  resources_servers:
+    grl_tetris:
+      entrypoint: app.py
+      domain: games
+      verified: false
+grl_tetris_simple_agent:
+  responses_api_agents:
+    simple_agent:
+      entrypoint: app.py
+      max_steps: 10
+      resources_server:
+        type: resources_servers
+        name: grl_tetris_resources_server
+      model_server:
+        type: responses_api_models
+        name: policy_model
+      datasets:
+      - name: example
+        type: example
+        jsonl_fpath: resources_servers/grl_tetris/data/example.jsonl
+        num_repeats: 1
+        gitlab_identifier:
+          dataset_name: grl_tetris
+          version: 0.0.1
+          artifact_fpath: example.jsonl
+        license: Apache 2.0
diff --git a/resources_servers/grl_tetris/data/example.jsonl b/resources_servers/grl_tetris/data/example.jsonl
new file mode 100644
index 000000000..2eb162fde
--- /dev/null
+++ b/resources_servers/grl_tetris/data/example.jsonl
@@ -0,0 +1,5 @@
+{"game_id": 1, "seed": 93810, "dim_board": [5, 5], "box_type": 0, "responses_create_params": {"input": [{"role": "developer", "content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with <answer>Action1 || Action2 || ...</answer> summarizing all moves you made."}, {"role": "user", "content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible."}], "tools": [{"name": "step", "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving.", "strict": true, "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}}]}}
+{"game_id": 2, "seed": 46185, "dim_board": [4, 6], "box_type": 1, "responses_create_params": {"input": [{"role": "developer", "content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with <answer>Action1 || Action2 || ...</answer> summarizing all moves you made."}, {"role": "user", "content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible."}], "tools": [{"name": "step", "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving.", "strict": true, "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}}]}}
+{"game_id": 3, "seed": 28563, "dim_board": [5, 5], "box_type": 0, "responses_create_params": {"input": [{"role": "developer", "content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with <answer>Action1 || Action2 || ...</answer> summarizing all moves you made."}, {"role": "user", "content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible."}], "tools": [{"name": "step", "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving.", "strict": true, "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}}]}}
+{"game_id": 4, "seed": 87808, "dim_board": [6, 5], "box_type": 0, "responses_create_params": {"input": [{"role": "developer", "content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with <answer>Action1 || Action2 || ...</answer> summarizing all moves you made."}, {"role": "user", "content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible."}], "tools": [{"name": "step", "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving.", "strict": true, "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}}]}}
+{"game_id": 5, "seed": 14453, "dim_board": [5, 5], "box_type": 1, "responses_create_params": {"input": [{"role": "developer", "content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with <answer>Action1 || Action2 || ...</answer> summarizing all moves you made."}, {"role": "user", "content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible."}], "tools": [{"name": "step", "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving.", "strict": true, "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}}]}}
diff --git a/resources_servers/grl_tetris/data/example_metrics.json b/resources_servers/grl_tetris/data/example_metrics.json
new file mode 100644
index 000000000..88030ce73
--- /dev/null
+++ b/resources_servers/grl_tetris/data/example_metrics.json
@@ -0,0 +1,8 @@
+{
+  "name": "example",
+  "type": "example",
+  "jsonl_fpath": "resources_servers/grl_tetris/data/example.jsonl",
+  "gitlab_identifier": null,
+  "license": "Apache 2.0",
+  "Number of examples": 5
+}
diff --git a/resources_servers/grl_tetris/data/example_rollouts.jsonl b/resources_servers/grl_tetris/data/example_rollouts.jsonl
new file mode 100644
index 000000000..3ee989b4c
--- /dev/null
+++ b/resources_servers/grl_tetris/data/example_rollouts.jsonl
@@ -0,0 +1,5 @@
+{"responses_create_params": {"background": null, "include": null, "input": [{"content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with <answer>Action1 || Action2 || ...</answer> summarizing all moves you made.", "role": "developer", "type": "message"}, {"content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible.", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": null, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_0652b8fcf1e2dce400696565e6b2e881959ed1de12f162997e", "created_at": 1768252902.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "gpt-4o-2024-08-06", "object": "response", "output": [{"arguments": "{\"actions\":[]}", "call_id": "call_KiPwWiCxtkOEN1RDK4wi52Sp", "name": "step", "type": "function_call", "id": "fc_0652b8fcf1e2dce400696565e47d2c8195bb1da44b843ec537", "status": "completed"}, {"call_id": "call_KiPwWiCxtkOEN1RDK4wi52Sp", "output": "{\"observation\":\"__XX\\n____\\n____\\n____\",\"reward\":0.0,\"total_reward\":0.0,\"done\":false,\"steps\":[],\"history\":[]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_g5iqpJJHzJnhmDa3btuotPF3", "name": "step", "type": "function_call", "id": "fc_0652b8fcf1e2dce400696565e6273881959577dc95640f8566", "status": "completed"}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_2q9Elms7fMTWkIhRWR5OldMv", "name": "step", "type": "function_call", "id": "fc_0652b8fcf1e2dce400696565e6401881958e2569ad4f42dcef", "status": "completed"}, {"call_id": "call_g5iqpJJHzJnhmDa3btuotPF3", "output": "{\"observation\":\"__XX\\n__XX\\n____\\n____\",\"reward\":-0.1,\"total_reward\":-0.1,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"call_id": "call_2q9Elms7fMTWkIhRWR5OldMv", "output": "{\"observation\":\"____\\n__XX\\n__XX\\n____\",\"reward\":-0.1,\"total_reward\":-0.2,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"id": "msg_0652b8fcf1e2dce400696565e73d4c81958bf7c73d4d24fa8f", "content": [{"annotations": [], "text": "<answer>Down || Down</answer>", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": 1.0, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_p": 1.0, "background": false, "conversation": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "prompt_cache_key": null, "reasoning": {"effort": null, "generate_summary": null, "summary": null}, "safety_identifier": null, "service_tier": "default", "status": "completed", "text": {"format": {"type": "text"}, "verbosity": "medium"}, "top_logprobs": 0, "truncation": "disabled", "usage": {"input_tokens": 625, "input_tokens_details": {"cached_tokens": 0}, "output_tokens": 11, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 636}, "user": null, "billing": {"payer": "developer"}, "completed_at": 1768252903, "frequency_penalty": 0.0, "presence_penalty": 0.0, "prompt_cache_retention": null, "store": true}, "reward": -0.2, "success": false}
+{"responses_create_params": {"background": null, "include": null, "input": [{"content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with <answer>Action1 || Action2 || ...</answer> summarizing all moves you made.", "role": "developer", "type": "message"}, {"content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible.", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": null, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_0690794e9bb8139d00696565e9fc20819eb9128e7907a19bd3", "created_at": 1768252906.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "gpt-4o-2024-08-06", "object": "response", "output": [{"arguments": "{\"actions\":[]}", "call_id": "call_uL4WuAX2lvwJOdGVdqlezO8y", "name": "step", "type": "function_call", "id": "fc_0690794e9bb8139d00696565e4c938819eba8c692fc8aba62e", "status": "completed"}, {"call_id": "call_uL4WuAX2lvwJOdGVdqlezO8y", "output": "{\"observation\":\"_X__\\n____\\n____\\n____\",\"reward\":0.0,\"total_reward\":0.0,\"done\":false,\"steps\":[],\"history\":[]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_0Ny2wn272A6wZIy2SSExmBxS", "name": "step", "type": "function_call", "id": "fc_0690794e9bb8139d00696565e651ac819ea88648ddd7f7e4a0", "status": "completed"}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_ykybl3HByrFr34weNdftZxBr", "name": "step", "type": "function_call", "id": "fc_0690794e9bb8139d00696565e66ea8819e845eab68c6b7fab8", "status": "completed"}, {"call_id": "call_0Ny2wn272A6wZIy2SSExmBxS", "output": "{\"observation\":\"_X__\\n_X__\\n____\\n____\",\"reward\":-0.1,\"total_reward\":-0.1,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"call_id": "call_ykybl3HByrFr34weNdftZxBr", "output": "{\"observation\":\"____\\n_X__\\n_X__\\n____\",\"reward\":-0.1,\"total_reward\":-0.2,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_r9hSUcSt8qx9OTLE8158e0Rt", "name": "step", "type": "function_call", "id": "fc_0690794e9bb8139d00696565e7aec4819eb416d637c8a04d4e", "status": "completed"}, {"call_id": "call_r9hSUcSt8qx9OTLE8158e0Rt", "output": "{\"observation\":\"__XX\\n____\\n_#__\\n_#__\",\"reward\":-0.1,\"total_reward\":-0.30000000000000004,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_oZwMj5SDCFo9oQEllqrmjBNk", "name": "step", "type": "function_call", "id": "fc_0690794e9bb8139d00696565e9b808819eb791aa22730c445f", "status": "completed"}, {"call_id": "call_oZwMj5SDCFo9oQEllqrmjBNk", "output": "{\"observation\":\"____\\n__XX\\n_#__\\n_#__\",\"reward\":-0.1,\"total_reward\":-0.4,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"id": "msg_0690794e9bb8139d00696565ea6138819eb58f850f635e9b0b", "content": [{"annotations": [], "text": "<answer>Down || Down || Down || Down || Down</answer>", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": 1.0, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_p": 1.0, "background": false, "conversation": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "prompt_cache_key": null, "reasoning": {"effort": null, "generate_summary": null, "summary": null}, "safety_identifier": null, "service_tier": "default", "status": "completed", "text": {"format": {"type": "text"}, "verbosity": "medium"}, "top_logprobs": 0, "truncation": "disabled", "usage": {"input_tokens": 1179, "input_tokens_details": {"cached_tokens": 0}, "output_tokens": 17, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 1196}, "user": null, "billing": {"payer": "developer"}, "completed_at": 1768252906, "frequency_penalty": 0.0, "presence_penalty": 0.0, "prompt_cache_retention": null, "store": true}, "reward": -0.4, "success": false}
+{"responses_create_params": {"background": null, "include": null, "input": [{"content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with <answer>Action1 || Action2 || ...</answer> summarizing all moves you made.", "role": "developer", "type": "message"}, {"content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible.", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": null, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_045dc8dead221e0500696565ea3c6c81a08481f9287fae7c5c", "created_at": 1768252906.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "gpt-4o-2024-08-06", "object": "response", "output": [{"arguments": "{\"actions\":[]}", "call_id": "call_AKpurldhUA7kkDmHHTk9bvq7", "name": "step", "type": "function_call", "id": "fc_045dc8dead221e0500696565e4912481a08f2bff033e7f7cc3", "status": "completed"}, {"call_id": "call_AKpurldhUA7kkDmHHTk9bvq7", "output": "{\"observation\":\"___X\\n____\\n____\\n____\",\"reward\":0.0,\"total_reward\":0.0,\"done\":false,\"steps\":[],\"history\":[]}", "type": "function_call_output", "id": null, "status": null}, {"id": "msg_045dc8dead221e0500696565e5d2fc81a092444caabf20fff3", "content": [{"annotations": [], "text": "The current board state is:\n\n```\n___X\n____\n____\n____\n```\n\nThe active piece ('X') is in the top-right corner. I'll move it down to try and clear a line.", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}, {"arguments": "{\"actions\":[\"Left\"]}", "call_id": "call_f9yuUc5kBntRpWGckYNy9NIR", "name": "step", "type": "function_call", "id": "fc_045dc8dead221e0500696565e7543481a0bd34b836a71328c5", "status": "completed"}, {"call_id": "call_f9yuUc5kBntRpWGckYNy9NIR", "output": "{\"observation\":\"__X_\\n____\\n____\\n____\",\"reward\":-0.1,\"total_reward\":-0.1,\"done\":false,\"steps\":[{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Left\"]}", "call_id": "call_0ZU1Ffo5TaYfQ7Rc4SdBYFHk", "name": "step", "type": "function_call", "id": "fc_045dc8dead221e0500696565e86dd481a09155bae8b20354f9", "status": "completed"}, {"call_id": "call_0ZU1Ffo5TaYfQ7Rc4SdBYFHk", "output": "{\"observation\":\"_X__\\n____\\n____\\n____\",\"reward\":-0.1,\"total_reward\":-0.2,\"done\":false,\"steps\":[{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Left\"]}", "call_id": "call_vOxbvT3jQz5cxquUrBbOScQi", "name": "step", "type": "function_call", "id": "fc_045dc8dead221e0500696565e91f6881a0a4e5d2323a1f998e", "status": "completed"}, {"call_id": "call_vOxbvT3jQz5cxquUrBbOScQi", "output": "{\"observation\":\"X___\\n____\\n____\\n____\",\"reward\":-0.1,\"total_reward\":-0.30000000000000004,\"done\":false,\"steps\":[{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\",\"Down\",\"Down\"]}", "call_id": "call_mqgGieUPMttGAlOVkRTJ7TM6", "name": "step", "type": "function_call", "id": "fc_045dc8dead221e0500696565e9d2b481a0934a7e539636b6cd", "status": "completed"}, {"call_id": "call_mqgGieUPMttGAlOVkRTJ7TM6", "output": "{\"observation\":\"__XX\\n____\\n#___\\n#___\",\"reward\":-0.30000000000000004,\"total_reward\":-0.6,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}],\"history\":[{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}]}", "type": "function_call_output", "id": null, "status": null}, {"id": "msg_045dc8dead221e0500696565eaafc881a0bb0db1f9c9c5815d", "content": [{"annotations": [], "text": "I moved the piece, but no line was cleared. Here's the sequence of actions taken:\n\n<answer>Left || Left || Left || Down || Down || Down</answer>", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": 1.0, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_p": 1.0, "background": false, "conversation": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "prompt_cache_key": null, "reasoning": {"effort": null, "generate_summary": null, "summary": null}, "safety_identifier": null, "service_tier": "default", "status": "completed", "text": {"format": {"type": "text"}, "verbosity": "medium"}, "top_logprobs": 0, "truncation": "disabled", "usage": {"input_tokens": 1420, "input_tokens_details": {"cached_tokens": 0}, "output_tokens": 37, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 1457}, "user": null, "billing": {"payer": "developer"}, "completed_at": 1768252907, "frequency_penalty": 0.0, "presence_penalty": 0.0, "prompt_cache_retention": null, "store": true}, "reward": -0.6, "success": false}
+{"responses_create_params": {"background": null, "include": null, "input": [{"content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with <answer>Action1 || Action2 || ...</answer> summarizing all moves you made.", "role": "developer", "type": "message"}, {"content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible.", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": null, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_0d5a2c8365850b8e00696565eab4288192b2da66afb906d98e", "created_at": 1768252906.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "gpt-4o-2024-08-06", "object": "response", "output": [{"arguments": "{\"actions\":[]}", "call_id": "call_KgJRph2MXPBIZpi0YTtUUBGc", "name": "step", "type": "function_call", "id": "fc_0d5a2c8365850b8e00696565e48c00819282ccef6c4a83d6b9", "status": "completed"}, {"call_id": "call_KgJRph2MXPBIZpi0YTtUUBGc", "output": "{\"observation\":\"XX__\\n____\\n____\\n____\",\"reward\":0.0,\"total_reward\":0.0,\"done\":false,\"steps\":[],\"history\":[]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_erUkQxpj6GAIbwEwYPk6TRse", "name": "step", "type": "function_call", "id": "fc_0d5a2c8365850b8e00696565e55df08192988609846746dba4", "status": "completed"}, {"call_id": "call_erUkQxpj6GAIbwEwYPk6TRse", "output": "{\"observation\":\"____\\nXX__\\n____\\n____\",\"reward\":-0.1,\"total_reward\":-0.1,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_0PIqBadc3XMyvboNUnDvschd", "name": "step", "type": "function_call", "id": "fc_0d5a2c8365850b8e00696565e645d081929b23c8104eb226d7", "status": "completed"}, {"call_id": "call_0PIqBadc3XMyvboNUnDvschd", "output": "{\"observation\":\"____\\n____\\nXX__\\n____\",\"reward\":-0.1,\"total_reward\":-0.2,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_hdtWjww3kRgedkTackhU9wBK", "name": "step", "type": "function_call", "id": "fc_0d5a2c8365850b8e00696565e721a8819291340dca9bafe5e9", "status": "completed"}, {"call_id": "call_hdtWjww3kRgedkTackhU9wBK", "output": "{\"observation\":\"XX__\\n____\\n____\\n##__\",\"reward\":-0.1,\"total_reward\":-0.30000000000000004,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_eiAPDwgSwieMKVJrNgCQO4sX", "name": "step", "type": "function_call", "id": "fc_0d5a2c8365850b8e00696565e83f208192a9d24d3ae0a5a550", "status": "completed"}, {"call_id": "call_eiAPDwgSwieMKVJrNgCQO4sX", "output": "{\"observation\":\"XX__\\nXX__\\n____\\n##__\",\"reward\":-0.1,\"total_reward\":-0.4,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_gUYouOo1KFFZfIOW2T31ooRi", "name": "step", "type": "function_call", "id": "fc_0d5a2c8365850b8e00696565e945888192837561cdb478f81a", "status": "completed"}, {"call_id": "call_gUYouOo1KFFZfIOW2T31ooRi", "output": "{\"observation\":\"__XX\\n##__\\n##__\\n##__\",\"reward\":-0.1,\"total_reward\":-0.5,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Left\"]}", "call_id": "call_GObmJdA0qMGI0cniT0VAv9mU", "name": "step", "type": "function_call", "id": "fc_0d5a2c8365850b8e00696565ea51148192bef30ae7c3576d74", "status": "completed"}, {"call_id": "call_GObmJdA0qMGI0cniT0VAv9mU", "output": "{\"observation\":\"_XX_\\n##__\\n##__\\n##__\",\"reward\":-0.1,\"total_reward\":-0.6,\"done\":true,\"steps\":[{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":true,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}},{\"action_id\":0,\"action_label\":\"Left\",\"reward\":-0.1,\"done\":true,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}]}", "type": "function_call_output", "id": null, "status": null}, {"id": "msg_0d5a2c8365850b8e00696565eb619c8192a650c33c2aeacd37", "content": [{"annotations": [], "text": "<answer>Down || Down || Down || Down || Down || Left</answer>", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": 1.0, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_p": 1.0, "background": false, "conversation": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "prompt_cache_key": null, "reasoning": {"effort": null, "generate_summary": null, "summary": null}, "safety_identifier": null, "service_tier": "default", "status": "completed", "text": {"format": {"type": "text"}, "verbosity": "medium"}, "top_logprobs": 0, "truncation": "disabled", "usage": {"input_tokens": 1911, "input_tokens_details": {"cached_tokens": 1408}, "output_tokens": 19, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 1930}, "user": null, "billing": {"payer": "developer"}, "completed_at": 1768252907, "frequency_penalty": 0.0, "presence_penalty": 0.0, "prompt_cache_retention": null, "store": true}, "reward": -0.6, "success": false}
+{"responses_create_params": {"background": null, "include": null, "input": [{"content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with <answer>Action1 || Action2 || ...</answer> summarizing all moves you made.", "role": "developer", "type": "message"}, {"content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible.", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": null, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_0ab7221f9fb73bd200696565ec54088197b9f83d678139f026", "created_at": 1768252908.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "gpt-4o-2024-08-06", "object": "response", "output": [{"arguments": "{\"actions\":[]}", "call_id": "call_fobyRyvvP30QTkyCq2FOdcXr", "name": "step", "type": "function_call", "id": "fc_0ab7221f9fb73bd200696565e4bfa88197aa8aa2203c6f21d1", "status": "completed"}, {"call_id": "call_fobyRyvvP30QTkyCq2FOdcXr", "output": "{\"observation\":\"__XX\\n____\\n____\\n____\",\"reward\":0.0,\"total_reward\":0.0,\"done\":false,\"steps\":[],\"history\":[]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_0Je4QPq9mXY39BORQxE7D8OR", "name": "step", "type": "function_call", "id": "fc_0ab7221f9fb73bd200696565e6640881979fea8f9521117bcd", "status": "completed"}, {"call_id": "call_0Je4QPq9mXY39BORQxE7D8OR", "output": "{\"observation\":\"____\\n__XX\\n____\\n____\",\"reward\":-0.1,\"total_reward\":-0.1,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_Gxat5KDZc8yUZwxnBoMWOhul", "name": "step", "type": "function_call", "id": "fc_0ab7221f9fb73bd200696565e713cc8197865d18096bbb121f", "status": "completed"}, {"call_id": "call_Gxat5KDZc8yUZwxnBoMWOhul", "output": "{\"observation\":\"____\\n____\\n__XX\\n____\",\"reward\":-0.1,\"total_reward\":-0.2,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_P8BpK5nYCCcTwxF0TqeQ3AKi", "name": "step", "type": "function_call", "id": "fc_0ab7221f9fb73bd200696565e80afc8197955bf941b8fe6c6e", "status": "completed"}, {"call_id": "call_P8BpK5nYCCcTwxF0TqeQ3AKi", "output": "{\"observation\":\"__X_\\n____\\n____\\n__##\",\"reward\":-0.1,\"total_reward\":-0.30000000000000004,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[]}", "call_id": "call_CANNEhbdjIMwbhkVMRU906i5", "name": "step", "type": "function_call", "id": "fc_0ab7221f9fb73bd200696565e9c2548197916632847ec8a644", "status": "completed"}, {"call_id": "call_CANNEhbdjIMwbhkVMRU906i5", "output": "{\"observation\":\"__X_\\n____\\n____\\n__##\",\"reward\":0.0,\"total_reward\":-0.30000000000000004,\"done\":false,\"steps\":[],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_bnjsIqzspnvRyq8MtsEt3vxS", "name": "step", "type": "function_call", "id": "fc_0ab7221f9fb73bd200696565eabcc08197be40e1b0fc1e021f", "status": "completed"}, {"call_id": "call_bnjsIqzspnvRyq8MtsEt3vxS", "output": "{\"observation\":\"__X_\\n__X_\\n____\\n__##\",\"reward\":-0.1,\"total_reward\":-0.4,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}}]}", "type": "function_call_output", "id": null, "status": null}, {"arguments": "{\"actions\":[\"Down\"]}", "call_id": "call_V3is9Vdy5aJlTpknfBqWp1aN", "name": "step", "type": "function_call", "id": "fc_0ab7221f9fb73bd200696565ebd490819790fc411b3af2fc6a", "status": "completed"}, {"call_id": "call_V3is9Vdy5aJlTpknfBqWp1aN", "output": "{\"observation\":\"__XX\\n__#_\\n__#_\\n__##\",\"reward\":-0.1,\"total_reward\":-0.5,\"done\":false,\"steps\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}],\"history\":[{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":false}},{\"action_id\":2,\"action_label\":\"Down\",\"reward\":-0.1,\"done\":false,\"info\":{\"action_is_effective\":true,\"action_is_valid\":true,\"success\":false,\"dropped\":true}}]}", "type": "function_call_output", "id": null, "status": null}, {"id": "msg_0ab7221f9fb73bd200696565ecc7b481978ebd3a1369d5396b", "content": [{"annotations": [], "text": "<answer>Down || Down || Down || Down || Down</answer>", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": 1.0, "tool_choice": "auto", "tools": [{"name": "step", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}, "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state."}}, "required": ["actions"], "additionalProperties": false}, "strict": true, "type": "function", "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving."}], "top_p": 1.0, "background": false, "conversation": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "prompt_cache_key": null, "reasoning": {"effort": null, "generate_summary": null, "summary": null}, "safety_identifier": null, "service_tier": "default", "status": "completed", "text": {"format": {"type": "text"}, "verbosity": "medium"}, "top_logprobs": 0, "truncation": "disabled", "usage": {"input_tokens": 1724, "input_tokens_details": {"cached_tokens": 1280}, "output_tokens": 17, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 1741}, "user": null, "billing": {"payer": "developer"}, "completed_at": 1768252908, "frequency_penalty": 0.0, "presence_penalty": 0.0, "prompt_cache_retention": null, "store": true}, "reward": -0.5, "success": false}
diff --git a/resources_servers/grl_tetris/generate_test_examples.py b/resources_servers/grl_tetris/generate_test_examples.py
new file mode 100755
index 000000000..ac46d2d1f
--- /dev/null
+++ b/resources_servers/grl_tetris/generate_test_examples.py
@@ -0,0 +1,149 @@
+#!/usr/bin/env python3
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Generate test examples for GRL Tetris environment."""
+
+import json
+import random
+from pathlib import Path
+from typing import Any, Dict, List
+
+
+def generate_tetris_example(game_id: int, seed: int, dim_board: List[int], box_type: int) -> Dict[str, Any]:
+    """Generate a single Tetris test example in the expected JSONL format.
+
+    Args:
+        game_id: Unique identifier for the game
+        seed: Random seed for reproducible game generation
+        dim_board: Board dimensions as [width, height]
+        box_type: Type of Tetris pieces (0=single, 1=single, 2=I and -, 3=I, -, and O)
+
+    Returns:
+        Dictionary containing the game configuration and prompt
+    """
+    return {
+        "game_id": game_id,
+        "seed": seed,
+        "dim_board": dim_board,
+        "box_type": box_type,
+        "responses_create_params": {
+            "input": [
+                {
+                    "role": "developer",
+                    "content": "You are a Tetris-playing assistant. IMPORTANT: First call the `step` tool with an empty array [] to see the initial board state and active piece. Example: step({\"actions\": []}). The tool will return an ASCII board using '_' for empty cells, '#' for locked blocks, and 'X' for the active piece. Then continue calling `step` with valid actions (Left, Right, Down) until you clear a line or the board locks out. At the end, respond with <answer>Action1 || Action2 || ...</answer> summarizing all moves you made.",
+                },
+                {
+                    "role": "user",
+                    "content": "Call the step tool to see the board, then play Tetris to clear at least one line if possible.",
+                },
+            ],
+            "tools": [
+                {
+                    "name": "step",
+                    "type": "function",
+                    "description": "Execute Tetris moves sequentially. Call with empty array [] to see current board state without moving.",
+                    "strict": True,
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "actions": {
+                                "type": "array",
+                                "items": {"type": "string"},
+                                "description": "Sequence of actions, e.g. ['Left', 'Down']. Use empty array [] to view current state.",
+                            }
+                        },
+                        "required": ["actions"],
+                        "additionalProperties": False,
+                    },
+                }
+            ],
+        },
+    }
+
+
+def generate_test_examples(num_examples: int = 500, output_file: str = "data/test_examples.jsonl") -> None:
+    """Generate diverse test examples for Tetris environment.
+
+    Args:
+        num_examples: Number of examples to generate (default: 500)
+        output_file: Output JSONL file path
+    """
+    examples = []
+
+    # Define parameter ranges for diversity
+    board_sizes = [
+        [4, 4],  # Small square
+        [5, 5],  # Medium square
+        [6, 6],  # Large square
+        [4, 6],  # Narrow tall
+        [6, 4],  # Wide short
+        [5, 6],  # Medium tall
+        [6, 5],  # Medium wide
+        [4, 5],  # Small tall
+        [5, 4],  # Small wide
+    ]
+
+    box_types = [0, 1, 2, 3]  # All available piece types
+
+    # Generate diverse examples
+    for i in range(num_examples):
+        game_id = i + 1
+
+        # Use game_id as base for seed to ensure reproducibility but diversity
+        seed = random.randint(10000, 99999) + i * 137  # Prime offset for better distribution
+
+        # Cycle through board sizes with some randomness
+        dim_board = random.choice(board_sizes)
+
+        # Distribute box types evenly but with some randomness
+        box_type = random.choice(box_types)
+
+        example = generate_tetris_example(game_id, seed, dim_board, box_type)
+        examples.append(example)
+
+    # Write to JSONL file
+    output_path = Path(__file__).parent / output_file
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    with open(output_path, "w") as f:
+        for example in examples:
+            f.write(json.dumps(example) + "\n")
+
+    print(f"Generated {num_examples} test examples")
+    print(f"Output file: {output_path}")
+    print("\nParameter distribution:")
+    print(f"  Board sizes: {set(tuple(e['dim_board']) for e in examples)}")
+    print(f"  Box types: {set(e['box_type'] for e in examples)}")
+    print(f"  Seed range: {min(e['seed'] for e in examples)} - {max(e['seed'] for e in examples)}")
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Generate Tetris test examples")
+    parser.add_argument("--num-examples", type=int, default=500, help="Number of examples to generate (default: 500)")
+    parser.add_argument(
+        "--output",
+        type=str,
+        default="data/test_examples.jsonl",
+        help="Output JSONL file path (default: data/test_examples.jsonl)",
+    )
+    parser.add_argument("--seed", type=int, default=42, help="Random seed for generation (default: 42)")
+
+    args = parser.parse_args()
+
+    # Set random seed for reproducibility
+    random.seed(args.seed)
+
+    generate_test_examples(args.num_examples, args.output)
diff --git a/resources_servers/grl_tetris/requirements.txt b/resources_servers/grl_tetris/requirements.txt
new file mode 100644
index 000000000..1c193ea4b
--- /dev/null
+++ b/resources_servers/grl_tetris/requirements.txt
@@ -0,0 +1,3 @@
+-e nemo-gym[dev] @ ../../
+gymnasium==1.1.1
+numpy>=1.26
diff --git a/resources_servers/grl_tetris/tests/test_app.py b/resources_servers/grl_tetris/tests/test_app.py
new file mode 100644
index 000000000..38150cae8
--- /dev/null
+++ b/resources_servers/grl_tetris/tests/test_app.py
@@ -0,0 +1,253 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from unittest.mock import MagicMock, patch
+
+from fastapi.testclient import TestClient
+
+from nemo_gym.openai_utils import (
+    NeMoGymResponse,
+    NeMoGymResponseCreateParamsNonStreaming,
+)
+from nemo_gym.server_utils import ServerClient
+from resources_servers.grl_tetris.app import (
+    GrlTetrisResourcesServer,
+    GrlTetrisResourcesServerConfig,
+)
+
+
+_VERIFY_CREATE_PARAMS = NeMoGymResponseCreateParamsNonStreaming(
+    input="placeholder",
+)
+
+_VERIFY_RESPONSE = NeMoGymResponse.model_construct(
+    id="resp_test",
+    object="response",
+    created_at=0.0,
+    status="completed",
+    output=[],
+    model="gpt-4.1",
+    parallel_tool_calls=True,
+    tool_choice="auto",
+    tools=[],
+)
+
+
+def _verify_payload() -> dict:
+    return {
+        "responses_create_params": _VERIFY_CREATE_PARAMS.model_dump(mode="json"),
+        "response": _VERIFY_RESPONSE.model_dump(mode="json"),
+    }
+
+
+class TestApp:
+    def test_sanity(self) -> None:
+        config = GrlTetrisResourcesServerConfig(
+            host="0.0.0.0",
+            port=8080,
+            entrypoint="",
+            name="",
+        )
+        GrlTetrisResourcesServer(config=config, server_client=MagicMock(spec=ServerClient))
+
+    def test_seed_and_step_flow(self) -> None:
+        config = GrlTetrisResourcesServerConfig(
+            host="0.0.0.0",
+            port=8080,
+            entrypoint="",
+            name="",
+        )
+        server = GrlTetrisResourcesServer(
+            config=config,
+            server_client=MagicMock(spec=ServerClient),
+        )
+
+        class FakeEnv:
+            ACTION_LOOKUP = {0: "Left"}
+
+            def __init__(self, *_args, **_kwargs) -> None:
+                self._closed = False
+                self.step_calls = 0
+
+            def reset(self, seed=None):  # noqa: ARG002
+                return "Initial observation"
+
+            def step(self, action):
+                self.step_calls += 1
+                assert action == 0
+                reward = 1.0
+                done = self.step_calls >= 1
+                info = {"success": done}
+                return "Next observation", reward, done, info
+
+            def close(self):
+                self._closed = True
+
+        fake_env = FakeEnv()
+        with patch("resources_servers.grl_tetris.app.TetrisEnv", return_value=fake_env):
+            app = server.setup_webserver()
+            client = TestClient(app)
+
+            response = client.post("/seed_session", json={"seed": 123})
+            assert response.status_code == 200
+            assert response.json()["observation"] == "Initial observation"
+
+            cookies = response.cookies
+            response = client.post("/step", json={"actions": ["Left"]}, cookies=cookies)
+            payload = response.json()
+            assert response.status_code == 200
+            assert payload["observation"] == "Next observation"
+            assert payload["reward"] == 1.0
+            assert payload["done"] is True
+            assert payload["steps"][0]["action_label"] == "Left"
+            assert fake_env.step_calls == 1
+
+            response = client.post("/verify", json=_verify_payload(), cookies=cookies)
+            assert response.status_code == 200
+            payload = response.json()
+            assert payload["success"] is True
+            assert payload["reward"] == 1.0
+            assert fake_env._closed is True
+
+    def test_step_action_mapping_stops_after_done(self) -> None:
+        config = GrlTetrisResourcesServerConfig(
+            host="0.0.0.0",
+            port=8080,
+            entrypoint="",
+            name="",
+        )
+        server = GrlTetrisResourcesServer(
+            config=config,
+            server_client=MagicMock(spec=ServerClient),
+        )
+
+        class FakeEnv:
+            ACTION_LOOKUP = {0: "Left", 1: "Right"}
+
+            def __init__(self, *_args, **_kwargs) -> None:
+                self.calls = 0
+                self.closed = False
+
+            def reset(self, seed=None):  # noqa: ARG002
+                return "Init"
+
+            def step(self, action):
+                self.calls += 1
+                if self.calls == 1:
+                    assert action == 0
+                    return "Obs1", 0.5, True, {"success": True}
+                raise AssertionError("Env.step should not be called after done")
+
+            def close(self):
+                self.closed = True
+
+        fake_env = FakeEnv()
+        with patch("resources_servers.grl_tetris.app.TetrisEnv", return_value=fake_env):
+            app = server.setup_webserver()
+            client = TestClient(app)
+
+            seed_resp = client.post("/seed_session", json={})
+            cookies = seed_resp.cookies
+            resp = client.post("/step", json={"actions": ["Left", "Right"]}, cookies=cookies)
+            payload = resp.json()
+            assert resp.status_code == 200
+            assert payload["done"] is True
+            assert payload["steps"][0]["action_label"] == "Left"
+            assert len(payload["steps"]) == 1
+            assert len(payload["history"]) == 1
+            assert fake_env.calls == 1
+
+    def test_step_invalid_action_raises(self) -> None:
+        config = GrlTetrisResourcesServerConfig(
+            host="0.0.0.0",
+            port=8080,
+            entrypoint="",
+            name="",
+        )
+        server = GrlTetrisResourcesServer(
+            config=config,
+            server_client=MagicMock(spec=ServerClient),
+        )
+
+        class FakeEnv:
+            ACTION_LOOKUP = {0: "Left"}
+
+            def __init__(self, *_args, **_kwargs) -> None:
+                pass
+
+            def reset(self, seed=None):  # noqa: ARG002
+                return "Init"
+
+            def step(self, action):  # pragma: no cover - not reached
+                raise AssertionError("Should not call step for invalid action")
+
+            def close(self):
+                pass
+
+        with patch("resources_servers.grl_tetris.app.TetrisEnv", return_value=FakeEnv()):
+            app = server.setup_webserver()
+            client = TestClient(app)
+
+            seed_resp = client.post("/seed_session", json={})
+            cookies = seed_resp.cookies
+            resp = client.post("/step", json={"actions": ["Rotate"]}, cookies=cookies)
+            assert resp.status_code == 400
+            assert resp.json()["detail"].startswith("Unable to parse action")
+
+    def test_verify_failure_zero_reward_and_cleanup(self) -> None:
+        config = GrlTetrisResourcesServerConfig(
+            host="0.0.0.0",
+            port=8080,
+            entrypoint="",
+            name="",
+        )
+        server = GrlTetrisResourcesServer(
+            config=config,
+            server_client=MagicMock(spec=ServerClient),
+        )
+
+        class FakeEnv:
+            ACTION_LOOKUP = {0: "Left"}
+
+            def __init__(self, *_args, **_kwargs) -> None:
+                self.closed = False
+
+            def reset(self, seed=None):  # noqa: ARG002
+                return "Init"
+
+            def step(self, action):
+                return "Obs", 0.0, False, {"success": False}
+
+            def close(self):
+                self.closed = True
+
+        fake_env = FakeEnv()
+        with patch("resources_servers.grl_tetris.app.TetrisEnv", return_value=fake_env):
+            app = server.setup_webserver()
+            client = TestClient(app)
+
+            seed_resp = client.post("/seed_session", json={})
+            cookies = seed_resp.cookies
+            client.post("/step", json={"actions": [0]}, cookies=cookies)
+
+            verify_resp = client.post(
+                "/verify",
+                json=_verify_payload(),
+                cookies=cookies,
+            )
+            assert verify_resp.status_code == 200
+            payload = verify_resp.json()
+            assert payload["success"] is False
+            assert payload["reward"] == 0.0
+            assert fake_env.closed is True
+            assert server.session_id_to_state == {}
diff --git a/resources_servers/grl_tetris/tetris_env/__init__.py b/resources_servers/grl_tetris/tetris_env/__init__.py
new file mode 100644
index 000000000..271a95004
--- /dev/null
+++ b/resources_servers/grl_tetris/tetris_env/__init__.py
@@ -0,0 +1,6 @@
+"""Local Tetris environment implementation."""
+
+from .tetris_env import TetrisEnv
+
+
+__all__ = ["TetrisEnv"]
diff --git a/resources_servers/grl_tetris/tetris_env/tetris_env.py b/resources_servers/grl_tetris/tetris_env/tetris_env.py
new file mode 100644
index 000000000..d9ce8558b
--- /dev/null
+++ b/resources_servers/grl_tetris/tetris_env/tetris_env.py
@@ -0,0 +1,289 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Standalone Tetris environment implementation for the GRL Tetris resource server.
+
+This module adapts the environment logic from ``GRL/grl/agents/tetrisAgent/env.py``
+and removes dependencies on the upstream GRL repository so it can run entirely
+within the NeMo Gym project.
+"""
+
+from __future__ import annotations
+
+import copy
+import random
+from contextlib import contextmanager
+from typing import Any, Dict, Iterator, List, Tuple
+
+import gymnasium as gym
+import numpy as np
+
+
+# ─────────────────────────── utilities ────────────────────────────
+
+
+@contextmanager
+def all_seed(seed: int | None) -> Iterator[None]:
+    """Temporarily set ``random`` and ``numpy`` seeds within a context."""
+    random_state = random.getstate()
+    numpy_state = np.random.get_state()
+
+    try:
+        if seed is not None:
+            random.seed(seed)
+            np.random.seed(seed)
+        yield
+    finally:
+        random.setstate(random_state)
+        np.random.set_state(numpy_state)
+
+
+class BaseEnv:
+    """Minimal base class mirroring the GRL interface."""
+
+    def __init__(self, config: Dict[str, Any] | None = None, **_kwargs: Any) -> None:
+        self.config: Dict[str, Any] = config or {}
+
+    def reset(self, seed: int | None = None, **_kwargs: Any) -> Any:
+        raise NotImplementedError
+
+    def step(self, action: int) -> Tuple[Any, float, bool, Dict[str, Any]]:
+        raise NotImplementedError
+
+    def render(self, mode: str = "text") -> Any:
+        raise NotImplementedError
+
+    def close(self) -> None:
+        raise NotImplementedError
+
+
+def is_occupied(shape: List[Tuple[int, int]], anchor: Tuple[int, int], board: np.ndarray) -> bool:
+    """Return True when ``shape`` anchored at ``anchor`` collides with the board."""
+    for dx, dy in shape:
+        x, y = anchor[0] + dx, anchor[1] + dy
+        if y < 0:
+            continue
+        if x < 0 or x >= board.shape[0] or y >= board.shape[1] or board[x, y]:
+            return True
+    return False
+
+
+class TetrisEnv(BaseEnv):
+    """Self-contained Tetris environment modelled after the GRL training environment."""
+
+    def __init__(self, config: Dict[str, Any] | None = None, **_kwargs: Any) -> None:
+        super().__init__(config=config or {})
+        self.width = self.config.get("dim_x", 4)
+        self.height = self.config.get("dim_y", 4)
+        self.board = np.zeros((self.width, self.height), dtype=np.bool_)
+        self.GRID_LOOKUP = self.config.get("grid_lookup", {0: "_", 1: "#", 2: "X"})
+        self.ACTION_LOOKUP = self.config.get(
+            "action_lookup",
+            {0: "Left", 1: "Right", 2: "Down"},
+        )
+        self.ACTION_SPACE = gym.spaces.Discrete(3, start=0)
+        self.render_mode = self.config.get("render_mode", "text")
+
+        box_type = self.config.get("box_type", 1)
+        if box_type == 2:
+            self.shapes = {
+                "I": [(0, 0), (0, -1)],
+                "-": [(0, 0), (-1, 0)],
+            }
+            self.shape_names = ["I", "-"]
+        elif box_type == 3:
+            self.shapes = {
+                "I": [(0, 0), (0, -1)],
+                "-": [(0, 0), (-1, 0)],
+                "O": [(0, 0), (-1, 0), (0, -1), (-1, -1)],
+            }
+            self.shape_names = ["I", "-", "O"]
+        else:
+            self.shapes = {"O": [(0, 0)]}
+            self.shape_names = ["O"]
+
+        self.actions = {0: self._left, 1: self._right, 2: self._soft_drop}
+
+        self.time = 0
+        self.score = 0
+        self.anchor: Tuple[int, int] | None = None
+        self.shape: List[Tuple[int, int]] | None = None
+        self.n_deaths = 0
+        self._shape_counts = [0] * len(self.shapes)
+
+        self.pre_generated_pieces: List[Tuple[Tuple[int, int], List[Tuple[int, int]]]] = []
+        self.current_piece_index = 0
+
+        self.reset()
+
+    # ─────────────────────────── core helpers ────────────────────────────
+
+    def _choose_shape(self) -> List[Tuple[int, int]]:
+        max_count = max(self._shape_counts)
+        weights = [5 + max_count - count for count in self._shape_counts]
+        r = random.randint(1, sum(weights))
+        for i, weight in enumerate(weights):
+            r -= weight
+            if r <= 0:
+                self._shape_counts[i] += 1
+                return self.shapes[self.shape_names[i]]
+        return self.shapes[self.shape_names[0]]
+
+    def _generate_piece(self) -> Tuple[Tuple[int, int], List[Tuple[int, int]]]:
+        shape = self._choose_shape()
+        if (-1, 0) in shape:
+            anchor = (random.randint(1, self.width - 1), 0)
+        else:
+            anchor = (random.randint(0, self.width - 1), 0)
+        return anchor, shape
+
+    def _new_piece(self) -> None:
+        if self.current_piece_index < len(self.pre_generated_pieces):
+            self.anchor, self.shape = self.pre_generated_pieces[self.current_piece_index]
+            self.current_piece_index += 1
+        else:
+            self.anchor, self.shape = self._generate_piece()
+
+    def _has_dropped(self) -> bool:
+        assert self.shape is not None and self.anchor is not None
+        return is_occupied(self.shape, (self.anchor[0], self.anchor[1] + 1), self.board)
+
+    def _clear_lines(self) -> int:
+        can_clear = [np.all(self.board[:, i]) for i in range(self.height)]
+        new_board = np.zeros_like(self.board)
+        write_idx = self.height - 1
+        for i in range(self.height - 1, -1, -1):
+            if not can_clear[i]:
+                new_board[:, write_idx] = self.board[:, i]
+                write_idx -= 1
+        lines_cleared = sum(can_clear)
+        self.score += lines_cleared
+        self.board = new_board
+        return lines_cleared
+
+    def _set_piece(self, on: bool = False) -> None:
+        assert self.shape is not None and self.anchor is not None
+        for dx, dy in self.shape:
+            x, y = self.anchor[0] + dx, self.anchor[1] + dy
+            if 0 <= x < self.width and 0 <= y < self.height:
+                self.board[x, y] = on
+
+    def _left(self) -> None:
+        assert self.shape is not None and self.anchor is not None
+        new_anchor = (self.anchor[0] - 1, self.anchor[1])
+        if not is_occupied(self.shape, new_anchor, self.board):
+            self.anchor = new_anchor
+
+    def _right(self) -> None:
+        assert self.shape is not None and self.anchor is not None
+        new_anchor = (self.anchor[0] + 1, self.anchor[1])
+        if not is_occupied(self.shape, new_anchor, self.board):
+            self.anchor = new_anchor
+
+    def _soft_drop(self) -> None:
+        assert self.shape is not None and self.anchor is not None
+        new_anchor = (self.anchor[0], self.anchor[1] + 1)
+        if not is_occupied(self.shape, new_anchor, self.board):
+            self.anchor = new_anchor
+
+    def _idle(self) -> None:
+        pass
+
+    # ───────────────────────────── API ──────────────────────────────
+
+    def reset(self, seed: int | None = None, **_kwargs: Any) -> Any:
+        """Reset the environment to its initial state."""
+        try:
+            with all_seed(seed):
+                self.time = 0
+                self.score = 0
+                self.board = np.zeros((self.width, self.height), dtype=np.bool_)
+
+                self.pre_generated_pieces = []
+                self.current_piece_index = 0
+                num_pieces_to_generate = self.width * self.height + 1
+                for _ in range(num_pieces_to_generate):
+                    self.pre_generated_pieces.append(self._generate_piece())
+
+                self._new_piece()
+                return self.render()
+        except (RuntimeError, RuntimeWarning):
+            next_seed = abs(hash(str(seed))) % (2**32) if seed is not None else None
+            return self.reset(next_seed)
+
+    def step(self, action: int) -> Tuple[Any, float, bool, Dict[str, Any]]:
+        if action not in self.actions:
+            return self.render(), 0.0, True, {"error": "Invalid action"}
+
+        previous_pos = copy.deepcopy(self.anchor)
+        self.actions[action]()
+
+        self.time += 1
+        reward = -0.1
+        done = False
+        dropped = False
+        info: Dict[str, Any] = {}
+
+        lines_cleared = 0
+        if self._has_dropped():
+            dropped = True
+            self._set_piece(True)
+            lines_cleared = self._clear_lines()
+            reward += lines_cleared * 10
+
+            if np.any(self.board[:, 0]):
+                done = True
+            else:
+                self._new_piece()
+
+        self._set_piece(True)
+        state = self.render()
+        self._set_piece(False)
+
+        action_effective = previous_pos is not None and previous_pos != self.anchor
+        info["action_is_effective"] = action_effective
+        info["action_is_valid"] = True
+        info["success"] = lines_cleared > 0
+        info["dropped"] = dropped
+
+        if lines_cleared > 0:
+            done = True
+
+        return state, reward, done, info
+
+    def render(self, mode: str = "text") -> Any:
+        if mode != "text":
+            return self.board.copy()
+
+        board_str = "\n".join("".join("#" if cell else "_" for cell in row) for row in self.board.T)
+
+        self._set_piece(True)
+        assert self.shape is not None and self.anchor is not None
+        positions = [(self.anchor[0] + dx, self.anchor[1] + dy) for dx, dy in self.shape]
+        self._set_piece(False)
+
+        lines = board_str.split("\n")
+        for x, y in positions:
+            if 0 <= y < len(lines) and 0 <= x < len(lines[0]):
+                line = list(lines[y])
+                line[x] = "X"
+                lines[y] = "".join(line)
+        return "\n".join(lines)
+
+    def get_all_actions(self) -> List[int]:
+        return list(self.actions.keys())
+
+    def close(self) -> None:
+        self.board = None
+        self.anchor = None
+        self.shape = None