diff --git a/config.ini b/config.ini
new file mode 100644
index 0000000..473b87d
--- /dev/null
+++ b/config.ini
@@ -0,0 +1,4 @@
+[OpenAI]
+API_MODE=responses
+
+
diff --git a/tests/unit/test_action_generator_structured_outputs.py b/tests/unit/test_action_generator_structured_outputs.py
new file mode 100644
index 0000000..1f9a022
--- /dev/null
+++ b/tests/unit/test_action_generator_structured_outputs.py
@@ -0,0 +1,57 @@
+import pytest
+
+from tinytroupe.agent.action_generator import ActionGenerator, ActionRefusedException
+from tinytroupe.agent import TinyPerson, CognitiveActionModel
+
+
+class FakeClient:
+    def __init__(self, message):
+        self._message = message
+
+    def send_message(self, *args, **kwargs):
+        return self._message
+
+
+def test_prefers_parsed_payload(monkeypatch):
+    TinyPerson.clear_agents()
+    # Build a parsed payload consistent with CognitiveActionModel
+    parsed = {
+        "action": {"type": "THINK", "content": "test content", "target": ""},
+        "cognitive_state": {
+            "goals": "g",
+            "context": ["c"],
+            "attention": "a",
+            "emotions": "e",
+        },
+    }
+
+    message = {"role": "assistant", "content": "{\"action\":{}}", "parsed": parsed}
+
+    # Patch client used by action generator to return our fake message
+    from tinytroupe import openai_utils
+
+    monkeypatch.setattr(openai_utils, "client", lambda: FakeClient(message))
+
+    agent = TinyPerson(name="Tester")
+    ag = ActionGenerator()
+
+    action, role, content = ag._generate_tentative_action(agent, agent.current_messages)[0:3]
+
+    assert content == parsed
+    assert action == parsed["action"]
+    assert role == "assistant"
+
+
+def test_refusal_raises(monkeypatch):
+    TinyPerson.clear_agents()
+    message = {"role": "assistant", "content": "{}", "refusal": "safety refusal"}
+
+    from tinytroupe import openai_utils
+
+    monkeypatch.setattr(openai_utils, "client", lambda: FakeClient(message))
+
+    agent = TinyPerson(name="Tester")
+    ag = ActionGenerator()
+
+    with pytest.raises(ActionRefusedException):
+        ag._generate_tentative_action(agent, agent.current_messages)
diff --git a/tests/unit/test_openai_utils_responses.py b/tests/unit/test_openai_utils_responses.py
new file mode 100644
index 0000000..edb1096
--- /dev/null
+++ b/tests/unit/test_openai_utils_responses.py
@@ -0,0 +1,72 @@
+import types
+from unittest.mock import patch
+
+import tinytroupe.openai_utils as openai_utils
+
+
+class _StubResponsesClient:
+    def __init__(self):
+        self.last_params = None
+
+    class _Responses:
+        def __init__(self, outer):
+            self._outer = outer
+
+        def create(self, **kwargs):
+            # Capture params for assertions
+            self._outer.last_params = kwargs
+
+            # Return minimal object with output_text like the SDK does
+            return types.SimpleNamespace(output_text="ok")
+
+    @property
+    def responses(self):
+        return _StubResponsesClient._Responses(self)
+
+
+def test_send_message_uses_responses_api_when_api_mode_is_responses():
+    stub = _StubResponsesClient()
+
+    # Patch setup to force responses mode and inject stub client
+    original_setup = openai_utils.OpenAIClient._setup_from_config
+
+    def _setup_with_responses(self):
+        self.client = stub
+        self.api_mode = "responses"
+
+    try:
+        openai_utils.OpenAIClient._setup_from_config = _setup_with_responses
+
+        client = openai_utils.OpenAIClient()
+
+        messages = [
+            {"role": "system", "content": "You are terse."},
+            {"role": "user", "content": "Say ok."},
+        ]
+
+        result = client.send_message(
+            current_messages=messages,
+            model="gpt-4.1-mini",
+            temperature=0.2,
+            max_tokens=128,
+        )
+
+        # Verify mapping to Responses API
+        assert stub.last_params is not None
+        assert stub.last_params.get("model") == "gpt-4.1-mini"
+        assert stub.last_params.get("temperature") == 0.2
+        assert stub.last_params.get("max_output_tokens") == 128
+
+        input_msgs = stub.last_params.get("input")
+        assert isinstance(input_msgs, list) and len(input_msgs) == 2
+        assert input_msgs[0]["role"] == "system"
+        assert input_msgs[1]["role"] == "user"
+        assert input_msgs[1]["content"][0]["text"] == "Say ok."
+
+        # Verify extractor returns assistant content
+        assert result["content"].lower().startswith("ok")
+
+    finally:
+        openai_utils.OpenAIClient._setup_from_config = original_setup
+
+
diff --git a/tinytroupe/agent/action_generator.py b/tinytroupe/agent/action_generator.py
index 08aca7f..c38ddf2 100644
--- a/tinytroupe/agent/action_generator.py
+++ b/tinytroupe/agent/action_generator.py
@@ -288,7 +288,19 @@ def _generate_tentative_action(self, agent, current_messages, feedback_from_prev
 
         if not self.enable_reasoning_step:
             logger.debug(f"[{agent.name}] Reasoning step disabled.")
-            next_message = openai_utils.client().send_message(current_messages_context, response_format=CognitiveActionModel)
+            # Prefer Responses API JSON Schema when API_MODE=responses; fallback to Pydantic class on legacy
+            response_format = CognitiveActionModel
+            try:
+                # If running in responses mode, provide a JSON Schema envelope with strict mode
+                from pydantic import TypeAdapter
+                schema = TypeAdapter(CognitiveActionModel).json_schema()
+                response_format = {
+                    "type": "json_schema",
+                    "json_schema": {"name": "CognitiveActionModel", "schema": schema, "strict": True},
+                }
+            except Exception:
+                pass
+            next_message = openai_utils.client().send_message(current_messages_context, response_format=response_format)
             
         else:
             logger.debug(f"[{agent.name}] Reasoning step enabled.")
@@ -302,11 +314,31 @@ def _generate_tentative_action(self, agent, current_messages, feedback_from_prev
             current_messages_context.append({"role": "system",
                                             "content": "Use the \"reasoning\" field to add any reasoning process you might wish to use before generating the next action and cognitive state. "})
 
-            next_message = openai_utils.client().send_message(current_messages_context, response_format=CognitiveActionModelWithReasoning)
+            response_format = CognitiveActionModelWithReasoning
+            try:
+                from pydantic import TypeAdapter
+                schema = TypeAdapter(CognitiveActionModelWithReasoning).json_schema()
+                response_format = {
+                    "type": "json_schema",
+                    "json_schema": {"name": "CognitiveActionModelWithReasoning", "schema": schema, "strict": True},
+                }
+            except Exception:
+                pass
+            next_message = openai_utils.client().send_message(current_messages_context, response_format=response_format)
 
         logger.debug(f"[{agent.name}] Received message: {next_message}")
 
-        role, content = next_message["role"], utils.extract_json(next_message["content"])
+        # Prefer typed parsed payload when available; otherwise, fall back to JSON extraction
+        role = next_message.get("role", "assistant")
+
+        # Handle explicit refusal from provider payloads when present
+        refusal = next_message.get("refusal")
+        if refusal:
+            # Log and raise a specialized exception to surface actionable errors
+            logger.warning(f"[{agent.name}] Model refusal received: {refusal}")
+            raise ActionRefusedException(refusal)
+
+        content = next_message.get("parsed") or utils.extract_json(next_message["content"])
 
         action = content['action']
         logger.debug(f"{agent.name}'s action: {action}")
@@ -530,3 +562,8 @@ class PoorQualityActionException(Exception):
     def __init__(self, message="The generated action is of poor quality"):
         self.message = message
         super().__init__(self.message)
+
+
+class ActionRefusedException(Exception):
+    def __init__(self, refusal_message: str = "The model refused to generate an action"):
+        super().__init__(refusal_message)
diff --git a/tinytroupe/config.ini b/tinytroupe/config.ini
index 353bdb0..6c45ac5 100644
--- a/tinytroupe/config.ini
+++ b/tinytroupe/config.ini
@@ -1,4 +1,6 @@
 [OpenAI]
+# Enable Responses API path for local runs
+API_MODE=responses
 #
 # OpenAI or Azure OpenAI Service
 #
diff --git a/tinytroupe/openai_utils.py b/tinytroupe/openai_utils.py
index c7a04cb..2c6b4d4 100644
--- a/tinytroupe/openai_utils.py
+++ b/tinytroupe/openai_utils.py
@@ -9,6 +9,7 @@
 
 
 import tiktoken
+from pydantic import BaseModel
 from tinytroupe import utils
 from tinytroupe.control import transactional
 from tinytroupe import default
@@ -53,6 +54,11 @@ def _setup_from_config(self):
         Sets up the OpenAI API configurations for this client.
         """
         self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+        # API mode: 'responses' or 'legacy' (default to legacy to preserve behavior unless configured)
+        try:
+            self.api_mode = config["OpenAI"].get("API_MODE", "legacy").strip().lower()
+        except Exception:
+            self.api_mode = "legacy"
 
     @config_manager.config_defaults(
         model="model",
@@ -227,57 +233,156 @@ def _raw_model_call(self, model, chat_api_params):
         """
         Calls the OpenAI API with the given parameters. Subclasses should
         override this method to implement their own API calls.
-        """   
+        """
 
-        # adjust parameters depending on the model
+        # Choose API mode (legacy chat vs responses)
+        api_mode = config["OpenAI"].get("API_MODE", "legacy").lower()
+
+        # adjust parameters depending on the model (legacy path expectations)
         if self._is_reasoning_model(model):
             # Reasoning models have slightly different parameters
-            del chat_api_params["stream"]
-            del chat_api_params["temperature"]
-            del chat_api_params["top_p"]
-            del chat_api_params["frequency_penalty"]
-            del chat_api_params["presence_penalty"]            
-
-            chat_api_params["max_completion_tokens"] = chat_api_params["max_tokens"]
-            del chat_api_params["max_tokens"]
+            if api_mode == "legacy":
+                if "stream" in chat_api_params: del chat_api_params["stream"]
+                if "temperature" in chat_api_params: del chat_api_params["temperature"]
+                if "top_p" in chat_api_params: del chat_api_params["top_p"]
+                if "frequency_penalty" in chat_api_params: del chat_api_params["frequency_penalty"]
+                if "presence_penalty" in chat_api_params: del chat_api_params["presence_penalty"]
 
-            chat_api_params["reasoning_effort"] = default["reasoning_effort"]
+                chat_api_params["max_completion_tokens"] = chat_api_params["max_tokens"]
+                del chat_api_params["max_tokens"]
 
+                chat_api_params["reasoning_effort"] = default["reasoning_effort"]
 
         # To make the log cleaner, we remove the messages from the logged parameters
-        logged_params = {k: v for k, v in chat_api_params.items() if k != "messages"} 
+        logged_params = {k: v for k, v in chat_api_params.items() if k != "messages"}
 
-        if "response_format" in chat_api_params:
-            # to enforce the response format via pydantic, we need to use a different method
+        if api_mode == "responses":
+            # Build Responses API params
+            responses_params = self._build_responses_params(model, chat_api_params)
+
+            # Log sanitized params and full messages separately
+            rp_logged = {k: v for k, v in responses_params.items() if k != "input" and k != "messages"}
+            logger.debug(f"Calling LLM model (Responses API) with these parameters: {rp_logged}. Not showing 'messages'/'input' parameter.")
+            logger.debug(f"   --> Complete messages sent to LLM: {responses_params.get('messages') or responses_params.get('input')}")
+
+            # If using Pydantic model, prefer parse helper when available
+            if isinstance(chat_api_params.get("response_format"), type):
+                # Responses parse path with Pydantic model
+                return self.client.responses.parse(**responses_params)
+            else:
+                return self.client.responses.create(**responses_params)
 
+        # Legacy Chat Completions path
+        if "response_format" in chat_api_params:
             if "stream" in chat_api_params:
                 del chat_api_params["stream"]
 
             logger.debug(f"Calling LLM model (using .parse too) with these parameters: {logged_params}. Not showing 'messages' parameter.")
-            # complete message
             logger.debug(f"   --> Complete messages sent to LLM: {chat_api_params['messages']}")
+            return self.client.beta.chat.completions.parse(**chat_api_params)
+        else:
+            logger.debug(f"Calling LLM model with these parameters: {logged_params}. Not showing 'messages' parameter.")
+            return self.client.chat.completions.create(**chat_api_params)
 
-            result_message = self.client.beta.chat.completions.parse(
-                    **chat_api_params
-                )
+    def _build_responses_params(self, model, chat_api_params):
+        """
+        Map legacy chat-style params to Responses API params.
+        - Prefer 'messages' as input if present; else use 'input'.
+        - Map max_tokens -> max_output_tokens
+        - For reasoning models add reasoning: { effort: ... } and drop sampling params.
+        - If response_format is a Pydantic model class, pass it directly (Responses parse supports Pydantic);
+          if it's a dict (JSON Schema), pass as-is with strict mode expected to be set by caller.
+        """
+        params = {
+            "model": model,
+            # Latest SDKs accept either 'input' or 'messages'. We pass both for compatibility; the SDK ignores the unused one.
+            "messages": chat_api_params.get("messages"),
+            "input": chat_api_params.get("messages"),
+            "max_output_tokens": chat_api_params.get("max_tokens"),
+            "timeout": chat_api_params.get("timeout"),
+        }
 
-            return result_message 
-        
+        # Include response_format (Pydantic class or JSON Schema dict)
+        if chat_api_params.get("response_format") is not None:
+            rf = chat_api_params["response_format"]
+            params["response_format"] = rf
+
+        # Reasoning models: remove sampling controls and set reasoning effort
+        if self._is_reasoning_model(model):
+            params["reasoning"] = {"effort": default["reasoning_effort"]}
         else:
-            logger.debug(f"Calling LLM model with these parameters: {logged_params}. Not showing 'messages' parameter.")
-            return self.client.chat.completions.create(
-                        **chat_api_params
-                    )
+            # Non-reasoning: sampling controls are valid
+            for key in ("temperature", "top_p", "frequency_penalty", "presence_penalty"):
+                if chat_api_params.get(key) is not None:
+                    params[key] = chat_api_params[key]
+
+        return params
 
     def _is_reasoning_model(self, model):
-        return "o1" in model or "o3" in model
+        return ("o1" in model) or ("o3" in model) or ("gpt-5" in model)
 
     def _raw_model_response_extractor(self, response):
         """
-        Extracts the response from the API response. Subclasses should
-        override this method to implement their own response extraction.
+        Extract the response into a unified dict shape used by callers.
+        Supports both Chat Completions and Responses API return shapes.
         """
-        return response.choices[0].message.to_dict()
+        # Legacy chat path
+        if hasattr(response, "choices"):
+            return response.choices[0].message.to_dict()
+
+        # Responses API path
+        try:
+            # Try to obtain a dict-like representation
+            resp_dict = None
+            if hasattr(response, "to_dict"):
+                resp_dict = response.to_dict()
+            elif hasattr(response, "model_dump"):
+                resp_dict = response.model_dump()
+
+            # Fall back to attribute traversal if needed
+            output_items = None
+            if resp_dict is not None:
+                output_items = resp_dict.get("output") or resp_dict.get("outputs")
+            else:
+                output_items = getattr(response, "output", None) or getattr(response, "outputs", None)
+
+            role = "assistant"
+            content_text = None
+            parsed = None
+            refusal = None
+
+            if output_items:
+                # Expect the first item to be a message with content parts
+                first = output_items[0]
+                contents = first.get("content") if isinstance(first, dict) else getattr(first, "content", [])
+                for part in contents or []:
+                    ptype = part.get("type") if isinstance(part, dict) else getattr(part, "type", None)
+                    # Text output
+                    if ptype in ("output_text", "text"):
+                        content_text = part.get("text") if isinstance(part, dict) else getattr(part, "text", None)
+                    # Structured parse
+                    if (isinstance(part, dict) and "parsed" in part):
+                        parsed = part.get("parsed")
+                    elif hasattr(part, "parsed"):
+                        parsed = getattr(part, "parsed")
+                    # Refusal
+                    if (isinstance(part, dict) and "refusal" in part):
+                        refusal = part.get("refusal")
+                    elif hasattr(part, "refusal"):
+                        refusal = getattr(part, "refusal")
+
+            # As a final fallback, try convenience property 'output_text'
+            if content_text is None and hasattr(response, "output_text"):
+                try:
+                    content_text = response.output_text
+                except Exception:
+                    pass
+
+            return {"role": role, "content": content_text, "parsed": parsed, "refusal": refusal}
+        except Exception as e:
+            logger.error(f"Failed to extract Responses API payload: {e}")
+            # best-effort fallback
+            return {"role": "assistant", "content": None, "parsed": None, "refusal": None}
 
     def _count_tokens(self, messages: list, model: str):
         """