diff --git a/config.ini b/config.ini new file mode 100644 index 0000000..473b87d --- /dev/null +++ b/config.ini @@ -0,0 +1,4 @@ +[OpenAI] +API_MODE=responses + + diff --git a/tests/unit/test_action_generator_structured_outputs.py b/tests/unit/test_action_generator_structured_outputs.py new file mode 100644 index 0000000..1f9a022 --- /dev/null +++ b/tests/unit/test_action_generator_structured_outputs.py @@ -0,0 +1,57 @@ +import pytest + +from tinytroupe.agent.action_generator import ActionGenerator, ActionRefusedException +from tinytroupe.agent import TinyPerson, CognitiveActionModel + + +class FakeClient: + def __init__(self, message): + self._message = message + + def send_message(self, *args, **kwargs): + return self._message + + +def test_prefers_parsed_payload(monkeypatch): + TinyPerson.clear_agents() + # Build a parsed payload consistent with CognitiveActionModel + parsed = { + "action": {"type": "THINK", "content": "test content", "target": ""}, + "cognitive_state": { + "goals": "g", + "context": ["c"], + "attention": "a", + "emotions": "e", + }, + } + + message = {"role": "assistant", "content": "{\"action\":{}}", "parsed": parsed} + + # Patch client used by action generator to return our fake message + from tinytroupe import openai_utils + + monkeypatch.setattr(openai_utils, "client", lambda: FakeClient(message)) + + agent = TinyPerson(name="Tester") + ag = ActionGenerator() + + action, role, content = ag._generate_tentative_action(agent, agent.current_messages)[0:3] + + assert content == parsed + assert action == parsed["action"] + assert role == "assistant" + + +def test_refusal_raises(monkeypatch): + TinyPerson.clear_agents() + message = {"role": "assistant", "content": "{}", "refusal": "safety refusal"} + + from tinytroupe import openai_utils + + monkeypatch.setattr(openai_utils, "client", lambda: FakeClient(message)) + + agent = TinyPerson(name="Tester") + ag = ActionGenerator() + + with pytest.raises(ActionRefusedException): + ag._generate_tentative_action(agent, agent.current_messages) diff --git a/tests/unit/test_openai_utils_responses.py b/tests/unit/test_openai_utils_responses.py new file mode 100644 index 0000000..edb1096 --- /dev/null +++ b/tests/unit/test_openai_utils_responses.py @@ -0,0 +1,72 @@ +import types +from unittest.mock import patch + +import tinytroupe.openai_utils as openai_utils + + +class _StubResponsesClient: + def __init__(self): + self.last_params = None + + class _Responses: + def __init__(self, outer): + self._outer = outer + + def create(self, **kwargs): + # Capture params for assertions + self._outer.last_params = kwargs + + # Return minimal object with output_text like the SDK does + return types.SimpleNamespace(output_text="ok") + + @property + def responses(self): + return _StubResponsesClient._Responses(self) + + +def test_send_message_uses_responses_api_when_api_mode_is_responses(): + stub = _StubResponsesClient() + + # Patch setup to force responses mode and inject stub client + original_setup = openai_utils.OpenAIClient._setup_from_config + + def _setup_with_responses(self): + self.client = stub + self.api_mode = "responses" + + try: + openai_utils.OpenAIClient._setup_from_config = _setup_with_responses + + client = openai_utils.OpenAIClient() + + messages = [ + {"role": "system", "content": "You are terse."}, + {"role": "user", "content": "Say ok."}, + ] + + result = client.send_message( + current_messages=messages, + model="gpt-4.1-mini", + temperature=0.2, + max_tokens=128, + ) + + # Verify mapping to Responses API + assert stub.last_params is not None + assert stub.last_params.get("model") == "gpt-4.1-mini" + assert stub.last_params.get("temperature") == 0.2 + assert stub.last_params.get("max_output_tokens") == 128 + + input_msgs = stub.last_params.get("input") + assert isinstance(input_msgs, list) and len(input_msgs) == 2 + assert input_msgs[0]["role"] == "system" + assert input_msgs[1]["role"] == "user" + assert input_msgs[1]["content"][0]["text"] == "Say ok." + + # Verify extractor returns assistant content + assert result["content"].lower().startswith("ok") + + finally: + openai_utils.OpenAIClient._setup_from_config = original_setup + + diff --git a/tinytroupe/agent/action_generator.py b/tinytroupe/agent/action_generator.py index 08aca7f..c38ddf2 100644 --- a/tinytroupe/agent/action_generator.py +++ b/tinytroupe/agent/action_generator.py @@ -288,7 +288,19 @@ def _generate_tentative_action(self, agent, current_messages, feedback_from_prev if not self.enable_reasoning_step: logger.debug(f"[{agent.name}] Reasoning step disabled.") - next_message = openai_utils.client().send_message(current_messages_context, response_format=CognitiveActionModel) + # Prefer Responses API JSON Schema when API_MODE=responses; fallback to Pydantic class on legacy + response_format = CognitiveActionModel + try: + # If running in responses mode, provide a JSON Schema envelope with strict mode + from pydantic import TypeAdapter + schema = TypeAdapter(CognitiveActionModel).json_schema() + response_format = { + "type": "json_schema", + "json_schema": {"name": "CognitiveActionModel", "schema": schema, "strict": True}, + } + except Exception: + pass + next_message = openai_utils.client().send_message(current_messages_context, response_format=response_format) else: logger.debug(f"[{agent.name}] Reasoning step enabled.") @@ -302,11 +314,31 @@ def _generate_tentative_action(self, agent, current_messages, feedback_from_prev current_messages_context.append({"role": "system", "content": "Use the \"reasoning\" field to add any reasoning process you might wish to use before generating the next action and cognitive state. "}) - next_message = openai_utils.client().send_message(current_messages_context, response_format=CognitiveActionModelWithReasoning) + response_format = CognitiveActionModelWithReasoning + try: + from pydantic import TypeAdapter + schema = TypeAdapter(CognitiveActionModelWithReasoning).json_schema() + response_format = { + "type": "json_schema", + "json_schema": {"name": "CognitiveActionModelWithReasoning", "schema": schema, "strict": True}, + } + except Exception: + pass + next_message = openai_utils.client().send_message(current_messages_context, response_format=response_format) logger.debug(f"[{agent.name}] Received message: {next_message}") - role, content = next_message["role"], utils.extract_json(next_message["content"]) + # Prefer typed parsed payload when available; otherwise, fall back to JSON extraction + role = next_message.get("role", "assistant") + + # Handle explicit refusal from provider payloads when present + refusal = next_message.get("refusal") + if refusal: + # Log and raise a specialized exception to surface actionable errors + logger.warning(f"[{agent.name}] Model refusal received: {refusal}") + raise ActionRefusedException(refusal) + + content = next_message.get("parsed") or utils.extract_json(next_message["content"]) action = content['action'] logger.debug(f"{agent.name}'s action: {action}") @@ -530,3 +562,8 @@ class PoorQualityActionException(Exception): def __init__(self, message="The generated action is of poor quality"): self.message = message super().__init__(self.message) + + +class ActionRefusedException(Exception): + def __init__(self, refusal_message: str = "The model refused to generate an action"): + super().__init__(refusal_message) diff --git a/tinytroupe/config.ini b/tinytroupe/config.ini index 353bdb0..6c45ac5 100644 --- a/tinytroupe/config.ini +++ b/tinytroupe/config.ini @@ -1,4 +1,6 @@ [OpenAI] +# Enable Responses API path for local runs +API_MODE=responses # # OpenAI or Azure OpenAI Service # diff --git a/tinytroupe/openai_utils.py b/tinytroupe/openai_utils.py index c7a04cb..2c6b4d4 100644 --- a/tinytroupe/openai_utils.py +++ b/tinytroupe/openai_utils.py @@ -9,6 +9,7 @@ import tiktoken +from pydantic import BaseModel from tinytroupe import utils from tinytroupe.control import transactional from tinytroupe import default @@ -53,6 +54,11 @@ def _setup_from_config(self): Sets up the OpenAI API configurations for this client. """ self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) + # API mode: 'responses' or 'legacy' (default to legacy to preserve behavior unless configured) + try: + self.api_mode = config["OpenAI"].get("API_MODE", "legacy").strip().lower() + except Exception: + self.api_mode = "legacy" @config_manager.config_defaults( model="model", @@ -227,57 +233,156 @@ def _raw_model_call(self, model, chat_api_params): """ Calls the OpenAI API with the given parameters. Subclasses should override this method to implement their own API calls. - """ + """ - # adjust parameters depending on the model + # Choose API mode (legacy chat vs responses) + api_mode = config["OpenAI"].get("API_MODE", "legacy").lower() + + # adjust parameters depending on the model (legacy path expectations) if self._is_reasoning_model(model): # Reasoning models have slightly different parameters - del chat_api_params["stream"] - del chat_api_params["temperature"] - del chat_api_params["top_p"] - del chat_api_params["frequency_penalty"] - del chat_api_params["presence_penalty"] - - chat_api_params["max_completion_tokens"] = chat_api_params["max_tokens"] - del chat_api_params["max_tokens"] + if api_mode == "legacy": + if "stream" in chat_api_params: del chat_api_params["stream"] + if "temperature" in chat_api_params: del chat_api_params["temperature"] + if "top_p" in chat_api_params: del chat_api_params["top_p"] + if "frequency_penalty" in chat_api_params: del chat_api_params["frequency_penalty"] + if "presence_penalty" in chat_api_params: del chat_api_params["presence_penalty"] - chat_api_params["reasoning_effort"] = default["reasoning_effort"] + chat_api_params["max_completion_tokens"] = chat_api_params["max_tokens"] + del chat_api_params["max_tokens"] + chat_api_params["reasoning_effort"] = default["reasoning_effort"] # To make the log cleaner, we remove the messages from the logged parameters - logged_params = {k: v for k, v in chat_api_params.items() if k != "messages"} + logged_params = {k: v for k, v in chat_api_params.items() if k != "messages"} - if "response_format" in chat_api_params: - # to enforce the response format via pydantic, we need to use a different method + if api_mode == "responses": + # Build Responses API params + responses_params = self._build_responses_params(model, chat_api_params) + + # Log sanitized params and full messages separately + rp_logged = {k: v for k, v in responses_params.items() if k != "input" and k != "messages"} + logger.debug(f"Calling LLM model (Responses API) with these parameters: {rp_logged}. Not showing 'messages'/'input' parameter.") + logger.debug(f" --> Complete messages sent to LLM: {responses_params.get('messages') or responses_params.get('input')}") + + # If using Pydantic model, prefer parse helper when available + if isinstance(chat_api_params.get("response_format"), type): + # Responses parse path with Pydantic model + return self.client.responses.parse(**responses_params) + else: + return self.client.responses.create(**responses_params) + # Legacy Chat Completions path + if "response_format" in chat_api_params: if "stream" in chat_api_params: del chat_api_params["stream"] logger.debug(f"Calling LLM model (using .parse too) with these parameters: {logged_params}. Not showing 'messages' parameter.") - # complete message logger.debug(f" --> Complete messages sent to LLM: {chat_api_params['messages']}") + return self.client.beta.chat.completions.parse(**chat_api_params) + else: + logger.debug(f"Calling LLM model with these parameters: {logged_params}. Not showing 'messages' parameter.") + return self.client.chat.completions.create(**chat_api_params) - result_message = self.client.beta.chat.completions.parse( - **chat_api_params - ) + def _build_responses_params(self, model, chat_api_params): + """ + Map legacy chat-style params to Responses API params. + - Prefer 'messages' as input if present; else use 'input'. + - Map max_tokens -> max_output_tokens + - For reasoning models add reasoning: { effort: ... } and drop sampling params. + - If response_format is a Pydantic model class, pass it directly (Responses parse supports Pydantic); + if it's a dict (JSON Schema), pass as-is with strict mode expected to be set by caller. + """ + params = { + "model": model, + # Latest SDKs accept either 'input' or 'messages'. We pass both for compatibility; the SDK ignores the unused one. + "messages": chat_api_params.get("messages"), + "input": chat_api_params.get("messages"), + "max_output_tokens": chat_api_params.get("max_tokens"), + "timeout": chat_api_params.get("timeout"), + } - return result_message - + # Include response_format (Pydantic class or JSON Schema dict) + if chat_api_params.get("response_format") is not None: + rf = chat_api_params["response_format"] + params["response_format"] = rf + + # Reasoning models: remove sampling controls and set reasoning effort + if self._is_reasoning_model(model): + params["reasoning"] = {"effort": default["reasoning_effort"]} else: - logger.debug(f"Calling LLM model with these parameters: {logged_params}. Not showing 'messages' parameter.") - return self.client.chat.completions.create( - **chat_api_params - ) + # Non-reasoning: sampling controls are valid + for key in ("temperature", "top_p", "frequency_penalty", "presence_penalty"): + if chat_api_params.get(key) is not None: + params[key] = chat_api_params[key] + + return params def _is_reasoning_model(self, model): - return "o1" in model or "o3" in model + return ("o1" in model) or ("o3" in model) or ("gpt-5" in model) def _raw_model_response_extractor(self, response): """ - Extracts the response from the API response. Subclasses should - override this method to implement their own response extraction. + Extract the response into a unified dict shape used by callers. + Supports both Chat Completions and Responses API return shapes. """ - return response.choices[0].message.to_dict() + # Legacy chat path + if hasattr(response, "choices"): + return response.choices[0].message.to_dict() + + # Responses API path + try: + # Try to obtain a dict-like representation + resp_dict = None + if hasattr(response, "to_dict"): + resp_dict = response.to_dict() + elif hasattr(response, "model_dump"): + resp_dict = response.model_dump() + + # Fall back to attribute traversal if needed + output_items = None + if resp_dict is not None: + output_items = resp_dict.get("output") or resp_dict.get("outputs") + else: + output_items = getattr(response, "output", None) or getattr(response, "outputs", None) + + role = "assistant" + content_text = None + parsed = None + refusal = None + + if output_items: + # Expect the first item to be a message with content parts + first = output_items[0] + contents = first.get("content") if isinstance(first, dict) else getattr(first, "content", []) + for part in contents or []: + ptype = part.get("type") if isinstance(part, dict) else getattr(part, "type", None) + # Text output + if ptype in ("output_text", "text"): + content_text = part.get("text") if isinstance(part, dict) else getattr(part, "text", None) + # Structured parse + if (isinstance(part, dict) and "parsed" in part): + parsed = part.get("parsed") + elif hasattr(part, "parsed"): + parsed = getattr(part, "parsed") + # Refusal + if (isinstance(part, dict) and "refusal" in part): + refusal = part.get("refusal") + elif hasattr(part, "refusal"): + refusal = getattr(part, "refusal") + + # As a final fallback, try convenience property 'output_text' + if content_text is None and hasattr(response, "output_text"): + try: + content_text = response.output_text + except Exception: + pass + + return {"role": role, "content": content_text, "parsed": parsed, "refusal": refusal} + except Exception as e: + logger.error(f"Failed to extract Responses API payload: {e}") + # best-effort fallback + return {"role": "assistant", "content": None, "parsed": None, "refusal": None} def _count_tokens(self, messages: list, model: str): """