diff --git a/docs/openapi.json b/docs/openapi.json index bff858bf2..80873199d 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -3795,19 +3795,19 @@ } } }, - "/v1/infer": { + "/v1/responses": { "post": { "tags": [ - "rlsapi-v1" + "responses" ], - "summary": "Infer Endpoint", - "description": "Handle rlsapi v1 /infer requests for stateless inference.\n\nThis endpoint serves requests from the RHEL Lightspeed Command Line Assistant (CLA).\n\nAccepts a question with optional context (stdin, attachments, terminal output,\nsystem info) and returns an LLM-generated response.\n\nArgs:\n infer_request: The inference request containing question and context.\n request: The FastAPI request object for accessing headers and state.\n background_tasks: FastAPI background tasks for async Splunk event sending.\n auth: Authentication tuple from the configured auth provider.\n\nReturns:\n RlsapiV1InferResponse containing the generated response text and request ID.\n\nRaises:\n HTTPException: 503 if the LLM service is unavailable.", - "operationId": "infer_endpoint_v1_infer_post", + "summary": "Responses Endpoint Handler", + "description": "Handle request to the /responses endpoint using Responses API (LCORE specification).\n\nProcesses a POST request to the responses endpoint, forwarding the\nuser's request to a selected Llama Stack LLM and returning the generated response\nfollowing the LCORE OpenAPI specification.\n\nReturns:\n ResponsesResponse: Contains the response following LCORE specification (non-streaming).\n StreamingResponse: SSE-formatted streaming response with enriched events (streaming).\n - response.created event includes conversation attribute\n - response.completed event includes available_quotas attribute\n\nRaises:\n HTTPException:\n - 401: Unauthorized - Missing or invalid credentials\n - 403: Forbidden - Insufficient permissions or model override not allowed\n - 404: Not Found - Conversation, model, or provider not found\n - 413: Prompt too long - Prompt exceeded model's context window size\n - 422: Unprocessable Entity - Request validation failed\n - 429: Quota limit exceeded - The token quota for model or user has been exceeded\n - 500: Internal Server Error - Configuration not loaded or other server errors\n - 503: Service Unavailable - Unable to connect to Llama Stack backend", + "operationId": "responses_endpoint_handler_v1_responses_post", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/RlsapiV1InferRequest" + "$ref": "#/components/schemas/ResponsesRequest" } } }, @@ -3819,14 +3819,59 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/RlsapiV1InferResponse" + "$ref": "#/components/schemas/ResponsesResponse" }, "example": { - "data": { - "request_id": "01JDKR8N7QW9ZMXVGK3PB5TQWZ", - "text": "To list files in Linux, use the `ls` command." + "available_quotas": { + "daily": 1000, + "monthly": 50000 + }, + "completed_at": 1704067250, + "conversation": "0d21ba731f21f798dc9680125d5d6f493e4a7ab79f25670e", + "created_at": 1704067200, + "id": "resp_abc123", + "instructions": "You are a helpful assistant", + "model": "openai/gpt-4-turbo", + "object": "response", + "output": [ + { + "content": [ + { + "text": "Kubernetes is an open-source container orchestration system...", + "type": "output_text" + } + ], + "role": "assistant", + "type": "message" + } + ], + "output_text": "Kubernetes is an open-source container orchestration system...", + "parallel_tool_calls": true, + "status": "completed", + "store": true, + "temperature": 0.7, + "text": { + "format": { + "type": "text" + } + }, + "usage": { + "input_tokens": 100, + "output_tokens": 50, + "total_tokens": 150 } } + }, + "text/event-stream": { + "schema": { + "type": "string" + }, + "examples": { + "stream": { + "value": "event: response.created\ndata: {\"type\":\"response.created\",\"sequence_number\":0,\"response\":{\"id\":\"resp_abc\",\"created_at\":1704067200,\"status\":\"in_progress\",\"output\":[],\"conversation\":\"0d21ba731f21f798dc9680125d5d6f49\",\"available_quotas\":{},\"output_text\":\"\"}}\n\nevent: response.output_item.added\ndata: {\"response_id\":\"resp_abc\",\"item\":{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"Hello! How can I help?\"}]},\"output_index\":0,\"sequence_number\":1}\n\nevent: response.output_item.done\ndata: {\"response_id\":\"resp_abc\",\"item\":{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"Hello! How can I help?\"}]},\"output_index\":0,\"sequence_number\":2}\n\nevent: response.completed\ndata: {\"type\":\"response.completed\",\"sequence_number\":3,\"response\":{\"id\":\"resp_abc\",\"created_at\":1704067200,\"completed_at\":1704067250,\"status\":\"completed\",\"output\":[{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"Hello! How can I help?\"}]}],\"usage\":{\"input_tokens\":10,\"output_tokens\":6,\"total_tokens\":16},\"conversation\":\"0d21ba731f21f798dc9680125d5d6f49\",\"available_quotas\":{\"daily\":1000,\"monthly\":50000},\"output_text\":\"Hello! How can I help?\"}}\n\ndata: [DONE]\n\n" + } + }, + "description": "SSE stream of events" } } }, @@ -3866,6 +3911,14 @@ "$ref": "#/components/schemas/ForbiddenResponse" }, "examples": { + "conversation read": { + "value": { + "detail": { + "cause": "User 6789 does not have permission to read conversation with ID 123e4567-e89b-12d3-a456-426614174000", + "response": "User does not have permission to perform this action" + } + } + }, "endpoint": { "value": { "detail": { @@ -3873,6 +3926,50 @@ "response": "User does not have permission to access this endpoint" } } + }, + "model override": { + "value": { + "detail": { + "cause": "User lacks model_override permission required to override model/provider.", + "response": "This instance does not permit overriding model/provider in the query request (missing permission: MODEL_OVERRIDE). Please remove the model and provider fields from your request." + } + } + } + } + } + } + }, + "404": { + "description": "Resource not found", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/NotFoundResponse" + }, + "examples": { + "conversation": { + "value": { + "detail": { + "cause": "Conversation with ID 123e4567-e89b-12d3-a456-426614174000 does not exist", + "response": "Conversation not found" + } + } + }, + "provider": { + "value": { + "detail": { + "cause": "Provider with ID openai does not exist", + "response": "Provider not found" + } + } + }, + "model": { + "value": { + "detail": { + "cause": "Model with ID gpt-4-turbo is not configured", + "response": "Model not found" + } + } } } } @@ -4008,6 +4105,16 @@ "application/json": { "schema": { "$ref": "#/components/schemas/InternalServerErrorResponse" + }, + "examples": { + "configuration": { + "value": { + "detail": { + "cause": "Lightspeed Stack configuration has not been initialized.", + "response": "Configuration is not loaded" + } + } + } } } } @@ -4035,26 +4142,37 @@ } } }, - "/readiness": { - "get": { + "/v1/infer": { + "post": { "tags": [ - "health" + "rlsapi-v1" ], - "summary": "Readiness Probe Get Method", - "description": "Handle the readiness probe endpoint, returning service readiness.\n\nIf any provider reports an error status, responds with HTTP 503\nand details of unhealthy providers; otherwise, indicates the\nservice is ready.\n\nReturns:\n ReadinessResponse: Object with `ready` indicating overall readiness,\n `reason` explaining the outcome, and `providers` containing the list of\n unhealthy ProviderHealthStatus entries (empty when ready).", - "operationId": "readiness_probe_get_method_readiness_get", + "summary": "Infer Endpoint", + "description": "Handle rlsapi v1 /infer requests for stateless inference.\n\nThis endpoint serves requests from the RHEL Lightspeed Command Line Assistant (CLA).\n\nAccepts a question with optional context (stdin, attachments, terminal output,\nsystem info) and returns an LLM-generated response.\n\nArgs:\n infer_request: The inference request containing question and context.\n request: The FastAPI request object for accessing headers and state.\n background_tasks: FastAPI background tasks for async Splunk event sending.\n auth: Authentication tuple from the configured auth provider.\n\nReturns:\n RlsapiV1InferResponse containing the generated response text and request ID.\n\nRaises:\n HTTPException: 503 if the LLM service is unavailable.", + "operationId": "infer_endpoint_v1_infer_post", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RlsapiV1InferRequest" + } + } + }, + "required": true + }, "responses": { "200": { "description": "Successful response", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ReadinessResponse" + "$ref": "#/components/schemas/RlsapiV1InferResponse" }, "example": { - "providers": [], - "ready": true, - "reason": "Service is ready" + "data": { + "request_id": "01JDKR8N7QW9ZMXVGK3PB5TQWZ", + "text": "To list files in Linux, use the `ls` command." + } } } } @@ -4107,167 +4225,153 @@ } } }, - "503": { - "description": "Service unavailable", + "413": { + "description": "Prompt is too long", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ServiceUnavailableResponse" + "$ref": "#/components/schemas/PromptTooLongResponse" }, "examples": { - "llama stack": { + "prompt too long": { "value": { "detail": { - "cause": "Connection error while trying to reach backend service.", - "response": "Unable to connect to Llama Stack" + "cause": "The prompt exceeds the maximum allowed length.", + "response": "Prompt is too long" } } } } } } - } - } - } - }, - "/liveness": { - "get": { - "tags": [ - "health" - ], - "summary": "Liveness Probe Get Method", - "description": "Return the liveness status of the service.\n\nReturns:\n LivenessResponse: Indicates that the service is alive.", - "operationId": "liveness_probe_get_method_liveness_get", - "responses": { - "200": { - "description": "Successful response", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/LivenessResponse" - }, - "example": { - "alive": true - } - } - } }, - "401": { - "description": "Unauthorized", + "422": { + "description": "Request validation failed", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/UnauthorizedResponse" + "$ref": "#/components/schemas/UnprocessableEntityResponse" }, "examples": { - "missing header": { + "invalid format": { "value": { "detail": { - "cause": "No Authorization header found", - "response": "Missing or invalid credentials provided by client" + "cause": "Invalid request format. The request body could not be parsed.", + "response": "Invalid request format" } } }, - "missing token": { + "missing attributes": { "value": { "detail": { - "cause": "No token found in Authorization header", - "response": "Missing or invalid credentials provided by client" + "cause": "Missing required attributes: ['query', 'model', 'provider']", + "response": "Missing required attributes" } } - } - } - } - } - }, - "403": { - "description": "Permission denied", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ForbiddenResponse" - }, - "examples": { - "endpoint": { + }, + "invalid value": { "value": { "detail": { - "cause": "User 6789 is not authorized to access this endpoint.", - "response": "User does not have permission to access this endpoint" + "cause": "Invalid attachment type: must be one of ['text/plain', 'application/json', 'application/yaml', 'application/xml']", + "response": "Invalid attribute value" } } } } } } - } - } - } - }, - "/authorized": { - "post": { - "tags": [ - "authorized" - ], - "summary": "Authorized Endpoint Handler", - "description": "Handle request to the /authorized endpoint.\n\nProcess POST requests to the /authorized endpoint, returning\nthe authenticated user's ID and username.\n\nThe response intentionally omits any authentication token.\n\nReturns:\n AuthorizedResponse: Contains the user ID and username of the authenticated user.", - "operationId": "authorized_endpoint_handler_authorized_post", - "responses": { - "200": { - "description": "Successful response", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/AuthorizedResponse" - }, - "example": { - "skip_userid_check": false, - "user_id": "123e4567-e89b-12d3-a456-426614174000", - "username": "user1" - } - } - } }, - "401": { - "description": "Unauthorized", + "429": { + "description": "Quota limit exceeded", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/UnauthorizedResponse" + "$ref": "#/components/schemas/QuotaExceededResponse" }, "examples": { - "missing header": { + "model": { "value": { "detail": { - "cause": "No Authorization header found", - "response": "Missing or invalid credentials provided by client" + "cause": "The token quota for model gpt-4-turbo has been exceeded.", + "response": "The model quota has been exceeded" } } }, - "missing token": { + "user none": { "value": { "detail": { - "cause": "No token found in Authorization header", - "response": "Missing or invalid credentials provided by client" + "cause": "User 123 has no available tokens.", + "response": "The quota has been exceeded" } } - } - } - } - } - }, - "403": { - "description": "Permission denied", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ForbiddenResponse" - }, - "examples": { - "endpoint": { + }, + "cluster none": { "value": { "detail": { - "cause": "User 6789 is not authorized to access this endpoint.", - "response": "User does not have permission to access this endpoint" + "cause": "Cluster has no available tokens.", + "response": "The quota has been exceeded" + } + } + }, + "subject none": { + "value": { + "detail": { + "cause": "Unknown subject 999 has no available tokens.", + "response": "The quota has been exceeded" + } + } + }, + "user insufficient": { + "value": { + "detail": { + "cause": "User 123 has 5 tokens, but 10 tokens are needed.", + "response": "The quota has been exceeded" + } + } + }, + "cluster insufficient": { + "value": { + "detail": { + "cause": "Cluster has 500 tokens, but 900 tokens are needed.", + "response": "The quota has been exceeded" + } + } + }, + "subject insufficient": { + "value": { + "detail": { + "cause": "Unknown subject 999 has 3 tokens, but 6 tokens are needed.", + "response": "The quota has been exceeded" + } + } + } + } + } + } + }, + "500": { + "description": "Internal server error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InternalServerErrorResponse" + } + } + } + }, + "503": { + "description": "Service unavailable", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ServiceUnavailableResponse" + }, + "examples": { + "llama stack": { + "value": { + "detail": { + "cause": "Connection error while trying to reach backend service.", + "response": "Unable to connect to Llama Stack" } } } @@ -4278,21 +4382,26 @@ } } }, - "/metrics": { + "/readiness": { "get": { "tags": [ - "metrics" + "health" ], - "summary": "Metrics Endpoint Handler", - "description": "Handle request to the /metrics endpoint.\n\nProcess GET requests to the /metrics endpoint, returning the\nlatest Prometheus metrics in form of a plain text.\n\nInitializes model metrics on the first request if not already\nset up, then responds with the current metrics snapshot in\nPrometheus format.\n\nReturns:\n PlainTextResponse: Response body containing the Prometheus metrics text\n and the Prometheus content type.", - "operationId": "metrics_endpoint_handler_metrics_get", + "summary": "Readiness Probe Get Method", + "description": "Handle the readiness probe endpoint, returning service readiness.\n\nIf any provider reports an error status, responds with HTTP 503\nand details of unhealthy providers; otherwise, indicates the\nservice is ready.\n\nReturns:\n ReadinessResponse: Object with `ready` indicating overall readiness,\n `reason` explaining the outcome, and `providers` containing the list of\n unhealthy ProviderHealthStatus entries (empty when ready).", + "operationId": "readiness_probe_get_method_readiness_get", "responses": { "200": { - "description": "Successful Response", + "description": "Successful response", "content": { - "text/plain": { + "application/json": { "schema": { - "type": "string" + "$ref": "#/components/schemas/ReadinessResponse" + }, + "example": { + "providers": [], + "ready": true, + "reason": "Service is ready" } } } @@ -4301,6 +4410,9 @@ "description": "Unauthorized", "content": { "application/json": { + "schema": { + "$ref": "#/components/schemas/UnauthorizedResponse" + }, "examples": { "missing header": { "value": { @@ -4319,11 +4431,6 @@ } } } - }, - "text/plain": { - "schema": { - "$ref": "#/components/schemas/UnauthorizedResponse" - } } } }, @@ -4331,6 +4438,9 @@ "description": "Permission denied", "content": { "application/json": { + "schema": { + "$ref": "#/components/schemas/ForbiddenResponse" + }, "examples": { "endpoint": { "value": { @@ -4341,33 +4451,6 @@ } } } - }, - "text/plain": { - "schema": { - "$ref": "#/components/schemas/ForbiddenResponse" - } - } - } - }, - "500": { - "description": "Internal server error", - "content": { - "application/json": { - "examples": { - "configuration": { - "value": { - "detail": { - "cause": "Lightspeed Stack configuration has not been initialized.", - "response": "Configuration is not loaded" - } - } - } - } - }, - "text/plain": { - "schema": { - "$ref": "#/components/schemas/InternalServerErrorResponse" - } } } }, @@ -4375,6 +4458,9 @@ "description": "Service unavailable", "content": { "application/json": { + "schema": { + "$ref": "#/components/schemas/ServiceUnavailableResponse" + }, "examples": { "llama stack": { "value": { @@ -4385,162 +4471,2725 @@ } } } - }, - "text/plain": { - "schema": { - "$ref": "#/components/schemas/ServiceUnavailableResponse" - } } } } } } }, - "/.well-known/agent-card.json": { + "/liveness": { "get": { "tags": [ - "a2a" + "health" ], - "summary": "Get Agent Card", - "description": "Serve the A2A Agent Card at the well-known location.\n\nThis endpoint provides the agent card that describes Lightspeed's\ncapabilities according to the A2A protocol specification.\n\nReturns:\n AgentCard: The agent card describing this agent's capabilities.", - "operationId": "get_agent_card__well_known_agent_card_json_get", + "summary": "Liveness Probe Get Method", + "description": "Return the liveness status of the service.\n\nReturns:\n LivenessResponse: Indicates that the service is alive.", + "operationId": "liveness_probe_get_method_liveness_get", "responses": { "200": { - "description": "Successful Response", + "description": "Successful response", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/AgentCard" + "$ref": "#/components/schemas/LivenessResponse" + }, + "example": { + "alive": true } } } - } - } - } - }, - "/.well-known/agent.json": { - "get": { - "tags": [ - "a2a" - ], - "summary": "Get Agent Card", - "description": "Serve the A2A Agent Card at the well-known location.\n\nThis endpoint provides the agent card that describes Lightspeed's\ncapabilities according to the A2A protocol specification.\n\nReturns:\n AgentCard: The agent card describing this agent's capabilities.", - "operationId": "get_agent_card__well_known_agent_json_get", - "responses": { - "200": { - "description": "Successful Response", + }, + "401": { + "description": "Unauthorized", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/AgentCard" + "$ref": "#/components/schemas/UnauthorizedResponse" + }, + "examples": { + "missing header": { + "value": { + "detail": { + "cause": "No Authorization header found", + "response": "Missing or invalid credentials provided by client" + } + } + }, + "missing token": { + "value": { + "detail": { + "cause": "No token found in Authorization header", + "response": "Missing or invalid credentials provided by client" + } + } + } } } } - } - } - } - }, - "/a2a": { - "get": { - "tags": [ - "a2a" - ], - "summary": "Handle A2A Jsonrpc", - "description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n request: FastAPI request object\n auth: Authentication tuple\n mcp_headers: MCP headers for context propagation\n\nReturns:\n JSON-RPC response or streaming response", - "operationId": "handle_a2a_jsonrpc_a2a_get", - "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": {} - } - } - } - } - }, - "post": { - "tags": [ - "a2a" - ], - "summary": "Handle A2A Jsonrpc", - "description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n request: FastAPI request object\n auth: Authentication tuple\n mcp_headers: MCP headers for context propagation\n\nReturns:\n JSON-RPC response or streaming response", - "operationId": "handle_a2a_jsonrpc_a2a_get", - "responses": { - "200": { - "description": "Successful Response", + }, + "403": { + "description": "Permission denied", "content": { "application/json": { - "schema": {} + "schema": { + "$ref": "#/components/schemas/ForbiddenResponse" + }, + "examples": { + "endpoint": { + "value": { + "detail": { + "cause": "User 6789 is not authorized to access this endpoint.", + "response": "User does not have permission to access this endpoint" + } + } + } + } } } } } } }, - "/a2a/health": { - "get": { + "/authorized": { + "post": { "tags": [ - "a2a" + "authorized" ], - "summary": "A2A Health Check", - "description": "Health check endpoint for A2A service.\n\nReturns:\n Dict with health status information.", - "operationId": "a2a_health_check_a2a_health_get", + "summary": "Authorized Endpoint Handler", + "description": "Handle request to the /authorized endpoint.\n\nProcess POST requests to the /authorized endpoint, returning\nthe authenticated user's ID and username.\n\nThe response intentionally omits any authentication token.\n\nReturns:\n AuthorizedResponse: Contains the user ID and username of the authenticated user.", + "operationId": "authorized_endpoint_handler_authorized_post", "responses": { "200": { - "description": "Successful Response", + "description": "Successful response", "content": { "application/json": { "schema": { - "additionalProperties": { - "type": "string" + "$ref": "#/components/schemas/AuthorizedResponse" + }, + "example": { + "skip_userid_check": false, + "user_id": "123e4567-e89b-12d3-a456-426614174000", + "username": "user1" + } + } + } + }, + "401": { + "description": "Unauthorized", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UnauthorizedResponse" + }, + "examples": { + "missing header": { + "value": { + "detail": { + "cause": "No Authorization header found", + "response": "Missing or invalid credentials provided by client" + } + } }, - "type": "object", - "title": "Response A2A Health Check A2A Health Get" + "missing token": { + "value": { + "detail": { + "cause": "No token found in Authorization header", + "response": "Missing or invalid credentials provided by client" + } + } + } + } + } + } + }, + "403": { + "description": "Permission denied", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ForbiddenResponse" + }, + "examples": { + "endpoint": { + "value": { + "detail": { + "cause": "User 6789 is not authorized to access this endpoint.", + "response": "User does not have permission to access this endpoint" + } + } + } + } + } + } + } + } + } + }, + "/metrics": { + "get": { + "tags": [ + "metrics" + ], + "summary": "Metrics Endpoint Handler", + "description": "Handle request to the /metrics endpoint.\n\nProcess GET requests to the /metrics endpoint, returning the\nlatest Prometheus metrics in form of a plain text.\n\nInitializes model metrics on the first request if not already\nset up, then responds with the current metrics snapshot in\nPrometheus format.\n\nReturns:\n PlainTextResponse: Response body containing the Prometheus metrics text\n and the Prometheus content type.", + "operationId": "metrics_endpoint_handler_metrics_get", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "text/plain": { + "schema": { + "type": "string" + } + } + } + }, + "401": { + "description": "Unauthorized", + "content": { + "application/json": { + "examples": { + "missing header": { + "value": { + "detail": { + "cause": "No Authorization header found", + "response": "Missing or invalid credentials provided by client" + } + } + }, + "missing token": { + "value": { + "detail": { + "cause": "No token found in Authorization header", + "response": "Missing or invalid credentials provided by client" + } + } + } + } + }, + "text/plain": { + "schema": { + "$ref": "#/components/schemas/UnauthorizedResponse" + } + } + } + }, + "403": { + "description": "Permission denied", + "content": { + "application/json": { + "examples": { + "endpoint": { + "value": { + "detail": { + "cause": "User 6789 is not authorized to access this endpoint.", + "response": "User does not have permission to access this endpoint" + } + } + } + } + }, + "text/plain": { + "schema": { + "$ref": "#/components/schemas/ForbiddenResponse" + } + } + } + }, + "500": { + "description": "Internal server error", + "content": { + "application/json": { + "examples": { + "configuration": { + "value": { + "detail": { + "cause": "Lightspeed Stack configuration has not been initialized.", + "response": "Configuration is not loaded" + } + } + } + } + }, + "text/plain": { + "schema": { + "$ref": "#/components/schemas/InternalServerErrorResponse" + } + } + } + }, + "503": { + "description": "Service unavailable", + "content": { + "application/json": { + "examples": { + "llama stack": { + "value": { + "detail": { + "cause": "Connection error while trying to reach backend service.", + "response": "Unable to connect to Llama Stack" + } + } + } + } + }, + "text/plain": { + "schema": { + "$ref": "#/components/schemas/ServiceUnavailableResponse" + } + } + } + } + } + } + }, + "/.well-known/agent-card.json": { + "get": { + "tags": [ + "a2a" + ], + "summary": "Get Agent Card", + "description": "Serve the A2A Agent Card at the well-known location.\n\nThis endpoint provides the agent card that describes Lightspeed's\ncapabilities according to the A2A protocol specification.\n\nReturns:\n AgentCard: The agent card describing this agent's capabilities.", + "operationId": "get_agent_card__well_known_agent_card_json_get", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/AgentCard" + } + } + } + } + } + } + }, + "/.well-known/agent.json": { + "get": { + "tags": [ + "a2a" + ], + "summary": "Get Agent Card", + "description": "Serve the A2A Agent Card at the well-known location.\n\nThis endpoint provides the agent card that describes Lightspeed's\ncapabilities according to the A2A protocol specification.\n\nReturns:\n AgentCard: The agent card describing this agent's capabilities.", + "operationId": "get_agent_card__well_known_agent_json_get", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/AgentCard" } } } } - } - } - } - }, - "components": { - "schemas": { - "A2AStateConfiguration": { + } + } + }, + "/a2a": { + "get": { + "tags": [ + "a2a" + ], + "summary": "Handle A2A Jsonrpc", + "description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n request: FastAPI request object\n auth: Authentication tuple\n mcp_headers: MCP headers for context propagation\n\nReturns:\n JSON-RPC response or streaming response", + "operationId": "handle_a2a_jsonrpc_a2a_get", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + } + } + }, + "post": { + "tags": [ + "a2a" + ], + "summary": "Handle A2A Jsonrpc", + "description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n request: FastAPI request object\n auth: Authentication tuple\n mcp_headers: MCP headers for context propagation\n\nReturns:\n JSON-RPC response or streaming response", + "operationId": "handle_a2a_jsonrpc_a2a_get", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + } + } + } + }, + "/a2a/health": { + "get": { + "tags": [ + "a2a" + ], + "summary": "A2A Health Check", + "description": "Health check endpoint for A2A service.\n\nReturns:\n Dict with health status information.", + "operationId": "a2a_health_check_a2a_health_get", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "title": "Response A2A Health Check A2A Health Get" + } + } + } + } + } + } + } + }, + "components": { + "schemas": { + "A2AStateConfiguration": { + "properties": { + "sqlite": { + "anyOf": [ + { + "$ref": "#/components/schemas/SQLiteDatabaseConfiguration" + }, + { + "type": "null" + } + ], + "title": "SQLite configuration", + "description": "SQLite database configuration for A2A state storage." + }, + "postgres": { + "anyOf": [ + { + "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration" + }, + { + "type": "null" + } + ], + "title": "PostgreSQL configuration", + "description": "PostgreSQL database configuration for A2A state storage." + } + }, + "additionalProperties": false, + "type": "object", + "title": "A2AStateConfiguration", + "description": "A2A protocol persistent state configuration.\n\nConfigures how A2A task state and context-to-conversation mappings are\nstored. For multi-worker deployments, use SQLite or PostgreSQL to ensure\nstate is shared across all workers.\n\nIf no configuration is provided, in-memory storage is used (default).\nThis is suitable for single-worker deployments but state will be lost\non restarts and not shared across workers.\n\nAttributes:\n sqlite: SQLite database configuration for A2A state storage.\n postgres: PostgreSQL database configuration for A2A state storage." + }, + "APIKeySecurityScheme": { + "properties": { + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description" + }, + "in": { + "$ref": "#/components/schemas/In" + }, + "name": { + "type": "string", + "title": "Name" + }, + "type": { + "type": "string", + "const": "apiKey", + "title": "Type", + "default": "apiKey" + } + }, + "type": "object", + "required": [ + "in", + "name" + ], + "title": "APIKeySecurityScheme", + "description": "Defines a security scheme using an API key." + }, + "APIKeyTokenConfiguration": { + "properties": { + "api_key": { + "type": "string", + "minLength": 1, + "format": "password", + "title": "API key", + "writeOnly": true, + "examples": [ + "some-api-key" + ] + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "api_key" + ], + "title": "APIKeyTokenConfiguration", + "description": "API Key Token configuration." + }, + "AccessRule": { + "properties": { + "role": { + "type": "string", + "title": "Role name", + "description": "Name of the role" + }, + "actions": { + "items": { + "$ref": "#/components/schemas/Action" + }, + "type": "array", + "title": "Allowed actions", + "description": "Allowed actions for this role" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "role", + "actions" + ], + "title": "AccessRule", + "description": "Rule defining what actions a role can perform." + }, + "Action": { + "type": "string", + "enum": [ + "admin", + "list_other_conversations", + "read_other_conversations", + "query_other_conversations", + "delete_other_conversations", + "query", + "streaming_query", + "get_conversation", + "list_conversations", + "delete_conversation", + "update_conversation", + "feedback", + "get_models", + "get_tools", + "get_shields", + "list_providers", + "get_provider", + "list_rags", + "get_rag", + "get_metrics", + "get_config", + "info", + "model_override", + "rlsapi_v1_infer", + "a2a_agent_card", + "a2a_task_execution", + "a2a_message", + "a2a_jsonrpc" + ], + "title": "Action", + "description": "Available actions in the system.\n\nNote: this is not a real model, just an enumeration of all action names." + }, + "AgentCapabilities": { + "properties": { + "extensions": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/AgentExtension" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Extensions" + }, + "pushNotifications": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Pushnotifications" + }, + "stateTransitionHistory": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Statetransitionhistory" + }, + "streaming": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Streaming" + } + }, + "type": "object", + "title": "AgentCapabilities", + "description": "Defines optional capabilities supported by an agent." + }, + "AgentCard": { + "properties": { + "additionalInterfaces": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/AgentInterface" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Additionalinterfaces" + }, + "capabilities": { + "$ref": "#/components/schemas/AgentCapabilities" + }, + "defaultInputModes": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Defaultinputmodes" + }, + "defaultOutputModes": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Defaultoutputmodes" + }, + "description": { + "type": "string", + "title": "Description", + "examples": [ + "Agent that helps users with recipes and cooking." + ] + }, + "documentationUrl": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Documentationurl" + }, + "iconUrl": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Iconurl" + }, + "name": { + "type": "string", + "title": "Name", + "examples": [ + "Recipe Agent" + ] + }, + "preferredTransport": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Preferredtransport", + "default": "JSONRPC", + "examples": [ + "JSONRPC", + "GRPC", + "HTTP+JSON" + ] + }, + "protocolVersion": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Protocolversion", + "default": "0.3.0" + }, + "provider": { + "anyOf": [ + { + "$ref": "#/components/schemas/AgentProvider" + }, + { + "type": "null" + } + ] + }, + "security": { + "anyOf": [ + { + "items": { + "additionalProperties": { + "items": { + "type": "string" + }, + "type": "array" + }, + "type": "object" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Security", + "examples": [ + [ + { + "oauth": [ + "read" + ] + }, + { + "api-key": [], + "mtls": [] + } + ] + ] + }, + "securitySchemes": { + "anyOf": [ + { + "additionalProperties": { + "$ref": "#/components/schemas/SecurityScheme" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Securityschemes" + }, + "signatures": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/AgentCardSignature" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Signatures" + }, + "skills": { + "items": { + "$ref": "#/components/schemas/AgentSkill" + }, + "type": "array", + "title": "Skills" + }, + "supportsAuthenticatedExtendedCard": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Supportsauthenticatedextendedcard" + }, + "url": { + "type": "string", + "title": "Url", + "examples": [ + "https://api.example.com/a2a/v1" + ] + }, + "version": { + "type": "string", + "title": "Version", + "examples": [ + "1.0.0" + ] + } + }, + "type": "object", + "required": [ + "capabilities", + "defaultInputModes", + "defaultOutputModes", + "description", + "name", + "skills", + "url", + "version" + ], + "title": "AgentCard", + "description": "The AgentCard is a self-describing manifest for an agent. It provides essential\nmetadata including the agent's identity, capabilities, skills, supported\ncommunication methods, and security requirements." + }, + "AgentCardSignature": { + "properties": { + "header": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Header" + }, + "protected": { + "type": "string", + "title": "Protected" + }, + "signature": { + "type": "string", + "title": "Signature" + } + }, + "type": "object", + "required": [ + "protected", + "signature" + ], + "title": "AgentCardSignature", + "description": "AgentCardSignature represents a JWS signature of an AgentCard.\nThis follows the JSON format of an RFC 7515 JSON Web Signature (JWS)." + }, + "AgentExtension": { + "properties": { + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description" + }, + "params": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Params" + }, + "required": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Required" + }, + "uri": { + "type": "string", + "title": "Uri" + } + }, + "type": "object", + "required": [ + "uri" + ], + "title": "AgentExtension", + "description": "A declaration of a protocol extension supported by an Agent." + }, + "AgentInterface": { + "properties": { + "transport": { + "type": "string", + "title": "Transport", + "examples": [ + "JSONRPC", + "GRPC", + "HTTP+JSON" + ] + }, + "url": { + "type": "string", + "title": "Url", + "examples": [ + "https://api.example.com/a2a/v1", + "https://grpc.example.com/a2a", + "https://rest.example.com/v1" + ] + } + }, + "type": "object", + "required": [ + "transport", + "url" + ], + "title": "AgentInterface", + "description": "Declares a combination of a target URL and a transport protocol for interacting with the agent.\nThis allows agents to expose the same functionality over multiple transport mechanisms." + }, + "AgentProvider": { + "properties": { + "organization": { + "type": "string", + "title": "Organization" + }, + "url": { + "type": "string", + "title": "Url" + } + }, + "type": "object", + "required": [ + "organization", + "url" + ], + "title": "AgentProvider", + "description": "Represents the service provider of an agent." + }, + "AgentSkill": { + "properties": { + "description": { + "type": "string", + "title": "Description" + }, + "examples": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Examples", + "examples": [ + [ + "I need a recipe for bread" + ] + ] + }, + "id": { + "type": "string", + "title": "Id" + }, + "inputModes": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Inputmodes" + }, + "name": { + "type": "string", + "title": "Name" + }, + "outputModes": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Outputmodes" + }, + "security": { + "anyOf": [ + { + "items": { + "additionalProperties": { + "items": { + "type": "string" + }, + "type": "array" + }, + "type": "object" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Security", + "examples": [ + [ + { + "google": [ + "oidc" + ] + } + ] + ] + }, + "tags": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Tags", + "examples": [ + [ + "cooking", + "customer support", + "billing" + ] + ] + } + }, + "type": "object", + "required": [ + "description", + "id", + "name", + "tags" + ], + "title": "AgentSkill", + "description": "Represents a distinct capability or function that an agent can perform." + }, + "AllowedToolsFilter": { + "properties": { + "tool_names": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tool Names" + } + }, + "type": "object", + "title": "AllowedToolsFilter", + "description": "Filter configuration for restricting which MCP tools can be used.\n\n:param tool_names: (Optional) List of specific tool names that are allowed" + }, + "ApprovalFilter": { + "properties": { + "always": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Always" + }, + "never": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Never" + } + }, + "type": "object", + "title": "ApprovalFilter", + "description": "Filter configuration for MCP tool approval requirements.\n\n:param always: (Optional) List of tool names that always require approval\n:param never: (Optional) List of tool names that never require approval" + }, + "Attachment": { + "properties": { + "attachment_type": { + "type": "string", + "title": "Attachment Type", + "description": "The attachment type, like 'log', 'configuration' etc.", + "examples": [ + "log" + ] + }, + "content_type": { + "type": "string", + "title": "Content Type", + "description": "The content type as defined in MIME standard", + "examples": [ + "text/plain" + ] + }, + "content": { + "type": "string", + "title": "Content", + "description": "The actual attachment content", + "examples": [ + "warning: quota exceeded" + ] + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "attachment_type", + "content_type", + "content" + ], + "title": "Attachment", + "description": "Model representing an attachment that can be send from the UI as part of query.\n\nA list of attachments can be an optional part of 'query' request.\n\nAttributes:\n attachment_type: The attachment type, like \"log\", \"configuration\" etc.\n content_type: The content type as defined in MIME standard\n content: The actual attachment content\n\nYAML attachments with **kind** and **metadata/name** attributes will\nbe handled as resources with the specified name:\n```\nkind: Pod\nmetadata:\n name: private-reg\n```", + "examples": [ + { + "attachment_type": "log", + "content": "this is attachment", + "content_type": "text/plain" + }, + { + "attachment_type": "configuration", + "content": "kind: Pod\n metadata:\n name: private-reg", + "content_type": "application/yaml" + }, + { + "attachment_type": "configuration", + "content": "foo: bar", + "content_type": "application/yaml" + } + ] + }, + "AuthenticationConfiguration": { + "properties": { + "module": { + "type": "string", + "title": "Module", + "default": "noop" + }, + "skip_tls_verification": { + "type": "boolean", + "title": "Skip Tls Verification", + "default": false + }, + "skip_for_health_probes": { + "type": "boolean", + "title": "Skip authorization for probes", + "description": "Skip authorization for readiness and liveness probes", + "default": false + }, + "k8s_cluster_api": { + "anyOf": [ + { + "type": "string", + "minLength": 1, + "format": "uri" + }, + { + "type": "null" + } + ], + "title": "K8S Cluster Api" + }, + "k8s_ca_cert_path": { + "anyOf": [ + { + "type": "string", + "format": "file-path" + }, + { + "type": "null" + } + ], + "title": "K8S Ca Cert Path" + }, + "jwk_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/JwkConfiguration" + }, + { + "type": "null" + } + ] + }, + "api_key_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/APIKeyTokenConfiguration" + }, + { + "type": "null" + } + ] + }, + "rh_identity_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/RHIdentityConfiguration" + }, + { + "type": "null" + } + ] + } + }, + "additionalProperties": false, + "type": "object", + "title": "AuthenticationConfiguration", + "description": "Authentication configuration." + }, + "AuthorizationCodeOAuthFlow": { + "properties": { + "authorizationUrl": { + "type": "string", + "title": "Authorizationurl" + }, + "refreshUrl": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Refreshurl" + }, + "scopes": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "title": "Scopes" + }, + "tokenUrl": { + "type": "string", + "title": "Tokenurl" + } + }, + "type": "object", + "required": [ + "authorizationUrl", + "scopes", + "tokenUrl" + ], + "title": "AuthorizationCodeOAuthFlow", + "description": "Defines configuration details for the OAuth 2.0 Authorization Code flow." + }, + "AuthorizationConfiguration": { + "properties": { + "access_rules": { + "items": { + "$ref": "#/components/schemas/AccessRule" + }, + "type": "array", + "title": "Access rules", + "description": "Rules for role-based access control" + } + }, + "additionalProperties": false, + "type": "object", + "title": "AuthorizationConfiguration", + "description": "Authorization configuration." + }, + "AuthorizedResponse": { + "properties": { + "user_id": { + "type": "string", + "title": "User Id", + "description": "User ID, for example UUID", + "examples": [ + "c5260aec-4d82-4370-9fdf-05cf908b3f16" + ] + }, + "username": { + "type": "string", + "title": "Username", + "description": "User name", + "examples": [ + "John Doe", + "Adam Smith" + ] + }, + "skip_userid_check": { + "type": "boolean", + "title": "Skip Userid Check", + "description": "Whether to skip the user ID check", + "examples": [ + true, + false + ] + } + }, + "type": "object", + "required": [ + "user_id", + "username", + "skip_userid_check" + ], + "title": "AuthorizedResponse", + "description": "Model representing a response to an authorization request.\n\nAttributes:\n user_id: The ID of the logged in user.\n username: The name of the logged in user.\n skip_userid_check: Whether to skip the user ID check.", + "examples": [ + { + "skip_userid_check": false, + "user_id": "123e4567-e89b-12d3-a456-426614174000", + "username": "user1" + } + ] + }, + "AzureEntraIdConfiguration": { + "properties": { + "tenant_id": { + "type": "string", + "format": "password", + "title": "Tenant Id", + "writeOnly": true + }, + "client_id": { + "type": "string", + "format": "password", + "title": "Client Id", + "writeOnly": true + }, + "client_secret": { + "type": "string", + "format": "password", + "title": "Client Secret", + "writeOnly": true + }, + "scope": { + "type": "string", + "title": "Token scope", + "description": "Azure Cognitive Services scope for token requests. Override only if using a different Azure service.", + "default": "https://cognitiveservices.azure.com/.default" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "tenant_id", + "client_id", + "client_secret" + ], + "title": "AzureEntraIdConfiguration", + "description": "Microsoft Entra ID authentication attributes for Azure." + }, + "BadRequestResponse": { + "properties": { + "status_code": { + "type": "integer", + "title": "Status Code" + }, + "detail": { + "$ref": "#/components/schemas/DetailModel" + } + }, + "type": "object", + "required": [ + "status_code", + "detail" + ], + "title": "BadRequestResponse", + "description": "400 Bad Request. Invalid resource identifier.", + "examples": [ + { + "detail": { + "cause": "The conversation ID 123e4567-e89b-12d3-a456-426614174000 has invalid format.", + "response": "Invalid conversation ID format" + }, + "label": "conversation_id" + } + ] + }, + "ByokRag": { + "properties": { + "rag_id": { + "type": "string", + "minLength": 1, + "title": "RAG ID", + "description": "Unique RAG ID" + }, + "rag_type": { + "type": "string", + "minLength": 1, + "title": "RAG type", + "description": "Type of RAG database.", + "default": "inline::faiss" + }, + "embedding_model": { + "type": "string", + "minLength": 1, + "title": "Embedding model", + "description": "Embedding model identification", + "default": "sentence-transformers/all-mpnet-base-v2" + }, + "embedding_dimension": { + "type": "integer", + "exclusiveMinimum": 0.0, + "title": "Embedding dimension", + "description": "Dimensionality of embedding vectors.", + "default": 768 + }, + "vector_db_id": { + "type": "string", + "minLength": 1, + "title": "Vector DB ID", + "description": "Vector database identification." + }, + "db_path": { + "type": "string", + "title": "DB path", + "description": "Path to RAG database." + }, + "score_multiplier": { + "type": "number", + "exclusiveMinimum": 0.0, + "title": "Score multiplier", + "description": "Multiplier applied to relevance scores from this vector store. Used to weight results when querying multiple knowledge sources. Values > 1 boost this store's results; values < 1 reduce them.", + "default": 1.0 + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "rag_id", + "vector_db_id", + "db_path" + ], + "title": "ByokRag", + "description": "BYOK (Bring Your Own Knowledge) RAG configuration." + }, + "CORSConfiguration": { + "properties": { + "allow_origins": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Allow origins", + "description": "A list of origins allowed for cross-origin requests. An origin is the combination of protocol (http, https), domain (myapp.com, localhost, localhost.tiangolo.com), and port (80, 443, 8080). Use ['*'] to allow all origins.", + "default": [ + "*" + ] + }, + "allow_credentials": { + "type": "boolean", + "title": "Allow credentials", + "description": "Indicate that cookies should be supported for cross-origin requests", + "default": false + }, + "allow_methods": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Allow methods", + "description": "A list of HTTP methods that should be allowed for cross-origin requests. You can use ['*'] to allow all standard methods.", + "default": [ + "*" + ] + }, + "allow_headers": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Allow headers", + "description": "A list of HTTP request headers that should be supported for cross-origin requests. You can use ['*'] to allow all headers. The Accept, Accept-Language, Content-Language and Content-Type headers are always allowed for simple CORS requests.", + "default": [ + "*" + ] + } + }, + "additionalProperties": false, + "type": "object", + "title": "CORSConfiguration", + "description": "CORS configuration.\n\nCORS or 'Cross-Origin Resource Sharing' refers to the situations when a\nfrontend running in a browser has JavaScript code that communicates with a\nbackend, and the backend is in a different 'origin' than the frontend.\n\nUseful resources:\n\n - [CORS in FastAPI](https://fastapi.tiangolo.com/tutorial/cors/)\n - [Wikipedia article](https://en.wikipedia.org/wiki/Cross-origin_resource_sharing)\n - [What is CORS?](https://dev.to/akshay_chauhan/what-is-cors-explained-8f1)" + }, + "ClientCredentialsOAuthFlow": { + "properties": { + "refreshUrl": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Refreshurl" + }, + "scopes": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "title": "Scopes" + }, + "tokenUrl": { + "type": "string", + "title": "Tokenurl" + } + }, + "type": "object", + "required": [ + "scopes", + "tokenUrl" + ], + "title": "ClientCredentialsOAuthFlow", + "description": "Defines configuration details for the OAuth 2.0 Client Credentials flow." + }, + "Configuration": { + "properties": { + "name": { + "type": "string", + "title": "Service name", + "description": "Name of the service. That value will be used in REST API endpoints." + }, + "service": { + "$ref": "#/components/schemas/ServiceConfiguration", + "title": "Service configuration", + "description": "This section contains Lightspeed Core Stack service configuration." + }, + "llama_stack": { + "$ref": "#/components/schemas/LlamaStackConfiguration", + "title": "Llama Stack configuration", + "description": "This section contains Llama Stack configuration. Lightspeed Core Stack service can call Llama Stack in library mode or in server mode." + }, + "user_data_collection": { + "$ref": "#/components/schemas/UserDataCollection", + "title": "User data collection configuration", + "description": "This section contains configuration for subsystem that collects user data(transcription history and feedbacks)." + }, + "database": { + "$ref": "#/components/schemas/DatabaseConfiguration", + "title": "Database Configuration", + "description": "Configuration for database to store conversation IDs and other runtime data" + }, + "mcp_servers": { + "items": { + "$ref": "#/components/schemas/ModelContextProtocolServer" + }, + "type": "array", + "title": "Model Context Protocol Server and tools configuration", + "description": "MCP (Model Context Protocol) servers provide tools and capabilities to the AI agents. These are configured in this section. Only MCP servers defined in the lightspeed-stack.yaml configuration are available to the agents. Tools configured in the llama-stack run.yaml are not accessible to lightspeed-core agents." + }, + "authentication": { + "$ref": "#/components/schemas/AuthenticationConfiguration", + "title": "Authentication configuration", + "description": "Authentication configuration" + }, + "authorization": { + "anyOf": [ + { + "$ref": "#/components/schemas/AuthorizationConfiguration" + }, + { + "type": "null" + } + ], + "title": "Authorization configuration", + "description": "Lightspeed Core Stack implements a modular authentication and authorization system with multiple authentication methods. Authorization is configurable through role-based access control. Authentication is handled through selectable modules configured via the module field in the authentication configuration." + }, + "customization": { + "anyOf": [ + { + "$ref": "#/components/schemas/Customization" + }, + { + "type": "null" + } + ], + "title": "Custom profile configuration", + "description": "It is possible to customize Lightspeed Core Stack via this section. System prompt can be customized and also different parts of the service can be replaced by custom Python modules." + }, + "inference": { + "$ref": "#/components/schemas/InferenceConfiguration", + "title": "Inference configuration", + "description": "One LLM provider and one its model might be selected as default ones. When no provider+model pair is specified in REST API calls (query endpoints), the default provider and model are used." + }, + "conversation_cache": { + "$ref": "#/components/schemas/ConversationHistoryConfiguration", + "title": "Conversation history configuration" + }, + "byok_rag": { + "items": { + "$ref": "#/components/schemas/ByokRag" + }, + "type": "array", + "title": "BYOK RAG configuration", + "description": "BYOK RAG configuration. This configuration can be used to reconfigure Llama Stack through its run.yaml configuration file" + }, + "a2a_state": { + "$ref": "#/components/schemas/A2AStateConfiguration", + "title": "A2A state configuration", + "description": "Configuration for A2A protocol persistent state storage." + }, + "quota_handlers": { + "$ref": "#/components/schemas/QuotaHandlersConfiguration", + "title": "Quota handlers", + "description": "Quota handlers configuration" + }, + "azure_entra_id": { + "anyOf": [ + { + "$ref": "#/components/schemas/AzureEntraIdConfiguration" + }, + { + "type": "null" + } + ] + }, + "splunk": { + "anyOf": [ + { + "$ref": "#/components/schemas/SplunkConfiguration" + }, + { + "type": "null" + } + ], + "title": "Splunk configuration", + "description": "Splunk HEC configuration for sending telemetry events." + }, + "deployment_environment": { + "type": "string", + "title": "Deployment environment", + "description": "Deployment environment name (e.g., 'development', 'staging', 'production'). Used in telemetry events.", + "default": "development" + }, + "rag": { + "$ref": "#/components/schemas/RagConfiguration", + "title": "RAG configuration", + "description": "Configuration for all RAG strategies (inline and tool-based)." + }, + "okp": { + "$ref": "#/components/schemas/OkpConfiguration", + "title": "OKP configuration", + "description": "OKP provider settings. Only used when 'okp' is listed in rag.inline or rag.tool." + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "name", + "service", + "llama_stack", + "user_data_collection" + ], + "title": "Configuration", + "description": "Global service configuration." + }, + "ConfigurationResponse": { + "properties": { + "configuration": { + "$ref": "#/components/schemas/Configuration" + } + }, + "type": "object", + "required": [ + "configuration" + ], + "title": "ConfigurationResponse", + "description": "Success response model for the config endpoint.", + "examples": [ + { + "configuration": { + "authentication": { + "module": "noop", + "skip_tls_verification": false + }, + "authorization": { + "access_rules": [] + }, + "byok_rag": [], + "conversation_cache": {}, + "database": { + "sqlite": { + "db_path": "/tmp/lightspeed-stack.db" + } + }, + "inference": { + "default_model": "gpt-4-turbo", + "default_provider": "openai" + }, + "llama_stack": { + "api_key": "*****", + "url": "http://localhost:8321", + "use_as_library_client": false + }, + "mcp_servers": [ + { + "name": "server1", + "provider_id": "provider1", + "url": "http://url.com:1" + } + ], + "name": "lightspeed-stack", + "quota_handlers": { + "enable_token_history": false, + "limiters": [], + "scheduler": { + "period": 1 + } + }, + "service": { + "access_log": true, + "auth_enabled": false, + "color_log": true, + "cors": { + "allow_credentials": false, + "allow_headers": [ + "*" + ], + "allow_methods": [ + "*" + ], + "allow_origins": [ + "*" + ] + }, + "host": "localhost", + "port": 8080, + "tls_config": {}, + "workers": 1 + }, + "user_data_collection": { + "feedback_enabled": true, + "feedback_storage": "/tmp/data/feedback", + "transcripts_enabled": false, + "transcripts_storage": "/tmp/data/transcripts" + } + } + } + ] + }, + "ConversationData": { + "properties": { + "conversation_id": { + "type": "string", + "title": "Conversation Id" + }, + "topic_summary": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Topic Summary" + }, + "last_message_timestamp": { + "type": "number", + "title": "Last Message Timestamp" + } + }, + "type": "object", + "required": [ + "conversation_id", + "topic_summary", + "last_message_timestamp" + ], + "title": "ConversationData", + "description": "Model representing conversation data returned by cache list operations.\n\nAttributes:\n conversation_id: The conversation ID\n topic_summary: The topic summary for the conversation (can be None)\n last_message_timestamp: The timestamp of the last message in the conversation" + }, + "ConversationDeleteResponse": { + "properties": { + "conversation_id": { + "type": "string", + "title": "Conversation Id", + "description": "The conversation ID (UUID) that was deleted.", + "examples": [ + "123e4567-e89b-12d3-a456-426614174000" + ] + }, + "success": { + "type": "boolean", + "title": "Success", + "description": "Whether the deletion was successful.", + "examples": [ + true, + false + ] + }, + "response": { + "type": "string", + "title": "Response", + "description": "A message about the deletion result.", + "examples": [ + "Conversation deleted successfully", + "Conversation cannot be deleted" + ] + } + }, + "type": "object", + "required": [ + "conversation_id", + "success", + "response" + ], + "title": "ConversationDeleteResponse", + "description": "Model representing a response for deleting a conversation.\n\nAttributes:\n conversation_id: The conversation ID (UUID) that was deleted.\n success: Whether the deletion was successful.\n response: A message about the deletion result.", + "examples": [ + { + "label": "deleted", + "value": { + "conversation_id": "123e4567-e89b-12d3-a456-426614174000", + "response": "Conversation deleted successfully", + "success": true + } + }, + { + "label": "not found", + "value": { + "conversation_id": "123e4567-e89b-12d3-a456-426614174000", + "response": "Conversation can not be deleted", + "success": true + } + } + ] + }, + "ConversationDetails": { + "properties": { + "conversation_id": { + "type": "string", + "title": "Conversation Id", + "description": "Conversation ID (UUID)", + "examples": [ + "c5260aec-4d82-4370-9fdf-05cf908b3f16" + ] + }, + "created_at": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Created At", + "description": "When the conversation was created", + "examples": [ + "2024-01-01T01:00:00Z" + ] + }, + "last_message_at": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Message At", + "description": "When the last message was sent", + "examples": [ + "2024-01-01T01:00:00Z" + ] + }, + "message_count": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Message Count", + "description": "Number of user messages in the conversation", + "examples": [ + 42 + ] + }, + "last_used_model": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Used Model", + "description": "Identification of the last model used for the conversation", + "examples": [ + "gpt-4-turbo", + "gpt-3.5-turbo-0125" + ] + }, + "last_used_provider": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Used Provider", + "description": "Identification of the last provider used for the conversation", + "examples": [ + "openai", + "gemini" + ] + }, + "topic_summary": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Topic Summary", + "description": "Topic summary for the conversation", + "examples": [ + "Openshift Microservices Deployment Strategies" + ] + } + }, + "type": "object", + "required": [ + "conversation_id" + ], + "title": "ConversationDetails", + "description": "Model representing the details of a user conversation.\n\nAttributes:\n conversation_id: The conversation ID (UUID).\n created_at: When the conversation was created.\n last_message_at: When the last message was sent.\n message_count: Number of user messages in the conversation.\n last_used_model: The last model used for the conversation.\n last_used_provider: The provider of the last used model.\n topic_summary: The topic summary for the conversation.\n\nExample:\n ```python\n conversation = ConversationDetails(\n conversation_id=\"123e4567-e89b-12d3-a456-426614174000\",\n created_at=\"2024-01-01T00:00:00Z\",\n last_message_at=\"2024-01-01T00:05:00Z\",\n message_count=5,\n last_used_model=\"gemini/gemini-2.0-flash\",\n last_used_provider=\"gemini\",\n topic_summary=\"Openshift Microservices Deployment Strategies\",\n )\n ```" + }, + "ConversationHistoryConfiguration": { + "properties": { + "type": { + "anyOf": [ + { + "type": "string", + "enum": [ + "noop", + "memory", + "sqlite", + "postgres" + ] + }, + { + "type": "null" + } + ], + "title": "Conversation history database type", + "description": "Type of database where the conversation history is to be stored." + }, + "memory": { + "anyOf": [ + { + "$ref": "#/components/schemas/InMemoryCacheConfig" + }, + { + "type": "null" + } + ], + "title": "In-memory cache configuration", + "description": "In-memory cache configuration" + }, + "sqlite": { + "anyOf": [ + { + "$ref": "#/components/schemas/SQLiteDatabaseConfiguration" + }, + { + "type": "null" + } + ], + "title": "SQLite configuration", + "description": "SQLite database configuration" + }, + "postgres": { + "anyOf": [ + { + "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration" + }, + { + "type": "null" + } + ], + "title": "PostgreSQL configuration", + "description": "PostgreSQL database configuration" + } + }, + "additionalProperties": false, + "type": "object", + "title": "ConversationHistoryConfiguration", + "description": "Conversation history configuration." + }, + "ConversationResponse": { + "properties": { + "conversation_id": { + "type": "string", + "title": "Conversation Id", + "description": "Conversation ID (UUID)", + "examples": [ + "c5260aec-4d82-4370-9fdf-05cf908b3f16" + ] + }, + "chat_history": { + "items": { + "$ref": "#/components/schemas/ConversationTurn" + }, + "type": "array", + "title": "Chat History", + "description": "The simplified chat history as a list of conversation turns", + "examples": [ + { + "completed_at": "2024-01-01T00:01:05Z", + "messages": [ + { + "content": "Hello", + "type": "user" + }, + { + "content": "Hi there!", + "type": "assistant" + } + ], + "model": "gpt-4o-mini", + "provider": "openai", + "started_at": "2024-01-01T00:01:00Z", + "tool_calls": [], + "tool_results": [] + } + ] + } + }, + "type": "object", + "required": [ + "conversation_id", + "chat_history" + ], + "title": "ConversationResponse", + "description": "Model representing a response for retrieving a conversation.\n\nAttributes:\n conversation_id: The conversation ID (UUID).\n chat_history: The chat history as a list of conversation turns.", + "examples": [ + { + "chat_history": [ + { + "completed_at": "2024-01-01T00:01:05Z", + "messages": [ + { + "content": "Hello", + "type": "user" + }, + { + "content": "Hi there!", + "type": "assistant" + } + ], + "model": "gpt-4o-mini", + "provider": "openai", + "started_at": "2024-01-01T00:01:00Z", + "tool_calls": [], + "tool_results": [] + } + ], + "conversation_id": "123e4567-e89b-12d3-a456-426614174000" + } + ] + }, + "ConversationTurn": { + "properties": { + "messages": { + "items": { + "$ref": "#/components/schemas/Message" + }, + "type": "array", + "title": "Messages", + "description": "List of messages in this turn" + }, + "tool_calls": { + "items": { + "$ref": "#/components/schemas/ToolCallSummary" + }, + "type": "array", + "title": "Tool Calls", + "description": "List of tool calls made in this turn" + }, + "tool_results": { + "items": { + "$ref": "#/components/schemas/ToolResultSummary" + }, + "type": "array", + "title": "Tool Results", + "description": "List of tool results from this turn" + }, + "provider": { + "type": "string", + "title": "Provider", + "description": "Provider identifier used for this turn", + "examples": [ + "openai" + ] + }, + "model": { + "type": "string", + "title": "Model", + "description": "Model identifier used for this turn", + "examples": [ + "gpt-4o-mini" + ] + }, + "started_at": { + "type": "string", + "title": "Started At", + "description": "ISO 8601 timestamp when the turn started", + "examples": [ + "2024-01-01T00:01:00Z" + ] + }, + "completed_at": { + "type": "string", + "title": "Completed At", + "description": "ISO 8601 timestamp when the turn completed", + "examples": [ + "2024-01-01T00:01:05Z" + ] + } + }, + "type": "object", + "required": [ + "provider", + "model", + "started_at", + "completed_at" + ], + "title": "ConversationTurn", + "description": "Model representing a single conversation turn.\n\nAttributes:\n messages: List of messages in this turn.\n tool_calls: List of tool calls made in this turn.\n tool_results: List of tool results from this turn.\n provider: Provider identifier used for this turn.\n model: Model identifier used for this turn.\n started_at: ISO 8601 timestamp when the turn started.\n completed_at: ISO 8601 timestamp when the turn completed." + }, + "ConversationUpdateRequest": { + "properties": { + "topic_summary": { + "type": "string", + "maxLength": 1000, + "minLength": 1, + "title": "Topic Summary", + "description": "The new topic summary for the conversation", + "examples": [ + "Discussion about machine learning algorithms" + ] + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "topic_summary" + ], + "title": "ConversationUpdateRequest", + "description": "Model representing a request to update a conversation topic summary.\n\nAttributes:\n topic_summary: The new topic summary for the conversation.\n\nExample:\n ```python\n update_request = ConversationUpdateRequest(\n topic_summary=\"Discussion about machine learning algorithms\"\n )\n ```" + }, + "ConversationUpdateResponse": { + "properties": { + "conversation_id": { + "type": "string", + "title": "Conversation Id", + "description": "The conversation ID (UUID) that was updated", + "examples": [ + "123e4567-e89b-12d3-a456-426614174000" + ] + }, + "success": { + "type": "boolean", + "title": "Success", + "description": "Whether the update was successful", + "examples": [ + true + ] + }, + "message": { + "type": "string", + "title": "Message", + "description": "A message about the update result", + "examples": [ + "Topic summary updated successfully" + ] + } + }, + "type": "object", + "required": [ + "conversation_id", + "success", + "message" + ], + "title": "ConversationUpdateResponse", + "description": "Model representing a response for updating a conversation topic summary.\n\nAttributes:\n conversation_id: The conversation ID (UUID) that was updated.\n success: Whether the update was successful.\n message: A message about the update result.\n\nExample:\n ```python\n update_response = ConversationUpdateResponse(\n conversation_id=\"123e4567-e89b-12d3-a456-426614174000\",\n success=True,\n message=\"Topic summary updated successfully\",\n )\n ```", + "examples": [ + { + "conversation_id": "123e4567-e89b-12d3-a456-426614174000", + "message": "Topic summary updated successfully", + "success": true + } + ] + }, + "ConversationsListResponse": { + "properties": { + "conversations": { + "items": { + "$ref": "#/components/schemas/ConversationDetails" + }, + "type": "array", + "title": "Conversations" + } + }, + "type": "object", + "required": [ + "conversations" + ], + "title": "ConversationsListResponse", + "description": "Model representing a response for listing conversations of a user.\n\nAttributes:\n conversations: List of conversation details associated with the user.", + "examples": [ + { + "conversations": [ + { + "conversation_id": "123e4567-e89b-12d3-a456-426614174000", + "created_at": "2024-01-01T00:00:00Z", + "last_message_at": "2024-01-01T00:05:00Z", + "last_used_model": "gemini/gemini-2.0-flash", + "last_used_provider": "gemini", + "message_count": 5, + "topic_summary": "Openshift Microservices Deployment Strategies" + }, + { + "conversation_id": "456e7890-e12b-34d5-a678-901234567890", + "created_at": "2024-01-01T01:00:00Z", + "last_used_model": "gemini/gemini-2.5-flash", + "last_used_provider": "gemini", + "message_count": 2, + "topic_summary": "RHDH Purpose Summary" + } + ] + } + ] + }, + "ConversationsListResponseV2": { + "properties": { + "conversations": { + "items": { + "$ref": "#/components/schemas/ConversationData" + }, + "type": "array", + "title": "Conversations" + } + }, + "type": "object", + "required": [ + "conversations" + ], + "title": "ConversationsListResponseV2", + "description": "Model representing a response for listing conversations of a user.\n\nAttributes:\n conversations: List of conversation data associated with the user.", + "examples": [ + { + "conversations": [ + { + "conversation_id": "123e4567-e89b-12d3-a456-426614174000", + "last_message_timestamp": 1704067200.0, + "topic_summary": "Openshift Microservices Deployment Strategies" + } + ] + } + ] + }, + "CustomProfile": { + "properties": { + "path": { + "type": "string", + "title": "Path to custom profile", + "description": "Path to Python modules containing custom profile." + }, + "prompts": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "title": "System prompts", + "description": "Dictionary containing map of system prompts", + "default": {} + } + }, + "type": "object", + "required": [ + "path" + ], + "title": "CustomProfile", + "description": "Custom profile customization for prompts and validation." + }, + "Customization": { + "properties": { + "profile_path": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Profile Path" + }, + "disable_query_system_prompt": { + "type": "boolean", + "title": "Disable Query System Prompt", + "default": false + }, + "disable_shield_ids_override": { + "type": "boolean", + "title": "Disable Shield Ids Override", + "default": false + }, + "system_prompt_path": { + "anyOf": [ + { + "type": "string", + "format": "file-path" + }, + { + "type": "null" + } + ], + "title": "System Prompt Path" + }, + "system_prompt": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "System Prompt" + }, + "agent_card_path": { + "anyOf": [ + { + "type": "string", + "format": "file-path" + }, + { + "type": "null" + } + ], + "title": "Agent Card Path" + }, + "agent_card_config": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Agent Card Config" + }, + "custom_profile": { + "anyOf": [ + { + "$ref": "#/components/schemas/CustomProfile" + }, + { + "type": "null" + } + ] + } + }, + "additionalProperties": false, + "type": "object", + "title": "Customization", + "description": "Service customization." + }, + "DatabaseConfiguration": { + "properties": { + "sqlite": { + "anyOf": [ + { + "$ref": "#/components/schemas/SQLiteDatabaseConfiguration" + }, + { + "type": "null" + } + ], + "title": "SQLite configuration", + "description": "SQLite database configuration" + }, + "postgres": { + "anyOf": [ + { + "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration" + }, + { + "type": "null" + } + ], + "title": "PostgreSQL configuration", + "description": "PostgreSQL database configuration" + } + }, + "additionalProperties": false, + "type": "object", + "title": "DatabaseConfiguration", + "description": "Database configuration." + }, + "DetailModel": { + "properties": { + "response": { + "type": "string", + "title": "Response", + "description": "Short summary of the error" + }, + "cause": { + "type": "string", + "title": "Cause", + "description": "Detailed explanation of what caused the error" + } + }, + "type": "object", + "required": [ + "response", + "cause" + ], + "title": "DetailModel", + "description": "Nested detail model for error responses." + }, + "FeedbackCategory": { + "type": "string", + "enum": [ + "incorrect", + "not_relevant", + "incomplete", + "outdated_information", + "unsafe", + "other" + ], + "title": "FeedbackCategory", + "description": "Enum representing predefined feedback categories for AI responses.\n\nThese categories help provide structured feedback about AI inference quality\nwhen users provide negative feedback (thumbs down). Multiple categories can\nbe selected to provide comprehensive feedback about response issues." + }, + "FeedbackRequest": { + "properties": { + "conversation_id": { + "type": "string", + "title": "Conversation Id", + "description": "The required conversation ID (UUID)", + "examples": [ + "c5260aec-4d82-4370-9fdf-05cf908b3f16" + ] + }, + "user_question": { + "type": "string", + "title": "User Question", + "description": "User question (the query string)", + "examples": [ + "What is Kubernetes?" + ] + }, + "llm_response": { + "type": "string", + "title": "Llm Response", + "description": "Response from LLM", + "examples": [ + "Kubernetes is an open-source container orchestration system for automating ..." + ] + }, + "sentiment": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Sentiment", + "description": "User sentiment, if provided must be -1 or 1", + "examples": [ + -1, + 1 + ] + }, + "user_feedback": { + "anyOf": [ + { + "type": "string", + "maxLength": 4096 + }, + { + "type": "null" + } + ], + "title": "User Feedback", + "description": "Feedback on the LLM response.", + "examples": [ + "I'm not satisfied with the response because it is too vague." + ] + }, + "categories": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/FeedbackCategory" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Categories", + "description": "List of feedback categories that describe issues with the LLM response (for negative feedback).", + "examples": [ + [ + "incorrect", + "incomplete" + ] + ] + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "conversation_id", + "user_question", + "llm_response" + ], + "title": "FeedbackRequest", + "description": "Model representing a feedback request.\n\nAttributes:\n conversation_id: The required conversation ID (UUID).\n user_question: The required user question.\n llm_response: The required LLM response.\n sentiment: The optional sentiment.\n user_feedback: The optional user feedback.\n categories: The optional list of feedback categories (multi-select for negative feedback).\n\nExample:\n ```python\n feedback_request = FeedbackRequest(\n conversation_id=\"12345678-abcd-0000-0123-456789abcdef\",\n user_question=\"what are you doing?\",\n user_feedback=\"This response is not helpful\",\n llm_response=\"I don't know\",\n sentiment=-1,\n categories=[FeedbackCategory.INCORRECT, FeedbackCategory.INCOMPLETE]\n )\n ```", + "examples": [ + { + "conversation_id": "12345678-abcd-0000-0123-456789abcdef", + "llm_response": "bar", + "sentiment": -1, + "user_feedback": "Not satisfied with the response quality.", + "user_question": "foo" + }, + { + "categories": [ + "incorrect" + ], + "conversation_id": "12345678-abcd-0000-0123-456789abcdef", + "llm_response": "The capital of France is Berlin.", + "sentiment": -1, + "user_question": "What is the capital of France?" + }, + { + "categories": [ + "incomplete", + "not_relevant" + ], + "conversation_id": "12345678-abcd-0000-0123-456789abcdef", + "llm_response": "Use Docker.", + "sentiment": -1, + "user_feedback": "This response is too general and doesn't provide specific steps.", + "user_question": "How do I deploy a web app?" + } + ] + }, + "FeedbackResponse": { + "properties": { + "response": { + "type": "string", + "title": "Response", + "description": "The response of the feedback request.", + "examples": [ + "feedback received" + ] + } + }, + "type": "object", + "required": [ + "response" + ], + "title": "FeedbackResponse", + "description": "Model representing a response to a feedback request.\n\nAttributes:\n response: The response of the feedback request.\n\nExample:\n ```python\n feedback_response = FeedbackResponse(response=\"feedback received\")\n ```", + "examples": [ + { + "response": "feedback received" + } + ] + }, + "FeedbackStatusUpdateRequest": { + "properties": { + "status": { + "type": "boolean", + "title": "Status", + "description": "Desired state of feedback enablement, must be False or True", + "default": false, + "examples": [ + true, + false + ] + } + }, + "additionalProperties": false, + "type": "object", + "title": "FeedbackStatusUpdateRequest", + "description": "Model representing a feedback status update request.\n\nAttributes:\n status: Value of the desired feedback enabled state.\n\nExample:\n ```python\n feedback_request = FeedbackRequest(\n status=false\n )\n ```" + }, + "FeedbackStatusUpdateResponse": { + "properties": { + "status": { + "additionalProperties": true, + "type": "object", + "title": "Status" + } + }, + "type": "object", + "required": [ + "status" + ], + "title": "FeedbackStatusUpdateResponse", + "description": "Model representing a response to a feedback status update request.\n\nAttributes:\n status: The previous and current status of the service and who updated it.\n\nExample:\n ```python\n status_response = StatusResponse(\n status={\n \"previous_status\": true,\n \"updated_status\": false,\n \"updated_by\": \"user/test\",\n \"timestamp\": \"2023-03-15 12:34:56\"\n },\n )\n ```", + "examples": [ + { + "status": { + "previous_status": true, + "timestamp": "2023-03-15 12:34:56", + "updated_by": "user/test", + "updated_status": false + } + } + ] + }, + "ForbiddenResponse": { "properties": { - "sqlite": { - "anyOf": [ - { - "$ref": "#/components/schemas/SQLiteDatabaseConfiguration" - }, - { - "type": "null" - } - ], - "title": "SQLite configuration", - "description": "SQLite database configuration for A2A state storage." + "status_code": { + "type": "integer", + "title": "Status Code" }, - "postgres": { + "detail": { + "$ref": "#/components/schemas/DetailModel" + } + }, + "type": "object", + "required": [ + "status_code", + "detail" + ], + "title": "ForbiddenResponse", + "description": "403 Forbidden. Access denied.", + "examples": [ + { + "detail": { + "cause": "User 6789 does not have permission to read conversation with ID 123e4567-e89b-12d3-a456-426614174000", + "response": "User does not have permission to perform this action" + }, + "label": "conversation read" + }, + { + "detail": { + "cause": "User 6789 does not have permission to delete conversation with ID 123e4567-e89b-12d3-a456-426614174000", + "response": "User does not have permission to perform this action" + }, + "label": "conversation delete" + }, + { + "detail": { + "cause": "User 6789 is not authorized to access this endpoint.", + "response": "User does not have permission to access this endpoint" + }, + "label": "endpoint" + }, + { + "detail": { + "cause": "Storing feedback is disabled.", + "response": "Storing feedback is disabled" + }, + "label": "feedback" + }, + { + "detail": { + "cause": "User lacks model_override permission required to override model/provider.", + "response": "This instance does not permit overriding model/provider in the query request (missing permission: MODEL_OVERRIDE). Please remove the model and provider fields from your request." + }, + "label": "model override" + } + ] + }, + "HTTPAuthSecurityScheme": { + "properties": { + "bearerFormat": { "anyOf": [ { - "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration" + "type": "string" }, { "type": "null" } ], - "title": "PostgreSQL configuration", - "description": "PostgreSQL database configuration for A2A state storage." - } - }, - "additionalProperties": false, - "type": "object", - "title": "A2AStateConfiguration", - "description": "A2A protocol persistent state configuration.\n\nConfigures how A2A task state and context-to-conversation mappings are\nstored. For multi-worker deployments, use SQLite or PostgreSQL to ensure\nstate is shared across all workers.\n\nIf no configuration is provided, in-memory storage is used (default).\nThis is suitable for single-worker deployments but state will be lost\non restarts and not shared across workers.\n\nAttributes:\n sqlite: SQLite database configuration for A2A state storage.\n postgres: PostgreSQL database configuration for A2A state storage." - }, - "APIKeySecurityScheme": { - "properties": { + "title": "Bearerformat" + }, "description": { "anyOf": [ { @@ -4552,340 +7201,411 @@ ], "title": "Description" }, - "in": { - "$ref": "#/components/schemas/In" - }, - "name": { + "scheme": { "type": "string", - "title": "Name" + "title": "Scheme" }, "type": { "type": "string", - "const": "apiKey", + "const": "http", "title": "Type", - "default": "apiKey" + "default": "http" } }, "type": "object", "required": [ - "in", - "name" + "scheme" ], - "title": "APIKeySecurityScheme", - "description": "Defines a security scheme using an API key." + "title": "HTTPAuthSecurityScheme", + "description": "Defines a security scheme using HTTP authentication." }, - "APIKeyTokenConfiguration": { + "HTTPValidationError": { "properties": { - "api_key": { - "type": "string", - "minLength": 1, - "format": "password", - "title": "API key", - "writeOnly": true, - "examples": [ - "some-api-key" - ] + "detail": { + "items": { + "$ref": "#/components/schemas/ValidationError" + }, + "type": "array", + "title": "Detail" } }, - "additionalProperties": false, "type": "object", - "required": [ - "api_key" - ], - "title": "APIKeyTokenConfiguration", - "description": "API Key Token configuration." + "title": "HTTPValidationError" }, - "AccessRule": { + "ImplicitOAuthFlow": { "properties": { - "role": { + "authorizationUrl": { "type": "string", - "title": "Role name", - "description": "Name of the role" + "title": "Authorizationurl" }, - "actions": { - "items": { - "$ref": "#/components/schemas/Action" + "refreshUrl": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Refreshurl" + }, + "scopes": { + "additionalProperties": { + "type": "string" }, - "type": "array", - "title": "Allowed actions", - "description": "Allowed actions for this role" + "type": "object", + "title": "Scopes" } }, - "additionalProperties": false, "type": "object", "required": [ - "role", - "actions" + "authorizationUrl", + "scopes" ], - "title": "AccessRule", - "description": "Rule defining what actions a role can perform." + "title": "ImplicitOAuthFlow", + "description": "Defines configuration details for the OAuth 2.0 Implicit flow." }, - "Action": { + "In": { "type": "string", "enum": [ - "admin", - "list_other_conversations", - "read_other_conversations", - "query_other_conversations", - "delete_other_conversations", - "query", - "streaming_query", - "get_conversation", - "list_conversations", - "delete_conversation", - "update_conversation", - "feedback", - "get_models", - "get_tools", - "get_shields", - "list_providers", - "get_provider", - "list_rags", - "get_rag", - "get_metrics", - "get_config", - "info", - "model_override", - "rlsapi_v1_infer", - "a2a_agent_card", - "a2a_task_execution", - "a2a_message", - "a2a_jsonrpc" + "cookie", + "header", + "query" ], - "title": "Action", - "description": "Available actions in the system.\n\nNote: this is not a real model, just an enumeration of all action names." + "title": "In", + "description": "The location of the API key." }, - "AgentCapabilities": { + "InMemoryCacheConfig": { "properties": { - "extensions": { - "anyOf": [ - { - "items": { - "$ref": "#/components/schemas/AgentExtension" - }, - "type": "array" - }, - { - "type": "null" - } - ], - "title": "Extensions" - }, - "pushNotifications": { + "max_entries": { + "type": "integer", + "exclusiveMinimum": 0.0, + "title": "Max entries", + "description": "Maximum number of entries stored in the in-memory cache" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "max_entries" + ], + "title": "InMemoryCacheConfig", + "description": "In-memory cache configuration." + }, + "IncludeParameter": { + "type": "string", + "enum": [ + "web_search_call.action.sources", + "code_interpreter_call.outputs", + "computer_call_output.output.image_url", + "file_search_call.results", + "message.input_image.image_url", + "message.output_text.logprobs", + "reasoning.encrypted_content" + ] + }, + "InferenceConfiguration": { + "properties": { + "default_model": { "anyOf": [ { - "type": "boolean" + "type": "string" }, { "type": "null" } ], - "title": "Pushnotifications" + "title": "Default model", + "description": "Identification of default model used when no other model is specified." }, - "stateTransitionHistory": { + "default_provider": { "anyOf": [ { - "type": "boolean" + "type": "string" }, { "type": "null" } ], - "title": "Statetransitionhistory" + "title": "Default provider", + "description": "Identification of default provider used when no other model is specified." + } + }, + "additionalProperties": false, + "type": "object", + "title": "InferenceConfiguration", + "description": "Inference configuration." + }, + "InfoResponse": { + "properties": { + "name": { + "type": "string", + "title": "Name", + "description": "Service name", + "examples": [ + "Lightspeed Stack" + ] }, - "streaming": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "null" - } - ], - "title": "Streaming" + "service_version": { + "type": "string", + "title": "Service Version", + "description": "Service version", + "examples": [ + "0.1.0", + "0.2.0", + "1.0.0" + ] + }, + "llama_stack_version": { + "type": "string", + "title": "Llama Stack Version", + "description": "Llama Stack version", + "examples": [ + "0.2.1", + "0.2.2", + "0.2.18", + "0.2.21", + "0.2.22" + ] } }, "type": "object", - "title": "AgentCapabilities", - "description": "Defines optional capabilities supported by an agent." + "required": [ + "name", + "service_version", + "llama_stack_version" + ], + "title": "InfoResponse", + "description": "Model representing a response to an info request.\n\nAttributes:\n name: Service name.\n service_version: Service version.\n llama_stack_version: Llama Stack version.\n\nExample:\n ```python\n info_response = InfoResponse(\n name=\"Lightspeed Stack\",\n service_version=\"1.0.0\",\n llama_stack_version=\"0.2.22\",\n )\n ```", + "examples": [ + { + "llama_stack_version": "1.0.0", + "name": "Lightspeed Stack", + "service_version": "1.0.0" + } + ] }, - "AgentCard": { + "InternalServerErrorResponse": { "properties": { - "additionalInterfaces": { - "anyOf": [ - { - "items": { - "$ref": "#/components/schemas/AgentInterface" - }, - "type": "array" - }, - { - "type": "null" - } - ], - "title": "Additionalinterfaces" + "status_code": { + "type": "integer", + "title": "Status Code" }, - "capabilities": { - "$ref": "#/components/schemas/AgentCapabilities" + "detail": { + "$ref": "#/components/schemas/DetailModel" + } + }, + "type": "object", + "required": [ + "status_code", + "detail" + ], + "title": "InternalServerErrorResponse", + "description": "500 Internal Server Error.", + "examples": [ + { + "detail": { + "cause": "An unexpected error occurred while processing the request.", + "response": "Internal server error" + }, + "label": "internal" }, - "defaultInputModes": { - "items": { - "type": "string" + { + "detail": { + "cause": "Lightspeed Stack configuration has not been initialized.", + "response": "Configuration is not loaded" }, - "type": "array", - "title": "Defaultinputmodes" + "label": "configuration" }, - "defaultOutputModes": { - "items": { - "type": "string" + { + "detail": { + "cause": "Failed to store feedback at directory: /path/example", + "response": "Failed to store feedback" }, - "type": "array", - "title": "Defaultoutputmodes" + "label": "feedback storage" }, - "description": { + { + "detail": { + "cause": "Failed to call backend API", + "response": "Error while processing query" + }, + "label": "query" + }, + { + "detail": { + "cause": "Conversation cache is not configured or unavailable.", + "response": "Conversation cache not configured" + }, + "label": "conversation cache" + }, + { + "detail": { + "cause": "Failed to query the database", + "response": "Database query failed" + }, + "label": "database" + } + ] + }, + "JsonPathOperator": { + "type": "string", + "enum": [ + "equals", + "contains", + "in", + "match" + ], + "title": "JsonPathOperator", + "description": "Supported operators for JSONPath evaluation.\n\nNote: this is not a real model, just an enumeration of all supported JSONPath operators." + }, + "JwkConfiguration": { + "properties": { + "url": { "type": "string", - "title": "Description", - "examples": [ - "Agent that helps users with recipes and cooking." - ] + "minLength": 1, + "format": "uri", + "title": "URL", + "description": "HTTPS URL of the JWK (JSON Web Key) set used to validate JWTs." }, - "documentationUrl": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "title": "Documentationurl" + "jwt_configuration": { + "$ref": "#/components/schemas/JwtConfiguration", + "title": "JWT configuration", + "description": "JWT (JSON Web Token) configuration" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "url" + ], + "title": "JwkConfiguration", + "description": "JWK (JSON Web Key) configuration.\n\nA JSON Web Key (JWK) is a JavaScript Object Notation (JSON) data structure\nthat represents a cryptographic key.\n\nUseful resources:\n\n - [JSON Web Key](https://openid.net/specs/draft-jones-json-web-key-03.html)\n - [RFC 7517](https://www.rfc-editor.org/rfc/rfc7517)" + }, + "JwtConfiguration": { + "properties": { + "user_id_claim": { + "type": "string", + "title": "User ID claim", + "description": "JWT claim name that uniquely identifies the user (subject ID).", + "default": "user_id" }, - "iconUrl": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "title": "Iconurl" + "username_claim": { + "type": "string", + "title": "Username claim", + "description": "JWT claim name that provides the human-readable username.", + "default": "username" }, - "name": { + "role_rules": { + "items": { + "$ref": "#/components/schemas/JwtRoleRule" + }, + "type": "array", + "title": "Role rules", + "description": "Rules for extracting roles from JWT claims" + } + }, + "additionalProperties": false, + "type": "object", + "title": "JwtConfiguration", + "description": "JWT (JSON Web Token) configuration.\n\nJSON Web Token (JWT) is a compact, URL-safe means of representing\nclaims to be transferred between two parties. The claims in a JWT\nare encoded as a JSON object that is used as the payload of a JSON\nWeb Signature (JWS) structure or as the plaintext of a JSON Web\nEncryption (JWE) structure, enabling the claims to be digitally\nsigned or integrity protected with a Message Authentication Code\n(MAC) and/or encrypted.\n\nUseful resources:\n\n - [JSON Web Token](https://en.wikipedia.org/wiki/JSON_Web_Token)\n - [RFC 7519](https://datatracker.ietf.org/doc/html/rfc7519)\n - [JSON Web Tokens](https://auth0.com/docs/secure/tokens/json-web-tokens)" + }, + "JwtRoleRule": { + "properties": { + "jsonpath": { "type": "string", - "title": "Name", - "examples": [ - "Recipe Agent" - ] + "title": "JSON path", + "description": "JSONPath expression to evaluate against the JWT payload" }, - "preferredTransport": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "title": "Preferredtransport", - "default": "JSONRPC", - "examples": [ - "JSONRPC", - "GRPC", - "HTTP+JSON" - ] + "operator": { + "$ref": "#/components/schemas/JsonPathOperator", + "title": "Operator", + "description": "JSON path comparison operator" }, - "protocolVersion": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "title": "Protocolversion", - "default": "0.3.0" + "negate": { + "type": "boolean", + "title": "Negate rule", + "description": "If set to true, the meaning of the rule is negated", + "default": false }, - "provider": { - "anyOf": [ - { - "$ref": "#/components/schemas/AgentProvider" - }, - { - "type": "null" - } - ] + "value": { + "title": "Value", + "description": "Value to compare against" }, - "security": { - "anyOf": [ - { - "items": { - "additionalProperties": { - "items": { - "type": "string" - }, - "type": "array" - }, - "type": "object" - }, - "type": "array" - }, - { - "type": "null" - } - ], - "title": "Security", + "roles": { + "items": { + "type": "string" + }, + "type": "array", + "title": "List of roles", + "description": "Roles to be assigned if the rule matches" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "jsonpath", + "operator", + "value", + "roles" + ], + "title": "JwtRoleRule", + "description": "Rule for extracting roles from JWT claims." + }, + "LivenessResponse": { + "properties": { + "alive": { + "type": "boolean", + "title": "Alive", + "description": "Flag indicating that the app is alive", "examples": [ - [ - { - "oauth": [ - "read" - ] - }, - { - "api-key": [], - "mtls": [] - } - ] + true, + false ] - }, - "securitySchemes": { + } + }, + "type": "object", + "required": [ + "alive" + ], + "title": "LivenessResponse", + "description": "Model representing a response to a liveness request.\n\nAttributes:\n alive: If app is alive.\n\nExample:\n ```python\n liveness_response = LivenessResponse(alive=True)\n ```", + "examples": [ + { + "alive": true + } + ] + }, + "LlamaStackConfiguration": { + "properties": { + "url": { "anyOf": [ { - "additionalProperties": { - "$ref": "#/components/schemas/SecurityScheme" - }, - "type": "object" + "type": "string", + "minLength": 1, + "format": "uri" }, { "type": "null" } ], - "title": "Securityschemes" + "title": "Llama Stack URL", + "description": "URL to Llama Stack service; used when library mode is disabled. Must be a valid HTTP or HTTPS URL." }, - "signatures": { + "api_key": { "anyOf": [ { - "items": { - "$ref": "#/components/schemas/AgentCardSignature" - }, - "type": "array" + "type": "string", + "format": "password", + "writeOnly": true }, { "type": "null" } ], - "title": "Signatures" - }, - "skills": { - "items": { - "$ref": "#/components/schemas/AgentSkill" - }, - "type": "array", - "title": "Skills" + "title": "API key", + "description": "API key to access Llama Stack service" }, - "supportsAuthenticatedExtendedCard": { + "use_as_library_client": { "anyOf": [ { "type": "boolean" @@ -4894,198 +7614,155 @@ "type": "null" } ], - "title": "Supportsauthenticatedextendedcard" - }, - "url": { - "type": "string", - "title": "Url", - "examples": [ - "https://api.example.com/a2a/v1" - ] + "title": "Use as library", + "description": "When set to true Llama Stack will be used in library mode, not in server mode (default)" }, - "version": { - "type": "string", - "title": "Version", - "examples": [ - "1.0.0" - ] - } - }, - "type": "object", - "required": [ - "capabilities", - "defaultInputModes", - "defaultOutputModes", - "description", - "name", - "skills", - "url", - "version" - ], - "title": "AgentCard", - "description": "The AgentCard is a self-describing manifest for an agent. It provides essential\nmetadata including the agent's identity, capabilities, skills, supported\ncommunication methods, and security requirements." - }, - "AgentCardSignature": { - "properties": { - "header": { + "library_client_config_path": { "anyOf": [ { - "additionalProperties": true, - "type": "object" + "type": "string" }, { "type": "null" } ], - "title": "Header" - }, - "protected": { - "type": "string", - "title": "Protected" + "title": "Llama Stack configuration path", + "description": "Path to configuration file used when Llama Stack is run in library mode" }, - "signature": { - "type": "string", - "title": "Signature" + "timeout": { + "type": "integer", + "exclusiveMinimum": 0.0, + "title": "Request timeout", + "description": "Timeout in seconds for requests to Llama Stack service. Default is 180 seconds (3 minutes) to accommodate long-running RAG queries.", + "default": 180 } }, + "additionalProperties": false, "type": "object", - "required": [ - "protected", - "signature" - ], - "title": "AgentCardSignature", - "description": "AgentCardSignature represents a JWS signature of an AgentCard.\nThis follows the JSON format of an RFC 7515 JSON Web Signature (JWS)." + "title": "LlamaStackConfiguration", + "description": "Llama stack configuration.\n\nLlama Stack is a comprehensive system that provides a uniform set of tools\nfor building, scaling, and deploying generative AI applications, enabling\ndevelopers to create, integrate, and orchestrate multiple AI services and\ncapabilities into an adaptable setup.\n\nUseful resources:\n\n - [Llama Stack](https://www.llama.com/products/llama-stack/)\n - [Python Llama Stack client](https://github.com/llamastack/llama-stack-client-python)\n - [Build AI Applications with Llama Stack](https://llamastack.github.io/)" }, - "AgentExtension": { + "MCPClientAuthOptionsResponse": { "properties": { - "description": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "title": "Description" - }, - "params": { - "anyOf": [ + "servers": { + "items": { + "$ref": "#/components/schemas/MCPServerAuthInfo" + }, + "type": "array", + "title": "Servers", + "description": "List of MCP servers that accept client-provided authorization" + } + }, + "type": "object", + "title": "MCPClientAuthOptionsResponse", + "description": "Response containing MCP servers that accept client-provided authorization.", + "examples": [ + { + "servers": [ { - "additionalProperties": true, - "type": "object" + "client_auth_headers": [ + "Authorization" + ], + "name": "github" }, { - "type": "null" + "client_auth_headers": [ + "Authorization", + "X-API-Key" + ], + "name": "gitlab" } - ], - "title": "Params" + ] + } + ] + }, + "MCPListToolsTool": { + "properties": { + "input_schema": { + "additionalProperties": true, + "type": "object", + "title": "Input Schema" }, - "required": { + "name": { + "type": "string", + "title": "Name" + }, + "description": { "anyOf": [ { - "type": "boolean" + "type": "string" }, { "type": "null" } ], - "title": "Required" - }, - "uri": { - "type": "string", - "title": "Uri" + "title": "Description" } }, "type": "object", "required": [ - "uri" + "input_schema", + "name" ], - "title": "AgentExtension", - "description": "A declaration of a protocol extension supported by an Agent." + "title": "MCPListToolsTool", + "description": "Tool definition returned by MCP list tools operation.\n\n:param input_schema: JSON schema defining the tool's input parameters\n:param name: Name of the tool\n:param description: (Optional) Description of what the tool does" }, - "AgentInterface": { + "MCPServerAuthInfo": { "properties": { - "transport": { + "name": { "type": "string", - "title": "Transport", - "examples": [ - "JSONRPC", - "GRPC", - "HTTP+JSON" - ] + "title": "Name", + "description": "MCP server name" }, - "url": { - "type": "string", - "title": "Url", - "examples": [ - "https://api.example.com/a2a/v1", - "https://grpc.example.com/a2a", - "https://rest.example.com/v1" - ] + "client_auth_headers": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Client Auth Headers", + "description": "List of authentication header names for client-provided tokens" } }, "type": "object", "required": [ - "transport", - "url" + "name", + "client_auth_headers" ], - "title": "AgentInterface", - "description": "Declares a combination of a target URL and a transport protocol for interacting with the agent.\nThis allows agents to expose the same functionality over multiple transport mechanisms." + "title": "MCPServerAuthInfo", + "description": "Information about MCP server client authentication options." }, - "AgentProvider": { + "Message": { "properties": { - "organization": { + "content": { "type": "string", - "title": "Organization" + "title": "Content", + "description": "The message content", + "examples": [ + "Hello, how can I help you?" + ] }, - "url": { - "type": "string", - "title": "Url" - } - }, - "type": "object", - "required": [ - "organization", - "url" - ], - "title": "AgentProvider", - "description": "Represents the service provider of an agent." - }, - "AgentSkill": { - "properties": { - "description": { + "type": { "type": "string", - "title": "Description" - }, - "examples": { - "anyOf": [ - { - "items": { - "type": "string" - }, - "type": "array" - }, - { - "type": "null" - } + "enum": [ + "user", + "assistant", + "system", + "developer" ], - "title": "Examples", + "title": "Type", + "description": "The type of message", "examples": [ - [ - "I need a recipe for bread" - ] + "user", + "assistant", + "system", + "developer" ] }, - "id": { - "type": "string", - "title": "Id" - }, - "inputModes": { + "referenced_documents": { "anyOf": [ { "items": { - "type": "string" + "$ref": "#/components/schemas/ReferencedDocument" }, "type": "array" }, @@ -5093,838 +7770,924 @@ "type": "null" } ], - "title": "Inputmodes" - }, + "title": "Referenced Documents", + "description": "List of documents referenced in the response (assistant messages only)" + } + }, + "type": "object", + "required": [ + "content", + "type" + ], + "title": "Message", + "description": "Model representing a message in a conversation turn.\n\nAttributes:\n content: The message content.\n type: The type of message.\n referenced_documents: Optional list of documents referenced in an assistant response." + }, + "ModelContextProtocolServer": { + "properties": { "name": { "type": "string", - "title": "Name" + "title": "MCP name", + "description": "MCP server name that must be unique" }, - "outputModes": { + "provider_id": { + "type": "string", + "title": "Provider ID", + "description": "MCP provider identification", + "default": "model-context-protocol" + }, + "url": { + "type": "string", + "title": "MCP server URL", + "description": "URL of the MCP server" + }, + "authorization_headers": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "title": "Authorization headers", + "description": "Headers to send to the MCP server. The map contains the header name and the path to a file containing the header value (secret). There are 3 special cases: 1. Usage of the kubernetes token in the header. To specify this use a string 'kubernetes' instead of the file path. 2. Usage of the client-provided token in the header. To specify this use a string 'client' instead of the file path. 3. Usage of the oauth token in the header. To specify this use a string 'oauth' instead of the file path. " + }, + "headers": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Propagated headers", + "description": "List of HTTP header names to automatically forward from the incoming request to this MCP server. Headers listed here are extracted from the original client request and included when calling the MCP server. This is useful when infrastructure components (e.g. API gateways) inject headers that MCP servers need, such as x-rh-identity in HCC. Header matching is case-insensitive. These headers are additive with authorization_headers and MCP-HEADERS." + }, + "timeout": { "anyOf": [ { - "items": { - "type": "string" - }, - "type": "array" + "type": "integer", + "exclusiveMinimum": 0.0 }, { "type": "null" } ], - "title": "Outputmodes" - }, - "security": { + "title": "Request timeout", + "description": "Timeout in seconds for requests to the MCP server. If not specified, the default timeout from Llama Stack will be used. Note: This field is reserved for future use when Llama Stack adds timeout support." + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "name", + "url" + ], + "title": "ModelContextProtocolServer", + "description": "Model context protocol server configuration.\n\nMCP (Model Context Protocol) servers provide tools and capabilities to the\nAI agents. These are configured by this structure. Only MCP servers\ndefined in the lightspeed-stack.yaml configuration are available to the\nagents. Tools configured in the llama-stack run.yaml are not accessible to\nlightspeed-core agents.\n\nUseful resources:\n\n- [Model Context Protocol](https://modelcontextprotocol.io/docs/getting-started/intro)\n- [MCP FAQs](https://modelcontextprotocol.io/faqs)\n- [Wikipedia article](https://en.wikipedia.org/wiki/Model_Context_Protocol)" + }, + "ModelsResponse": { + "properties": { + "models": { + "items": { + "additionalProperties": true, + "type": "object" + }, + "type": "array", + "title": "Models", + "description": "List of models available" + } + }, + "type": "object", + "required": [ + "models" + ], + "title": "ModelsResponse", + "description": "Model representing a response to models request.", + "examples": [ + { + "models": [ + { + "api_model_type": "llm", + "identifier": "openai/gpt-4-turbo", + "metadata": {}, + "model_type": "llm", + "provider_id": "openai", + "provider_resource_id": "gpt-4-turbo", + "type": "model" + } + ] + } + ] + }, + "MutualTLSSecurityScheme": { + "properties": { + "description": { "anyOf": [ { - "items": { - "additionalProperties": { - "items": { - "type": "string" - }, - "type": "array" - }, - "type": "object" - }, - "type": "array" + "type": "string" }, { "type": "null" } ], - "title": "Security", - "examples": [ - [ - { - "google": [ - "oidc" - ] - } - ] - ] + "title": "Description" }, - "tags": { - "items": { - "type": "string" - }, - "type": "array", - "title": "Tags", - "examples": [ - [ - "cooking", - "customer support", - "billing" - ] - ] + "type": { + "type": "string", + "const": "mutualTLS", + "title": "Type", + "default": "mutualTLS" } }, "type": "object", - "required": [ - "description", - "id", - "name", - "tags" - ], - "title": "AgentSkill", - "description": "Represents a distinct capability or function that an agent can perform." + "title": "MutualTLSSecurityScheme", + "description": "Defines a security scheme using mTLS authentication." }, - "Attachment": { + "NotFoundResponse": { "properties": { - "attachment_type": { - "type": "string", - "title": "Attachment Type", - "description": "The attachment type, like 'log', 'configuration' etc.", - "examples": [ - "log" - ] - }, - "content_type": { - "type": "string", - "title": "Content Type", - "description": "The content type as defined in MIME standard", - "examples": [ - "text/plain" - ] + "status_code": { + "type": "integer", + "title": "Status Code" }, - "content": { - "type": "string", - "title": "Content", - "description": "The actual attachment content", - "examples": [ - "warning: quota exceeded" - ] + "detail": { + "$ref": "#/components/schemas/DetailModel" } }, - "additionalProperties": false, "type": "object", "required": [ - "attachment_type", - "content_type", - "content" + "status_code", + "detail" ], - "title": "Attachment", - "description": "Model representing an attachment that can be send from the UI as part of query.\n\nA list of attachments can be an optional part of 'query' request.\n\nAttributes:\n attachment_type: The attachment type, like \"log\", \"configuration\" etc.\n content_type: The content type as defined in MIME standard\n content: The actual attachment content\n\nYAML attachments with **kind** and **metadata/name** attributes will\nbe handled as resources with the specified name:\n```\nkind: Pod\nmetadata:\n name: private-reg\n```", + "title": "NotFoundResponse", + "description": "404 Not Found - Resource does not exist.", "examples": [ { - "attachment_type": "log", - "content": "this is attachment", - "content_type": "text/plain" + "detail": { + "cause": "Conversation with ID 123e4567-e89b-12d3-a456-426614174000 does not exist", + "response": "Conversation not found" + }, + "label": "conversation" }, { - "attachment_type": "configuration", - "content": "kind: Pod\n metadata:\n name: private-reg", - "content_type": "application/yaml" + "detail": { + "cause": "Provider with ID openai does not exist", + "response": "Provider not found" + }, + "label": "provider" }, { - "attachment_type": "configuration", - "content": "foo: bar", - "content_type": "application/yaml" + "detail": { + "cause": "Model with ID gpt-4-turbo is not configured", + "response": "Model not found" + }, + "label": "model" + }, + { + "detail": { + "cause": "Rag with ID vs_7b52a8cf-0fa3-489c-beab-27e061d102f3 does not exist", + "response": "Rag not found" + }, + "label": "rag" + }, + { + "detail": { + "cause": "Streaming Request with ID 123e4567-e89b-12d3-a456-426614174000 does not exist", + "response": "Streaming Request not found" + }, + "label": "streaming request" } ] }, - "AuthenticationConfiguration": { + "OAuth2SecurityScheme": { "properties": { - "module": { - "type": "string", - "title": "Module", - "default": "noop" - }, - "skip_tls_verification": { - "type": "boolean", - "title": "Skip Tls Verification", - "default": false - }, - "skip_for_health_probes": { - "type": "boolean", - "title": "Skip authorization for probes", - "description": "Skip authorization for readiness and liveness probes", - "default": false - }, - "k8s_cluster_api": { + "description": { "anyOf": [ { - "type": "string", - "minLength": 1, - "format": "uri" + "type": "string" }, { "type": "null" } ], - "title": "K8S Cluster Api" + "title": "Description" }, - "k8s_ca_cert_path": { + "flows": { + "$ref": "#/components/schemas/OAuthFlows" + }, + "oauth2MetadataUrl": { "anyOf": [ { - "type": "string", - "format": "file-path" + "type": "string" }, { "type": "null" } ], - "title": "K8S Ca Cert Path" + "title": "Oauth2Metadataurl" }, - "jwk_config": { + "type": { + "type": "string", + "const": "oauth2", + "title": "Type", + "default": "oauth2" + } + }, + "type": "object", + "required": [ + "flows" + ], + "title": "OAuth2SecurityScheme", + "description": "Defines a security scheme using OAuth 2.0." + }, + "OAuthFlows": { + "properties": { + "authorizationCode": { "anyOf": [ { - "$ref": "#/components/schemas/JwkConfiguration" + "$ref": "#/components/schemas/AuthorizationCodeOAuthFlow" }, { "type": "null" } ] }, - "api_key_config": { + "clientCredentials": { "anyOf": [ { - "$ref": "#/components/schemas/APIKeyTokenConfiguration" + "$ref": "#/components/schemas/ClientCredentialsOAuthFlow" }, { "type": "null" } ] }, - "rh_identity_config": { + "implicit": { "anyOf": [ { - "$ref": "#/components/schemas/RHIdentityConfiguration" + "$ref": "#/components/schemas/ImplicitOAuthFlow" }, { "type": "null" } ] - } - }, - "additionalProperties": false, - "type": "object", - "title": "AuthenticationConfiguration", - "description": "Authentication configuration." - }, - "AuthorizationCodeOAuthFlow": { - "properties": { - "authorizationUrl": { - "type": "string", - "title": "Authorizationurl" }, - "refreshUrl": { + "password": { "anyOf": [ { - "type": "string" + "$ref": "#/components/schemas/PasswordOAuthFlow" }, { "type": "null" } - ], - "title": "Refreshurl" - }, - "scopes": { - "additionalProperties": { - "type": "string" - }, - "type": "object", - "title": "Scopes" - }, - "tokenUrl": { - "type": "string", - "title": "Tokenurl" - } - }, - "type": "object", - "required": [ - "authorizationUrl", - "scopes", - "tokenUrl" - ], - "title": "AuthorizationCodeOAuthFlow", - "description": "Defines configuration details for the OAuth 2.0 Authorization Code flow." - }, - "AuthorizationConfiguration": { - "properties": { - "access_rules": { - "items": { - "$ref": "#/components/schemas/AccessRule" - }, - "type": "array", - "title": "Access rules", - "description": "Rules for role-based access control" + ] } }, - "additionalProperties": false, "type": "object", - "title": "AuthorizationConfiguration", - "description": "Authorization configuration." + "title": "OAuthFlows", + "description": "Defines the configuration for the supported OAuth 2.0 flows." }, - "AuthorizedResponse": { + "OkpConfiguration": { "properties": { - "user_id": { - "type": "string", - "title": "User Id", - "description": "User ID, for example UUID", - "examples": [ - "c5260aec-4d82-4370-9fdf-05cf908b3f16" - ] + "offline": { + "type": "boolean", + "title": "OKP offline mode", + "description": "When True, use parent_id for OKP chunk source URLs. When False, use reference_url for chunk source URLs.", + "default": true }, - "username": { + "chunk_filter_query": { "type": "string", - "title": "Username", - "description": "User name", - "examples": [ - "John Doe", - "Adam Smith" - ] - }, - "skip_userid_check": { - "type": "boolean", - "title": "Skip Userid Check", - "description": "Whether to skip the user ID check", - "examples": [ - true, - false - ] + "title": "OKP chunk filter query", + "description": "OKP filter query applied to every OKP search request. Defaults to 'is_chunk:true' to restrict results to chunk documents. To add extra constraints, extend the expression using boolean syntax, e.g. 'is_chunk:true AND product:*openshift*'.", + "default": "is_chunk:true" } }, + "additionalProperties": false, "type": "object", - "required": [ - "user_id", - "username", - "skip_userid_check" - ], - "title": "AuthorizedResponse", - "description": "Model representing a response to an authorization request.\n\nAttributes:\n user_id: The ID of the logged in user.\n username: The name of the logged in user.\n skip_userid_check: Whether to skip the user ID check.", - "examples": [ - { - "skip_userid_check": false, - "user_id": "123e4567-e89b-12d3-a456-426614174000", - "username": "user1" - } - ] + "title": "OkpConfiguration", + "description": "OKP (Offline Knowledge Portal) provider configuration.\n\nControls provider-specific behaviour for the OKP vector store.\nOnly relevant when ``\"okp\"`` is listed in ``rag.inline`` or ``rag.tool``." }, - "AzureEntraIdConfiguration": { + "OpenAIResponseAnnotationCitation": { "properties": { - "tenant_id": { + "type": { "type": "string", - "format": "password", - "title": "Tenant Id", - "writeOnly": true + "const": "url_citation", + "title": "Type", + "default": "url_citation" }, - "client_id": { - "type": "string", - "format": "password", - "title": "Client Id", - "writeOnly": true + "end_index": { + "type": "integer", + "title": "End Index" }, - "client_secret": { + "start_index": { + "type": "integer", + "title": "Start Index" + }, + "title": { "type": "string", - "format": "password", - "title": "Client Secret", - "writeOnly": true + "title": "Title" }, - "scope": { + "url": { "type": "string", - "title": "Token scope", - "description": "Azure Cognitive Services scope for token requests. Override only if using a different Azure service.", - "default": "https://cognitiveservices.azure.com/.default" + "title": "Url" } }, - "additionalProperties": false, "type": "object", "required": [ - "tenant_id", - "client_id", - "client_secret" + "end_index", + "start_index", + "title", + "url" ], - "title": "AzureEntraIdConfiguration", - "description": "Microsoft Entra ID authentication attributes for Azure." + "title": "OpenAIResponseAnnotationCitation", + "description": "URL citation annotation for referencing external web resources.\n\n:param type: Annotation type identifier, always \"url_citation\"\n:param end_index: End position of the citation span in the content\n:param start_index: Start position of the citation span in the content\n:param title: Title of the referenced web resource\n:param url: URL of the referenced web resource" }, - "BadRequestResponse": { + "OpenAIResponseAnnotationContainerFileCitation": { "properties": { - "status_code": { + "type": { + "type": "string", + "const": "container_file_citation", + "title": "Type", + "default": "container_file_citation" + }, + "container_id": { + "type": "string", + "title": "Container Id" + }, + "end_index": { "type": "integer", - "title": "Status Code" + "title": "End Index" }, - "detail": { - "$ref": "#/components/schemas/DetailModel" + "file_id": { + "type": "string", + "title": "File Id" + }, + "filename": { + "type": "string", + "title": "Filename" + }, + "start_index": { + "type": "integer", + "title": "Start Index" } }, "type": "object", "required": [ - "status_code", - "detail" + "container_id", + "end_index", + "file_id", + "filename", + "start_index" ], - "title": "BadRequestResponse", - "description": "400 Bad Request. Invalid resource identifier.", - "examples": [ - { - "detail": { - "cause": "The conversation ID 123e4567-e89b-12d3-a456-426614174000 has invalid format.", - "response": "Invalid conversation ID format" - }, - "label": "conversation_id" - } - ] + "title": "OpenAIResponseAnnotationContainerFileCitation" }, - "ByokRag": { + "OpenAIResponseAnnotationFileCitation": { "properties": { - "rag_id": { + "type": { "type": "string", - "minLength": 1, - "title": "RAG ID", - "description": "Unique RAG ID" + "const": "file_citation", + "title": "Type", + "default": "file_citation" }, - "rag_type": { + "file_id": { "type": "string", - "minLength": 1, - "title": "RAG type", - "description": "Type of RAG database.", - "default": "inline::faiss" + "title": "File Id" }, - "embedding_model": { + "filename": { "type": "string", - "minLength": 1, - "title": "Embedding model", - "description": "Embedding model identification", - "default": "sentence-transformers/all-mpnet-base-v2" + "title": "Filename" }, - "embedding_dimension": { + "index": { "type": "integer", - "exclusiveMinimum": 0.0, - "title": "Embedding dimension", - "description": "Dimensionality of embedding vectors.", - "default": 768 - }, - "vector_db_id": { + "title": "Index" + } + }, + "type": "object", + "required": [ + "file_id", + "filename", + "index" + ], + "title": "OpenAIResponseAnnotationFileCitation", + "description": "File citation annotation for referencing specific files in response content.\n\n:param type: Annotation type identifier, always \"file_citation\"\n:param file_id: Unique identifier of the referenced file\n:param filename: Name of the referenced file\n:param index: Position index of the citation within the content" + }, + "OpenAIResponseAnnotationFilePath": { + "properties": { + "type": { "type": "string", - "minLength": 1, - "title": "Vector DB ID", - "description": "Vector database identification." + "const": "file_path", + "title": "Type", + "default": "file_path" }, - "db_path": { + "file_id": { "type": "string", - "format": "file-path", - "title": "DB path", - "description": "Path to RAG database." + "title": "File Id" }, - "score_multiplier": { - "type": "number", - "exclusiveMinimum": 0.0, - "title": "Score multiplier", - "description": "Multiplier applied to relevance scores from this vector store. Used to weight results when querying multiple knowledge sources. Values > 1 boost this store's results; values < 1 reduce them.", - "default": 1.0 + "index": { + "type": "integer", + "title": "Index" } }, - "additionalProperties": false, "type": "object", "required": [ - "rag_id", - "vector_db_id", - "db_path" + "file_id", + "index" ], - "title": "ByokRag", - "description": "BYOK (Bring Your Own Knowledge) RAG configuration." + "title": "OpenAIResponseAnnotationFilePath" }, - "CORSConfiguration": { + "OpenAIResponseContentPartRefusal": { "properties": { - "allow_origins": { - "items": { - "type": "string" - }, - "type": "array", - "title": "Allow origins", - "description": "A list of origins allowed for cross-origin requests. An origin is the combination of protocol (http, https), domain (myapp.com, localhost, localhost.tiangolo.com), and port (80, 443, 8080). Use ['*'] to allow all origins.", - "default": [ - "*" - ] - }, - "allow_credentials": { - "type": "boolean", - "title": "Allow credentials", - "description": "Indicate that cookies should be supported for cross-origin requests", - "default": false - }, - "allow_methods": { - "items": { - "type": "string" - }, - "type": "array", - "title": "Allow methods", - "description": "A list of HTTP methods that should be allowed for cross-origin requests. You can use ['*'] to allow all standard methods.", - "default": [ - "*" - ] + "type": { + "type": "string", + "const": "refusal", + "title": "Type", + "default": "refusal" }, - "allow_headers": { - "items": { - "type": "string" - }, - "type": "array", - "title": "Allow headers", - "description": "A list of HTTP request headers that should be supported for cross-origin requests. You can use ['*'] to allow all headers. The Accept, Accept-Language, Content-Language and Content-Type headers are always allowed for simple CORS requests.", - "default": [ - "*" - ] + "refusal": { + "type": "string", + "title": "Refusal" } }, - "additionalProperties": false, "type": "object", - "title": "CORSConfiguration", - "description": "CORS configuration.\n\nCORS or 'Cross-Origin Resource Sharing' refers to the situations when a\nfrontend running in a browser has JavaScript code that communicates with a\nbackend, and the backend is in a different 'origin' than the frontend.\n\nUseful resources:\n\n - [CORS in FastAPI](https://fastapi.tiangolo.com/tutorial/cors/)\n - [Wikipedia article](https://en.wikipedia.org/wiki/Cross-origin_resource_sharing)\n - [What is CORS?](https://dev.to/akshay_chauhan/what-is-cors-explained-8f1)" + "required": [ + "refusal" + ], + "title": "OpenAIResponseContentPartRefusal", + "description": "Refusal content within a streamed response part.\n\n:param type: Content part type identifier, always \"refusal\"\n:param refusal: Refusal text supplied by the model" }, - "ClientCredentialsOAuthFlow": { + "OpenAIResponseError": { "properties": { - "refreshUrl": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "title": "Refreshurl" - }, - "scopes": { - "additionalProperties": { - "type": "string" - }, - "type": "object", - "title": "Scopes" + "code": { + "type": "string", + "title": "Code" }, - "tokenUrl": { + "message": { "type": "string", - "title": "Tokenurl" + "title": "Message" } }, "type": "object", "required": [ - "scopes", - "tokenUrl" + "code", + "message" ], - "title": "ClientCredentialsOAuthFlow", - "description": "Defines configuration details for the OAuth 2.0 Client Credentials flow." + "title": "OpenAIResponseError", + "description": "Error details for failed OpenAI response requests.\n\n:param code: Error code identifying the type of failure\n:param message: Human-readable error message describing the failure" }, - "Configuration": { + "OpenAIResponseInputFunctionToolCallOutput": { "properties": { - "name": { + "call_id": { "type": "string", - "title": "Service name", - "description": "Name of the service. That value will be used in REST API endpoints." - }, - "service": { - "$ref": "#/components/schemas/ServiceConfiguration", - "title": "Service configuration", - "description": "This section contains Lightspeed Core Stack service configuration." - }, - "llama_stack": { - "$ref": "#/components/schemas/LlamaStackConfiguration", - "title": "Llama Stack configuration", - "description": "This section contains Llama Stack configuration. Lightspeed Core Stack service can call Llama Stack in library mode or in server mode." + "title": "Call Id" }, - "user_data_collection": { - "$ref": "#/components/schemas/UserDataCollection", - "title": "User data collection configuration", - "description": "This section contains configuration for subsystem that collects user data(transcription history and feedbacks)." - }, - "database": { - "$ref": "#/components/schemas/DatabaseConfiguration", - "title": "Database Configuration", - "description": "Configuration for database to store conversation IDs and other runtime data" + "output": { + "type": "string", + "title": "Output" }, - "mcp_servers": { - "items": { - "$ref": "#/components/schemas/ModelContextProtocolServer" - }, - "type": "array", - "title": "Model Context Protocol Server and tools configuration", - "description": "MCP (Model Context Protocol) servers provide tools and capabilities to the AI agents. These are configured in this section. Only MCP servers defined in the lightspeed-stack.yaml configuration are available to the agents. Tools configured in the llama-stack run.yaml are not accessible to lightspeed-core agents." + "type": { + "type": "string", + "const": "function_call_output", + "title": "Type", + "default": "function_call_output" }, - "authentication": { - "$ref": "#/components/schemas/AuthenticationConfiguration", - "title": "Authentication configuration", - "description": "Authentication configuration" + "id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Id" }, - "authorization": { + "status": { "anyOf": [ { - "$ref": "#/components/schemas/AuthorizationConfiguration" + "type": "string" }, { "type": "null" } ], - "title": "Authorization configuration", - "description": "Lightspeed Core Stack implements a modular authentication and authorization system with multiple authentication methods. Authorization is configurable through role-based access control. Authentication is handled through selectable modules configured via the module field in the authentication configuration." + "title": "Status" + } + }, + "type": "object", + "required": [ + "call_id", + "output" + ], + "title": "OpenAIResponseInputFunctionToolCallOutput", + "description": "This represents the output of a function call that gets passed back to the model." + }, + "OpenAIResponseInputMessageContentFile": { + "properties": { + "type": { + "type": "string", + "const": "input_file", + "title": "Type", + "default": "input_file" }, - "customization": { + "file_data": { "anyOf": [ { - "$ref": "#/components/schemas/Customization" + "type": "string" }, { "type": "null" } ], - "title": "Custom profile configuration", - "description": "It is possible to customize Lightspeed Core Stack via this section. System prompt can be customized and also different parts of the service can be replaced by custom Python modules." - }, - "inference": { - "$ref": "#/components/schemas/InferenceConfiguration", - "title": "Inference configuration", - "description": "One LLM provider and one its model might be selected as default ones. When no provider+model pair is specified in REST API calls (query endpoints), the default provider and model are used." + "title": "File Data" }, - "conversation_cache": { - "$ref": "#/components/schemas/ConversationHistoryConfiguration", - "title": "Conversation history configuration" + "file_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "File Id" }, - "byok_rag": { - "items": { - "$ref": "#/components/schemas/ByokRag" - }, - "type": "array", - "title": "BYOK RAG configuration", - "description": "BYOK RAG configuration. This configuration can be used to reconfigure Llama Stack through its run.yaml configuration file" + "file_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "File Url" }, - "a2a_state": { - "$ref": "#/components/schemas/A2AStateConfiguration", - "title": "A2A state configuration", - "description": "Configuration for A2A protocol persistent state storage." + "filename": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Filename" + } + }, + "type": "object", + "title": "OpenAIResponseInputMessageContentFile", + "description": "File content for input messages in OpenAI response format.\n\n:param type: The type of the input item. Always `input_file`.\n:param file_data: The data of the file to be sent to the model.\n:param file_id: (Optional) The ID of the file to be sent to the model.\n:param file_url: The URL of the file to be sent to the model.\n:param filename: The name of the file to be sent to the model." + }, + "OpenAIResponseInputMessageContentImage": { + "properties": { + "detail": { + "anyOf": [ + { + "type": "string", + "const": "low" + }, + { + "type": "string", + "const": "high" + }, + { + "type": "string", + "const": "auto" + } + ], + "title": "Detail", + "default": "auto" }, - "quota_handlers": { - "$ref": "#/components/schemas/QuotaHandlersConfiguration", - "title": "Quota handlers", - "description": "Quota handlers configuration" + "type": { + "type": "string", + "const": "input_image", + "title": "Type", + "default": "input_image" }, - "azure_entra_id": { + "file_id": { "anyOf": [ { - "$ref": "#/components/schemas/AzureEntraIdConfiguration" + "type": "string" }, { "type": "null" } - ] + ], + "title": "File Id" }, - "splunk": { + "image_url": { "anyOf": [ { - "$ref": "#/components/schemas/SplunkConfiguration" + "type": "string" }, { "type": "null" } ], - "title": "Splunk configuration", - "description": "Splunk HEC configuration for sending telemetry events." + "title": "Image Url" + } + }, + "type": "object", + "title": "OpenAIResponseInputMessageContentImage", + "description": "Image content for input messages in OpenAI response format.\n\n:param detail: Level of detail for image processing, can be \"low\", \"high\", or \"auto\"\n:param type: Content type identifier, always \"input_image\"\n:param file_id: (Optional) The ID of the file to be sent to the model.\n:param image_url: (Optional) URL of the image content" + }, + "OpenAIResponseInputMessageContentText": { + "properties": { + "text": { + "type": "string", + "title": "Text" }, - "deployment_environment": { + "type": { "type": "string", - "title": "Deployment environment", - "description": "Deployment environment name (e.g., 'development', 'staging', 'production'). Used in telemetry events.", - "default": "development" + "const": "input_text", + "title": "Type", + "default": "input_text" + } + }, + "type": "object", + "required": [ + "text" + ], + "title": "OpenAIResponseInputMessageContentText", + "description": "Text content for input messages in OpenAI response format.\n\n:param text: The text content of the input message\n:param type: Content type identifier, always \"input_text\"" + }, + "OpenAIResponseInputToolChoiceAllowedTools": { + "properties": { + "mode": { + "type": "string", + "enum": [ + "auto", + "required" + ], + "title": "Mode", + "default": "auto" }, - "rag": { - "$ref": "#/components/schemas/RagConfiguration", - "title": "RAG configuration", - "description": "Configuration for all RAG strategies (inline and tool-based)." + "tools": { + "items": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "type": "array", + "title": "Tools" }, - "okp": { - "$ref": "#/components/schemas/OkpConfiguration", - "title": "OKP configuration", - "description": "OKP provider settings. Only used when 'okp' is listed in rag.inline or rag.tool." + "type": { + "type": "string", + "const": "allowed_tools", + "title": "Type", + "default": "allowed_tools" } }, - "additionalProperties": false, "type": "object", "required": [ - "name", - "service", - "llama_stack", - "user_data_collection" + "tools" ], - "title": "Configuration", - "description": "Global service configuration." + "title": "OpenAIResponseInputToolChoiceAllowedTools", + "description": "Constrains the tools available to the model to a pre-defined set.\n\n:param mode: Constrains the tools available to the model to a pre-defined set\n:param tools: A list of tool definitions that the model should be allowed to call\n:param type: Tool choice type identifier, always \"allowed_tools\"" }, - "ConfigurationResponse": { + "OpenAIResponseInputToolChoiceCustomTool": { "properties": { - "configuration": { - "$ref": "#/components/schemas/Configuration" + "type": { + "type": "string", + "const": "custom", + "title": "Type", + "default": "custom" + }, + "name": { + "type": "string", + "title": "Name" } }, "type": "object", "required": [ - "configuration" + "name" ], - "title": "ConfigurationResponse", - "description": "Success response model for the config endpoint.", - "examples": [ - { - "configuration": { - "authentication": { - "module": "noop", - "skip_tls_verification": false - }, - "authorization": { - "access_rules": [] - }, - "byok_rag": [], - "conversation_cache": {}, - "database": { - "sqlite": { - "db_path": "/tmp/lightspeed-stack.db" - } - }, - "inference": { - "default_model": "gpt-4-turbo", - "default_provider": "openai" - }, - "llama_stack": { - "api_key": "*****", - "url": "http://localhost:8321", - "use_as_library_client": false + "title": "OpenAIResponseInputToolChoiceCustomTool", + "description": "Forces the model to call a custom tool.\n\n:param type: Tool choice type identifier, always \"custom\"\n:param name: The name of the custom tool to call." + }, + "OpenAIResponseInputToolChoiceFileSearch": { + "properties": { + "type": { + "type": "string", + "const": "file_search", + "title": "Type", + "default": "file_search" + } + }, + "type": "object", + "title": "OpenAIResponseInputToolChoiceFileSearch", + "description": "Indicates that the model should use file search to generate a response.\n\n:param type: Tool choice type identifier, always \"file_search\"" + }, + "OpenAIResponseInputToolChoiceFunctionTool": { + "properties": { + "name": { + "type": "string", + "title": "Name" + }, + "type": { + "type": "string", + "const": "function", + "title": "Type", + "default": "function" + } + }, + "type": "object", + "required": [ + "name" + ], + "title": "OpenAIResponseInputToolChoiceFunctionTool", + "description": "Forces the model to call a specific function.\n\n:param name: The name of the function to call\n:param type: Tool choice type identifier, always \"function\"" + }, + "OpenAIResponseInputToolChoiceMCPTool": { + "properties": { + "server_label": { + "type": "string", + "title": "Server Label" + }, + "type": { + "type": "string", + "const": "mcp", + "title": "Type", + "default": "mcp" + }, + "name": { + "anyOf": [ + { + "type": "string" }, - "mcp_servers": [ - { - "name": "server1", - "provider_id": "provider1", - "url": "http://url.com:1" - } - ], - "name": "lightspeed-stack", - "quota_handlers": { - "enable_token_history": false, - "limiters": [], - "scheduler": { - "period": 1 - } + { + "type": "null" + } + ], + "title": "Name" + } + }, + "type": "object", + "required": [ + "server_label" + ], + "title": "OpenAIResponseInputToolChoiceMCPTool", + "description": "Forces the model to call a specific tool on a remote MCP server\n\n:param server_label: The label of the MCP server to use.\n:param type: Tool choice type identifier, always \"mcp\"\n:param name: (Optional) The name of the tool to call on the server." + }, + "OpenAIResponseInputToolChoiceMode": { + "type": "string", + "enum": [ + "auto", + "required", + "none" + ], + "title": "OpenAIResponseInputToolChoiceMode" + }, + "OpenAIResponseInputToolChoiceWebSearch": { + "properties": { + "type": { + "anyOf": [ + { + "type": "string", + "const": "web_search" }, - "service": { - "access_log": true, - "auth_enabled": false, - "color_log": true, - "cors": { - "allow_credentials": false, - "allow_headers": [ - "*" - ], - "allow_methods": [ - "*" - ], - "allow_origins": [ - "*" - ] - }, - "host": "localhost", - "port": 8080, - "tls_config": {}, - "workers": 1 + { + "type": "string", + "const": "web_search_preview" }, - "user_data_collection": { - "feedback_enabled": true, - "feedback_storage": "/tmp/data/feedback", - "transcripts_enabled": false, - "transcripts_storage": "/tmp/data/transcripts" + { + "type": "string", + "const": "web_search_preview_2025_03_11" + }, + { + "type": "string", + "const": "web_search_2025_08_26" } - } + ], + "title": "Type", + "default": "web_search" } - ] + }, + "type": "object", + "title": "OpenAIResponseInputToolChoiceWebSearch", + "description": "Indicates that the model should use web search to generate a response\n\n:param type: Web search tool type variant to use" }, - "ConversationData": { + "OpenAIResponseInputToolFileSearch": { "properties": { - "conversation_id": { + "type": { "type": "string", - "title": "Conversation Id" + "const": "file_search", + "title": "Type", + "default": "file_search" }, - "topic_summary": { + "vector_store_ids": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Vector Store Ids" + }, + "filters": { "anyOf": [ { - "type": "string" + "additionalProperties": true, + "type": "object" }, { "type": "null" } ], - "title": "Topic Summary" + "title": "Filters" }, - "last_message_timestamp": { - "type": "number", - "title": "Last Message Timestamp" + "max_num_results": { + "anyOf": [ + { + "type": "integer", + "maximum": 50.0, + "minimum": 1.0 + }, + { + "type": "null" + } + ], + "title": "Max Num Results", + "default": 10 + }, + "ranking_options": { + "anyOf": [ + { + "$ref": "#/components/schemas/SearchRankingOptions" + }, + { + "type": "null" + } + ] } }, "type": "object", "required": [ - "conversation_id", - "topic_summary", - "last_message_timestamp" + "vector_store_ids" ], - "title": "ConversationData", - "description": "Model representing conversation data returned by cache list operations.\n\nAttributes:\n conversation_id: The conversation ID\n topic_summary: The topic summary for the conversation (can be None)\n last_message_timestamp: The timestamp of the last message in the conversation" + "title": "OpenAIResponseInputToolFileSearch", + "description": "File search tool configuration for OpenAI response inputs.\n\n:param type: Tool type identifier, always \"file_search\"\n:param vector_store_ids: List of vector store identifiers to search within\n:param filters: (Optional) Additional filters to apply to the search\n:param max_num_results: (Optional) Maximum number of search results to return (1-50)\n:param ranking_options: (Optional) Options for ranking and scoring search results" }, - "ConversationDeleteResponse": { + "OpenAIResponseInputToolFunction": { "properties": { - "conversation_id": { + "type": { "type": "string", - "title": "Conversation Id", - "description": "The conversation ID (UUID) that was deleted.", - "examples": [ - "123e4567-e89b-12d3-a456-426614174000" - ] - }, - "success": { - "type": "boolean", - "title": "Success", - "description": "Whether the deletion was successful.", - "examples": [ - true, - false - ] + "const": "function", + "title": "Type", + "default": "function" }, - "response": { + "name": { "type": "string", - "title": "Response", - "description": "A message about the deletion result.", - "examples": [ - "Conversation deleted successfully", - "Conversation cannot be deleted" - ] + "title": "Name" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description" + }, + "parameters": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Parameters" + }, + "strict": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Strict" } }, "type": "object", "required": [ - "conversation_id", - "success", - "response" + "name", + "parameters" ], - "title": "ConversationDeleteResponse", - "description": "Model representing a response for deleting a conversation.\n\nAttributes:\n conversation_id: The conversation ID (UUID) that was deleted.\n success: Whether the deletion was successful.\n response: A message about the deletion result.", - "examples": [ - { - "label": "deleted", - "value": { - "conversation_id": "123e4567-e89b-12d3-a456-426614174000", - "response": "Conversation deleted successfully", - "success": true - } - }, - { - "label": "not found", - "value": { - "conversation_id": "123e4567-e89b-12d3-a456-426614174000", - "response": "Conversation can not be deleted", - "success": true - } - } - ] + "title": "OpenAIResponseInputToolFunction", + "description": "Function tool configuration for OpenAI response inputs.\n\n:param type: Tool type identifier, always \"function\"\n:param name: Name of the function that can be called\n:param description: (Optional) Description of what the function does\n:param parameters: (Optional) JSON schema defining the function's parameters\n:param strict: (Optional) Whether to enforce strict parameter validation" }, - "ConversationDetails": { + "OpenAIResponseInputToolMCP": { "properties": { - "conversation_id": { + "type": { "type": "string", - "title": "Conversation Id", - "description": "Conversation ID (UUID)", - "examples": [ - "c5260aec-4d82-4370-9fdf-05cf908b3f16" - ] + "const": "mcp", + "title": "Type", + "default": "mcp" }, - "created_at": { + "server_label": { + "type": "string", + "title": "Server Label" + }, + "connector_id": { "anyOf": [ { "type": "string" @@ -5933,13 +8696,9 @@ "type": "null" } ], - "title": "Created At", - "description": "When the conversation was created", - "examples": [ - "2024-01-01T01:00:00Z" - ] + "title": "Connector Id" }, - "last_message_at": { + "server_url": { "anyOf": [ { "type": "string" @@ -5948,44 +8707,167 @@ "type": "null" } ], - "title": "Last Message At", - "description": "When the last message was sent", - "examples": [ - "2024-01-01T01:00:00Z" - ] + "title": "Server Url" }, - "message_count": { + "headers": { "anyOf": [ { - "type": "integer" + "additionalProperties": true, + "type": "object" }, { "type": "null" } ], - "title": "Message Count", - "description": "Number of user messages in the conversation", - "examples": [ - 42 - ] + "title": "Headers" }, - "last_used_model": { + "authorization": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Authorization" + }, + "require_approval": { + "anyOf": [ + { + "type": "string", + "const": "always" + }, + { + "type": "string", + "const": "never" + }, + { + "$ref": "#/components/schemas/ApprovalFilter" + } + ], + "title": "Require Approval", + "default": "never" + }, + "allowed_tools": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "$ref": "#/components/schemas/AllowedToolsFilter" + }, + { + "type": "null" + } + ], + "title": "Allowed Tools" + } + }, + "type": "object", + "required": [ + "server_label" + ], + "title": "OpenAIResponseInputToolMCP", + "description": "Model Context Protocol (MCP) tool configuration for OpenAI response inputs.\n\n:param type: Tool type identifier, always \"mcp\"\n:param server_label: Label to identify this MCP server\n:param connector_id: (Optional) ID of the connector to use for this MCP server\n:param server_url: (Optional) URL endpoint of the MCP server\n:param headers: (Optional) HTTP headers to include when connecting to the server\n:param authorization: (Optional) OAuth access token for authenticating with the MCP server\n:param require_approval: Approval requirement for tool calls (\"always\", \"never\", or filter)\n:param allowed_tools: (Optional) Restriction on which tools can be used from this server" + }, + "OpenAIResponseInputToolWebSearch": { + "properties": { + "type": { + "anyOf": [ + { + "type": "string", + "const": "web_search" + }, + { + "type": "string", + "const": "web_search_preview" + }, + { + "type": "string", + "const": "web_search_preview_2025_03_11" + }, + { + "type": "string", + "const": "web_search_2025_08_26" + } + ], + "title": "Type", + "default": "web_search" + }, + "search_context_size": { "anyOf": [ { - "type": "string" + "type": "string", + "pattern": "^low|medium|high$" }, { "type": "null" } ], - "title": "Last Used Model", - "description": "Identification of the last model used for the conversation", - "examples": [ - "gpt-4-turbo", - "gpt-3.5-turbo-0125" - ] + "title": "Search Context Size", + "default": "medium" + } + }, + "type": "object", + "title": "OpenAIResponseInputToolWebSearch", + "description": "Web search tool configuration for OpenAI response inputs.\n\n:param type: Web search tool type variant to use\n:param search_context_size: (Optional) Size of search context, must be \"low\", \"medium\", or \"high\"" + }, + "OpenAIResponseMCPApprovalRequest": { + "properties": { + "arguments": { + "type": "string", + "title": "Arguments" }, - "last_used_provider": { + "id": { + "type": "string", + "title": "Id" + }, + "name": { + "type": "string", + "title": "Name" + }, + "server_label": { + "type": "string", + "title": "Server Label" + }, + "type": { + "type": "string", + "const": "mcp_approval_request", + "title": "Type", + "default": "mcp_approval_request" + } + }, + "type": "object", + "required": [ + "arguments", + "id", + "name", + "server_label" + ], + "title": "OpenAIResponseMCPApprovalRequest", + "description": "A request for human approval of a tool invocation." + }, + "OpenAIResponseMCPApprovalResponse": { + "properties": { + "approval_request_id": { + "type": "string", + "title": "Approval Request Id" + }, + "approve": { + "type": "boolean", + "title": "Approve" + }, + "type": { + "type": "string", + "const": "mcp_approval_response", + "title": "Type", + "default": "mcp_approval_response" + }, + "id": { "anyOf": [ { "type": "string" @@ -5994,14 +8876,9 @@ "type": "null" } ], - "title": "Last Used Provider", - "description": "Identification of the last provider used for the conversation", - "examples": [ - "openai", - "gemini" - ] + "title": "Id" }, - "topic_summary": { + "reason": { "anyOf": [ { "type": "string" @@ -6010,382 +8887,468 @@ "type": "null" } ], - "title": "Topic Summary", - "description": "Topic summary for the conversation", - "examples": [ - "Openshift Microservices Deployment Strategies" - ] + "title": "Reason" } }, "type": "object", "required": [ - "conversation_id" + "approval_request_id", + "approve" ], - "title": "ConversationDetails", - "description": "Model representing the details of a user conversation.\n\nAttributes:\n conversation_id: The conversation ID (UUID).\n created_at: When the conversation was created.\n last_message_at: When the last message was sent.\n message_count: Number of user messages in the conversation.\n last_used_model: The last model used for the conversation.\n last_used_provider: The provider of the last used model.\n topic_summary: The topic summary for the conversation.\n\nExample:\n ```python\n conversation = ConversationDetails(\n conversation_id=\"123e4567-e89b-12d3-a456-426614174000\",\n created_at=\"2024-01-01T00:00:00Z\",\n last_message_at=\"2024-01-01T00:05:00Z\",\n message_count=5,\n last_used_model=\"gemini/gemini-2.0-flash\",\n last_used_provider=\"gemini\",\n topic_summary=\"Openshift Microservices Deployment Strategies\",\n )\n ```" + "title": "OpenAIResponseMCPApprovalResponse", + "description": "A response to an MCP approval request." }, - "ConversationHistoryConfiguration": { + "OpenAIResponseMessage-Input": { "properties": { - "type": { + "content": { "anyOf": [ { - "type": "string", - "enum": [ - "noop", - "memory", - "sqlite", - "postgres" - ] + "type": "string" }, { - "type": "null" + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseInputMessageContentText" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputMessageContentFile" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "input_file": "#/components/schemas/OpenAIResponseInputMessageContentFile", + "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage", + "input_text": "#/components/schemas/OpenAIResponseInputMessageContentText" + } + } + }, + "type": "array" + }, + { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageContentOutputText-Input" + }, + { + "$ref": "#/components/schemas/OpenAIResponseContentPartRefusal" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "output_text": "#/components/schemas/OpenAIResponseOutputMessageContentOutputText-Input", + "refusal": "#/components/schemas/OpenAIResponseContentPartRefusal" + } + } + }, + "type": "array" } ], - "title": "Conversation history database type", - "description": "Type of database where the conversation history is to be stored." + "title": "Content" }, - "memory": { + "role": { "anyOf": [ { - "$ref": "#/components/schemas/InMemoryCacheConfig" + "type": "string", + "const": "system" }, { - "type": "null" + "type": "string", + "const": "developer" + }, + { + "type": "string", + "const": "user" + }, + { + "type": "string", + "const": "assistant" } ], - "title": "In-memory cache configuration", - "description": "In-memory cache configuration" + "title": "Role" }, - "sqlite": { + "type": { + "type": "string", + "const": "message", + "title": "Type", + "default": "message" + }, + "id": { "anyOf": [ { - "$ref": "#/components/schemas/SQLiteDatabaseConfiguration" + "type": "string" }, { "type": "null" } ], - "title": "SQLite configuration", - "description": "SQLite database configuration" + "title": "Id" }, - "postgres": { + "status": { "anyOf": [ { - "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration" + "type": "string" }, { "type": "null" } ], - "title": "PostgreSQL configuration", - "description": "PostgreSQL database configuration" + "title": "Status" } }, - "additionalProperties": false, "type": "object", - "title": "ConversationHistoryConfiguration", - "description": "Conversation history configuration." + "required": [ + "content", + "role" + ], + "title": "OpenAIResponseMessage", + "description": "Corresponds to the various Message types in the Responses API.\nThey are all under one type because the Responses API gives them all\nthe same \"type\" value, and there is no way to tell them apart in certain\nscenarios." }, - "ConversationResponse": { + "OpenAIResponseMessage-Output": { "properties": { - "conversation_id": { - "type": "string", - "title": "Conversation Id", - "description": "Conversation ID (UUID)", - "examples": [ - "c5260aec-4d82-4370-9fdf-05cf908b3f16" - ] - }, - "chat_history": { - "items": { - "$ref": "#/components/schemas/ConversationTurn" - }, - "type": "array", - "title": "Chat History", - "description": "The simplified chat history as a list of conversation turns", - "examples": [ + "content": { + "anyOf": [ { - "completed_at": "2024-01-01T00:01:05Z", - "messages": [ - { - "content": "Hello", - "type": "user" - }, - { - "content": "Hi there!", - "type": "assistant" + "type": "string" + }, + { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseInputMessageContentText" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputMessageContentFile" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "input_file": "#/components/schemas/OpenAIResponseInputMessageContentFile", + "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage", + "input_text": "#/components/schemas/OpenAIResponseInputMessageContentText" + } } - ], - "model": "gpt-4o-mini", - "provider": "openai", - "started_at": "2024-01-01T00:01:00Z", - "tool_calls": [], - "tool_results": [] - } - ] - } - }, - "type": "object", - "required": [ - "conversation_id", - "chat_history" - ], - "title": "ConversationResponse", - "description": "Model representing a response for retrieving a conversation.\n\nAttributes:\n conversation_id: The conversation ID (UUID).\n chat_history: The chat history as a list of conversation turns.", - "examples": [ - { - "chat_history": [ + }, + "type": "array" + }, { - "completed_at": "2024-01-01T00:01:05Z", - "messages": [ - { - "content": "Hello", - "type": "user" - }, - { - "content": "Hi there!", - "type": "assistant" + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageContentOutputText-Output" + }, + { + "$ref": "#/components/schemas/OpenAIResponseContentPartRefusal" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "output_text": "#/components/schemas/OpenAIResponseOutputMessageContentOutputText-Output", + "refusal": "#/components/schemas/OpenAIResponseContentPartRefusal" + } } - ], - "model": "gpt-4o-mini", - "provider": "openai", - "started_at": "2024-01-01T00:01:00Z", - "tool_calls": [], - "tool_results": [] + }, + "type": "array" } - ], - "conversation_id": "123e4567-e89b-12d3-a456-426614174000" - } - ] - }, - "ConversationTurn": { - "properties": { - "messages": { - "items": { - "$ref": "#/components/schemas/Message" - }, - "type": "array", - "title": "Messages", - "description": "List of messages in this turn" - }, - "tool_calls": { - "items": { - "$ref": "#/components/schemas/ToolCallSummary" - }, - "type": "array", - "title": "Tool Calls", - "description": "List of tool calls made in this turn" - }, - "tool_results": { - "items": { - "$ref": "#/components/schemas/ToolResultSummary" - }, - "type": "array", - "title": "Tool Results", - "description": "List of tool results from this turn" + ], + "title": "Content" }, - "provider": { - "type": "string", - "title": "Provider", - "description": "Provider identifier used for this turn", - "examples": [ - "openai" - ] + "role": { + "anyOf": [ + { + "type": "string", + "const": "system" + }, + { + "type": "string", + "const": "developer" + }, + { + "type": "string", + "const": "user" + }, + { + "type": "string", + "const": "assistant" + } + ], + "title": "Role" }, - "model": { + "type": { "type": "string", - "title": "Model", - "description": "Model identifier used for this turn", - "examples": [ - "gpt-4o-mini" - ] + "const": "message", + "title": "Type", + "default": "message" }, - "started_at": { - "type": "string", - "title": "Started At", - "description": "ISO 8601 timestamp when the turn started", - "examples": [ - "2024-01-01T00:01:00Z" - ] + "id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Id" }, - "completed_at": { - "type": "string", - "title": "Completed At", - "description": "ISO 8601 timestamp when the turn completed", - "examples": [ - "2024-01-01T00:01:05Z" - ] + "status": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Status" } }, "type": "object", "required": [ - "provider", - "model", - "started_at", - "completed_at" + "content", + "role" ], - "title": "ConversationTurn", - "description": "Model representing a single conversation turn.\n\nAttributes:\n messages: List of messages in this turn.\n tool_calls: List of tool calls made in this turn.\n tool_results: List of tool results from this turn.\n provider: Provider identifier used for this turn.\n model: Model identifier used for this turn.\n started_at: ISO 8601 timestamp when the turn started.\n completed_at: ISO 8601 timestamp when the turn completed." + "title": "OpenAIResponseMessage", + "description": "Corresponds to the various Message types in the Responses API.\nThey are all under one type because the Responses API gives them all\nthe same \"type\" value, and there is no way to tell them apart in certain\nscenarios." }, - "ConversationUpdateRequest": { + "OpenAIResponseOutputMessageContentOutputText-Input": { "properties": { - "topic_summary": { + "text": { "type": "string", - "maxLength": 1000, - "minLength": 1, - "title": "Topic Summary", - "description": "The new topic summary for the conversation", - "examples": [ - "Discussion about machine learning algorithms" - ] + "title": "Text" + }, + "type": { + "type": "string", + "const": "output_text", + "title": "Type", + "default": "output_text" + }, + "annotations": { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationFileCitation" + }, + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationCitation" + }, + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation" + }, + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationFilePath" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "container_file_citation": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation", + "file_citation": "#/components/schemas/OpenAIResponseAnnotationFileCitation", + "file_path": "#/components/schemas/OpenAIResponseAnnotationFilePath", + "url_citation": "#/components/schemas/OpenAIResponseAnnotationCitation" + } + } + }, + "type": "array", + "title": "Annotations" + }, + "logprobs": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/OpenAITokenLogProb" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Logprobs" } }, - "additionalProperties": false, "type": "object", "required": [ - "topic_summary" + "text" ], - "title": "ConversationUpdateRequest", - "description": "Model representing a request to update a conversation topic summary.\n\nAttributes:\n topic_summary: The new topic summary for the conversation.\n\nExample:\n ```python\n update_request = ConversationUpdateRequest(\n topic_summary=\"Discussion about machine learning algorithms\"\n )\n ```" + "title": "OpenAIResponseOutputMessageContentOutputText" }, - "ConversationUpdateResponse": { + "OpenAIResponseOutputMessageContentOutputText-Output": { "properties": { - "conversation_id": { + "text": { "type": "string", - "title": "Conversation Id", - "description": "The conversation ID (UUID) that was updated", - "examples": [ - "123e4567-e89b-12d3-a456-426614174000" - ] - }, - "success": { - "type": "boolean", - "title": "Success", - "description": "Whether the update was successful", - "examples": [ - true - ] + "title": "Text" }, - "message": { + "type": { "type": "string", - "title": "Message", - "description": "A message about the update result", - "examples": [ - "Topic summary updated successfully" - ] + "const": "output_text", + "title": "Type", + "default": "output_text" + }, + "annotations": { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationFileCitation" + }, + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationCitation" + }, + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation" + }, + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationFilePath" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "container_file_citation": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation", + "file_citation": "#/components/schemas/OpenAIResponseAnnotationFileCitation", + "file_path": "#/components/schemas/OpenAIResponseAnnotationFilePath", + "url_citation": "#/components/schemas/OpenAIResponseAnnotationCitation" + } + } + }, + "type": "array", + "title": "Annotations" + }, + "logprobs": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/OpenAITokenLogProb" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Logprobs" } }, "type": "object", "required": [ - "conversation_id", - "success", - "message" + "text" ], - "title": "ConversationUpdateResponse", - "description": "Model representing a response for updating a conversation topic summary.\n\nAttributes:\n conversation_id: The conversation ID (UUID) that was updated.\n success: Whether the update was successful.\n message: A message about the update result.\n\nExample:\n ```python\n update_response = ConversationUpdateResponse(\n conversation_id=\"123e4567-e89b-12d3-a456-426614174000\",\n success=True,\n message=\"Topic summary updated successfully\",\n )\n ```", - "examples": [ - { - "conversation_id": "123e4567-e89b-12d3-a456-426614174000", - "message": "Topic summary updated successfully", - "success": true - } - ] + "title": "OpenAIResponseOutputMessageContentOutputText" }, - "ConversationsListResponse": { + "OpenAIResponseOutputMessageFileSearchToolCall": { "properties": { - "conversations": { + "id": { + "type": "string", + "title": "Id" + }, + "queries": { "items": { - "$ref": "#/components/schemas/ConversationDetails" + "type": "string" }, "type": "array", - "title": "Conversations" - } - }, - "type": "object", - "required": [ - "conversations" - ], - "title": "ConversationsListResponse", - "description": "Model representing a response for listing conversations of a user.\n\nAttributes:\n conversations: List of conversation details associated with the user.", - "examples": [ - { - "conversations": [ + "title": "Queries" + }, + "status": { + "type": "string", + "title": "Status" + }, + "type": { + "type": "string", + "const": "file_search_call", + "title": "Type", + "default": "file_search_call" + }, + "results": { + "anyOf": [ { - "conversation_id": "123e4567-e89b-12d3-a456-426614174000", - "created_at": "2024-01-01T00:00:00Z", - "last_message_at": "2024-01-01T00:05:00Z", - "last_used_model": "gemini/gemini-2.0-flash", - "last_used_provider": "gemini", - "message_count": 5, - "topic_summary": "Openshift Microservices Deployment Strategies" + "items": { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCallResults" + }, + "type": "array" }, { - "conversation_id": "456e7890-e12b-34d5-a678-901234567890", - "created_at": "2024-01-01T01:00:00Z", - "last_used_model": "gemini/gemini-2.5-flash", - "last_used_provider": "gemini", - "message_count": 2, - "topic_summary": "RHDH Purpose Summary" + "type": "null" } - ] - } - ] - }, - "ConversationsListResponseV2": { - "properties": { - "conversations": { - "items": { - "$ref": "#/components/schemas/ConversationData" - }, - "type": "array", - "title": "Conversations" + ], + "title": "Results" } }, "type": "object", - "required": [ - "conversations" - ], - "title": "ConversationsListResponseV2", - "description": "Model representing a response for listing conversations of a user.\n\nAttributes:\n conversations: List of conversation data associated with the user.", - "examples": [ - { - "conversations": [ - { - "conversation_id": "123e4567-e89b-12d3-a456-426614174000", - "last_message_timestamp": 1704067200.0, - "topic_summary": "Openshift Microservices Deployment Strategies" - } - ] - } - ] + "required": [ + "id", + "queries", + "status" + ], + "title": "OpenAIResponseOutputMessageFileSearchToolCall", + "description": "File search tool call output message for OpenAI responses.\n\n:param id: Unique identifier for this tool call\n:param queries: List of search queries executed\n:param status: Current status of the file search operation\n:param type: Tool call type identifier, always \"file_search_call\"\n:param results: (Optional) Search results returned by the file search operation" }, - "CustomProfile": { + "OpenAIResponseOutputMessageFileSearchToolCallResults": { "properties": { - "path": { + "attributes": { + "additionalProperties": true, + "type": "object", + "title": "Attributes" + }, + "file_id": { "type": "string", - "title": "Path to custom profile", - "description": "Path to Python modules containing custom profile." + "title": "File Id" }, - "prompts": { - "additionalProperties": { - "type": "string" - }, - "type": "object", - "title": "System prompts", - "description": "Dictionary containing map of system prompts", - "default": {} + "filename": { + "type": "string", + "title": "Filename" + }, + "score": { + "type": "number", + "title": "Score" + }, + "text": { + "type": "string", + "title": "Text" } }, "type": "object", "required": [ - "path" + "attributes", + "file_id", + "filename", + "score", + "text" ], - "title": "CustomProfile", - "description": "Custom profile customization for prompts and validation." + "title": "OpenAIResponseOutputMessageFileSearchToolCallResults", + "description": "Search results returned by the file search operation.\n\n:param attributes: (Optional) Key-value attributes associated with the file\n:param file_id: Unique identifier of the file containing the result\n:param filename: Name of the file containing the result\n:param score: Relevance score for this search result (between 0 and 1)\n:param text: Text content of the search result" }, - "Customization": { + "OpenAIResponseOutputMessageFunctionToolCall": { "properties": { - "profile_path": { + "call_id": { + "type": "string", + "title": "Call Id" + }, + "name": { + "type": "string", + "title": "Name" + }, + "arguments": { + "type": "string", + "title": "Arguments" + }, + "type": { + "type": "string", + "const": "function_call", + "title": "Type", + "default": "function_call" + }, + "id": { "anyOf": [ { "type": "string" @@ -6394,31 +9357,54 @@ "type": "null" } ], - "title": "Profile Path" - }, - "disable_query_system_prompt": { - "type": "boolean", - "title": "Disable Query System Prompt", - "default": false - }, - "disable_shield_ids_override": { - "type": "boolean", - "title": "Disable Shield Ids Override", - "default": false + "title": "Id" }, - "system_prompt_path": { + "status": { "anyOf": [ { - "type": "string", - "format": "file-path" + "type": "string" }, { "type": "null" } ], - "title": "System Prompt Path" + "title": "Status" + } + }, + "type": "object", + "required": [ + "call_id", + "name", + "arguments" + ], + "title": "OpenAIResponseOutputMessageFunctionToolCall", + "description": "Function tool call output message for OpenAI responses.\n\n:param call_id: Unique identifier for the function call\n:param name: Name of the function being called\n:param arguments: JSON string containing the function arguments\n:param type: Tool call type identifier, always \"function_call\"\n:param id: (Optional) Additional identifier for the tool call\n:param status: (Optional) Current status of the function call execution" + }, + "OpenAIResponseOutputMessageMCPCall": { + "properties": { + "id": { + "type": "string", + "title": "Id" }, - "system_prompt": { + "type": { + "type": "string", + "const": "mcp_call", + "title": "Type", + "default": "mcp_call" + }, + "arguments": { + "type": "string", + "title": "Arguments" + }, + "name": { + "type": "string", + "title": "Name" + }, + "server_label": { + "type": "string", + "title": "Server Label" + }, + "error": { "anyOf": [ { "type": "string" @@ -6427,367 +9413,443 @@ "type": "null" } ], - "title": "System Prompt" + "title": "Error" }, - "agent_card_path": { + "output": { "anyOf": [ { - "type": "string", - "format": "file-path" + "type": "string" }, { "type": "null" } ], - "title": "Agent Card Path" + "title": "Output" + } + }, + "type": "object", + "required": [ + "id", + "arguments", + "name", + "server_label" + ], + "title": "OpenAIResponseOutputMessageMCPCall", + "description": "Model Context Protocol (MCP) call output message for OpenAI responses.\n\n:param id: Unique identifier for this MCP call\n:param type: Tool call type identifier, always \"mcp_call\"\n:param arguments: JSON string containing the MCP call arguments\n:param name: Name of the MCP method being called\n:param server_label: Label identifying the MCP server handling the call\n:param error: (Optional) Error message if the MCP call failed\n:param output: (Optional) Output result from the successful MCP call" + }, + "OpenAIResponseOutputMessageMCPListTools": { + "properties": { + "id": { + "type": "string", + "title": "Id" }, - "agent_card_config": { + "type": { + "type": "string", + "const": "mcp_list_tools", + "title": "Type", + "default": "mcp_list_tools" + }, + "server_label": { + "type": "string", + "title": "Server Label" + }, + "tools": { + "items": { + "$ref": "#/components/schemas/MCPListToolsTool" + }, + "type": "array", + "title": "Tools" + } + }, + "type": "object", + "required": [ + "id", + "server_label", + "tools" + ], + "title": "OpenAIResponseOutputMessageMCPListTools", + "description": "MCP list tools output message containing available tools from an MCP server.\n\n:param id: Unique identifier for this MCP list tools operation\n:param type: Tool call type identifier, always \"mcp_list_tools\"\n:param server_label: Label identifying the MCP server providing the tools\n:param tools: List of available tools provided by the MCP server" + }, + "OpenAIResponseOutputMessageWebSearchToolCall": { + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "status": { + "type": "string", + "title": "Status" + }, + "type": { + "type": "string", + "const": "web_search_call", + "title": "Type", + "default": "web_search_call" + } + }, + "type": "object", + "required": [ + "id", + "status" + ], + "title": "OpenAIResponseOutputMessageWebSearchToolCall", + "description": "Web search tool call output message for OpenAI responses.\n\n:param id: Unique identifier for this tool call\n:param status: Current status of the web search operation\n:param type: Tool call type identifier, always \"web_search_call\"" + }, + "OpenAIResponsePrompt": { + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "variables": { "anyOf": [ { - "additionalProperties": true, + "additionalProperties": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseInputMessageContentText" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputMessageContentFile" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "input_file": "#/components/schemas/OpenAIResponseInputMessageContentFile", + "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage", + "input_text": "#/components/schemas/OpenAIResponseInputMessageContentText" + } + } + }, "type": "object" }, { "type": "null" } ], - "title": "Agent Card Config" + "title": "Variables" }, - "custom_profile": { + "version": { "anyOf": [ { - "$ref": "#/components/schemas/CustomProfile" + "type": "string" }, { "type": "null" } - ] + ], + "title": "Version" } }, - "additionalProperties": false, "type": "object", - "title": "Customization", - "description": "Service customization." + "required": [ + "id" + ], + "title": "OpenAIResponsePrompt", + "description": "OpenAI compatible Prompt object that is used in OpenAI responses.\n\n:param id: Unique identifier of the prompt template\n:param variables: Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types\nlike images or files.\n:param version: Version number of the prompt to use (defaults to latest if not specified)" }, - "DatabaseConfiguration": { + "OpenAIResponseReasoning": { "properties": { - "sqlite": { + "effort": { "anyOf": [ { - "$ref": "#/components/schemas/SQLiteDatabaseConfiguration" + "type": "string", + "enum": [ + "none", + "minimal", + "low", + "medium", + "high", + "xhigh" + ] }, { "type": "null" } ], - "title": "SQLite configuration", - "description": "SQLite database configuration" - }, - "postgres": { + "title": "Effort" + } + }, + "type": "object", + "title": "OpenAIResponseReasoning", + "description": "Configuration for reasoning effort in OpenAI responses.\n\nControls how much reasoning the model performs before generating a response.\n\n:param effort: The effort level for reasoning. \"low\" favors speed and economical token usage,\n \"high\" favors more complete reasoning, \"medium\" is a balance between the two." + }, + "OpenAIResponseText": { + "properties": { + "format": { "anyOf": [ { - "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration" + "$ref": "#/components/schemas/OpenAIResponseTextFormat" }, { "type": "null" } - ], - "title": "PostgreSQL configuration", - "description": "PostgreSQL database configuration" - } - }, - "additionalProperties": false, - "type": "object", - "title": "DatabaseConfiguration", - "description": "Database configuration." - }, - "DetailModel": { - "properties": { - "response": { - "type": "string", - "title": "Response", - "description": "Short summary of the error" - }, - "cause": { - "type": "string", - "title": "Cause", - "description": "Detailed explanation of what caused the error" + ] } }, "type": "object", - "required": [ - "response", - "cause" - ], - "title": "DetailModel", - "description": "Nested detail model for error responses." - }, - "FeedbackCategory": { - "type": "string", - "enum": [ - "incorrect", - "not_relevant", - "incomplete", - "outdated_information", - "unsafe", - "other" - ], - "title": "FeedbackCategory", - "description": "Enum representing predefined feedback categories for AI responses.\n\nThese categories help provide structured feedback about AI inference quality\nwhen users provide negative feedback (thumbs down). Multiple categories can\nbe selected to provide comprehensive feedback about response issues." + "title": "OpenAIResponseText", + "description": "Text response configuration for OpenAI responses.\n\n:param format: (Optional) Text format configuration specifying output format requirements" }, - "FeedbackRequest": { + "OpenAIResponseTextFormat": { "properties": { - "conversation_id": { - "type": "string", - "title": "Conversation Id", - "description": "The required conversation ID (UUID)", - "examples": [ - "c5260aec-4d82-4370-9fdf-05cf908b3f16" - ] - }, - "user_question": { - "type": "string", - "title": "User Question", - "description": "User question (the query string)", - "examples": [ - "What is Kubernetes?" - ] + "type": { + "anyOf": [ + { + "type": "string", + "const": "text" + }, + { + "type": "string", + "const": "json_schema" + }, + { + "type": "string", + "const": "json_object" + } + ], + "title": "Type" }, - "llm_response": { - "type": "string", - "title": "Llm Response", - "description": "Response from LLM", - "examples": [ - "Kubernetes is an open-source container orchestration system for automating ..." - ] + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name" }, - "sentiment": { + "schema": { "anyOf": [ { - "type": "integer" + "additionalProperties": true, + "type": "object" }, { "type": "null" } ], - "title": "Sentiment", - "description": "User sentiment, if provided must be -1 or 1", - "examples": [ - -1, - 1 - ] + "title": "Schema" }, - "user_feedback": { + "description": { "anyOf": [ { - "type": "string", - "maxLength": 4096 + "type": "string" }, { "type": "null" } ], - "title": "User Feedback", - "description": "Feedback on the LLM response.", - "examples": [ - "I'm not satisfied with the response because it is too vague." - ] + "title": "Description" }, - "categories": { + "strict": { "anyOf": [ { - "items": { - "$ref": "#/components/schemas/FeedbackCategory" - }, - "type": "array" + "type": "boolean" }, { "type": "null" } ], - "title": "Categories", - "description": "List of feedback categories that describe issues with the LLM response (for negative feedback).", - "examples": [ - [ - "incorrect", - "incomplete" - ] - ] + "title": "Strict" } }, - "additionalProperties": false, "type": "object", - "required": [ - "conversation_id", - "user_question", - "llm_response" - ], - "title": "FeedbackRequest", - "description": "Model representing a feedback request.\n\nAttributes:\n conversation_id: The required conversation ID (UUID).\n user_question: The required user question.\n llm_response: The required LLM response.\n sentiment: The optional sentiment.\n user_feedback: The optional user feedback.\n categories: The optional list of feedback categories (multi-select for negative feedback).\n\nExample:\n ```python\n feedback_request = FeedbackRequest(\n conversation_id=\"12345678-abcd-0000-0123-456789abcdef\",\n user_question=\"what are you doing?\",\n user_feedback=\"This response is not helpful\",\n llm_response=\"I don't know\",\n sentiment=-1,\n categories=[FeedbackCategory.INCORRECT, FeedbackCategory.INCOMPLETE]\n )\n ```", - "examples": [ - { - "conversation_id": "12345678-abcd-0000-0123-456789abcdef", - "llm_response": "bar", - "sentiment": -1, - "user_feedback": "Not satisfied with the response quality.", - "user_question": "foo" - }, - { - "categories": [ - "incorrect" - ], - "conversation_id": "12345678-abcd-0000-0123-456789abcdef", - "llm_response": "The capital of France is Berlin.", - "sentiment": -1, - "user_question": "What is the capital of France?" - }, - { - "categories": [ - "incomplete", - "not_relevant" - ], - "conversation_id": "12345678-abcd-0000-0123-456789abcdef", - "llm_response": "Use Docker.", - "sentiment": -1, - "user_feedback": "This response is too general and doesn't provide specific steps.", - "user_question": "How do I deploy a web app?" - } - ] + "title": "OpenAIResponseTextFormat", + "description": "Configuration for Responses API text format.\n\n:param type: Must be \"text\", \"json_schema\", or \"json_object\" to identify the format type\n:param name: The name of the response format. Only used for json_schema.\n:param schema: The JSON schema the response should conform to. In a Python SDK, this is often a `pydantic` model. Only used for json_schema.\n:param description: (Optional) A description of the response format. Only used for json_schema.\n:param strict: (Optional) Whether to strictly enforce the JSON schema. If true, the response must match the schema exactly. Only used for json_schema." }, - "FeedbackResponse": { + "OpenAIResponseToolMCP": { "properties": { - "response": { + "type": { "type": "string", - "title": "Response", - "description": "The response of the feedback request.", - "examples": [ - "feedback received" - ] + "const": "mcp", + "title": "Type", + "default": "mcp" + }, + "server_label": { + "type": "string", + "title": "Server Label" + }, + "allowed_tools": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "$ref": "#/components/schemas/AllowedToolsFilter" + }, + { + "type": "null" + } + ], + "title": "Allowed Tools" } }, "type": "object", "required": [ - "response" + "server_label" ], - "title": "FeedbackResponse", - "description": "Model representing a response to a feedback request.\n\nAttributes:\n response: The response of the feedback request.\n\nExample:\n ```python\n feedback_response = FeedbackResponse(response=\"feedback received\")\n ```", - "examples": [ - { - "response": "feedback received" - } - ] + "title": "OpenAIResponseToolMCP", + "description": "Model Context Protocol (MCP) tool configuration for OpenAI response object.\n\n:param type: Tool type identifier, always \"mcp\"\n:param server_label: Label to identify this MCP server\n:param allowed_tools: (Optional) Restriction on which tools can be used from this server" }, - "FeedbackStatusUpdateRequest": { + "OpenAIResponseUsage": { "properties": { - "status": { - "type": "boolean", - "title": "Status", - "description": "Desired state of feedback enablement, must be False or True", - "default": false, - "examples": [ - true, - false - ] + "input_tokens": { + "type": "integer", + "title": "Input Tokens" + }, + "output_tokens": { + "type": "integer", + "title": "Output Tokens" + }, + "total_tokens": { + "type": "integer", + "title": "Total Tokens" + }, + "input_tokens_details": { + "$ref": "#/components/schemas/OpenAIResponseUsageInputTokensDetails" + }, + "output_tokens_details": { + "$ref": "#/components/schemas/OpenAIResponseUsageOutputTokensDetails" } }, - "additionalProperties": false, "type": "object", - "title": "FeedbackStatusUpdateRequest", - "description": "Model representing a feedback status update request.\n\nAttributes:\n status: Value of the desired feedback enabled state.\n\nExample:\n ```python\n feedback_request = FeedbackRequest(\n status=false\n )\n ```" + "required": [ + "input_tokens", + "output_tokens", + "total_tokens", + "input_tokens_details", + "output_tokens_details" + ], + "title": "OpenAIResponseUsage", + "description": "Usage information for OpenAI response.\n\n:param input_tokens: Number of tokens in the input\n:param output_tokens: Number of tokens in the output\n:param total_tokens: Total tokens used (input + output)\n:param input_tokens_details: Detailed breakdown of input token usage\n:param output_tokens_details: Detailed breakdown of output token usage" }, - "FeedbackStatusUpdateResponse": { + "OpenAIResponseUsageInputTokensDetails": { "properties": { - "status": { - "additionalProperties": true, - "type": "object", - "title": "Status" + "cached_tokens": { + "type": "integer", + "title": "Cached Tokens" } }, "type": "object", "required": [ - "status" + "cached_tokens" ], - "title": "FeedbackStatusUpdateResponse", - "description": "Model representing a response to a feedback status update request.\n\nAttributes:\n status: The previous and current status of the service and who updated it.\n\nExample:\n ```python\n status_response = StatusResponse(\n status={\n \"previous_status\": true,\n \"updated_status\": false,\n \"updated_by\": \"user/test\",\n \"timestamp\": \"2023-03-15 12:34:56\"\n },\n )\n ```", - "examples": [ - { - "status": { - "previous_status": true, - "timestamp": "2023-03-15 12:34:56", - "updated_by": "user/test", - "updated_status": false - } - } - ] + "title": "OpenAIResponseUsageInputTokensDetails", + "description": "Token details for input tokens in OpenAI response usage.\n\n:param cached_tokens: Number of tokens retrieved from cache" }, - "ForbiddenResponse": { + "OpenAIResponseUsageOutputTokensDetails": { "properties": { - "status_code": { + "reasoning_tokens": { "type": "integer", - "title": "Status Code" - }, - "detail": { - "$ref": "#/components/schemas/DetailModel" + "title": "Reasoning Tokens" } }, "type": "object", "required": [ - "status_code", - "detail" + "reasoning_tokens" ], - "title": "ForbiddenResponse", - "description": "403 Forbidden. Access denied.", - "examples": [ - { - "detail": { - "cause": "User 6789 does not have permission to read conversation with ID 123e4567-e89b-12d3-a456-426614174000", - "response": "User does not have permission to perform this action" - }, - "label": "conversation read" - }, - { - "detail": { - "cause": "User 6789 does not have permission to delete conversation with ID 123e4567-e89b-12d3-a456-426614174000", - "response": "User does not have permission to perform this action" - }, - "label": "conversation delete" + "title": "OpenAIResponseUsageOutputTokensDetails", + "description": "Token details for output tokens in OpenAI response usage.\n\n:param reasoning_tokens: Number of tokens used for reasoning (o1/o3 models)" + }, + "OpenAITokenLogProb": { + "properties": { + "token": { + "type": "string", + "title": "Token", + "description": "The token." }, - { - "detail": { - "cause": "User 6789 is not authorized to access this endpoint.", - "response": "User does not have permission to access this endpoint" - }, - "label": "endpoint" + "bytes": { + "anyOf": [ + { + "items": { + "type": "integer" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Bytes", + "description": "The bytes for the token." }, - { - "detail": { - "cause": "Storing feedback is disabled.", - "response": "Storing feedback is disabled" - }, - "label": "feedback" + "logprob": { + "type": "number", + "title": "Logprob", + "description": "The log probability of the token." }, - { - "detail": { - "cause": "User lacks model_override permission required to override model/provider.", - "response": "This instance does not permit overriding model/provider in the query request (missing permission: MODEL_OVERRIDE). Please remove the model and provider fields from your request." - }, - "label": "model override" + "top_logprobs": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/OpenAITopLogProb" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Top Logprobs", + "description": "The top log probabilities for the token." } - ] + }, + "type": "object", + "required": [ + "token", + "logprob" + ], + "title": "OpenAITokenLogProb", + "description": "The log probability for a token from an OpenAI-compatible chat completion response." }, - "HTTPAuthSecurityScheme": { + "OpenAITopLogProb": { "properties": { - "bearerFormat": { + "token": { + "type": "string", + "title": "Token", + "description": "The token." + }, + "bytes": { "anyOf": [ { - "type": "string" + "items": { + "type": "integer" + }, + "type": "array" }, { "type": "null" } ], - "title": "Bearerformat" + "title": "Bytes", + "description": "The bytes for the token." }, + "logprob": { + "type": "number", + "title": "Logprob", + "description": "The log probability of the token." + } + }, + "type": "object", + "required": [ + "token", + "logprob" + ], + "title": "OpenAITopLogProb", + "description": "The top log probability for a token from an OpenAI-compatible chat completion response." + }, + "OpenIdConnectSecurityScheme": { + "properties": { "description": { "anyOf": [ { @@ -6799,43 +9861,26 @@ ], "title": "Description" }, - "scheme": { + "openIdConnectUrl": { "type": "string", - "title": "Scheme" + "title": "Openidconnecturl" }, "type": { "type": "string", - "const": "http", + "const": "openIdConnect", "title": "Type", - "default": "http" + "default": "openIdConnect" } }, "type": "object", "required": [ - "scheme" + "openIdConnectUrl" ], - "title": "HTTPAuthSecurityScheme", - "description": "Defines a security scheme using HTTP authentication." - }, - "HTTPValidationError": { - "properties": { - "detail": { - "items": { - "$ref": "#/components/schemas/ValidationError" - }, - "type": "array", - "title": "Detail" - } - }, - "type": "object", - "title": "HTTPValidationError" + "title": "OpenIdConnectSecurityScheme", + "description": "Defines a security scheme using OpenID Connect." }, - "ImplicitOAuthFlow": { + "PasswordOAuthFlow": { "properties": { - "authorizationUrl": { - "type": "string", - "title": "Authorizationurl" - }, "refreshUrl": { "anyOf": [ { @@ -6853,46 +9898,53 @@ }, "type": "object", "title": "Scopes" + }, + "tokenUrl": { + "type": "string", + "title": "Tokenurl" } }, "type": "object", "required": [ - "authorizationUrl", - "scopes" - ], - "title": "ImplicitOAuthFlow", - "description": "Defines configuration details for the OAuth 2.0 Implicit flow." - }, - "In": { - "type": "string", - "enum": [ - "cookie", - "header", - "query" + "scopes", + "tokenUrl" ], - "title": "In", - "description": "The location of the API key." + "title": "PasswordOAuthFlow", + "description": "Defines configuration details for the OAuth 2.0 Resource Owner Password flow." }, - "InMemoryCacheConfig": { + "PostgreSQLDatabaseConfiguration": { "properties": { - "max_entries": { + "host": { + "type": "string", + "title": "Hostname", + "description": "Database server host or socket directory", + "default": "localhost" + }, + "port": { "type": "integer", "exclusiveMinimum": 0.0, - "title": "Max entries", - "description": "Maximum number of entries stored in the in-memory cache" - } - }, - "additionalProperties": false, - "type": "object", - "required": [ - "max_entries" - ], - "title": "InMemoryCacheConfig", - "description": "In-memory cache configuration." - }, - "InferenceConfiguration": { - "properties": { - "default_model": { + "title": "Port", + "description": "Database server port", + "default": 5432 + }, + "db": { + "type": "string", + "title": "Database name", + "description": "Database name to connect to" + }, + "user": { + "type": "string", + "title": "User name", + "description": "Database user name used to authenticate" + }, + "password": { + "type": "string", + "format": "password", + "title": "Password", + "description": "Password used to authenticate", + "writeOnly": true + }, + "namespace": { "anyOf": [ { "type": "string" @@ -6901,543 +9953,665 @@ "type": "null" } ], - "title": "Default model", - "description": "Identification of default model used when no other model is specified." + "title": "Name space", + "description": "Database namespace", + "default": "public" }, - "default_provider": { + "ssl_mode": { + "type": "string", + "title": "SSL mode", + "description": "SSL mode", + "default": "prefer" + }, + "gss_encmode": { + "type": "string", + "title": "GSS encmode", + "description": "This option determines whether or with what priority a secure GSS TCP/IP connection will be negotiated with the server.", + "default": "prefer" + }, + "ca_cert_path": { "anyOf": [ { - "type": "string" + "type": "string", + "format": "file-path" }, { "type": "null" } ], - "title": "Default provider", - "description": "Identification of default provider used when no other model is specified." + "title": "CA certificate path", + "description": "Path to CA certificate" } }, "additionalProperties": false, "type": "object", - "title": "InferenceConfiguration", - "description": "Inference configuration." + "required": [ + "db", + "user", + "password" + ], + "title": "PostgreSQLDatabaseConfiguration", + "description": "PostgreSQL database configuration.\n\nPostgreSQL database is used by Lightspeed Core Stack service for storing\ninformation about conversation IDs. It can also be leveraged to store\nconversation history and information about quota usage.\n\nUseful resources:\n\n- [Psycopg: connection classes](https://www.psycopg.org/psycopg3/docs/api/connections.html)\n- [PostgreSQL connection strings](https://www.connectionstrings.com/postgresql/)\n- [How to Use PostgreSQL in Python](https://www.freecodecamp.org/news/postgresql-in-python/)" }, - "InfoResponse": { + "PromptTooLongResponse": { "properties": { - "name": { + "status_code": { + "type": "integer", + "title": "Status Code" + }, + "detail": { + "$ref": "#/components/schemas/DetailModel" + } + }, + "type": "object", + "required": [ + "status_code", + "detail" + ], + "title": "PromptTooLongResponse", + "description": "413 Payload Too Large - Prompt is too long.", + "examples": [ + { + "detail": { + "cause": "The prompt exceeds the maximum allowed length.", + "response": "Prompt is too long" + }, + "label": "prompt too long" + } + ] + }, + "ProviderHealthStatus": { + "properties": { + "provider_id": { "type": "string", - "title": "Name", - "description": "Service name", + "title": "Provider Id", + "description": "The ID of the provider" + }, + "status": { + "type": "string", + "title": "Status", + "description": "The health status", "examples": [ - "Lightspeed Stack" + "ok", + "unhealthy", + "not_implemented" + ] + }, + "message": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Message", + "description": "Optional message about the health status", + "examples": [ + "All systems operational", + "Llama Stack is unavailable" ] + } + }, + "type": "object", + "required": [ + "provider_id", + "status" + ], + "title": "ProviderHealthStatus", + "description": "Model representing the health status of a provider.\n\nAttributes:\n provider_id: The ID of the provider.\n status: The health status ('ok', 'unhealthy', 'not_implemented').\n message: Optional message about the health status." + }, + "ProviderResponse": { + "properties": { + "api": { + "type": "string", + "title": "Api", + "description": "The API this provider implements" + }, + "config": { + "additionalProperties": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "items": {}, + "type": "array" + }, + {}, + { + "type": "null" + } + ] + }, + "type": "object", + "title": "Config", + "description": "Provider configuration parameters" + }, + "health": { + "additionalProperties": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "items": {}, + "type": "array" + }, + {}, + { + "type": "null" + } + ] + }, + "type": "object", + "title": "Health", + "description": "Current health status of the provider" }, - "service_version": { + "provider_id": { "type": "string", - "title": "Service Version", - "description": "Service version", - "examples": [ - "0.1.0", - "0.2.0", - "1.0.0" - ] + "title": "Provider Id", + "description": "Unique provider identifier" }, - "llama_stack_version": { + "provider_type": { "type": "string", - "title": "Llama Stack Version", - "description": "Llama Stack version", - "examples": [ - "0.2.1", - "0.2.2", - "0.2.18", - "0.2.21", - "0.2.22" - ] + "title": "Provider Type", + "description": "Provider implementation type" } }, "type": "object", "required": [ - "name", - "service_version", - "llama_stack_version" + "api", + "config", + "health", + "provider_id", + "provider_type" ], - "title": "InfoResponse", - "description": "Model representing a response to an info request.\n\nAttributes:\n name: Service name.\n service_version: Service version.\n llama_stack_version: Llama Stack version.\n\nExample:\n ```python\n info_response = InfoResponse(\n name=\"Lightspeed Stack\",\n service_version=\"1.0.0\",\n llama_stack_version=\"0.2.22\",\n )\n ```", + "title": "ProviderResponse", + "description": "Model representing a response to get specific provider request.", "examples": [ { - "llama_stack_version": "1.0.0", - "name": "Lightspeed Stack", - "service_version": "1.0.0" + "api": "inference", + "config": { + "api_key": "********" + }, + "health": { + "message": "Healthy", + "status": "OK" + }, + "provider_id": "openai", + "provider_type": "remote::openai" } ] }, - "InternalServerErrorResponse": { + "ProvidersListResponse": { "properties": { - "status_code": { - "type": "integer", - "title": "Status Code" - }, - "detail": { - "$ref": "#/components/schemas/DetailModel" + "providers": { + "additionalProperties": { + "items": { + "additionalProperties": true, + "type": "object" + }, + "type": "array" + }, + "type": "object", + "title": "Providers", + "description": "List of available API types and their corresponding providers" } }, "type": "object", "required": [ - "status_code", - "detail" + "providers" ], - "title": "InternalServerErrorResponse", - "description": "500 Internal Server Error.", + "title": "ProvidersListResponse", + "description": "Model representing a response to providers request.", "examples": [ { - "detail": { - "cause": "An unexpected error occurred while processing the request.", - "response": "Internal server error" - }, - "label": "internal" - }, - { - "detail": { - "cause": "Lightspeed Stack configuration has not been initialized.", - "response": "Configuration is not loaded" - }, - "label": "configuration" - }, - { - "detail": { - "cause": "Failed to store feedback at directory: /path/example", - "response": "Failed to store feedback" - }, - "label": "feedback storage" - }, - { - "detail": { - "cause": "Failed to call backend API", - "response": "Error while processing query" - }, - "label": "query" - }, - { - "detail": { - "cause": "Conversation cache is not configured or unavailable.", - "response": "Conversation cache not configured" - }, - "label": "conversation cache" - }, - { - "detail": { - "cause": "Failed to query the database", - "response": "Database query failed" - }, - "label": "database" + "providers": { + "agents": [ + { + "provider_id": "meta-reference", + "provider_type": "inline::meta-reference" + } + ], + "inference": [ + { + "provider_id": "sentence-transformers", + "provider_type": "inline::sentence-transformers" + }, + { + "provider_id": "openai", + "provider_type": "remote::openai" + } + ] + } } ] }, - "JsonPathOperator": { - "type": "string", - "enum": [ - "equals", - "contains", - "in", - "match" - ], - "title": "JsonPathOperator", - "description": "Supported operators for JSONPath evaluation.\n\nNote: this is not a real model, just an enumeration of all supported JSONPath operators." - }, - "JwkConfiguration": { - "properties": { - "url": { - "type": "string", - "minLength": 1, - "format": "uri", - "title": "URL", - "description": "HTTPS URL of the JWK (JSON Web Key) set used to validate JWTs." - }, - "jwt_configuration": { - "$ref": "#/components/schemas/JwtConfiguration", - "title": "JWT configuration", - "description": "JWT (JSON Web Token) configuration" - } - }, - "additionalProperties": false, - "type": "object", - "required": [ - "url" - ], - "title": "JwkConfiguration", - "description": "JWK (JSON Web Key) configuration.\n\nA JSON Web Key (JWK) is a JavaScript Object Notation (JSON) data structure\nthat represents a cryptographic key.\n\nUseful resources:\n\n - [JSON Web Key](https://openid.net/specs/draft-jones-json-web-key-03.html)\n - [RFC 7517](https://www.rfc-editor.org/rfc/rfc7517)" - }, - "JwtConfiguration": { + "QueryRequest": { "properties": { - "user_id_claim": { + "query": { "type": "string", - "title": "User ID claim", - "description": "JWT claim name that uniquely identifies the user (subject ID).", - "default": "user_id" + "title": "Query", + "description": "The query string", + "examples": [ + "What is Kubernetes?" + ] }, - "username_claim": { - "type": "string", - "title": "Username claim", - "description": "JWT claim name that provides the human-readable username.", - "default": "username" + "conversation_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Conversation Id", + "description": "The optional conversation ID (UUID)", + "examples": [ + "c5260aec-4d82-4370-9fdf-05cf908b3f16" + ] }, - "role_rules": { - "items": { - "$ref": "#/components/schemas/JwtRoleRule" - }, - "type": "array", - "title": "Role rules", - "description": "Rules for extracting roles from JWT claims" - } - }, - "additionalProperties": false, - "type": "object", - "title": "JwtConfiguration", - "description": "JWT (JSON Web Token) configuration.\n\nJSON Web Token (JWT) is a compact, URL-safe means of representing\nclaims to be transferred between two parties. The claims in a JWT\nare encoded as a JSON object that is used as the payload of a JSON\nWeb Signature (JWS) structure or as the plaintext of a JSON Web\nEncryption (JWE) structure, enabling the claims to be digitally\nsigned or integrity protected with a Message Authentication Code\n(MAC) and/or encrypted.\n\nUseful resources:\n\n - [JSON Web Token](https://en.wikipedia.org/wiki/JSON_Web_Token)\n - [RFC 7519](https://datatracker.ietf.org/doc/html/rfc7519)\n - [JSON Web Tokens](https://auth0.com/docs/secure/tokens/json-web-tokens)" - }, - "JwtRoleRule": { - "properties": { - "jsonpath": { - "type": "string", - "title": "JSON path", - "description": "JSONPath expression to evaluate against the JWT payload" + "provider": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Provider", + "description": "The optional provider", + "examples": [ + "openai", + "watsonx" + ] }, - "operator": { - "$ref": "#/components/schemas/JsonPathOperator", - "title": "Operator", - "description": "JSON path comparison operator" + "model": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Model", + "description": "The optional model", + "examples": [ + "gpt4mini" + ] }, - "negate": { - "type": "boolean", - "title": "Negate rule", - "description": "If set to true, the meaning of the rule is negated", - "default": false + "system_prompt": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "System Prompt", + "description": "The optional system prompt.", + "examples": [ + "You are OpenShift assistant.", + "You are Ansible assistant." + ] }, - "value": { - "title": "Value", - "description": "Value to compare against" + "attachments": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/Attachment" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Attachments", + "description": "The optional list of attachments.", + "examples": [ + { + "attachment_type": "log", + "content": "this is attachment", + "content_type": "text/plain" + }, + { + "attachment_type": "configuration", + "content": "kind: Pod\n metadata:\n name: private-reg", + "content_type": "application/yaml" + }, + { + "attachment_type": "configuration", + "content": "foo: bar", + "content_type": "application/yaml" + } + ] }, - "roles": { - "items": { - "type": "string" - }, - "type": "array", - "title": "List of roles", - "description": "Roles to be assigned if the rule matches" - } - }, - "additionalProperties": false, - "type": "object", - "required": [ - "jsonpath", - "operator", - "value", - "roles" - ], - "title": "JwtRoleRule", - "description": "Rule for extracting roles from JWT claims." - }, - "LivenessResponse": { - "properties": { - "alive": { - "type": "boolean", - "title": "Alive", - "description": "Flag indicating that the app is alive", + "no_tools": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "No Tools", + "description": "Whether to bypass all tools and MCP servers", + "default": false, "examples": [ true, false ] - } - }, - "type": "object", - "required": [ - "alive" - ], - "title": "LivenessResponse", - "description": "Model representing a response to a liveness request.\n\nAttributes:\n alive: If app is alive.\n\nExample:\n ```python\n liveness_response = LivenessResponse(alive=True)\n ```", - "examples": [ - { - "alive": true - } - ] - }, - "LlamaStackConfiguration": { - "properties": { - "url": { + }, + "generate_topic_summary": { "anyOf": [ { - "type": "string", - "minLength": 1, - "format": "uri" + "type": "boolean" }, { "type": "null" } ], - "title": "Llama Stack URL", - "description": "URL to Llama Stack service; used when library mode is disabled. Must be a valid HTTP or HTTPS URL." + "title": "Generate Topic Summary", + "description": "Whether to generate topic summary for new conversations", + "default": true, + "examples": [ + true, + false + ] }, - "api_key": { + "media_type": { "anyOf": [ { - "type": "string", - "format": "password", - "writeOnly": true + "type": "string" }, { "type": "null" } ], - "title": "API key", - "description": "API key to access Llama Stack service" + "title": "Media Type", + "description": "Media type for the response format", + "examples": [ + "application/json", + "text/plain" + ] }, - "use_as_library_client": { + "vector_store_ids": { "anyOf": [ { - "type": "boolean" + "items": { + "type": "string" + }, + "type": "array" }, { "type": "null" } ], - "title": "Use as library", - "description": "When set to true Llama Stack will be used in library mode, not in server mode (default)" + "title": "Vector Store Ids", + "description": "Optional list of specific vector store IDs to query for RAG. If not provided, all available vector stores will be queried.", + "examples": [ + "ocp_docs", + "knowledge_base", + "vector_db_1" + ] }, - "library_client_config_path": { + "shield_ids": { "anyOf": [ { - "type": "string" + "items": { + "type": "string" + }, + "type": "array" }, { "type": "null" } ], - "title": "Llama Stack configuration path", - "description": "Path to configuration file used when Llama Stack is run in library mode" + "title": "Shield Ids", + "description": "Optional list of safety shield IDs to apply. If None, all configured shields are used. ", + "examples": [ + "llama-guard", + "custom-shield" + ] }, - "timeout": { - "type": "integer", - "exclusiveMinimum": 0.0, - "title": "Request timeout", - "description": "Timeout in seconds for requests to Llama Stack service. Default is 180 seconds (3 minutes) to accommodate long-running RAG queries.", - "default": 180 + "solr": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Solr", + "description": "Solr-specific query parameters including filter queries", + "examples": [ + { + "fq": [ + "product:*openshift*", + "product_version:*4.16*" + ] + } + ] } }, "additionalProperties": false, "type": "object", - "title": "LlamaStackConfiguration", - "description": "Llama stack configuration.\n\nLlama Stack is a comprehensive system that provides a uniform set of tools\nfor building, scaling, and deploying generative AI applications, enabling\ndevelopers to create, integrate, and orchestrate multiple AI services and\ncapabilities into an adaptable setup.\n\nUseful resources:\n\n - [Llama Stack](https://www.llama.com/products/llama-stack/)\n - [Python Llama Stack client](https://github.com/llamastack/llama-stack-client-python)\n - [Build AI Applications with Llama Stack](https://llamastack.github.io/)" - }, - "MCPClientAuthOptionsResponse": { - "properties": { - "servers": { - "items": { - "$ref": "#/components/schemas/MCPServerAuthInfo" - }, - "type": "array", - "title": "Servers", - "description": "List of MCP servers that accept client-provided authorization" - } - }, - "type": "object", - "title": "MCPClientAuthOptionsResponse", - "description": "Response containing MCP servers that accept client-provided authorization.", + "required": [ + "query" + ], + "title": "QueryRequest", + "description": "Model representing a request for the LLM (Language Model).\n\nAttributes:\n query: The query string.\n conversation_id: The optional conversation ID (UUID).\n provider: The optional provider.\n model: The optional model.\n system_prompt: The optional system prompt.\n attachments: The optional attachments.\n no_tools: Whether to bypass all tools and MCP servers (default: False).\n generate_topic_summary: Whether to generate topic summary for new conversations.\n media_type: The optional media type for response format (application/json or text/plain).\n vector_store_ids: The optional list of specific vector store IDs to query for RAG.\n shield_ids: The optional list of safety shield IDs to apply.\n\nExample:\n ```python\n query_request = QueryRequest(query=\"Tell me about Kubernetes\")\n ```", "examples": [ { - "servers": [ + "attachments": [ { - "client_auth_headers": [ - "Authorization" - ], - "name": "github" + "attachment_type": "log", + "content": "this is attachment", + "content_type": "text/plain" }, { - "client_auth_headers": [ - "Authorization", - "X-API-Key" - ], - "name": "gitlab" + "attachment_type": "configuration", + "content": "kind: Pod\n metadata:\n name: private-reg", + "content_type": "application/yaml" + }, + { + "attachment_type": "configuration", + "content": "foo: bar", + "content_type": "application/yaml" } + ], + "conversation_id": "123e4567-e89b-12d3-a456-426614174000", + "generate_topic_summary": true, + "model": "model-name", + "no_tools": false, + "provider": "openai", + "query": "write a deployment yaml for the mongodb image", + "system_prompt": "You are a helpful assistant", + "vector_store_ids": [ + "ocp_docs", + "knowledge_base" ] } ] }, - "MCPServerAuthInfo": { + "QueryResponse": { "properties": { - "name": { + "conversation_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Conversation Id", + "description": "The optional conversation ID (UUID)", + "examples": [ + "c5260aec-4d82-4370-9fdf-05cf908b3f16" + ] + }, + "response": { "type": "string", - "title": "Name", - "description": "MCP server name" + "title": "Response", + "description": "Response from LLM", + "examples": [ + "Kubernetes is an open-source container orchestration system for automating ..." + ] }, - "client_auth_headers": { + "rag_chunks": { "items": { - "type": "string" + "$ref": "#/components/schemas/RAGChunk" }, "type": "array", - "title": "Client Auth Headers", - "description": "List of authentication header names for client-provided tokens" - } - }, - "type": "object", - "required": [ - "name", - "client_auth_headers" - ], - "title": "MCPServerAuthInfo", - "description": "Information about MCP server client authentication options." - }, - "Message": { - "properties": { - "content": { - "type": "string", - "title": "Content", - "description": "The message content", + "title": "Rag Chunks", + "description": "Deprecated: List of RAG chunks used to generate the response." + }, + "referenced_documents": { + "items": { + "$ref": "#/components/schemas/ReferencedDocument" + }, + "type": "array", + "title": "Referenced Documents", + "description": "List of documents referenced in generating the response", "examples": [ - "Hello, how can I help you?" + [ + { + "doc_title": "Operator Lifecycle Manager (OLM)", + "doc_url": "https://docs.openshift.com/container-platform/4.15/operators/olm/index.html" + } + ] ] }, - "type": { - "type": "string", - "enum": [ - "user", - "assistant", - "system", - "developer" - ], - "title": "Type", - "description": "The type of message", + "truncated": { + "type": "boolean", + "title": "Truncated", + "description": "Deprecated:Whether conversation history was truncated", + "default": false, "examples": [ - "user", - "assistant", - "system", - "developer" + false, + true ] - } - }, - "type": "object", - "required": [ - "content", - "type" - ], - "title": "Message", - "description": "Model representing a message in a conversation turn.\n\nAttributes:\n content: The message content.\n type: The type of message." - }, - "ModelContextProtocolServer": { - "properties": { - "name": { - "type": "string", - "title": "MCP name", - "description": "MCP server name that must be unique" }, - "provider_id": { - "type": "string", - "title": "Provider ID", - "description": "MCP provider identification", - "default": "model-context-protocol" + "input_tokens": { + "type": "integer", + "title": "Input Tokens", + "description": "Number of tokens sent to LLM", + "default": 0, + "examples": [ + 150, + 250, + 500 + ] }, - "url": { - "type": "string", - "title": "MCP server URL", - "description": "URL of the MCP server" + "output_tokens": { + "type": "integer", + "title": "Output Tokens", + "description": "Number of tokens received from LLM", + "default": 0, + "examples": [ + 50, + 100, + 200 + ] }, - "authorization_headers": { + "available_quotas": { "additionalProperties": { - "type": "string" + "type": "integer" }, "type": "object", - "title": "Authorization headers", - "description": "Headers to send to the MCP server. The map contains the header name and the path to a file containing the header value (secret). There are 3 special cases: 1. Usage of the kubernetes token in the header. To specify this use a string 'kubernetes' instead of the file path. 2. Usage of the client-provided token in the header. To specify this use a string 'client' instead of the file path. 3. Usage of the oauth token in the header. To specify this use a string 'oauth' instead of the file path. " + "title": "Available Quotas", + "description": "Quota available as measured by all configured quota limiters", + "examples": [ + { + "daily": 1000, + "monthly": 50000 + } + ] }, - "headers": { + "tool_calls": { "items": { - "type": "string" + "$ref": "#/components/schemas/ToolCallSummary" }, "type": "array", - "title": "Propagated headers", - "description": "List of HTTP header names to automatically forward from the incoming request to this MCP server. Headers listed here are extracted from the original client request and included when calling the MCP server. This is useful when infrastructure components (e.g. API gateways) inject headers that MCP servers need, such as x-rh-identity in HCC. Header matching is case-insensitive. These headers are additive with authorization_headers and MCP-HEADERS." + "title": "Tool Calls", + "description": "List of tool calls made during response generation" }, - "timeout": { - "anyOf": [ - { - "type": "integer", - "exclusiveMinimum": 0.0 - }, - { - "type": "null" - } - ], - "title": "Request timeout", - "description": "Timeout in seconds for requests to the MCP server. If not specified, the default timeout from Llama Stack will be used. Note: This field is reserved for future use when Llama Stack adds timeout support." - } - }, - "additionalProperties": false, - "type": "object", - "required": [ - "name", - "url" - ], - "title": "ModelContextProtocolServer", - "description": "Model context protocol server configuration.\n\nMCP (Model Context Protocol) servers provide tools and capabilities to the\nAI agents. These are configured by this structure. Only MCP servers\ndefined in the lightspeed-stack.yaml configuration are available to the\nagents. Tools configured in the llama-stack run.yaml are not accessible to\nlightspeed-core agents.\n\nUseful resources:\n\n- [Model Context Protocol](https://modelcontextprotocol.io/docs/getting-started/intro)\n- [MCP FAQs](https://modelcontextprotocol.io/faqs)\n- [Wikipedia article](https://en.wikipedia.org/wiki/Model_Context_Protocol)" - }, - "ModelsResponse": { - "properties": { - "models": { + "tool_results": { "items": { - "additionalProperties": true, - "type": "object" + "$ref": "#/components/schemas/ToolResultSummary" }, "type": "array", - "title": "Models", - "description": "List of models available" + "title": "Tool Results", + "description": "List of tool results" } }, "type": "object", "required": [ - "models" + "response" ], - "title": "ModelsResponse", - "description": "Model representing a response to models request.", + "title": "QueryResponse", + "description": "Model representing LLM response to a query.\n\nAttributes:\n conversation_id: The optional conversation ID (UUID).\n response: The response.\n rag_chunks: Deprecated. List of RAG chunks used to generate the response.\n This information is now available in tool_results under file_search_call type.\n referenced_documents: The URLs and titles for the documents used to generate the response.\n tool_calls: List of tool calls made during response generation.\n tool_results: List of tool results.\n truncated: Whether conversation history was truncated.\n input_tokens: Number of tokens sent to LLM.\n output_tokens: Number of tokens received from LLM.\n available_quotas: Quota available as measured by all configured quota limiters.", "examples": [ { - "models": [ + "available_quotas": { + "ClusterQuotaLimiter": 998911, + "UserQuotaLimiter": 998911 + }, + "conversation_id": "123e4567-e89b-12d3-a456-426614174000", + "input_tokens": 123, + "output_tokens": 456, + "referenced_documents": [ { - "api_model_type": "llm", - "identifier": "openai/gpt-4-turbo", - "metadata": {}, - "model_type": "llm", - "provider_id": "openai", - "provider_resource_id": "gpt-4-turbo", - "type": "model" + "doc_title": "Operator Lifecycle Manager concepts and resources", + "doc_url": "https://docs.openshift.com/container-platform/4.15/operators/understanding/olm/olm-understanding-olm.html" } - ] - } - ] - }, - "MutualTLSSecurityScheme": { - "properties": { - "description": { - "anyOf": [ + ], + "response": "Operator Lifecycle Manager (OLM) helps users install...", + "tool_calls": [ { - "type": "string" - }, + "args": {}, + "id": "1", + "name": "tool1", + "type": "tool_call" + } + ], + "tool_results": [ { - "type": "null" + "content": "bla", + "id": "1", + "round": 1, + "status": "success", + "type": "tool_result" } ], - "title": "Description" - }, - "type": { - "type": "string", - "const": "mutualTLS", - "title": "Type", - "default": "mutualTLS" + "truncated": false } - }, - "type": "object", - "title": "MutualTLSSecurityScheme", - "description": "Defines a security scheme using mTLS authentication." + ] }, - "NotFoundResponse": { + "QuotaExceededResponse": { "properties": { "status_code": { "type": "integer", @@ -7451,158 +10625,195 @@ "required": [ "status_code", "detail" - ], - "title": "NotFoundResponse", - "description": "404 Not Found - Resource does not exist.", + ], + "title": "QuotaExceededResponse", + "description": "429 Too Many Requests - Quota limit exceeded.", "examples": [ { "detail": { - "cause": "Conversation with ID 123e4567-e89b-12d3-a456-426614174000 does not exist", - "response": "Conversation not found" + "cause": "The token quota for model gpt-4-turbo has been exceeded.", + "response": "The model quota has been exceeded" }, - "label": "conversation" + "label": "model" }, { "detail": { - "cause": "Provider with ID openai does not exist", - "response": "Provider not found" + "cause": "User 123 has no available tokens.", + "response": "The quota has been exceeded" }, - "label": "provider" + "label": "user none" }, { "detail": { - "cause": "Model with ID gpt-4-turbo is not configured", - "response": "Model not found" + "cause": "Cluster has no available tokens.", + "response": "The quota has been exceeded" }, - "label": "model" + "label": "cluster none" }, { "detail": { - "cause": "Rag with ID vs_7b52a8cf-0fa3-489c-beab-27e061d102f3 does not exist", - "response": "Rag not found" + "cause": "Unknown subject 999 has no available tokens.", + "response": "The quota has been exceeded" }, - "label": "rag" + "label": "subject none" }, { "detail": { - "cause": "Streaming Request with ID 123e4567-e89b-12d3-a456-426614174000 does not exist", - "response": "Streaming Request not found" + "cause": "User 123 has 5 tokens, but 10 tokens are needed.", + "response": "The quota has been exceeded" }, - "label": "streaming request" + "label": "user insufficient" + }, + { + "detail": { + "cause": "Cluster has 500 tokens, but 900 tokens are needed.", + "response": "The quota has been exceeded" + }, + "label": "cluster insufficient" + }, + { + "detail": { + "cause": "Unknown subject 999 has 3 tokens, but 6 tokens are needed.", + "response": "The quota has been exceeded" + }, + "label": "subject insufficient" } ] }, - "OAuth2SecurityScheme": { + "QuotaHandlersConfiguration": { "properties": { - "description": { + "sqlite": { "anyOf": [ { - "type": "string" + "$ref": "#/components/schemas/SQLiteDatabaseConfiguration" }, { "type": "null" } ], - "title": "Description" - }, - "flows": { - "$ref": "#/components/schemas/OAuthFlows" + "title": "SQLite configuration", + "description": "SQLite database configuration" }, - "oauth2MetadataUrl": { + "postgres": { "anyOf": [ { - "type": "string" + "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration" }, { "type": "null" } ], - "title": "Oauth2Metadataurl" + "title": "PostgreSQL configuration", + "description": "PostgreSQL database configuration" }, - "type": { - "type": "string", - "const": "oauth2", - "title": "Type", - "default": "oauth2" + "limiters": { + "items": { + "$ref": "#/components/schemas/QuotaLimiterConfiguration" + }, + "type": "array", + "title": "Quota limiters", + "description": "Quota limiters configuration" + }, + "scheduler": { + "$ref": "#/components/schemas/QuotaSchedulerConfiguration", + "title": "Quota scheduler", + "description": "Quota scheduler configuration" + }, + "enable_token_history": { + "type": "boolean", + "title": "Enable token history", + "description": "Enables storing information about token usage history", + "default": false } }, + "additionalProperties": false, "type": "object", - "required": [ - "flows" - ], - "title": "OAuth2SecurityScheme", - "description": "Defines a security scheme using OAuth 2.0." + "title": "QuotaHandlersConfiguration", + "description": "Quota limiter configuration.\n\nIt is possible to limit quota usage per user or per service or services\n(that typically run in one cluster). Each limit is configured as a separate\n_quota limiter_. It can be of type `user_limiter` or `cluster_limiter`\n(which is name that makes sense in OpenShift deployment)." }, - "OAuthFlows": { + "QuotaLimiterConfiguration": { "properties": { - "authorizationCode": { - "anyOf": [ - { - "$ref": "#/components/schemas/AuthorizationCodeOAuthFlow" - }, - { - "type": "null" - } - ] + "type": { + "type": "string", + "enum": [ + "user_limiter", + "cluster_limiter" + ], + "title": "Quota limiter type", + "description": "Quota limiter type, either user_limiter or cluster_limiter" }, - "clientCredentials": { - "anyOf": [ - { - "$ref": "#/components/schemas/ClientCredentialsOAuthFlow" - }, - { - "type": "null" - } - ] + "name": { + "type": "string", + "title": "Quota limiter name", + "description": "Human readable quota limiter name" }, - "implicit": { - "anyOf": [ - { - "$ref": "#/components/schemas/ImplicitOAuthFlow" - }, - { - "type": "null" - } - ] + "initial_quota": { + "type": "integer", + "minimum": 0.0, + "title": "Initial quota", + "description": "Quota set at beginning of the period" }, - "password": { - "anyOf": [ - { - "$ref": "#/components/schemas/PasswordOAuthFlow" - }, - { - "type": "null" - } - ] + "quota_increase": { + "type": "integer", + "minimum": 0.0, + "title": "Quota increase", + "description": "Delta value used to increase quota when period is reached" + }, + "period": { + "type": "string", + "title": "Period", + "description": "Period specified in human readable form" } }, + "additionalProperties": false, "type": "object", - "title": "OAuthFlows", - "description": "Defines the configuration for the supported OAuth 2.0 flows." + "required": [ + "type", + "name", + "initial_quota", + "quota_increase", + "period" + ], + "title": "QuotaLimiterConfiguration", + "description": "Configuration for one quota limiter.\n\nThere are three configuration options for each limiter:\n\n1. ``period`` is specified in a human-readable form, see\n https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT\n for all possible options. When the end of the period is reached, the\n quota is reset or increased.\n2. ``initial_quota`` is the value set at the beginning of the period.\n3. ``quota_increase`` is the value (if specified) used to increase the\n quota when the period is reached.\n\nThere are two basic use cases:\n\n1. When the quota needs to be reset to a specific value periodically (for\n example on a weekly or monthly basis), set ``initial_quota`` to the\n required value.\n2. When the quota needs to be increased by a specific value periodically\n (for example on a daily basis), set ``quota_increase``." }, - "OkpConfiguration": { + "QuotaSchedulerConfiguration": { "properties": { - "offline": { - "type": "boolean", - "title": "OKP offline mode", - "description": "When True, use parent_id for OKP chunk source URLs. When False, use reference_url for chunk source URLs.", - "default": true + "period": { + "type": "integer", + "exclusiveMinimum": 0.0, + "title": "Period", + "description": "Quota scheduler period specified in seconds", + "default": 1 }, - "chunk_filter_query": { - "type": "string", - "title": "OKP chunk filter query", - "description": "OKP filter query applied to every OKP search request. Defaults to 'is_chunk:true' to restrict results to chunk documents. To add extra constraints, extend the expression using boolean syntax, e.g. 'is_chunk:true AND product:*openshift*'.", - "default": "is_chunk:true" + "database_reconnection_count": { + "type": "integer", + "exclusiveMinimum": 0.0, + "title": "Database reconnection count on startup", + "description": "Database reconnection count on startup. When database for quota is not available on startup, the service tries to reconnect N times with specified delay.", + "default": 10 + }, + "database_reconnection_delay": { + "type": "integer", + "exclusiveMinimum": 0.0, + "title": "Database reconnection delay", + "description": "Database reconnection delay specified in seconds. When database for quota is not available on startup, the service tries to reconnect N times with specified delay.", + "default": 1 } }, "additionalProperties": false, "type": "object", - "title": "OkpConfiguration", - "description": "OKP (Offline Knowledge Portal) provider configuration.\n\nControls provider-specific behaviour for the OKP vector store.\nOnly relevant when ``\"okp\"`` is listed in ``rag.inline`` or ``rag.tool``." + "title": "QuotaSchedulerConfiguration", + "description": "Quota scheduler configuration." }, - "OpenIdConnectSecurityScheme": { + "RAGChunk": { "properties": { - "description": { + "content": { + "type": "string", + "title": "Content", + "description": "The content of the chunk" + }, + "source": { "anyOf": [ { "type": "string" @@ -7611,92 +10822,53 @@ "type": "null" } ], - "title": "Description" + "title": "Source", + "description": "Index name identifying the knowledge source from configuration" }, - "openIdConnectUrl": { - "type": "string", - "title": "Openidconnecturl" + "score": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "title": "Score", + "description": "Relevance score" }, - "type": { - "type": "string", - "const": "openIdConnect", - "title": "Type", - "default": "openIdConnect" - } - }, - "type": "object", - "required": [ - "openIdConnectUrl" - ], - "title": "OpenIdConnectSecurityScheme", - "description": "Defines a security scheme using OpenID Connect." - }, - "PasswordOAuthFlow": { - "properties": { - "refreshUrl": { + "attributes": { "anyOf": [ { - "type": "string" + "additionalProperties": true, + "type": "object" }, { "type": "null" } ], - "title": "Refreshurl" - }, - "scopes": { - "additionalProperties": { - "type": "string" - }, - "type": "object", - "title": "Scopes" - }, - "tokenUrl": { - "type": "string", - "title": "Tokenurl" + "title": "Attributes", + "description": "Document metadata from the RAG provider (e.g., url, title, author)" } }, "type": "object", "required": [ - "scopes", - "tokenUrl" + "content" ], - "title": "PasswordOAuthFlow", - "description": "Defines configuration details for the OAuth 2.0 Resource Owner Password flow." + "title": "RAGChunk", + "description": "Model representing a RAG chunk used in the response." }, - "PostgreSQLDatabaseConfiguration": { + "RAGInfoResponse": { "properties": { - "host": { - "type": "string", - "title": "Hostname", - "description": "Database server host or socket directory", - "default": "localhost" - }, - "port": { - "type": "integer", - "exclusiveMinimum": 0.0, - "title": "Port", - "description": "Database server port", - "default": 5432 - }, - "db": { - "type": "string", - "title": "Database name", - "description": "Database name to connect to" - }, - "user": { - "type": "string", - "title": "User name", - "description": "Database user name used to authenticate" - }, - "password": { + "id": { "type": "string", - "format": "password", - "title": "Password", - "description": "Password used to authenticate", - "writeOnly": true + "title": "Id", + "description": "Vector DB unique ID", + "examples": [ + "vs_00000000_0000_0000" + ] }, - "namespace": { + "name": { "anyOf": [ { "type": "string" @@ -7705,296 +10877,244 @@ "type": "null" } ], - "title": "Name space", - "description": "Database namespace", - "default": "public" - }, - "ssl_mode": { - "type": "string", - "title": "SSL mode", - "description": "SSL mode", - "default": "prefer" + "title": "Name", + "description": "Human readable vector DB name", + "examples": [ + "Faiss Store with Knowledge base" + ] }, - "gss_encmode": { - "type": "string", - "title": "GSS encmode", - "description": "This option determines whether or with what priority a secure GSS TCP/IP connection will be negotiated with the server.", - "default": "prefer" + "created_at": { + "type": "integer", + "title": "Created At", + "description": "When the vector store was created, represented as Unix time", + "examples": [ + 1763391371 + ] }, - "ca_cert_path": { + "last_active_at": { "anyOf": [ { - "type": "string", - "format": "file-path" + "type": "integer" }, { "type": "null" } ], - "title": "CA certificate path", - "description": "Path to CA certificate" - } - }, - "additionalProperties": false, - "type": "object", - "required": [ - "db", - "user", - "password" - ], - "title": "PostgreSQLDatabaseConfiguration", - "description": "PostgreSQL database configuration.\n\nPostgreSQL database is used by Lightspeed Core Stack service for storing\ninformation about conversation IDs. It can also be leveraged to store\nconversation history and information about quota usage.\n\nUseful resources:\n\n- [Psycopg: connection classes](https://www.psycopg.org/psycopg3/docs/api/connections.html)\n- [PostgreSQL connection strings](https://www.connectionstrings.com/postgresql/)\n- [How to Use PostgreSQL in Python](https://www.freecodecamp.org/news/postgresql-in-python/)" - }, - "PromptTooLongResponse": { - "properties": { - "status_code": { - "type": "integer", - "title": "Status Code" - }, - "detail": { - "$ref": "#/components/schemas/DetailModel" - } - }, - "type": "object", - "required": [ - "status_code", - "detail" - ], - "title": "PromptTooLongResponse", - "description": "413 Payload Too Large - Prompt is too long.", - "examples": [ - { - "detail": { - "cause": "The prompt exceeds the maximum allowed length.", - "response": "Prompt is too long" - }, - "label": "prompt too long" - } - ] - }, - "ProviderHealthStatus": { - "properties": { - "provider_id": { - "type": "string", - "title": "Provider Id", - "description": "The ID of the provider" + "title": "Last Active At", + "description": "When the vector store was last active, represented as Unix time", + "examples": [ + 1763391371 + ] }, - "status": { - "type": "string", - "title": "Status", - "description": "The health status", + "usage_bytes": { + "type": "integer", + "title": "Usage Bytes", + "description": "Storage byte(s) used by this vector DB", "examples": [ - "ok", - "unhealthy", - "not_implemented" + 0 ] }, - "message": { + "expires_at": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "null" } ], - "title": "Message", - "description": "Optional message about the health status", + "title": "Expires At", + "description": "When the vector store expires, represented as Unix time", "examples": [ - "All systems operational", - "Llama Stack is unavailable" + 1763391371 ] - } - }, - "type": "object", - "required": [ - "provider_id", - "status" - ], - "title": "ProviderHealthStatus", - "description": "Model representing the health status of a provider.\n\nAttributes:\n provider_id: The ID of the provider.\n status: The health status ('ok', 'unhealthy', 'not_implemented').\n message: Optional message about the health status." - }, - "ProviderResponse": { - "properties": { - "api": { - "type": "string", - "title": "Api", - "description": "The API this provider implements" - }, - "config": { - "additionalProperties": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "items": {}, - "type": "array" - }, - {}, - { - "type": "null" - } - ] - }, - "type": "object", - "title": "Config", - "description": "Provider configuration parameters" - }, - "health": { - "additionalProperties": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "items": {}, - "type": "array" - }, - {}, - { - "type": "null" - } - ] - }, - "type": "object", - "title": "Health", - "description": "Current health status of the provider" }, - "provider_id": { + "object": { "type": "string", - "title": "Provider Id", - "description": "Unique provider identifier" + "title": "Object", + "description": "Object type", + "examples": [ + "vector_store" + ] }, - "provider_type": { + "status": { "type": "string", - "title": "Provider Type", - "description": "Provider implementation type" + "title": "Status", + "description": "Vector DB status", + "examples": [ + "completed" + ] } }, "type": "object", "required": [ - "api", - "config", - "health", - "provider_id", - "provider_type" + "id", + "created_at", + "usage_bytes", + "object", + "status" ], - "title": "ProviderResponse", - "description": "Model representing a response to get specific provider request.", + "title": "RAGInfoResponse", + "description": "Model representing a response with information about RAG DB.", "examples": [ { - "api": "inference", - "config": { - "api_key": "********" - }, - "health": { - "message": "Healthy", - "status": "OK" - }, - "provider_id": "openai", - "provider_type": "remote::openai" + "created_at": 1763391371, + "id": "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3", + "last_active_at": 1763391371, + "name": "Faiss Store with Knowledge base", + "object": "vector_store", + "status": "completed", + "usage_bytes": 1024000 } ] }, - "ProvidersListResponse": { + "RAGListResponse": { "properties": { - "providers": { - "additionalProperties": { - "items": { - "additionalProperties": true, - "type": "object" - }, - "type": "array" + "rags": { + "items": { + "type": "string" }, - "type": "object", - "title": "Providers", - "description": "List of available API types and their corresponding providers" + "type": "array", + "title": "RAG list response", + "description": "List of RAG identifiers", + "examples": [ + "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3", + "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3" + ] } }, "type": "object", "required": [ - "providers" + "rags" ], - "title": "ProvidersListResponse", - "description": "Model representing a response to providers request.", + "title": "RAGListResponse", + "description": "Model representing a response to list RAGs request.", "examples": [ { - "providers": { - "agents": [ - { - "provider_id": "meta-reference", - "provider_type": "inline::meta-reference" - } - ], - "inference": [ - { - "provider_id": "sentence-transformers", - "provider_type": "inline::sentence-transformers" - }, - { - "provider_id": "openai", - "provider_type": "remote::openai" - } - ] - } + "rags": [ + "vs_00000000-cafe-babe-0000-000000000000", + "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3", + "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3" + ] } ] }, - "QueryRequest": { + "RHIdentityConfiguration": { "properties": { - "query": { - "type": "string", - "title": "Query", - "description": "The query string", - "examples": [ - "What is Kubernetes?" - ] + "required_entitlements": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Required entitlements", + "description": "List of all required entitlements." + } + }, + "additionalProperties": false, + "type": "object", + "title": "RHIdentityConfiguration", + "description": "Red Hat Identity authentication configuration." + }, + "RagConfiguration": { + "properties": { + "inline": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Inline RAG IDs", + "description": "RAG IDs whose sources are injected as context before the LLM call. Use 'okp' to enable OKP inline RAG. Empty by default (no inline RAG)." }, - "conversation_id": { + "tool": { "anyOf": [ { - "type": "string" + "items": { + "type": "string" + }, + "type": "array" }, { "type": "null" } ], - "title": "Conversation Id", - "description": "The optional conversation ID (UUID)", + "title": "Tool RAG IDs", + "description": "RAG IDs made available to the LLM as a file_search tool. Use 'okp' to include the OKP vector store. When omitted, all registered BYOK vector stores are used (backward compatibility)." + } + }, + "additionalProperties": false, + "type": "object", + "title": "RagConfiguration", + "description": "RAG strategy configuration.\n\nControls which RAG sources are used for inline and tool-based retrieval.\n\nEach strategy lists RAG IDs to include. The special ID ``\"okp\"`` defined in constants,\nactivates the OKP provider; all other IDs refer to entries in ``byok_rag``.\n\nBackward compatibility:\n - ``inline`` defaults to ``[]`` (no inline RAG).\n - ``tool`` defaults to ``None`` which means all registered vector stores\n are used (identical to the previous ``tool.byok.enabled = True`` default)." + }, + "ReadinessResponse": { + "properties": { + "ready": { + "type": "boolean", + "title": "Ready", + "description": "Flag indicating if service is ready", "examples": [ - "c5260aec-4d82-4370-9fdf-05cf908b3f16" + true, + false ] }, - "provider": { + "reason": { + "type": "string", + "title": "Reason", + "description": "The reason for the readiness", + "examples": [ + "Service is ready" + ] + }, + "providers": { + "items": { + "$ref": "#/components/schemas/ProviderHealthStatus" + }, + "type": "array", + "title": "Providers", + "description": "List of unhealthy providers in case of readiness failure.", + "examples": [] + } + }, + "type": "object", + "required": [ + "ready", + "reason", + "providers" + ], + "title": "ReadinessResponse", + "description": "Model representing response to a readiness request.\n\nAttributes:\n ready: If service is ready.\n reason: The reason for the readiness.\n providers: List of unhealthy providers in case of readiness failure.\n\nExample:\n ```python\n readiness_response = ReadinessResponse(\n ready=False,\n reason=\"Service is not ready\",\n providers=[\n ProviderHealthStatus(\n provider_id=\"ollama\",\n status=\"unhealthy\",\n message=\"Server is unavailable\"\n )\n ]\n )\n ```", + "examples": [ + { + "providers": [], + "ready": true, + "reason": "Service is ready" + } + ] + }, + "ReferencedDocument": { + "properties": { + "doc_url": { "anyOf": [ { - "type": "string" + "type": "string", + "minLength": 1, + "format": "uri" }, { "type": "null" } ], - "title": "Provider", - "description": "The optional provider", - "examples": [ - "openai", - "watsonx" - ] + "title": "Doc Url", + "description": "URL of the referenced document" }, - "model": { + "doc_title": { "anyOf": [ { "type": "string" @@ -8003,13 +11123,10 @@ "type": "null" } ], - "title": "Model", - "description": "The optional model", - "examples": [ - "gpt4mini" - ] + "title": "Doc Title", + "description": "Title of the referenced document" }, - "system_prompt": { + "source": { "anyOf": [ { "type": "string" @@ -8018,80 +11135,100 @@ "type": "null" } ], - "title": "System Prompt", - "description": "The optional system prompt.", - "examples": [ - "You are OpenShift assistant.", - "You are Ansible assistant." - ] + "title": "Source", + "description": "Index name identifying the knowledge source from configuration" + } + }, + "type": "object", + "title": "ReferencedDocument", + "description": "Model representing a document referenced in generating a response.\n\nAttributes:\n doc_url: Url to the referenced doc.\n doc_title: Title of the referenced doc." + }, + "ResponseInput": { + "anyOf": [ + { + "type": "string" }, - "attachments": { + { + "items": { + "$ref": "#/components/schemas/ResponseItem" + }, + "type": "array" + } + ] + }, + "ResponseItem": { + "anyOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseMessage-Input" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools" + }, + { + "$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall" + }, + { + "$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse" + } + ] + }, + "ResponsesRequest": { + "properties": { + "input": { + "$ref": "#/components/schemas/ResponseInput" + }, + "model": { "anyOf": [ { - "items": { - "$ref": "#/components/schemas/Attachment" - }, - "type": "array" + "type": "string" }, { "type": "null" } ], - "title": "Attachments", - "description": "The optional list of attachments.", - "examples": [ - { - "attachment_type": "log", - "content": "this is attachment", - "content_type": "text/plain" - }, - { - "attachment_type": "configuration", - "content": "kind: Pod\n metadata:\n name: private-reg", - "content_type": "application/yaml" - }, - { - "attachment_type": "configuration", - "content": "foo: bar", - "content_type": "application/yaml" - } - ] + "title": "Model" }, - "no_tools": { + "conversation": { "anyOf": [ { - "type": "boolean" + "type": "string" }, { "type": "null" } ], - "title": "No Tools", - "description": "Whether to bypass all tools and MCP servers", - "default": false, - "examples": [ - true, - false - ] + "title": "Conversation" }, - "generate_topic_summary": { + "include": { "anyOf": [ { - "type": "boolean" + "items": { + "$ref": "#/components/schemas/IncludeParameter" + }, + "type": "array" }, { "type": "null" } ], - "title": "Generate Topic Summary", - "description": "Whether to generate topic summary for new conversations", - "default": true, - "examples": [ - true, - false - ] + "title": "Include" }, - "media_type": { + "instructions": { "anyOf": [ { "type": "string" @@ -8100,496 +11237,252 @@ "type": "null" } ], - "title": "Media Type", - "description": "Media type for the response format", - "examples": [ - "application/json", - "text/plain" - ] + "title": "Instructions" }, - "vector_store_ids": { + "max_infer_iters": { "anyOf": [ { - "items": { - "type": "string" - }, - "type": "array" + "type": "integer" }, { "type": "null" } ], - "title": "Vector Store Ids", - "description": "Optional list of specific vector store IDs to query for RAG. If not provided, all available vector stores will be queried.", - "examples": [ - "ocp_docs", - "knowledge_base", - "vector_db_1" - ] + "title": "Max Infer Iters" }, - "shield_ids": { + "max_output_tokens": { "anyOf": [ { - "items": { - "type": "string" - }, - "type": "array" + "type": "integer" }, { "type": "null" } ], - "title": "Shield Ids", - "description": "Optional list of safety shield IDs to apply. If None, all configured shields are used. If provided, must contain at least one valid shield ID (empty list raises 422 error).", - "examples": [ - "llama-guard", - "custom-shield" - ] + "title": "Max Output Tokens" }, - "solr": { + "max_tool_calls": { "anyOf": [ { - "additionalProperties": true, - "type": "object" + "type": "integer" }, { "type": "null" } ], - "title": "Solr", - "description": "Solr-specific query parameters including filter queries", - "examples": [ - { - "fq": [ - "product:*openshift*", - "product_version:*4.16*" - ] - } - ] - } - }, - "additionalProperties": false, - "type": "object", - "required": [ - "query" - ], - "title": "QueryRequest", - "description": "Model representing a request for the LLM (Language Model).\n\nAttributes:\n query: The query string.\n conversation_id: The optional conversation ID (UUID).\n provider: The optional provider.\n model: The optional model.\n system_prompt: The optional system prompt.\n attachments: The optional attachments.\n no_tools: Whether to bypass all tools and MCP servers (default: False).\n generate_topic_summary: Whether to generate topic summary for new conversations.\n media_type: The optional media type for response format (application/json or text/plain).\n vector_store_ids: The optional list of specific vector store IDs to query for RAG.\n shield_ids: The optional list of safety shield IDs to apply.\n\nExample:\n ```python\n query_request = QueryRequest(query=\"Tell me about Kubernetes\")\n ```", - "examples": [ - { - "attachments": [ - { - "attachment_type": "log", - "content": "this is attachment", - "content_type": "text/plain" - }, - { - "attachment_type": "configuration", - "content": "kind: Pod\n metadata:\n name: private-reg", - "content_type": "application/yaml" - }, - { - "attachment_type": "configuration", - "content": "foo: bar", - "content_type": "application/yaml" - } - ], - "conversation_id": "123e4567-e89b-12d3-a456-426614174000", - "generate_topic_summary": true, - "model": "model-name", - "no_tools": false, - "provider": "openai", - "query": "write a deployment yaml for the mongodb image", - "system_prompt": "You are a helpful assistant", - "vector_store_ids": [ - "ocp_docs", - "knowledge_base" - ] - } - ] - }, - "QueryResponse": { - "properties": { - "conversation_id": { + "title": "Max Tool Calls" + }, + "metadata": { "anyOf": [ { - "type": "string" + "additionalProperties": { + "type": "string" + }, + "type": "object" }, { "type": "null" } ], - "title": "Conversation Id", - "description": "The optional conversation ID (UUID)", - "examples": [ - "c5260aec-4d82-4370-9fdf-05cf908b3f16" - ] - }, - "response": { - "type": "string", - "title": "Response", - "description": "Response from LLM", - "examples": [ - "Kubernetes is an open-source container orchestration system for automating ..." - ] - }, - "rag_chunks": { - "items": { - "$ref": "#/components/schemas/RAGChunk" - }, - "type": "array", - "title": "Rag Chunks", - "description": "Deprecated: List of RAG chunks used to generate the response." - }, - "referenced_documents": { - "items": { - "$ref": "#/components/schemas/ReferencedDocument" - }, - "type": "array", - "title": "Referenced Documents", - "description": "List of documents referenced in generating the response", - "examples": [ - [ - { - "doc_title": "Operator Lifecycle Manager (OLM)", - "doc_url": "https://docs.openshift.com/container-platform/4.15/operators/olm/index.html" - } - ] - ] - }, - "truncated": { - "type": "boolean", - "title": "Truncated", - "description": "Deprecated:Whether conversation history was truncated", - "default": false, - "examples": [ - false, - true - ] - }, - "input_tokens": { - "type": "integer", - "title": "Input Tokens", - "description": "Number of tokens sent to LLM", - "default": 0, - "examples": [ - 150, - 250, - 500 - ] - }, - "output_tokens": { - "type": "integer", - "title": "Output Tokens", - "description": "Number of tokens received from LLM", - "default": 0, - "examples": [ - 50, - 100, - 200 - ] + "title": "Metadata" }, - "available_quotas": { - "additionalProperties": { - "type": "integer" - }, - "type": "object", - "title": "Available Quotas", - "description": "Quota available as measured by all configured quota limiters", - "examples": [ + "parallel_tool_calls": { + "anyOf": [ { - "daily": 1000, - "monthly": 50000 - } - ] - }, - "tool_calls": { - "items": { - "$ref": "#/components/schemas/ToolCallSummary" - }, - "type": "array", - "title": "Tool Calls", - "description": "List of tool calls made during response generation" - }, - "tool_results": { - "items": { - "$ref": "#/components/schemas/ToolResultSummary" - }, - "type": "array", - "title": "Tool Results", - "description": "List of tool results" - } - }, - "type": "object", - "required": [ - "response" - ], - "title": "QueryResponse", - "description": "Model representing LLM response to a query.\n\nAttributes:\n conversation_id: The optional conversation ID (UUID).\n response: The response.\n rag_chunks: Deprecated. List of RAG chunks used to generate the response.\n This information is now available in tool_results under file_search_call type.\n referenced_documents: The URLs and titles for the documents used to generate the response.\n tool_calls: List of tool calls made during response generation.\n tool_results: List of tool results.\n truncated: Whether conversation history was truncated.\n input_tokens: Number of tokens sent to LLM.\n output_tokens: Number of tokens received from LLM.\n available_quotas: Quota available as measured by all configured quota limiters.", - "examples": [ - { - "available_quotas": { - "ClusterQuotaLimiter": 998911, - "UserQuotaLimiter": 998911 - }, - "conversation_id": "123e4567-e89b-12d3-a456-426614174000", - "input_tokens": 123, - "output_tokens": 456, - "referenced_documents": [ + "type": "boolean" + }, { - "doc_title": "Operator Lifecycle Manager concepts and resources", - "doc_url": "https://docs.openshift.com/container-platform/4.15/operators/understanding/olm/olm-understanding-olm.html" + "type": "null" } ], - "response": "Operator Lifecycle Manager (OLM) helps users install...", - "tool_calls": [ + "title": "Parallel Tool Calls" + }, + "previous_response_id": { + "anyOf": [ { - "args": {}, - "id": "1", - "name": "tool1", - "type": "tool_call" - } - ], - "tool_results": [ + "type": "string" + }, { - "content": "bla", - "id": "1", - "round": 1, - "status": "success", - "type": "tool_result" + "type": "null" } ], - "truncated": false - } - ] - }, - "QuotaExceededResponse": { - "properties": { - "status_code": { - "type": "integer", - "title": "Status Code" - }, - "detail": { - "$ref": "#/components/schemas/DetailModel" - } - }, - "type": "object", - "required": [ - "status_code", - "detail" - ], - "title": "QuotaExceededResponse", - "description": "429 Too Many Requests - Quota limit exceeded.", - "examples": [ - { - "detail": { - "cause": "The token quota for model gpt-4-turbo has been exceeded.", - "response": "The model quota has been exceeded" - }, - "label": "model" - }, - { - "detail": { - "cause": "User 123 has no available tokens.", - "response": "The quota has been exceeded" - }, - "label": "user none" - }, - { - "detail": { - "cause": "Cluster has no available tokens.", - "response": "The quota has been exceeded" - }, - "label": "cluster none" + "title": "Previous Response Id" }, - { - "detail": { - "cause": "Unknown subject 999 has no available tokens.", - "response": "The quota has been exceeded" - }, - "label": "subject none" - }, - { - "detail": { - "cause": "User 123 has 5 tokens, but 10 tokens are needed.", - "response": "The quota has been exceeded" - }, - "label": "user insufficient" - }, - { - "detail": { - "cause": "Cluster has 500 tokens, but 900 tokens are needed.", - "response": "The quota has been exceeded" - }, - "label": "cluster insufficient" + "prompt": { + "anyOf": [ + { + "$ref": "#/components/schemas/OpenAIResponsePrompt" + }, + { + "type": "null" + } + ] }, - { - "detail": { - "cause": "Unknown subject 999 has 3 tokens, but 6 tokens are needed.", - "response": "The quota has been exceeded" - }, - "label": "subject insufficient" - } - ] - }, - "QuotaHandlersConfiguration": { - "properties": { - "sqlite": { + "reasoning": { "anyOf": [ { - "$ref": "#/components/schemas/SQLiteDatabaseConfiguration" + "$ref": "#/components/schemas/OpenAIResponseReasoning" }, { "type": "null" } - ], - "title": "SQLite configuration", - "description": "SQLite database configuration" + ] }, - "postgres": { + "safety_identifier": { "anyOf": [ { - "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration" + "type": "string" }, { "type": "null" } ], - "title": "PostgreSQL configuration", - "description": "PostgreSQL database configuration" - }, - "limiters": { - "items": { - "$ref": "#/components/schemas/QuotaLimiterConfiguration" - }, - "type": "array", - "title": "Quota limiters", - "description": "Quota limiters configuration" + "title": "Safety Identifier" }, - "scheduler": { - "$ref": "#/components/schemas/QuotaSchedulerConfiguration", - "title": "Quota scheduler", - "description": "Quota scheduler configuration" + "store": { + "type": "boolean", + "title": "Store", + "default": true }, - "enable_token_history": { + "stream": { "type": "boolean", - "title": "Enable token history", - "description": "Enables storing information about token usage history", + "title": "Stream", "default": false - } - }, - "additionalProperties": false, - "type": "object", - "title": "QuotaHandlersConfiguration", - "description": "Quota limiter configuration.\n\nIt is possible to limit quota usage per user or per service or services\n(that typically run in one cluster). Each limit is configured as a separate\n_quota limiter_. It can be of type `user_limiter` or `cluster_limiter`\n(which is name that makes sense in OpenShift deployment)." - }, - "QuotaLimiterConfiguration": { - "properties": { - "type": { - "type": "string", - "enum": [ - "user_limiter", - "cluster_limiter" - ], - "title": "Quota limiter type", - "description": "Quota limiter type, either user_limiter or cluster_limiter" - }, - "name": { - "type": "string", - "title": "Quota limiter name", - "description": "Human readable quota limiter name" }, - "initial_quota": { - "type": "integer", - "minimum": 0.0, - "title": "Initial quota", - "description": "Quota set at beginning of the period" - }, - "quota_increase": { - "type": "integer", - "minimum": 0.0, - "title": "Quota increase", - "description": "Delta value used to increase quota when period is reached" + "temperature": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "title": "Temperature" }, - "period": { - "type": "string", - "title": "Period", - "description": "Period specified in human readable form" - } - }, - "additionalProperties": false, - "type": "object", - "required": [ - "type", - "name", - "initial_quota", - "quota_increase", - "period" - ], - "title": "QuotaLimiterConfiguration", - "description": "Configuration for one quota limiter.\n\nThere are three configuration options for each limiter:\n\n1. ``period`` is specified in a human-readable form, see\n https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT\n for all possible options. When the end of the period is reached, the\n quota is reset or increased.\n2. ``initial_quota`` is the value set at the beginning of the period.\n3. ``quota_increase`` is the value (if specified) used to increase the\n quota when the period is reached.\n\nThere are two basic use cases:\n\n1. When the quota needs to be reset to a specific value periodically (for\n example on a weekly or monthly basis), set ``initial_quota`` to the\n required value.\n2. When the quota needs to be increased by a specific value periodically\n (for example on a daily basis), set ``quota_increase``." - }, - "QuotaSchedulerConfiguration": { - "properties": { - "period": { - "type": "integer", - "exclusiveMinimum": 0.0, - "title": "Period", - "description": "Quota scheduler period specified in seconds", - "default": 1 + "text": { + "anyOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseText" + }, + { + "type": "null" + } + ] }, - "database_reconnection_count": { - "type": "integer", - "exclusiveMinimum": 0.0, - "title": "Database reconnection count on startup", - "description": "Database reconnection count on startup. When database for quota is not available on startup, the service tries to reconnect N times with specified delay.", - "default": 10 + "tool_choice": { + "anyOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceMode" + }, + { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceAllowedTools" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceFileSearch" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceFunctionTool" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceMCPTool" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceCustomTool" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "allowed_tools": "#/components/schemas/OpenAIResponseInputToolChoiceAllowedTools", + "custom": "#/components/schemas/OpenAIResponseInputToolChoiceCustomTool", + "file_search": "#/components/schemas/OpenAIResponseInputToolChoiceFileSearch", + "function": "#/components/schemas/OpenAIResponseInputToolChoiceFunctionTool", + "mcp": "#/components/schemas/OpenAIResponseInputToolChoiceMCPTool", + "web_search": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch", + "web_search_2025_08_26": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch", + "web_search_preview": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch", + "web_search_preview_2025_03_11": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch" + } + } + }, + { + "type": "null" + } + ], + "title": "Tool Choice" }, - "database_reconnection_delay": { - "type": "integer", - "exclusiveMinimum": 0.0, - "title": "Database reconnection delay", - "description": "Database reconnection delay specified in seconds. When database for quota is not available on startup, the service tries to reconnect N times with specified delay.", - "default": 1 - } - }, - "additionalProperties": false, - "type": "object", - "title": "QuotaSchedulerConfiguration", - "description": "Quota scheduler configuration." - }, - "RAGChunk": { - "properties": { - "content": { - "type": "string", - "title": "Content", - "description": "The content of the chunk" + "tools": { + "anyOf": [ + { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseInputToolWebSearch" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolFileSearch" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolFunction" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolMCP" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "file_search": "#/components/schemas/OpenAIResponseInputToolFileSearch", + "function": "#/components/schemas/OpenAIResponseInputToolFunction", + "mcp": "#/components/schemas/OpenAIResponseInputToolMCP", + "web_search": "#/components/schemas/OpenAIResponseInputToolWebSearch", + "web_search_2025_08_26": "#/components/schemas/OpenAIResponseInputToolWebSearch", + "web_search_preview": "#/components/schemas/OpenAIResponseInputToolWebSearch", + "web_search_preview_2025_03_11": "#/components/schemas/OpenAIResponseInputToolWebSearch" + } + } + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tools" }, - "source": { + "generate_topic_summary": { "anyOf": [ { - "type": "string" + "type": "boolean" }, { "type": "null" } ], - "title": "Source", - "description": "Index name identifying the knowledge source from configuration" + "title": "Generate Topic Summary", + "default": true }, - "score": { + "shield_ids": { "anyOf": [ { - "type": "number" + "items": { + "type": "string" + }, + "type": "array" }, { "type": "null" } ], - "title": "Score", - "description": "Relevance score" + "title": "Shield Ids" }, - "attributes": { + "solr": { "anyOf": [ { "additionalProperties": true, @@ -8599,28 +11492,115 @@ "type": "null" } ], - "title": "Attributes", - "description": "Document metadata from the RAG provider (e.g., url, title, author)" + "title": "Solr" } }, + "additionalProperties": false, "type": "object", "required": [ - "content" + "input" ], - "title": "RAGChunk", - "description": "Model representing a RAG chunk used in the response." + "title": "ResponsesRequest", + "description": "Model representing a request for the Responses API following LCORE specification.\n\nAttributes:\n input: Input text or structured input items containing the query.\n model: Model identifier in format \"provider/model\". Auto-selected if not provided.\n conversation: Conversation ID linking to an existing conversation. Accepts both\n OpenAI and LCORE formats. Mutually exclusive with previous_response_id.\n include: Explicitly specify output item types that are excluded by default but\n should be included in the response.\n instructions: System instructions or guidelines provided to the model (acts as\n the system prompt).\n max_infer_iters: Maximum number of inference iterations the model can perform.\n max_output_tokens: Maximum number of tokens allowed in the response.\n max_tool_calls: Maximum number of tool calls allowed in a single response.\n metadata: Custom metadata dictionary with key-value pairs for tracking or logging.\n parallel_tool_calls: Whether the model can make multiple tool calls in parallel.\n previous_response_id: Identifier of the previous response in a multi-turn\n conversation. Mutually exclusive with conversation.\n prompt: Prompt object containing a template with variables for dynamic\n substitution.\n reasoning: Reasoning configuration for the response.\n safety_identifier: Safety identifier for the response.\n store: Whether to store the response in conversation history. Defaults to True.\n stream: Whether to stream the response as it is generated. Defaults to False.\n temperature: Sampling temperature controlling randomness (typically 0.0\u20132.0).\n text: Text response configuration specifying output format constraints (JSON\n schema, JSON object, or plain text).\n tool_choice: Tool selection strategy (\"auto\", \"required\", \"none\", or specific\n tool configuration).\n tools: List of tools available to the model (file search, web search, function\n calls, MCP tools). Defaults to all tools available to the model.\n generate_topic_summary: LCORE-specific flag indicating whether to generate a\n topic summary for new conversations. Defaults to True.\n shield_ids: LCORE-specific list of safety shield IDs to apply. If None, all\n configured shields are used.\n solr: LCORE-specific Solr vector_io provider query parameters (e.g. filter\n queries). Optional.", + "examples": [ + { + "generate_topic_summary": true, + "input": "Hello World!", + "instructions": "You are a helpful assistant", + "model": "openai/gpt-4o-mini", + "store": true, + "stream": false + } + ] }, - "RAGInfoResponse": { + "ResponsesResponse": { "properties": { + "created_at": { + "type": "integer", + "title": "Created At" + }, + "completed_at": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Completed At" + }, + "error": { + "anyOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseError" + }, + { + "type": "null" + } + ] + }, "id": { "type": "string", - "title": "Id", - "description": "Vector DB unique ID", - "examples": [ - "vs_00000000_0000_0000" - ] + "title": "Id" }, - "name": { + "model": { + "type": "string", + "title": "Model" + }, + "object": { + "type": "string", + "const": "response", + "title": "Object", + "default": "response" + }, + "output": { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseMessage-Output" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools" + }, + { + "$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "file_search_call": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall", + "function_call": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall", + "mcp_approval_request": "#/components/schemas/OpenAIResponseMCPApprovalRequest", + "mcp_call": "#/components/schemas/OpenAIResponseOutputMessageMCPCall", + "mcp_list_tools": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools", + "message": "#/components/schemas/OpenAIResponseMessage-Output", + "web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall" + } + } + }, + "type": "array", + "title": "Output" + }, + "parallel_tool_calls": { + "type": "boolean", + "title": "Parallel Tool Calls", + "default": true + }, + "previous_response_id": { "anyOf": [ { "type": "string" @@ -8629,244 +11609,205 @@ "type": "null" } ], - "title": "Name", - "description": "Human readable vector DB name", - "examples": [ - "Faiss Store with Knowledge base" - ] + "title": "Previous Response Id" }, - "created_at": { - "type": "integer", - "title": "Created At", - "description": "When the vector store was created, represented as Unix time", - "examples": [ - 1763391371 + "prompt": { + "anyOf": [ + { + "$ref": "#/components/schemas/OpenAIResponsePrompt" + }, + { + "type": "null" + } ] }, - "last_active_at": { + "status": { + "type": "string", + "title": "Status" + }, + "temperature": { "anyOf": [ { - "type": "integer" + "type": "number" }, { "type": "null" } ], - "title": "Last Active At", - "description": "When the vector store was last active, represented as Unix time", - "examples": [ - 1763391371 - ] + "title": "Temperature" }, - "usage_bytes": { - "type": "integer", - "title": "Usage Bytes", - "description": "Storage byte(s) used by this vector DB", - "examples": [ - 0 + "text": { + "anyOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseText" + }, + { + "type": "null" + } ] }, - "expires_at": { + "top_p": { "anyOf": [ { - "type": "integer" + "type": "number" }, { "type": "null" } ], - "title": "Expires At", - "description": "When the vector store expires, represented as Unix time", - "examples": [ - 1763391371 - ] - }, - "object": { - "type": "string", - "title": "Object", - "description": "Object type", - "examples": [ - "vector_store" - ] + "title": "Top P" }, - "status": { - "type": "string", - "title": "Status", - "description": "Vector DB status", - "examples": [ - "completed" - ] - } - }, - "type": "object", - "required": [ - "id", - "created_at", - "usage_bytes", - "object", - "status" - ], - "title": "RAGInfoResponse", - "description": "Model representing a response with information about RAG DB.", - "examples": [ - { - "created_at": 1763391371, - "id": "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3", - "last_active_at": 1763391371, - "name": "Faiss Store with Knowledge base", - "object": "vector_store", - "status": "completed", - "usage_bytes": 1024000 - } - ] - }, - "RAGListResponse": { - "properties": { - "rags": { - "items": { - "type": "string" - }, - "type": "array", - "title": "RAG list response", - "description": "List of RAG identifiers", - "examples": [ - "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3", - "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3" - ] - } - }, - "type": "object", - "required": [ - "rags" - ], - "title": "RAGListResponse", - "description": "Model representing a response to list RAGs request.", - "examples": [ - { - "rags": [ - "vs_00000000-cafe-babe-0000-000000000000", - "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3", - "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3" - ] - } - ] - }, - "RHIdentityConfiguration": { - "properties": { - "required_entitlements": { + "tools": { + "anyOf": [ + { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseInputToolWebSearch" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolFileSearch" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolFunction" + }, + { + "$ref": "#/components/schemas/OpenAIResponseToolMCP" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "file_search": "#/components/schemas/OpenAIResponseInputToolFileSearch", + "function": "#/components/schemas/OpenAIResponseInputToolFunction", + "mcp": "#/components/schemas/OpenAIResponseToolMCP", + "web_search": "#/components/schemas/OpenAIResponseInputToolWebSearch", + "web_search_2025_08_26": "#/components/schemas/OpenAIResponseInputToolWebSearch", + "web_search_preview": "#/components/schemas/OpenAIResponseInputToolWebSearch", + "web_search_preview_2025_03_11": "#/components/schemas/OpenAIResponseInputToolWebSearch" + } + } + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tools" + }, + "tool_choice": { "anyOf": [ { - "items": { - "type": "string" - }, - "type": "array" + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceMode" + }, + { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceAllowedTools" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceFileSearch" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceFunctionTool" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceMCPTool" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceCustomTool" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "allowed_tools": "#/components/schemas/OpenAIResponseInputToolChoiceAllowedTools", + "custom": "#/components/schemas/OpenAIResponseInputToolChoiceCustomTool", + "file_search": "#/components/schemas/OpenAIResponseInputToolChoiceFileSearch", + "function": "#/components/schemas/OpenAIResponseInputToolChoiceFunctionTool", + "mcp": "#/components/schemas/OpenAIResponseInputToolChoiceMCPTool", + "web_search": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch", + "web_search_2025_08_26": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch", + "web_search_preview": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch", + "web_search_preview_2025_03_11": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch" + } + } }, { "type": "null" } ], - "title": "Required entitlements", - "description": "List of all required entitlements." - } - }, - "additionalProperties": false, - "type": "object", - "title": "RHIdentityConfiguration", - "description": "Red Hat Identity authentication configuration." - }, - "RagConfiguration": { - "properties": { - "inline": { - "items": { - "type": "string" - }, - "type": "array", - "title": "Inline RAG IDs", - "description": "RAG IDs whose sources are injected as context before the LLM call. Use 'okp' to enable OKP inline RAG. Empty by default (no inline RAG)." + "title": "Tool Choice" }, - "tool": { + "truncation": { "anyOf": [ { - "items": { - "type": "string" - }, - "type": "array" + "type": "string" }, { "type": "null" } ], - "title": "Tool RAG IDs", - "description": "RAG IDs made available to the LLM as a file_search tool. Use 'okp' to include the OKP vector store. When omitted, all registered BYOK vector stores are used (backward compatibility)." - } - }, - "additionalProperties": false, - "type": "object", - "title": "RagConfiguration", - "description": "RAG strategy configuration.\n\nControls which RAG sources are used for inline and tool-based retrieval.\n\nEach strategy lists RAG IDs to include. The special ID ``\"okp\"`` defined in constants,\nactivates the OKP provider; all other IDs refer to entries in ``byok_rag``.\n\nBackward compatibility:\n - ``inline`` defaults to ``[]`` (no inline RAG).\n - ``tool`` defaults to ``None`` which means all registered vector stores\n are used (identical to the previous ``tool.byok.enabled = True`` default)." - }, - "ReadinessResponse": { - "properties": { - "ready": { - "type": "boolean", - "title": "Ready", - "description": "Flag indicating if service is ready", - "examples": [ - true, - false + "title": "Truncation" + }, + "usage": { + "anyOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseUsage" + }, + { + "type": "null" + } ] }, - "reason": { - "type": "string", - "title": "Reason", - "description": "The reason for the readiness", - "examples": [ - "Service is ready" + "instructions": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Instructions" + }, + "max_tool_calls": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Tool Calls" + }, + "reasoning": { + "anyOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseReasoning" + }, + { + "type": "null" + } ] }, - "providers": { - "items": { - "$ref": "#/components/schemas/ProviderHealthStatus" - }, - "type": "array", - "title": "Providers", - "description": "List of unhealthy providers in case of readiness failure.", - "examples": [] - } - }, - "type": "object", - "required": [ - "ready", - "reason", - "providers" - ], - "title": "ReadinessResponse", - "description": "Model representing response to a readiness request.\n\nAttributes:\n ready: If service is ready.\n reason: The reason for the readiness.\n providers: List of unhealthy providers in case of readiness failure.\n\nExample:\n ```python\n readiness_response = ReadinessResponse(\n ready=False,\n reason=\"Service is not ready\",\n providers=[\n ProviderHealthStatus(\n provider_id=\"ollama\",\n status=\"unhealthy\",\n message=\"Server is unavailable\"\n )\n ]\n )\n ```", - "examples": [ - { - "providers": [], - "ready": true, - "reason": "Service is ready" - } - ] - }, - "ReferencedDocument": { - "properties": { - "doc_url": { + "max_output_tokens": { "anyOf": [ { - "type": "string", - "minLength": 1, - "format": "uri" + "type": "integer" }, { "type": "null" } ], - "title": "Doc Url", - "description": "URL of the referenced document" + "title": "Max Output Tokens" }, - "doc_title": { + "safety_identifier": { "anyOf": [ { "type": "string" @@ -8875,10 +11816,34 @@ "type": "null" } ], - "title": "Doc Title", - "description": "Title of the referenced document" + "title": "Safety Identifier" }, - "source": { + "metadata": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata" + }, + "store": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Store" + }, + "conversation": { "anyOf": [ { "type": "string" @@ -8887,13 +11852,75 @@ "type": "null" } ], - "title": "Source", - "description": "Index name identifying the knowledge source from configuration" + "title": "Conversation" + }, + "available_quotas": { + "additionalProperties": { + "type": "integer" + }, + "type": "object", + "title": "Available Quotas" + }, + "output_text": { + "type": "string", + "title": "Output Text" } }, "type": "object", - "title": "ReferencedDocument", - "description": "Model representing a document referenced in generating a response.\n\nAttributes:\n doc_url: Url to the referenced doc.\n doc_title: Title of the referenced doc." + "required": [ + "created_at", + "id", + "model", + "output", + "status", + "available_quotas", + "output_text" + ], + "title": "ResponsesResponse", + "description": "Model representing a response from the Responses API following LCORE specification.\n\nAttributes:\n created_at: Unix timestamp when the response was created.\n completed_at: Unix timestamp when the response was completed, if applicable.\n error: Error details if the response failed or was blocked.\n id: Unique identifier for this response.\n model: Model identifier in \"provider/model\" format used for generation.\n object: Object type identifier, always \"response\".\n output: List of structured output items containing messages, tool calls, and\n other content. This is the primary response content.\n parallel_tool_calls: Whether the model can make multiple tool calls in parallel.\n previous_response_id: Identifier of the previous response in a multi-turn\n conversation.\n prompt: The input prompt object that was sent to the model.\n status: Current status of the response (e.g., \"completed\", \"blocked\",\n \"in_progress\").\n temperature: Temperature parameter used for generation (controls randomness).\n text: Text response configuration object used for OpenAI responses.\n top_p: Top-p sampling parameter used for generation.\n tools: List of tools available to the model during generation.\n tool_choice: Tool selection strategy used (e.g., \"auto\", \"required\", \"none\").\n truncation: Strategy used for handling content that exceeds context limits.\n usage: Token usage statistics including input_tokens, output_tokens, and\n total_tokens.\n instructions: System instructions or guidelines provided to the model.\n max_tool_calls: Maximum number of tool calls allowed in a single response.\n reasoning: Reasoning configuration (effort level) used for the response.\n max_output_tokens: Upper bound for tokens generated in the response.\n safety_identifier: Safety/guardrail identifier applied to the request.\n metadata: Additional metadata dictionary with custom key-value pairs.\n store: Whether the response was stored.\n conversation: Conversation ID linking this response to a conversation thread\n (LCORE-specific).\n available_quotas: Remaining token quotas for the user (LCORE-specific).\n output_text: Aggregated text output from all output_text items in the\n output array.", + "examples": [ + { + "available_quotas": { + "daily": 1000, + "monthly": 50000 + }, + "completed_at": 1704067250, + "conversation": "0d21ba731f21f798dc9680125d5d6f493e4a7ab79f25670e", + "created_at": 1704067200, + "id": "resp_abc123", + "instructions": "You are a helpful assistant", + "model": "openai/gpt-4-turbo", + "object": "response", + "output": [ + { + "content": [ + { + "text": "Kubernetes is an open-source container orchestration system...", + "type": "output_text" + } + ], + "role": "assistant", + "type": "message" + } + ], + "output_text": "Kubernetes is an open-source container orchestration system...", + "parallel_tool_calls": true, + "status": "completed", + "store": true, + "temperature": 0.7, + "text": { + "format": { + "type": "text" + } + }, + "usage": { + "input_tokens": 100, + "output_tokens": 50, + "total_tokens": 150 + } + } + ], + "sse_example": "event: response.created\ndata: {\"type\":\"response.created\",\"sequence_number\":0,\"response\":{\"id\":\"resp_abc\",\"created_at\":1704067200,\"status\":\"in_progress\",\"output\":[],\"conversation\":\"0d21ba731f21f798dc9680125d5d6f49\",\"available_quotas\":{},\"output_text\":\"\"}}\n\nevent: response.output_item.added\ndata: {\"response_id\":\"resp_abc\",\"item\":{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"Hello! How can I help?\"}]},\"output_index\":0,\"sequence_number\":1}\n\nevent: response.output_item.done\ndata: {\"response_id\":\"resp_abc\",\"item\":{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"Hello! How can I help?\"}]},\"output_index\":0,\"sequence_number\":2}\n\nevent: response.completed\ndata: {\"type\":\"response.completed\",\"sequence_number\":3,\"response\":{\"id\":\"resp_abc\",\"created_at\":1704067200,\"completed_at\":1704067250,\"status\":\"completed\",\"output\":[{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"Hello! How can I help?\"}]}],\"usage\":{\"input_tokens\":10,\"output_tokens\":6,\"total_tokens\":16},\"conversation\":\"0d21ba731f21f798dc9680125d5d6f49\",\"available_quotas\":{\"daily\":1000,\"monthly\":50000},\"output_text\":\"Hello! How can I help?\"}}\n\ndata: [DONE]\n\n" }, "RlsapiV1Attachment": { "properties": { @@ -9163,6 +12190,90 @@ "title": "SQLiteDatabaseConfiguration", "description": "SQLite database configuration." }, + "SearchRankingOptions": { + "properties": { + "ranker": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Ranker" + }, + "score_threshold": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "title": "Score Threshold", + "default": 0.0 + }, + "alpha": { + "anyOf": [ + { + "type": "number", + "maximum": 1.0, + "minimum": 0.0 + }, + { + "type": "null" + } + ], + "title": "Alpha", + "description": "Weight factor for weighted ranker" + }, + "impact_factor": { + "anyOf": [ + { + "type": "number", + "exclusiveMinimum": 0.0 + }, + { + "type": "null" + } + ], + "title": "Impact Factor", + "description": "Impact factor for RRF algorithm" + }, + "weights": { + "anyOf": [ + { + "additionalProperties": { + "type": "number" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Weights", + "description": "Weights for combining vector, keyword, and neural scores. Keys: 'vector', 'keyword', 'neural'" + }, + "model": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Model", + "description": "Model identifier for neural reranker" + } + }, + "type": "object", + "title": "SearchRankingOptions", + "description": "Options for ranking and filtering search results.\n\nThis class configures how search results are ranked and filtered. You can use algorithm-based\nrerankers (weighted, RRF) or neural rerankers. Defaults from VectorStoresConfig are\nused when parameters are not provided.\n\nExamples:\n # Weighted ranker with custom alpha\n SearchRankingOptions(ranker=\"weighted\", alpha=0.7)\n\n # RRF ranker with custom impact factor\n SearchRankingOptions(ranker=\"rrf\", impact_factor=50.0)\n\n # Use config defaults (just specify ranker type)\n SearchRankingOptions(ranker=\"weighted\") # Uses alpha from VectorStoresConfig\n\n # Score threshold filtering\n SearchRankingOptions(ranker=\"weighted\", score_threshold=0.5)\n\n:param ranker: (Optional) Name of the ranking algorithm to use. Supported values:\n - \"weighted\": Weighted combination of vector and keyword scores\n - \"rrf\": Reciprocal Rank Fusion algorithm\n - \"neural\": Neural reranking model (requires model parameter, Part II)\n Note: For OpenAI API compatibility, any string value is accepted, but only the above values are supported.\n:param score_threshold: (Optional) Minimum relevance score threshold for results. Default: 0.0\n:param alpha: (Optional) Weight factor for weighted ranker (0-1).\n - 0.0 = keyword only\n - 0.5 = equal weight (default)\n - 1.0 = vector only\n Only used when ranker=\"weighted\" and weights is not provided.\n Falls back to VectorStoresConfig.chunk_retrieval_params.weighted_search_alpha if not provided.\n:param impact_factor: (Optional) Impact factor (k) for RRF algorithm.\n Lower values emphasize higher-ranked results. Default: 60.0 (optimal from research).\n Only used when ranker=\"rrf\".\n Falls back to VectorStoresConfig.chunk_retrieval_params.rrf_impact_factor if not provided.\n:param weights: (Optional) Dictionary of weights for combining different signal types.\n Keys can be \"vector\", \"keyword\", \"neural\". Values should sum to 1.0.\n Used when combining algorithm-based reranking with neural reranking (Part II).\n Example: {\"vector\": 0.3, \"keyword\": 0.3, \"neural\": 0.4}\n:param model: (Optional) Model identifier for neural reranker (e.g., \"vllm/Qwen3-Reranker-0.6B\").\n Required when ranker=\"neural\" or when weights contains \"neural\" (Part II)." + }, "SecurityScheme": { "anyOf": [ { @@ -9854,4 +12965,4 @@ } } } -} +} \ No newline at end of file diff --git a/docs/responses.md b/docs/responses.md index 80131048b..1eafb22ab 100644 --- a/docs/responses.md +++ b/docs/responses.md @@ -1,6 +1,6 @@ # LCORE OpenResponses API Specification -This document describes the LCORE implementation of the OpenResponses API, exposed via the `POST /v1/responses` endpoint. This endpoint follows the OpenResponses specification and is built on top of the Llama Stack Responses API. Since the underlying Llama Stack Responses API is still evolving, the LCORE endpoint provides a standards-aligned interface while documenting a supported subset of OpenResponses fields. In addition, it introduces LCORE-specific extensions to preserve feature parity and defines explicit field mappings to reproduce the functionality of existing `/v1/query` and `/v1/streaming_query` endpoints. +This document describes the LCORE implementation of the OpenResponses API, exposed via the `POST /v1/responses` endpoint. This endpoint follows the OpenResponses specification and is built on top of the Llama Stack Responses API. In addition, it introduces LCORE-specific extensions to preserve feature parity and defines explicit field mappings to reproduce the functionality of existing `/v1/query` and `/v1/streaming_query` endpoints. --- @@ -9,7 +9,7 @@ This document describes the LCORE implementation of the OpenResponses API, expos * [Introduction](#introduction) * [Endpoint Overview](#endpoint-overview) * [Request Specification](#request-specification) - * [Inherited LLS OpenAPI Fields](#inherited-lls-openapi-fields) + * [Inherited LLS OpenAPI Fields](#inherited-lls-openapi-attributes) * [LCORE-Specific Extensions](#lcore-specific-extensions) * [Field Mappings](#field-mappings) * [Structured request attributes: variants and usage](#structured-request-attributes-variants-and-usage) @@ -19,7 +19,7 @@ This document describes the LCORE implementation of the OpenResponses API, expos * [LCORE-Specific Extensions](#lcore-specific-extensions-1) * [Field Mappings](#field-mappings-1) * [Streaming Support](#streaming-support) -* [Known Limitations and Behavioral Differences](#known-limitations-and-behavioral-differences) +* [Behavioral Differences](#behavioral-differences) * [Conversation Handling](#conversation-handling) * [Output Representation](#output-representation) * [Tool Configuration Differences](#tool-configuration-differences) @@ -47,9 +47,9 @@ This document describes the LCORE implementation of the OpenResponses API, expos ## Introduction -The LCORE OpenResponses API provides a standards-aligned interface for AI response generation while preserving feature compatibility with existing LCORE workflows. In particular, the endpoint enriches requests and responses with LCORE-specific attributes, adjusts the semantics of some fields for compatibility, and enriches streaming events. +The LCORE OpenResponses API provides a standards-aligned interface for AI response generation while preserving feature compatibility with existing LCORE workflows. In particular, the endpoint enriches requests and responses with LCORE-specific attributes, adjusts the semantics of some fields for compatibility, and enriches content of some streaming events. -The endpoint is designed to provide feature parity with existing streaming endpoints while offering a more direct interface to the underlying Responses API. +The endpoint is designed to provide feature parity with existing query endpoints while offering a more direct interface to the underlying Responses API. --- @@ -69,7 +69,7 @@ The endpoint is designed to provide feature parity with existing streaming endpo ## Request Specification -### Inherited LLS OpenAPI Fields +### Inherited LLS OpenAPI Attributes The following request attributes are supported as defined by the underlying Llama Stack Responses API and retain their original OpenResponses semantics unless otherwise stated: @@ -80,12 +80,15 @@ The following request attributes are supported as defined by the underlying Llam | `conversation` | string | Conversation ID (OpenAI or LCORE format). Mutually exclusive with `previous_response_id` | No | | `include` | array[string] | Extra output item types to include | No | | `instructions` | string | System prompt | No | -| `max_infer_iters` | integer | Max inference iterations | No | -| `max_tool_calls` | integer | Max tool calls per response | No | +| `max_infer_iters` | integer | Maximum of inference iterations | No | +| `max_output_tokens` | integer | Maximum of output tokens | No | +| `max_tool_calls` | integer | Maximum of tool calls per response | No | | `metadata` | dictionary | Custom metadata (tracking/logging) | No | | `parallel_tool_calls` | boolean | Allow parallel tool calls | No | | `previous_response_id` | string | Previous response ID for context. Mutually exclusive with `conversation` | No | | `prompt` | object | Prompt substitution template | No | +| `reasoning` | object | Reasoning configuration (effort level) used for the response | No | +| `safety_identifier` | string | Safety/guardrail identifier applied to the request | No | | `store` | boolean | Store in conversation history (default: true) | No | | `stream` | boolean | Stream response (default: false) | No | | `temperature` | float | Sampling temperature (0.0–2.0) | No | @@ -93,15 +96,14 @@ The following request attributes are supported as defined by the underlying Llam | `tool_choice` | string or object | Tool selection strategy (auto, required, none, or specific rules). Default: auto | No | | `tools` | array[object] | Tools available for request (file search, web search, functions, MCP). Default: all | No | -**Note:** Only the fields listed above are currently supported. Additional OpenResponses fields may not yet be available due to LLS API incompleteness. - ### LCORE-Specific Extensions The following fields are LCORE-specific request extensions and are not part of the standard LLS OpenAPI specification: | Field | Type | Description | Required | |-------|------|-------------|----------| -| `generate_topic_summary` | boolean | Generate topic summary for new conversations | No | +| `generate_topic_summary` | boolean | Generate topic summary for new conversations. Default: true | No | +| `shield_ids` | array[string] | Shield IDs to apply. If omitted, all configured shields in LCORE are used | No | | `solr` | dictionary | Solr vector_io provider query parameters | No | @@ -114,11 +116,12 @@ The following table maps LCORE query request fields to the OpenResponses request | `query` | `input` | The attribute allows to pass string-like input and also structured input of list of input items | | `conversation_id` | `conversation` | Supports OpenAI `conv_*` format or LCORE hex UUID | | `provider` + `model` | `model` | Concatenated as `provider/model` | -| `system_prompt` | `instructions` | Only change in attribute's name | +| `system_prompt` | `instructions` | Same meaning. Only change in attribute's name | | `attachments` | `input` items | Attachments can be passed as input messages with content of type `input_file` | | `no_tools` | `tool_choice` | `no_tools=true` mapped to `tool_choice="none"` | | `vector_store_ids` | `tools` + `tool_choice` | Vector stores can be explicitly specified and restricted by `file_search` tool type's `vector_store_ids` attribute | | `generate_topic_summary` | N/A | Exposed directly (LCORE-specific) | +| `shield_ids` | N/A | Exposed directly (LCORE-specific) | | `solr` | N/A | Exposed directly (LCORE-specific) | **Note:** The `media_type` attribute is not present in the LCORE specification, as downstream logic determines which format to process (structured `output` or textual `output_text` response attributes). @@ -141,7 +144,7 @@ Required. Either a **string** or a list of input items. Each **item** is one of: - [mcp_approval_request](#mcp_approval_request) — request for human approval of an MCP call - [mcp_approval_response](#mcp_approval_response) — human approval or denial -All input item objects have a common `type` attribute that determines their structure. See [Available OpenResponses items](#available-openresponses-items) for detailed descriptions and examples of each item type. +All input item objects have a common `type` discriminator that determines the subsequent structure. See [Available OpenResponses items](#available-openresponses-items) for detailed descriptions and examples of each item type. #### `include` @@ -190,6 +193,28 @@ Template with multiple variable types (text, image, file): Here the template `report_template` (version `2.0`) might define placeholders such as `{{title}}`, `{{chart}}`, and `{{data}}`; the backend substitutes them with the provided text, image, and file respectively. +#### `reasoning` + +Optional. **Reasoning effort configuration** that controls how much “thinking” the model does before producing its answer. Supported on models that expose reasoning (e.g. o1/o3-style). Lower effort favors speed and fewer tokens; higher effort favors more thorough reasoning. + +When provided, the object has a single key: + +`effort`: One of `"none"`, `"minimal"`, `"low"`, `"medium"`, `"high"`, or `"xhigh"`. `None` leaves the default behavior to the backend. + +**Examples:** + +```json +{ "reasoning": { "effort": "low" } } +``` + +```json +{ "reasoning": { "effort": "high" } } +``` + +```json +{ "reasoning": { "effort": "medium" } } +``` + #### `text` Optional. Text response configuration that tells the model how to format its main text output. @@ -357,27 +382,31 @@ The following response attributes are inherited directly from the LLS OpenAPI sp | Field | Type | Description | |-------|------|-------------| -| `id` | string | Unique response ID | -| `object` | string | Always `"response"` | | `created_at` | integer | Creation time (Unix) | -| `status` | string | Status (e.g. completed, blocked, in_progress) | | `completed_at` | integer | Completion time (Unix), if set | +| `error` | object | Error details if failed or incompleted | +| `id` | string | Unique response ID or moderation ID | | `model` | string | Model ID (provider/model) used | +| `object` | string | Always `"response"` | | `output` | array[object] | Structured output (messages, tool calls, etc.) | -| `error` | object | Error details if failed or blocked | -| `instructions` | string | System instructions used | -| `max_tool_calls` | integer | Max tool calls allowed | -| `metadata` | dictionary | Custom metadata | | `parallel_tool_calls` | boolean | Parallel tool calls allowed | | `previous_response_id` | string | Previous response ID (multi-turn) | -| `prompt` | object | Prompt echoed (id, variables, version) | -| `temperature` | float | Temperature used | -| `text` | object | Text config (format key) | -| `tool_choice` | string or object | Tool selection used | -| `tools` | array[object] | Tools available during generation | +| `prompt` | object | The input prompt object that was sent to the model | +| `status` | string | Status (e.g. completed, blocked, in_progress) | +| `temperature` | float | Temperature parameter used for generation | +| `text` | object | Text response configuration object used | | `top_p` | float | Top-p sampling used | +| `tools` | array[object] | Tools available during generation | +| `tool_choice` | string or object | Tool selection used | | `truncation` | string | Truncation strategy applied (`"auto"` or `"disabled"`) | | `usage` | object | Token usage (input_tokens, output_tokens, total_tokens) | +| `instructions` | string | System instructions used | +| `max_tool_calls` | integer | Max tool calls allowed | +| `reasoning` | object | Reasoning configuration applied | +| `max_output_tokens` | integer | Maximum output tokens allowed, if set | +| `safety_identifier` | string | Safety model or identifier used, if set | +| `metadata` | dictionary | Custom metadata specified in request | +| `store` | boolean | Whether the response was stored | | `output_text` | string | Aggregated text from output items | ### Structured response output: object types and examples @@ -394,7 +423,7 @@ The `output` array contains structured items. Each item has a `type`. Each list **Note:** No `mcp_approval_response` nor `function_call_output` here as they can serve only as input items. -All response item objects have a common `type` attribute that determines their structure. See [Available OpenResponses items](#available-openresponses-items) for detailed descriptions and examples of each item type. +All response item objects have a common `type` discriminator that determines subsequent structure. See [Available OpenResponses items](#available-openresponses-items) for detailed descriptions and examples of each item type. ### LCORE-Specific Extensions @@ -421,7 +450,7 @@ The following mappings are applied when converting from LLS OpenAPI format to LC **Deprecated Fields:** The following fields are not exposed in the LCORE OpenResponses specification: * `rag_chunks` - Part of `output` items of `file_search_call` type * `referenced_documents` - Part of `output` items -* `truncated` - Deprecated; `truncation` field indicates used strategy, not whether the truncation was applied. +* `truncated` - Deprecated; `truncation` field indicates used strategy, not whether the truncation was actually applied. --- @@ -447,15 +476,15 @@ Each streaming event follows the Server-Sent Events (SSE) format: --- -## Known Limitations and Behavioral Differences +## Behavioral Differences -The `/v1/responses` endpoint follows the OpenResponses structure but is currently constrained by the capabilities of the underlying Llama Stack Responses API. As a result, only the documented subset of request and response fields is supported. +The `/v1/responses` endpoint follows the OpenResponses structure but also incorporates LCORE-specific features to maintain full feature compatibility with query endpoints. Several behavioral differences and implementation details should be noted: ### Conversation Handling -The `conversation` field in responses is a LCORE-managed extension. While not natively defined by the Llama Stack specification, it is internally resolved and linked to the request conversation to preserve multi-turn behavior. +The `conversation` field in responses is a LCORE-managed extension. While not natively defined by the Llama Stack specification, it is internally resolved and **always** present in the response to preserve LCORE conversation-based model. The endpoint accepts two conversation ID formats: @@ -484,13 +513,14 @@ Fields such as `media_type`, `tool_calls`, `tool_results`, `rag_chunks`, and `re ### Tool Configuration Differences -Vector store IDs are configured within the `tools` array (e.g., as `file_search` tools) rather than through separate parameters. By default all tools that are configured in LCORE are used to support the response. The set of available tools can be maintained per-request by `tool_choice` or `tools` attributes. +Vector store IDs are configured within the `tools` as `file_search` tools rather than through separate parameters. MCP tools are configurable under `mcp` tool type. By default **all** tools that are configured in LCORE are used to support the response. The set of available tools can be maintained per-request by `tool_choice` or `tools` attributes. ### LCORE-Specific Extensions The API introduces extensions that are not part of the OpenResponses specification: - `generate_topic_summary` (request) — When set to `true` and a new conversation is created, a topic summary is automatically generated and stored in conversation metadata. +- `shield_ids` (request) — Optional list of safety shield IDs to apply. If omitted, all configured shields are used. - `solr` (request) — Solr vector_io provider query parameters (e.g. filter queries). - `available_quotas` (response) — Provides real-time quota information from all configured quota limiters. @@ -498,10 +528,33 @@ The API introduces extensions that are not part of the OpenResponses specificati Streaming responses use Server-Sent Events (SSE) and are enriched with LCORE-specific metadata: -- The `conversation` attribute is included in streamed response payloads. +- The `conversation` attribute is included in all streamed payloads that contain `response` attribute. - The `available_quotas` attribute is added to final completion events (`response.completed`, `response.incomplete`, or `response.failed`) and also to the intermediate `response.in_progress` with empty object. -This enrichment may differ slightly from standard OpenAI streaming behavior but preserves compatibility with existing LCORE streaming workflows. + +## Implicit Conversation Management + +This implementation introduces **implicit conversation management**, ensuring that every response is associated with a conversation and can be inspected through the Conversations API. + +Users can provide context to the LLM using one of the following **mutually exclusive** strategies: + +- `conversation` — reference an existing conversation by ID +- `previous_response_id` — reference a previous response (for multi-turn continuation or branching) +- **no context** — neither a conversation nor a previous response is provided + +In **LCORE**, a conversation is modeled as a **linear chain of user turns** (request + response), where every turn belongs to exactly one conversation. Supporting `previous_response_id` as a context mechanism introduces **branching semantics**, which would break this linear structure if handled naively. To preserve a consistent conversation model, implicit conversation management applies the following rules: + +- **Context via `conversation`** — All items from the referenced conversation are provided as context for the new response. The new turn is automatically appended to that conversation, provided the conversation exists and the user has permission to access it. + +- **No context provided** — LCORE creates a new, empty conversation and assigns the new turn to it. + +- **Context via `previous_response_id`** — LCORE determines whether the referenced response is the **latest response in its conversation**: + - **If it is the latest successful response** — The request is treated as a normal continuation of that conversation, preserving the linear structure. + - **If it is not the latest response** — The conversation is **forked**. A new conversation is created, and the new turn becomes the starting point of that conversation. + +**Moderation responses** (requests that fail shield moderation) follow the same conversation rules. However, only **valid (successful) responses** can be referenced via `previous_response_id`; moderation responses cannot be used as context for follow-up requests. + +Blocked turns still appear in conversation history via the Conversations API, but they **do not produce a referenceable response** for continuation or forking. They are also **excluded when determining the latest response** in a conversation. ## Examples diff --git a/src/app/endpoints/conversations_v1.py b/src/app/endpoints/conversations_v1.py index 1dc14cc95..158d312b6 100644 --- a/src/app/endpoints/conversations_v1.py +++ b/src/app/endpoints/conversations_v1.py @@ -16,7 +16,6 @@ from configuration import configuration from models.config import Action from models.database.conversations import ( - UserTurn, UserConversation, ) from models.requests import ConversationUpdateRequest @@ -38,6 +37,7 @@ check_configuration_loaded, delete_conversation, retrieve_conversation, + retrieve_conversation_turns, validate_and_retrieve_conversation, ) from utils.suid import ( @@ -45,7 +45,10 @@ normalize_conversation_id, to_llama_stack_conversation_id, ) -from utils.conversations import build_conversation_turns_from_items +from utils.conversations import ( + build_conversation_turns_from_items, + get_all_conversation_items, +) from log import get_logger logger = get_logger(__name__) @@ -236,46 +239,23 @@ async def get_conversation_endpoint_handler( # pylint: disable=too-many-locals, llama_stack_conv_id, ) - # Use Conversations API to retrieve conversation items - conversation_items_response = await client.conversations.items.list( - conversation_id=llama_stack_conv_id, - after=None, - include=None, - limit=None, - order="asc", # oldest first - ) + # Retrieve turns metadata from database (can be empty for legacy conversations) + db_turns = retrieve_conversation_turns(normalized_conv_id) - if not conversation_items_response.data: + # Use Conversations API to retrieve conversation items + items = await get_all_conversation_items(client, llama_stack_conv_id) + if not items: logger.error("No items found for conversation %s", conversation_id) response = NotFoundResponse( resource="conversation", resource_id=normalized_conv_id ).model_dump() raise HTTPException(**response) - items = conversation_items_response.data - logger.info( "Successfully retrieved %d items for conversation %s", len(items), conversation_id, ) - # Retrieve turns metadata from database - db_turns: list[UserTurn] = [] - try: - with get_session() as session: - db_turns = ( - session.query(UserTurn) - .filter_by(conversation_id=normalized_conv_id) - .order_by(UserTurn.turn_number) - .all() - ) - except SQLAlchemyError as e: - logger.error( - "Database error occurred while retrieving conversation turns for %s.", - normalized_conv_id, - ) - response = InternalServerErrorResponse.database_error() - raise HTTPException(**response.model_dump()) from e # Build conversation turns from items and populate turns metadata # Use conversation.created_at for legacy conversations without turn metadata diff --git a/src/app/endpoints/query.py b/src/app/endpoints/query.py index a0a653c2c..d9635e791 100644 --- a/src/app/endpoints/query.py +++ b/src/app/endpoints/query.py @@ -1,5 +1,3 @@ -# pylint: disable=too-many-locals,too-many-branches,too-many-nested-blocks - """Handler for REST API call to provide answer to query using Response API.""" import datetime @@ -264,7 +262,7 @@ async def query_endpoint_handler( ) -async def retrieve_response( # pylint: disable=too-many-locals +async def retrieve_response( client: AsyncLlamaStackClient, responses_params: ResponsesApiParams, moderation_result: ShieldModerationResult, @@ -291,7 +289,9 @@ async def retrieve_response( # pylint: disable=too-many-locals responses_params.input, [moderation_result.refusal_response], ) - return TurnSummary(llm_response=moderation_result.message) + return TurnSummary( + id=moderation_result.moderation_id, llm_response=moderation_result.message + ) try: response = await client.responses.create( **responses_params.model_dump(exclude_none=True) diff --git a/src/app/endpoints/responses.py b/src/app/endpoints/responses.py new file mode 100644 index 000000000..89a475f9a --- /dev/null +++ b/src/app/endpoints/responses.py @@ -0,0 +1,731 @@ +# pylint: disable=too-many-locals,too-many-branches,too-many-nested-blocks, too-many-arguments,too-many-positional-arguments + +"""Handler for REST API call to provide answer using Responses API (LCORE specification).""" + +import json +from datetime import UTC, datetime +from typing import Annotated, Any, Optional, cast +from collections.abc import AsyncIterator + +from fastapi import APIRouter, Depends, HTTPException, Request +from fastapi.responses import StreamingResponse +from llama_stack_api import ( + OpenAIResponseObject, + OpenAIResponseObjectStream, + OpenAIResponseObjectStreamResponseOutputItemAdded as OutputItemAddedChunk, + OpenAIResponseObjectStreamResponseOutputItemDone as OutputItemDoneChunk, +) +from llama_stack_client import ( + APIConnectionError, + APIStatusError as LLSApiStatusError, + AsyncLlamaStackClient, +) +from openai._exceptions import ( + APIStatusError as OpenAIAPIStatusError, +) + +from authentication import get_auth_dependency +from authentication.interface import AuthTuple +from authorization.azure_token_manager import AzureEntraIDManager +from authorization.middleware import authorize +from client import AsyncLlamaStackClientHolder +from configuration import configuration +from log import get_logger +from models.config import Action +from models.requests import ResponsesRequest +from models.responses import ( + ForbiddenResponse, + InternalServerErrorResponse, + NotFoundResponse, + PromptTooLongResponse, + QuotaExceededResponse, + ResponsesResponse, + ServiceUnavailableResponse, + UnauthorizedResponse, + UnprocessableEntityResponse, +) + +from utils.conversations import append_turn_items_to_conversation +from utils.endpoints import ( + check_configuration_loaded, + resolve_response_context, +) +from utils.mcp_headers import mcp_headers_dependency +from utils.mcp_oauth_probe import check_mcp_auth +from utils.query import ( + consume_query_tokens, + extract_provider_and_model_from_model_id, + handle_known_apistatus_errors, + store_query_results, + update_azure_token, + validate_model_provider_override, +) +from utils.quota import check_tokens_available, get_available_quotas +from utils.responses import ( + build_tool_call_summary, + build_turn_summary, + check_model_configured, + deduplicate_referenced_documents, + extract_attachments_text, + extract_text_from_response_items, + extract_token_usage, + extract_vector_store_ids_from_tools, + get_topic_summary, + get_zero_usage, + parse_referenced_documents, + resolve_tool_choice, + select_model_for_responses, +) +from utils.shields import run_shield_moderation +from utils.suid import ( + normalize_conversation_id, +) +from utils.types import ( + RAGContext, + ResponseInput, + ResponsesApiParams, + ShieldModerationBlocked, + ShieldModerationResult, + TurnSummary, +) +from utils.vector_search import ( + append_inline_rag_context_to_responses_input, + build_rag_context, +) + +logger = get_logger(__name__) +router = APIRouter(tags=["responses"]) + +responses_response: dict[int | str, dict[str, Any]] = { + 200: ResponsesResponse.openapi_response(), + 401: UnauthorizedResponse.openapi_response( + examples=["missing header", "missing token"] + ), + 403: ForbiddenResponse.openapi_response( + examples=["endpoint", "conversation read", "model override"] + ), + 404: NotFoundResponse.openapi_response( + examples=["model", "conversation", "provider"] + ), + 413: PromptTooLongResponse.openapi_response(), + 422: UnprocessableEntityResponse.openapi_response(), + 429: QuotaExceededResponse.openapi_response(), + 500: InternalServerErrorResponse.openapi_response(examples=["configuration"]), + 503: ServiceUnavailableResponse.openapi_response(), +} + + +@router.post( + "/responses", + responses=responses_response, + response_model=None, + summary="Responses Endpoint Handler", +) +@authorize(Action.QUERY) +async def responses_endpoint_handler( + request: Request, + responses_request: ResponsesRequest, + auth: Annotated[AuthTuple, Depends(get_auth_dependency())], + mcp_headers: dict[str, dict[str, str]] = Depends(mcp_headers_dependency), +) -> ResponsesResponse | StreamingResponse: + """ + Handle request to the /responses endpoint using Responses API (LCORE specification). + + Processes a POST request to the responses endpoint, forwarding the + user's request to a selected Llama Stack LLM and returning the generated response + following the LCORE OpenAPI specification. + + Returns: + ResponsesResponse: Contains the response following LCORE specification (non-streaming). + StreamingResponse: SSE-formatted streaming response with enriched events (streaming). + - response.created event includes conversation attribute + - response.completed event includes available_quotas attribute + + Raises: + HTTPException: + - 401: Unauthorized - Missing or invalid credentials + - 403: Forbidden - Insufficient permissions or model override not allowed + - 404: Not Found - Conversation, model, or provider not found + - 413: Prompt too long - Prompt exceeded model's context window size + - 422: Unprocessable Entity - Request validation failed + - 429: Quota limit exceeded - The token quota for model or user has been exceeded + - 500: Internal Server Error - Configuration not loaded or other server errors + - 503: Service Unavailable - Unable to connect to Llama Stack backend + """ + responses_request = responses_request.model_copy(deep=True) + check_configuration_loaded(configuration) + started_at = datetime.now(UTC) + user_id = auth[0] + + await check_mcp_auth(configuration, mcp_headers) + + # Check token availability + check_tokens_available(configuration.quota_limiters, user_id) + + # Enforce RBAC: optionally disallow overriding model in requests + validate_model_provider_override( + responses_request.model, + None, # provider specified as model prefix + request.state.authorized_actions, + ) + + response_context = await resolve_response_context( + user_id=user_id, + others_allowed=( + Action.READ_OTHERS_CONVERSATIONS in request.state.authorized_actions + ), + conversation_id=responses_request.conversation, + previous_response_id=responses_request.previous_response_id, + generate_topic_summary=responses_request.generate_topic_summary, + ) + responses_request.conversation = response_context.conversation + responses_request.generate_topic_summary = response_context.generate_topic_summary + client = AsyncLlamaStackClientHolder().get_client() + + # LCORE-specific: Automatically select model if not provided in request + # This extends the base LLS API which requires model to be specified. + if not responses_request.model: + responses_request.model = await select_model_for_responses( + client, response_context.user_conversation + ) + if not await check_model_configured(client, responses_request.model): + _, model_id = extract_provider_and_model_from_model_id(responses_request.model) + error_response = NotFoundResponse(resource="model", resource_id=model_id) + raise HTTPException(**error_response.model_dump()) + + # Handle Azure token refresh if needed + if ( + responses_request.model.startswith("azure") + and AzureEntraIDManager().is_entra_id_configured + and AzureEntraIDManager().is_token_expired + and AzureEntraIDManager().refresh_token() + ): + client = await update_azure_token(client) + + input_text = ( + responses_request.input + if isinstance(responses_request.input, str) + else extract_text_from_response_items(responses_request.input) + ) + attachments_text = extract_attachments_text(responses_request.input) + + moderation_result = await run_shield_moderation( + client, + input_text + "\n\n" + attachments_text, + responses_request.shield_ids, + ) + + ( + responses_request.tools, + responses_request.tool_choice, + vector_store_ids, + ) = await resolve_tool_choice( + responses_request.tools, + responses_request.tool_choice, + auth[1], + mcp_headers, + request.headers, + ) + + # Build RAG context from Inline RAG sources + inline_rag_context = await build_rag_context( + client, + moderation_result.decision, + input_text, + vector_store_ids, + responses_request.solr, + ) + if moderation_result.decision == "passed": + responses_request.input = append_inline_rag_context_to_responses_input( + responses_request.input, inline_rag_context.context_text + ) + + response_handler = ( + handle_streaming_response + if responses_request.stream + else handle_non_streaming_response + ) + return await response_handler( + client=client, + request=responses_request, + auth=auth, + input_text=input_text, + started_at=started_at, + moderation_result=moderation_result, + inline_rag_context=inline_rag_context, + ) + + +async def handle_streaming_response( + client: AsyncLlamaStackClient, + request: ResponsesRequest, + auth: AuthTuple, + input_text: str, + started_at: datetime, + moderation_result: ShieldModerationResult, + inline_rag_context: RAGContext, +) -> StreamingResponse: + """Handle streaming response from Responses API. + + Args: + client: The AsyncLlamaStackClient instance + request: ResponsesRequest (LCORE-specific fields e.g. generate_topic_summary) + auth: Authentication tuple + input_text: The extracted input text + started_at: Timestamp when the conversation started + moderation_result: Result of shield moderation check + inline_rag_context: Inline RAG context to be used for the response + Returns: + StreamingResponse with SSE-formatted events + """ + api_params = ResponsesApiParams.model_validate(request.model_dump()) + turn_summary = TurnSummary() + # Handle blocked response + if moderation_result.decision == "blocked": + turn_summary.id = moderation_result.moderation_id + turn_summary.llm_response = moderation_result.message + available_quotas = get_available_quotas( + quota_limiters=configuration.quota_limiters, user_id=auth[0] + ) + generator = shield_violation_generator( + moderation_result, + api_params.conversation, + request.echoed_params(), + started_at, + available_quotas, + ) + if api_params.store: + await append_turn_items_to_conversation( + client=client, + conversation_id=api_params.conversation, + user_input=request.input, + llm_output=[moderation_result.refusal_response], + ) + else: + try: + response = await client.responses.create( + **api_params.model_dump(exclude_none=True) + ) + generator = response_generator( + stream=cast(AsyncIterator[OpenAIResponseObjectStream], response), + user_input=request.input, + api_params=api_params, + user_id=auth[0], + turn_summary=turn_summary, + inline_rag_context=inline_rag_context, + ) + except RuntimeError as e: # library mode wraps 413 into runtime error + if "context_length" in str(e).lower(): + error_response = PromptTooLongResponse(model=api_params.model) + raise HTTPException(**error_response.model_dump()) from e + raise e + except APIConnectionError as e: + error_response = ServiceUnavailableResponse( + backend_name="Llama Stack", + cause=str(e), + ) + raise HTTPException(**error_response.model_dump()) from e + except (LLSApiStatusError, OpenAIAPIStatusError) as e: + error_response = handle_known_apistatus_errors(e, api_params.model) + raise HTTPException(**error_response.model_dump()) from e + + return StreamingResponse( + generate_response( + generator=generator, + turn_summary=turn_summary, + client=client, + auth=auth, + input_text=input_text, + started_at=started_at, + api_params=api_params, + generate_topic_summary=request.generate_topic_summary or False, + ), + media_type="text/event-stream", + ) + + +async def shield_violation_generator( + moderation_result: ShieldModerationBlocked, + conversation_id: str, + echoed_params: dict[str, Any], + created_at: datetime, + available_quotas: dict[str, int], +) -> AsyncIterator[str]: + """Generate SSE-formatted streaming response for shield-blocked requests. + + Follows the Open Responses spec: + - Content-Type: text/event-stream + - Each event has 'event:' field matching the type in the event body + - Data objects are JSON-encoded strings + - Terminal event is the literal string [DONE] + - Emits full event sequence: response.created (in_progress), output_item.added, + output_item.done, response.completed (completed) + - Performs topic summary and persistence after [DONE] is emitted + + Args: + moderation_result: The moderation result + conversation_id: The conversation ID to include in the response + echoed_params: Echoed parameters from the request + created_at: Unix timestamp when the response was created + available_quotas: Available quotas dictionary for the user + Yields: + SSE-formatted strings for streaming events, ending with [DONE] + """ + normalized_conv_id = normalize_conversation_id(conversation_id) + + # 1. Send response.created event with status "in_progress" and empty output + created_response_object = ResponsesResponse.model_construct( + id=moderation_result.moderation_id, + created_at=int(created_at.timestamp()), + status="in_progress", + output=[], + conversation=normalized_conv_id, + available_quotas={}, + output_text="", + **echoed_params, + ) + created_response_dict = created_response_object.model_dump(exclude_none=True) + created_event = { + "type": "response.created", + "sequence_number": 0, + "response": created_response_dict, + } + data_json = json.dumps(created_event) + yield f"event: response.created\ndata: {data_json}\n\n" + + # 2. Send response.output_item.added event + item_added_event = OutputItemAddedChunk( + response_id=moderation_result.moderation_id, + item=moderation_result.refusal_response, + output_index=0, + sequence_number=1, + ) + data_json = json.dumps(item_added_event.model_dump(exclude_none=True)) + yield f"event: response.output_item.added\ndata: {data_json}\n\n" + + # 3. Send response.output_item.done event + item_done_event = OutputItemDoneChunk( + response_id=moderation_result.moderation_id, + item=moderation_result.refusal_response, + output_index=0, + sequence_number=2, + ) + data_json = json.dumps(item_done_event.model_dump(exclude_none=True)) + yield f"event: response.output_item.done\ndata: {data_json}\n\n" + + # 4. Send response.completed event with status "completed" and output populated + completed_response_object = ResponsesResponse.model_construct( + id=moderation_result.moderation_id, + created_at=int(created_at.timestamp()), + completed_at=int(datetime.now(UTC).timestamp()), + status="completed", + output=[moderation_result.refusal_response], + usage=get_zero_usage(), + conversation=normalized_conv_id, + available_quotas=available_quotas, + output_text=moderation_result.message, + **echoed_params, + ) + completed_response_dict = completed_response_object.model_dump(exclude_none=True) + completed_event = { + "type": "response.completed", + "sequence_number": 3, + "response": completed_response_dict, + } + data_json = json.dumps(completed_event) + yield f"event: response.completed\ndata: {data_json}\n\n" + + yield "data: [DONE]\n\n" + + +async def response_generator( + stream: AsyncIterator[OpenAIResponseObjectStream], + user_input: ResponseInput, + api_params: ResponsesApiParams, + user_id: str, + turn_summary: TurnSummary, + inline_rag_context: RAGContext, +) -> AsyncIterator[str]: + """Generate SSE-formatted streaming response with LCORE-enriched events. + + Args: + stream: The streaming response from Llama Stack + user_input: User input to the response + api_params: ResponsesApiParams + user_id: User ID for quota retrieval + turn_summary: TurnSummary to populate during streaming + inline_rag_context: Inline RAG context to be used for the response + Yields: + SSE-formatted strings for streaming events, ending with [DONE] + """ + normalized_conv_id = normalize_conversation_id(api_params.conversation) + + logger.debug("Starting streaming response (Responses API) processing") + + latest_response_object: Optional[OpenAIResponseObject] = None + sequence_number = 0 + + async for chunk in stream: + event_type = getattr(chunk, "type", None) + logger.debug("Processing streaming chunk, type: %s", event_type) + + chunk_dict = chunk.model_dump(exclude_none=True) + + # Create own sequence number for chunks to maintain order + chunk_dict["sequence_number"] = sequence_number + sequence_number += 1 + + # Add conversation attribute to the response if chunk has it + if "response" in chunk_dict: + chunk_dict["response"]["conversation"] = normalized_conv_id + + # Intermediate response - no quota consumption and text yet + if event_type == "response.in_progress": + chunk_dict["response"]["available_quotas"] = {} + chunk_dict["response"]["output_text"] = "" + + # Handle completion, incomplete, and failed events - only quota handling here + if event_type in ( + "response.completed", + "response.incomplete", + "response.failed", + ): + latest_response_object = cast( + OpenAIResponseObject, cast(Any, chunk).response + ) + + # Extract and consume tokens if any were used + turn_summary.token_usage = extract_token_usage( + latest_response_object.usage, api_params.model + ) + consume_query_tokens( + user_id=user_id, + model_id=api_params.model, + token_usage=turn_summary.token_usage, + ) + + # Get available quotas after token consumption + available_quotas = get_available_quotas( + quota_limiters=configuration.quota_limiters, user_id=user_id + ) + chunk_dict["response"]["available_quotas"] = available_quotas + turn_summary.llm_response = extract_text_from_response_items( + latest_response_object.output + ) + chunk_dict["response"]["output_text"] = turn_summary.llm_response + + data_json = json.dumps(chunk_dict) + yield f"event: {event_type or 'error'}\ndata: {data_json}\n\n" + + # Extract response metadata from final response object + if latest_response_object: + turn_summary.id = latest_response_object.id + vector_store_ids = extract_vector_store_ids_from_tools(api_params.tools) + tool_rag_docs = parse_referenced_documents( + latest_response_object, vector_store_ids, configuration.rag_id_mapping + ) + turn_summary.referenced_documents = deduplicate_referenced_documents( + inline_rag_context.referenced_documents + tool_rag_docs + ) + for item in latest_response_object.output: + tool_call, tool_result = build_tool_call_summary( + item, + turn_summary.rag_chunks, + vector_store_ids, + configuration.rag_id_mapping, + ) + if tool_call: + turn_summary.tool_calls.append(tool_call) + if tool_result: + turn_summary.tool_results.append(tool_result) + + turn_summary.rag_chunks.extend(inline_rag_context.rag_chunks) + + client = AsyncLlamaStackClientHolder().get_client() + # Explicitly append the turn to conversation if context passed by previous response + if api_params.store and api_params.previous_response_id and latest_response_object: + await append_turn_items_to_conversation( + client, api_params.conversation, user_input, latest_response_object.output + ) + + yield "data: [DONE]\n\n" + + +async def generate_response( + generator: AsyncIterator[str], + turn_summary: TurnSummary, + client: AsyncLlamaStackClient, + auth: AuthTuple, + input_text: str, + started_at: datetime, + api_params: ResponsesApiParams, + generate_topic_summary: bool, +) -> AsyncIterator[str]: + """Stream the response from the generator and persist conversation details. + + After streaming completes, conversation details are persisted. + + Args: + generator: The SSE event generator + turn_summary: TurnSummary populated during streaming + client: The AsyncLlamaStackClient instance + auth: Authentication tuple + input_text: The extracted input text + started_at: Timestamp when the conversation started + api_params: ResponsesApiParams + generate_topic_summary: Whether to generate topic summary for new conversations + Yields: + SSE-formatted strings from the generator + """ + user_id, _, skip_userid_check, _ = auth + async for event in generator: + yield event + + # Get topic summary for new conversation + topic_summary = None + if generate_topic_summary: + logger.debug("Generating topic summary for new conversation") + topic_summary = await get_topic_summary(input_text, client, api_params.model) + + completed_at = datetime.now(UTC) + if api_params.store: + store_query_results( + user_id=user_id, + conversation_id=normalize_conversation_id(api_params.conversation), + model=api_params.model, + started_at=started_at.strftime("%Y-%m-%dT%H:%M:%SZ"), + completed_at=completed_at.strftime("%Y-%m-%dT%H:%M:%SZ"), + summary=turn_summary, + query=input_text, + attachments=[], + skip_userid_check=skip_userid_check, + topic_summary=topic_summary, + ) + + +async def handle_non_streaming_response( + client: AsyncLlamaStackClient, + request: ResponsesRequest, + auth: AuthTuple, + input_text: str, + started_at: datetime, + moderation_result: ShieldModerationResult, + inline_rag_context: RAGContext, +) -> ResponsesResponse: + """Handle non-streaming response from Responses API. + + Args: + client: The AsyncLlamaStackClient instance + request: Request object + auth: Authentication tuple + input_text: The extracted input text + started_at: Timestamp when the conversation started + moderation_result: Result of shield moderation check + inline_rag_context: Inline RAG context to be used for the response + Returns: + ResponsesResponse with the completed response + """ + user_id, _, skip_userid_check, _ = auth + api_params = ResponsesApiParams.model_validate(request.model_dump()) + + # Fork: Get response object (blocked vs normal) + if moderation_result.decision == "blocked": + output_text = moderation_result.message + api_response = OpenAIResponseObject.model_construct( + id=moderation_result.moderation_id, + created_at=int(started_at.timestamp()), + status="completed", + output=[moderation_result.refusal_response], + usage=get_zero_usage(), + **request.echoed_params(), + ) + if api_params.store: + await append_turn_items_to_conversation( + client=client, + conversation_id=api_params.conversation, + user_input=request.input, + llm_output=[moderation_result.refusal_response], + ) + else: + try: + api_response = cast( + OpenAIResponseObject, + await client.responses.create( + **api_params.model_dump(exclude_none=True) + ), + ) + token_usage = extract_token_usage(api_response.usage, api_params.model) + logger.info("Consuming tokens") + consume_query_tokens( + user_id=user_id, + model_id=api_params.model, + token_usage=token_usage, + ) + output_text = extract_text_from_response_items(api_response.output) + # Explicitly append the turn to conversation if context passed by previous response + if api_params.store and api_params.previous_response_id: + await append_turn_items_to_conversation( + client, api_params.conversation, request.input, api_response.output + ) + + except RuntimeError as e: + if "context_length" in str(e).lower(): + error_response = PromptTooLongResponse(model=api_params.model) + raise HTTPException(**error_response.model_dump()) from e + raise e + except APIConnectionError as e: + error_response = ServiceUnavailableResponse( + backend_name="Llama Stack", + cause=str(e), + ) + raise HTTPException(**error_response.model_dump()) from e + except (LLSApiStatusError, OpenAIAPIStatusError) as e: + error_response = handle_known_apistatus_errors(e, api_params.model) + raise HTTPException(**error_response.model_dump()) from e + + # Get available quotas + logger.info("Getting available quotas") + available_quotas = get_available_quotas( + quota_limiters=configuration.quota_limiters, user_id=user_id + ) + # Get topic summary for new conversation + topic_summary = None + if request.generate_topic_summary: + logger.debug("Generating topic summary for new conversation") + topic_summary = await get_topic_summary(input_text, client, api_params.model) + + vector_store_ids = extract_vector_store_ids_from_tools(api_params.tools) + turn_summary = build_turn_summary( + api_response, + api_params.model, + vector_store_ids, + configuration.rag_id_mapping, + ) + turn_summary.referenced_documents = deduplicate_referenced_documents( + inline_rag_context.referenced_documents + turn_summary.referenced_documents + ) + turn_summary.rag_chunks.extend(inline_rag_context.rag_chunks) + completed_at = datetime.now(UTC) + if api_params.store: + store_query_results( + user_id=user_id, + conversation_id=normalize_conversation_id(api_params.conversation), + model=api_params.model, + started_at=started_at.strftime("%Y-%m-%dT%H:%M:%SZ"), + completed_at=completed_at.strftime("%Y-%m-%dT%H:%M:%SZ"), + summary=turn_summary, + query=input_text, + attachments=[], + skip_userid_check=skip_userid_check, + topic_summary=topic_summary, + ) + response = ResponsesResponse.model_validate( + { + **api_response.model_dump(exclude_none=True), + "available_quotas": available_quotas, + "conversation": normalize_conversation_id(api_params.conversation), + "completed_at": int(completed_at.timestamp()), + "output_text": output_text, + } + ) + return response diff --git a/src/app/endpoints/streaming_query.py b/src/app/endpoints/streaming_query.py index 99f0082c2..250a29b89 100644 --- a/src/app/endpoints/streaming_query.py +++ b/src/app/endpoints/streaming_query.py @@ -8,7 +8,7 @@ from fastapi import APIRouter, Depends, HTTPException, Request from fastapi.responses import StreamingResponse -from llama_stack_api.openai_responses import ( +from llama_stack_api import ( OpenAIResponseObject, OpenAIResponseObjectStream, OpenAIResponseObjectStreamResponseMcpCallArgumentsDone as MCPArgsDoneChunk, @@ -302,6 +302,7 @@ async def retrieve_response_generator( try: if context.moderation_result.decision == "blocked": turn_summary.llm_response = context.moderation_result.message + turn_summary.id = context.moderation_result.moderation_id await append_turn_items_to_conversation( context.client, responses_params.conversation, @@ -590,6 +591,7 @@ async def response_generator( # pylint: disable=too-many-branches,too-many-stat turn_response: The streaming response from Llama Stack context: The response generator context turn_summary: TurnSummary to populate during streaming + Yields: SSE-formatted strings for tokens, tool calls, tool results, turn completion, and error events. diff --git a/src/app/routers.py b/src/app/routers.py index 78663e18f..97e3522f6 100644 --- a/src/app/routers.py +++ b/src/app/routers.py @@ -26,6 +26,7 @@ rlsapi_v1, # A2A (Agent-to-Agent) protocol support a2a, + responses, ) @@ -58,7 +59,7 @@ def include_routers(app: FastAPI) -> None: app.include_router(feedback.router, prefix="/v1") app.include_router(conversations_v1.router, prefix="/v1") app.include_router(conversations_v2.router, prefix="/v2") - + app.include_router(responses.router, prefix="/v1") # RHEL Lightspeed rlsapi v1 compatibility - stateless CLA (Command Line Assistant) endpoint app.include_router(rlsapi_v1.router, prefix="/v1") diff --git a/src/models/database/conversations.py b/src/models/database/conversations.py index b34c9eb53..baebf6aa9 100644 --- a/src/models/database/conversations.py +++ b/src/models/database/conversations.py @@ -31,6 +31,7 @@ class UserConversation(Base): # pylint: disable=too-few-public-methods DateTime(timezone=True), server_default=func.now(), # pylint: disable=not-callable ) + last_response_id: Mapped[str] = mapped_column(nullable=True) # The number of user messages in the conversation message_count: Mapped[int] = mapped_column(default=0) @@ -66,3 +67,7 @@ class UserTurn(Base): # pylint: disable=too-few-public-methods provider: Mapped[str] = mapped_column(nullable=False) model: Mapped[str] = mapped_column(nullable=False) + + # Llama Stack response ID for this turn (1:1); nullable for legacy turns without it. + # Indexed for fast lookup when resolving previous_response_id to conversation. + response_id: Mapped[str] = mapped_column(nullable=True, index=True) diff --git a/src/models/requests.py b/src/models/requests.py index d65c0b49e..0e05e61d5 100644 --- a/src/models/requests.py +++ b/src/models/requests.py @@ -6,10 +6,11 @@ from llama_stack_api.openai_responses import ( OpenAIResponseInputToolChoice as ToolChoice, - OpenAIResponseInputToolChoiceMode as ToolChoiceMode, OpenAIResponseInputTool as InputTool, OpenAIResponsePrompt as Prompt, OpenAIResponseText as Text, + OpenAIResponseToolMCP as OutputToolMCP, + OpenAIResponseReasoning as Reasoning, ) from pydantic import BaseModel, Field, field_validator, model_validator @@ -20,6 +21,28 @@ logger = get_logger(__name__) +# Attribute names that are echoed back in the response. +_ECHOED_FIELDS = set( + { + "instructions", + "max_tool_calls", + "max_output_tokens", + "metadata", + "model", + "parallel_tool_calls", + "previous_response_id", + "prompt", + "reasoning", + "safety_identifier", + "temperature", + "top_p", + "truncation", + "text", + "tool_choice", + "store", + } +) + class Attachment(BaseModel): """Model representing an attachment that can be send from the UI as part of query. @@ -614,6 +637,7 @@ class ResponsesRequest(BaseModel): instructions: System instructions or guidelines provided to the model (acts as the system prompt). max_infer_iters: Maximum number of inference iterations the model can perform. + max_output_tokens: Maximum number of tokens allowed in the response. max_tool_calls: Maximum number of tool calls allowed in a single response. metadata: Custom metadata dictionary with key-value pairs for tracking or logging. parallel_tool_calls: Whether the model can make multiple tool calls in parallel. @@ -621,17 +645,21 @@ class ResponsesRequest(BaseModel): conversation. Mutually exclusive with conversation. prompt: Prompt object containing a template with variables for dynamic substitution. + reasoning: Reasoning configuration for the response. + safety_identifier: Safety identifier for the response. store: Whether to store the response in conversation history. Defaults to True. stream: Whether to stream the response as it is generated. Defaults to False. temperature: Sampling temperature controlling randomness (typically 0.0–2.0). text: Text response configuration specifying output format constraints (JSON schema, JSON object, or plain text). tool_choice: Tool selection strategy ("auto", "required", "none", or specific - tool configuration). Defaults to "auto". + tool configuration). tools: List of tools available to the model (file search, web search, function calls, MCP tools). Defaults to all tools available to the model. generate_topic_summary: LCORE-specific flag indicating whether to generate a topic summary for new conversations. Defaults to True. + shield_ids: LCORE-specific list of safety shield IDs to apply. If None, all + configured shields are used. solr: LCORE-specific Solr vector_io provider query parameters (e.g. filter queries). Optional. """ @@ -642,18 +670,23 @@ class ResponsesRequest(BaseModel): include: Optional[list[IncludeParameter]] = None instructions: Optional[str] = None max_infer_iters: Optional[int] = None + max_output_tokens: Optional[int] = None max_tool_calls: Optional[int] = None metadata: Optional[dict[str, str]] = None parallel_tool_calls: Optional[bool] = None previous_response_id: Optional[str] = None prompt: Optional[Prompt] = None + reasoning: Optional[Reasoning] = None + safety_identifier: Optional[str] = None store: bool = True stream: bool = False temperature: Optional[float] = None text: Optional[Text] = None - tool_choice: Optional[ToolChoice] = ToolChoiceMode.auto + tool_choice: Optional[ToolChoice] = None tools: Optional[list[InputTool]] = None + # LCORE-specific attributes generate_topic_summary: Optional[bool] = True + shield_ids: Optional[list[str]] = None solr: Optional[dict[str, Any]] = None model_config = { @@ -661,40 +694,11 @@ class ResponsesRequest(BaseModel): "json_schema_extra": { "examples": [ { - "input": "What is Kubernetes?", + "input": "Hello World!", "model": "openai/gpt-4o-mini", - "conversation": "conv_0d21ba731f21f798dc9680125d5d6f493e4a7ab79f25670e", "instructions": "You are a helpful assistant", - "include": ["message.output_text.logprobs"], - "max_tool_calls": 5, - "metadata": {"source": "api"}, - "parallel_tool_calls": True, - "prompt": { - "id": "prompt_123", - "variables": { - "topic": {"type": "input_text", "text": "Kubernetes"} - }, - "version": "1.0", - }, "store": True, "stream": False, - "temperature": 0.7, - "text": { - "format": { - "type": "json_schema", - "schema": { - "type": "object", - "properties": {"answer": {"type": "string"}}, - }, - } - }, - "tool_choice": "auto", - "tools": [ - { - "type": "file_search", - "vector_store_ids": ["vs_123"], - } - ], "generate_topic_summary": True, } ] @@ -729,3 +733,32 @@ def check_suid(cls, value: Optional[str]) -> Optional[str]: if value and not suid.check_suid(value): raise ValueError(f"Improper conversation ID '{value}'") return value + + @field_validator("previous_response_id") + @classmethod + def check_previous_response_id(cls, value: Optional[str]) -> Optional[str]: + """Validate that previous_response_id does not start with 'modr'.""" + if value is not None and value.startswith("modr"): + raise ValueError("You cannot provide context by moderation response.") + return value + + def echoed_params(self) -> dict[str, Any]: + """Dump attributes that are echoed back in the response. + + The tools attribute is converted from input tool to output tool model. + + Returns: + Dict of echoed attributes. + """ + data = self.model_dump(include=_ECHOED_FIELDS) + if self.tools is not None: + data["tools"] = [ + ( + OutputToolMCP.model_validate(t.model_dump()).model_dump() + if t.type == "mcp" + else t.model_dump() + ) + for t in self.tools + ] + + return data diff --git a/src/models/responses.py b/src/models/responses.py index 9e87ad2cf..fd5ef955a 100644 --- a/src/models/responses.py +++ b/src/models/responses.py @@ -2,7 +2,7 @@ """Models for REST API responses.""" -from typing import Any, ClassVar, Literal, Optional +from typing import Any, ClassVar, Literal, Optional, cast from fastapi import status from llama_stack_api.openai_responses import ( @@ -13,6 +13,7 @@ OpenAIResponseText as Text, OpenAIResponseTool as OutputTool, OpenAIResponseUsage as Usage, + OpenAIResponseReasoning as Reasoning, ) from pydantic import BaseModel, Field from pydantic_core import SchemaError @@ -1412,31 +1413,35 @@ class ResponsesResponse(AbstractSuccessfulResponse): """Model representing a response from the Responses API following LCORE specification. Attributes: - id: Unique identifier for this response. - object: Object type identifier, always "response". created_at: Unix timestamp when the response was created. - status: Current status of the response (e.g., "completed", "blocked", - "in_progress"). completed_at: Unix timestamp when the response was completed, if applicable. + error: Error details if the response failed or was blocked. + id: Unique identifier for this response. model: Model identifier in "provider/model" format used for generation. + object: Object type identifier, always "response". output: List of structured output items containing messages, tool calls, and other content. This is the primary response content. - error: Error details if the response failed or was blocked. - instructions: System instructions or guidelines provided to the model. - max_tool_calls: Maximum number of tool calls allowed in a single response. - metadata: Additional metadata dictionary with custom key-value pairs. parallel_tool_calls: Whether the model can make multiple tool calls in parallel. previous_response_id: Identifier of the previous response in a multi-turn conversation. prompt: The input prompt object that was sent to the model. + status: Current status of the response (e.g., "completed", "blocked", + "in_progress"). temperature: Temperature parameter used for generation (controls randomness). text: Text response configuration object used for OpenAI responses. - tool_choice: Tool selection strategy used (e.g., "auto", "required", "none"). - tools: List of tools available to the model during generation. top_p: Top-p sampling parameter used for generation. + tools: List of tools available to the model during generation. + tool_choice: Tool selection strategy used (e.g., "auto", "required", "none"). truncation: Strategy used for handling content that exceeds context limits. usage: Token usage statistics including input_tokens, output_tokens, and total_tokens. + instructions: System instructions or guidelines provided to the model. + max_tool_calls: Maximum number of tool calls allowed in a single response. + reasoning: Reasoning configuration (effort level) used for the response. + max_output_tokens: Upper bound for tokens generated in the response. + safety_identifier: Safety/guardrail identifier applied to the request. + metadata: Additional metadata dictionary with custom key-value pairs. + store: Whether the response was stored. conversation: Conversation ID linking this response to a conversation thread (LCORE-specific). available_quotas: Remaining token quotas for the user (LCORE-specific). @@ -1444,27 +1449,32 @@ class ResponsesResponse(AbstractSuccessfulResponse): output array. """ - id: str - object: Literal["response"] = "response" created_at: int - status: str completed_at: Optional[int] = None + error: Optional[Error] = None + id: str model: str + object: Literal["response"] = "response" output: list[Output] - error: Optional[Error] = None - instructions: Optional[str] = None - max_tool_calls: Optional[int] = None - metadata: Optional[dict[str, str]] = None parallel_tool_calls: bool = True previous_response_id: Optional[str] = None prompt: Optional[Prompt] = None + status: str temperature: Optional[float] = None text: Optional[Text] = None - tool_choice: Optional[ToolChoice] = None - tools: Optional[list[OutputTool]] = None top_p: Optional[float] = None + tools: Optional[list[OutputTool]] = None + tool_choice: Optional[ToolChoice] = None truncation: Optional[str] = None - usage: Usage + usage: Optional[Usage] = None + instructions: Optional[str] = None + max_tool_calls: Optional[int] = None + reasoning: Optional[Reasoning] = None + max_output_tokens: Optional[int] = None + safety_identifier: Optional[str] = None + metadata: Optional[dict[str, str]] = None + store: Optional[bool] = None + # LCORE-specific attributes conversation: Optional[str] = None available_quotas: dict[str, int] output_text: str @@ -1473,12 +1483,11 @@ class ResponsesResponse(AbstractSuccessfulResponse): "json_schema_extra": { "examples": [ { - "id": "resp_abc123", - "object": "response", "created_at": 1704067200, "completed_at": 1704067250, + "id": "resp_abc123", "model": "openai/gpt-4-turbo", - "status": "completed", + "object": "response", "output": [ { "type": "message", @@ -1494,21 +1503,89 @@ class ResponsesResponse(AbstractSuccessfulResponse): ], } ], + "parallel_tool_calls": True, + "status": "completed", + "temperature": 0.7, + "text": {"format": {"type": "text"}}, "usage": { "input_tokens": 100, "output_tokens": 50, "total_tokens": 150, }, "instructions": "You are a helpful assistant", - "temperature": 0.7, - "conversation": "conv_0d21ba731f21f798dc9680125d5d6f493e4a7ab79f25670e", + "store": True, + "conversation": "0d21ba731f21f798dc9680125d5d6f493e4a7ab79f25670e", "available_quotas": {"daily": 1000, "monthly": 50000}, - "output_text": "Kubernetes is an open-source container orchestration system...", + "output_text": ( + "Kubernetes is an open-source container " + "orchestration system..." + ), } - ] + ], + "sse_example": ( + "event: response.created\n" + 'data: {"type":"response.created","sequence_number":0,' + '"response":{"id":"resp_abc","created_at":1704067200,' + '"status":"in_progress","output":[],"conversation":' + '"0d21ba731f21f798dc9680125d5d6f49","available_quotas":{},' + '"output_text":""}}\n\n' + "event: response.output_item.added\n" + 'data: {"response_id":"resp_abc","item":{"type":"message",' + '"role":"assistant","content":[{"type":"output_text",' + '"text":"Hello! How can I help?"}]},"output_index":0,' + '"sequence_number":1}\n\n' + "event: response.output_item.done\n" + 'data: {"response_id":"resp_abc","item":{"type":"message",' + '"role":"assistant","content":[{"type":"output_text",' + '"text":"Hello! How can I help?"}]},"output_index":0,' + '"sequence_number":2}\n\n' + "event: response.completed\n" + 'data: {"type":"response.completed","sequence_number":3,' + '"response":{"id":"resp_abc","created_at":1704067200,' + '"completed_at":1704067250,"status":"completed",' + '"output":[{"type":"message","role":"assistant",' + '"content":[{"type":"output_text","text":"Hello! How can I help?"}]}],' + '"usage":{"input_tokens":10,"output_tokens":6,"total_tokens":16},' + '"conversation":"0d21ba731f21f798dc9680125d5d6f49",' + '"available_quotas":{"daily":1000,"monthly":50000},' + '"output_text":"Hello! How can I help?"}}\n\n' + "data: [DONE]\n\n" + ), } } + @classmethod + def openapi_response(cls) -> dict[str, Any]: + """ + Build OpenAPI response dict with application/json and text/event-stream. + + Uses the single JSON example from the model schema and adds + text/event-stream example from json_schema_extra.sse_example. + """ + schema = cls.model_json_schema() + model_examples = schema.get("examples", []) + json_example = model_examples[0] if model_examples else None + + schema_extra = ( + cast(dict[str, Any], dict(cls.model_config)).get("json_schema_extra") or {} + ) + sse_example = schema_extra.get("sse_example", "") + + content: dict[str, Any] = { + "application/json": {"example": json_example} if json_example else {}, + "text/event-stream": { + "schema": {"type": "string"}, + "description": "SSE stream of events", + "examples": {"stream": {"value": sse_example}} if sse_example else {}, + }, + } + + return { + "description": SUCCESSFUL_RESPONSE_DESCRIPTION, + "model": cls, + "content": content, + } + class DetailModel(BaseModel): """Nested detail model for error responses.""" diff --git a/src/utils/conversations.py b/src/utils/conversations.py index aa698aabe..ce882402c 100644 --- a/src/utils/conversations.py +++ b/src/utils/conversations.py @@ -476,3 +476,39 @@ async def append_turn_items_to_conversation( except APIStatusError as e: error_response = InternalServerErrorResponse.generic() raise HTTPException(**error_response.model_dump()) from e + + +async def get_all_conversation_items( + client: AsyncLlamaStackClient, + conversation_id_llama_stack: str, +) -> list[ItemListResponse]: + """Fetch all items for a conversation (Conversations API), paginating as needed. + + Args: + client: Llama Stack client. + conversation_id_llama_stack: Conversation ID in Llama Stack format. + + Returns: + List of all items in the conversation, oldest first. + """ + try: + paginator = client.conversations.items.list( + conversation_id=conversation_id_llama_stack, + order="asc", + ) + first_page = await paginator + items: list[ItemListResponse] = list(first_page.data or []) + page = first_page + while page.has_next_page(): + page = await page.get_next_page() + items.extend(page.data or []) + return items + except APIConnectionError as e: + error_response = ServiceUnavailableResponse( + backend_name="Llama Stack", + cause=str(e), + ) + raise HTTPException(**error_response.model_dump()) from e + except APIStatusError as e: + error_response = InternalServerErrorResponse.generic() + raise HTTPException(**error_response.model_dump()) from e diff --git a/src/utils/endpoints.py b/src/utils/endpoints.py index 332002eeb..a8ffb9a64 100644 --- a/src/utils/endpoints.py +++ b/src/utils/endpoints.py @@ -6,17 +6,20 @@ from pydantic import AnyUrl, ValidationError from sqlalchemy.exc import SQLAlchemyError +from client import AsyncLlamaStackClientHolder import constants from app.database import get_session from configuration import AppConfig, LogicError from log import get_logger -from models.database.conversations import UserConversation +from models.database.conversations import UserConversation, UserTurn from models.responses import ( ForbiddenResponse, InternalServerErrorResponse, NotFoundResponse, ) -from utils.types import ReferencedDocument, TurnSummary +from utils.responses import create_new_conversation +from utils.suid import normalize_conversation_id, to_llama_stack_conversation_id +from utils.types import ReferencedDocument, ResponsesConversationContext, TurnSummary logger = get_logger(__name__) @@ -59,6 +62,35 @@ def retrieve_conversation(conversation_id: str) -> Optional[UserConversation]: return session.query(UserConversation).filter_by(id=conversation_id).first() +def retrieve_conversation_turns(conversation_id: str) -> list[UserTurn]: + """Retrieve all turns for a conversation from the database, ordered by turn number. + + Args: + conversation_id (str): The normalized conversation ID. + + Returns: + list[UserTurn]: The list of turns for the conversation, ordered by turn_number. + + Raises: + HTTPException: 500 if a database error occurs. + """ + try: + with get_session() as session: + return ( + session.query(UserTurn) + .filter_by(conversation_id=conversation_id) + .order_by(UserTurn.turn_number) + .all() + ) + except SQLAlchemyError as e: + logger.error( + "Database error occurred while retrieving conversation turns for %s.", + conversation_id, + ) + response = InternalServerErrorResponse.database_error() + raise HTTPException(**response.model_dump()) from e + + def validate_conversation_ownership( user_id: str, conversation_id: str, others_allowed: bool = False ) -> Optional[UserConversation]: @@ -179,6 +211,145 @@ def validate_and_retrieve_conversation( return user_conversation +async def resolve_response_context( + user_id: str, + others_allowed: bool, + conversation_id: Optional[str], + previous_response_id: Optional[str], + generate_topic_summary: Optional[bool], +) -> ResponsesConversationContext: + """Resolve conversation context for the responses endpoint without mutating the request. + + Parameters: + user_id: ID of the user making the request. + others_allowed: Whether the user can access conversations owned by others. + conversation_id: Conversation ID from the request, if any. + previous_response_id: Previous response ID from the request, if any. + generate_topic_summary: Resolved value for request.generate_topic_summary. + + Returns: + ResponsesConversationContext: Contains conversation, user_conversation, and + resolved generate_topic_summary to apply to the request. + + Raises: + HTTPException: 404 if previous_response_id is set but the turn does not exist; + other HTTP exceptions from validate_and_retrieve_conversation. + """ + client = AsyncLlamaStackClientHolder().get_client() + # Context for the LLM passed by conversation + if conversation_id: + logger.info("Conversation ID specified in request: %s", conversation_id) + user_conversation = validate_and_retrieve_conversation( + normalized_conv_id=normalize_conversation_id(conversation_id), + user_id=user_id, + others_allowed=others_allowed, + ) + return ResponsesConversationContext( + conversation=to_llama_stack_conversation_id(user_conversation.id), + user_conversation=user_conversation, + generate_topic_summary=False, + ) + + # Context for the LLM passed by previous response id + if previous_response_id: + if not check_turn_existence(previous_response_id): + error_response = NotFoundResponse( + resource="response", resource_id=previous_response_id + ) + raise HTTPException(**error_response.model_dump()) + prev_user_turn = retrieve_turn_by_response_id(previous_response_id) + user_conversation = validate_and_retrieve_conversation( + normalized_conv_id=prev_user_turn.conversation_id, + user_id=user_id, + others_allowed=others_allowed, + ) + if ( + user_conversation.last_response_id is not None + and user_conversation.last_response_id != previous_response_id + ): + new_conv_id = await create_new_conversation(client) + want_topic_summary = ( + generate_topic_summary if generate_topic_summary is not None else True + ) + return ResponsesConversationContext( + conversation=new_conv_id, + user_conversation=user_conversation, + generate_topic_summary=want_topic_summary, + ) + return ResponsesConversationContext( + conversation=to_llama_stack_conversation_id(user_conversation.id), + user_conversation=user_conversation, + generate_topic_summary=False, + ) + + # No context passed, create new conversation + new_conv_id = await create_new_conversation(client) + want_topic_summary = ( + generate_topic_summary if generate_topic_summary is not None else True + ) + return ResponsesConversationContext( + conversation=new_conv_id, + user_conversation=None, + generate_topic_summary=want_topic_summary, + ) + + +def retrieve_turn_by_response_id(response_id: str) -> UserTurn: + """Retrieve a response's turn from the database by response ID. + + Looks up the turn that has this response_id to get its conversation. + Used for fork/previous_response_id resolution. + + Args: + response_id: The ID of the response (stored on UserTurn.response_id). + + Returns: + The UserTurn row for that response (has conversation_id). + + Raises: + HTTPException: 404 if no turn has this response_id; 500 on database error. + """ + try: + with get_session() as session: + turn = session.query(UserTurn).filter_by(response_id=response_id).first() + if turn is None: + logger.error("Response %s not found in database.", response_id) + response = NotFoundResponse( + resource="response", resource_id=response_id + ) + raise HTTPException(**response.model_dump()) + return turn + except SQLAlchemyError as e: + logger.exception( + "Database error while retrieving turn by response_id %s", response_id + ) + response = InternalServerErrorResponse.database_error() + raise HTTPException(**response.model_dump()) from e + + +def check_turn_existence(response_id: str) -> bool: + """Check if a turn exists for a given response ID. + + Args: + response_id: The ID of the response to check. + + Returns: + bool: True if the turn exists, False otherwise. + """ + try: + with get_session() as session: + turn = session.query(UserTurn).filter_by(response_id=response_id).first() + return turn is not None + except SQLAlchemyError as e: + logger.exception( + "Database error while checking turn existence for response_id %s", + response_id, + ) + raise HTTPException( + **InternalServerErrorResponse.database_error().model_dump() + ) from e + + def check_configuration_loaded(config: AppConfig) -> None: """ Raise an error if the configuration is not loaded. diff --git a/src/utils/query.py b/src/utils/query.py index 8d96b5eb6..5bd96dbf7 100644 --- a/src/utils/query.py +++ b/src/utils/query.py @@ -44,6 +44,7 @@ create_transcript_metadata, store_transcript, ) +from utils.suid import is_moderation_id from utils.types import TurnSummary logger = get_logger(__name__) @@ -290,6 +291,7 @@ def store_query_results( # pylint: disable=too-many-arguments model_id=model_id, provider_id=provider_id, topic_summary=topic_summary, + response_id=summary.id, ) except SQLAlchemyError as e: logger.exception("Error persisting conversation details.") @@ -377,6 +379,7 @@ def persist_user_conversation_details( model_id: str, provider_id: str, topic_summary: Optional[str], + response_id: str, ) -> None: """Associate conversation to user in the database. @@ -388,6 +391,7 @@ def persist_user_conversation_details( model_id: The model identifier provider_id: The provider identifier topic_summary: Optional topic summary for the conversation + response_id: Response ID for the conversation """ # Normalize the conversation ID (strip 'conv_' prefix if present) normalized_id = normalize_conversation_id(conversation_id) @@ -402,7 +406,6 @@ def persist_user_conversation_details( existing_conversation = ( session.query(UserConversation).filter_by(id=normalized_id).first() ) - if not existing_conversation: conversation = UserConversation( id=normalized_id, @@ -411,6 +414,10 @@ def persist_user_conversation_details( last_used_provider=provider_id, topic_summary=topic_summary or "", message_count=1, + # For new conversation either current response or None if moderation-blocked + last_response_id=( + response_id if not is_moderation_id(response_id) else None + ), ) session.add(conversation) logger.debug( @@ -427,6 +434,9 @@ def persist_user_conversation_details( user_id, existing_conversation.message_count, ) + # Update last response id only if not moderation-blocked + if not is_moderation_id(response_id): + existing_conversation.last_response_id = response_id max_turn_number = ( session.query(func.max(UserTurn.turn_number)) @@ -441,6 +451,7 @@ def persist_user_conversation_details( completed_at=datetime.fromisoformat(completed_at), provider=provider_id, model=model_id, + response_id=response_id, ) session.add(turn) logger.debug( diff --git a/src/utils/responses.py b/src/utils/responses.py index 1b0d12464..04f4c96c1 100644 --- a/src/utils/responses.py +++ b/src/utils/responses.py @@ -7,11 +7,12 @@ from typing import Any, Optional, cast from fastapi import HTTPException +from llama_stack_api import OpenAIResponseObject from llama_stack_api.openai_responses import ( OpenAIResponseContentPartRefusal as ContentPartRefusal, OpenAIResponseInputMessageContent as InputMessageContent, + OpenAIResponseInputMessageContentFile as InputFilePart, OpenAIResponseInputMessageContentText as InputTextPart, - OpenAIResponseInputTool as InputTool, OpenAIResponseInputToolFileSearch as InputToolFileSearch, OpenAIResponseInputToolMCP as InputToolMCP, OpenAIResponseMCPApprovalRequest as MCPApprovalRequest, @@ -27,9 +28,15 @@ OpenAIResponseOutputMessageMCPListTools as MCPListTools, OpenAIResponseOutputMessageWebSearchToolCall as WebSearchCall, OpenAIResponseUsage as ResponseUsage, + OpenAIResponseInputTool as InputTool, + OpenAIResponseUsageInputTokensDetails as UsageInputTokensDetails, + OpenAIResponseUsageOutputTokensDetails as UsageOutputTokensDetails, + OpenAIResponseInputToolChoiceMode as ToolChoiceMode, + OpenAIResponseInputToolChoice as ToolChoice, ) from llama_stack_client import APIConnectionError, APIStatusError, AsyncLlamaStackClient +from client import AsyncLlamaStackClientHolder import constants import metrics from configuration import configuration @@ -55,6 +62,7 @@ from utils.types import ( RAGChunk, ReferencedDocument, + ResponseInput, ResponseItem, ResponsesApiParams, ToolCallSummary, @@ -377,6 +385,29 @@ def resolve_vector_store_ids( return [rag_id_to_vector_db_id.get(vs_id, vs_id) for vs_id in vector_store_ids] +def translate_tools_vector_store_ids( + tools: list[InputTool], byok_rags: list[ByokRag] +) -> list[InputTool]: + """Translate user-facing vector_store_ids to llama-stack IDs in each file_search tool. + + Parameters: + tools: List of request tools (may contain file_search with user-facing IDs). + byok_rags: BYOK RAG configuration for ID resolution. + + Returns: + New list of tools with file_search vector_store_ids translated; other tools + unchanged. + """ + result: list[InputTool] = [] + for tool in tools: + if tool.type == "file_search": + resolved_ids = resolve_vector_store_ids(tool.vector_store_ids, byok_rags) + result.append(tool.model_copy(update={"vector_store_ids": resolved_ids})) + else: + result.append(tool) + return result + + def get_rag_tools(vector_store_ids: list[str]) -> Optional[list[InputToolFileSearch]]: """Convert vector store IDs to tools format for Responses API. @@ -1045,7 +1076,7 @@ async def select_model_for_responses( def build_turn_summary( - response: Optional[ResponseObject], + response: Optional[OpenAIResponseObject], model: str, vector_store_ids: Optional[list[str]] = None, rag_id_mapping: Optional[dict[str, str]] = None, @@ -1067,6 +1098,7 @@ def build_turn_summary( if response is None or response.output is None: return summary + summary.id = response.id # Extract text from output items summary.llm_response = extract_text_from_response_items(response.output) @@ -1118,15 +1150,12 @@ def extract_text_from_response_item(response_item: ResponseItem) -> str: response_item: A single item from request input or response output. Returns: - Extracted text content, or empty string if not a message or role is user. + Extracted text content, or empty string if not a message. """ if response_item.type != "message": return "" message_item = cast(ResponseMessage, response_item) - if message_item.role == "user": - return "" - return _extract_text_from_content(message_item.content) @@ -1178,3 +1207,130 @@ def deduplicate_referenced_documents( seen.add(key) out.append(d) return out + + +async def create_new_conversation( + client: AsyncLlamaStackClient, +) -> str: + """Create a new conversation via the Llama Stack Conversations API. + + Args: + client: The Llama Stack client used to create the conversation. + + Returns: + The new conversation's ID (string), as returned by the API. + """ + try: + conversation = await client.conversations.create(metadata={}) + return conversation.id + except APIConnectionError as e: + error_response = ServiceUnavailableResponse( + backend_name="Llama Stack", + cause=str(e), + ) + raise HTTPException(**error_response.model_dump()) from e + except APIStatusError as e: + error_response = InternalServerErrorResponse.generic() + raise HTTPException(**error_response.model_dump()) from e + + +def get_zero_usage() -> ResponseUsage: + """Create a Usage object with zero values for input and output tokens. + + Returns: + Usage object with zero values for input and output tokens. + """ + return ResponseUsage( + input_tokens=0, + input_tokens_details=UsageInputTokensDetails(cached_tokens=0), + output_tokens=0, + output_tokens_details=UsageOutputTokensDetails(reasoning_tokens=0), + total_tokens=0, + ) + + +def extract_attachments_text(response_input: ResponseInput) -> str: + """Extract file_data from input_file parts inside message content. + + Args: + response_input: Response input (string or list of response items). + + Returns: + All present file_data values joined by double newline. + """ + if isinstance(response_input, str): + return "" + file_data_parts: list[str] = [] + for item in response_input: + if item.type != "message": + continue + message = cast(ResponseMessage, item) + content = message.content + if isinstance(content, str): + continue + for part in content: + if part.type == "input_file": + file_part = cast(InputFilePart, part) + if file_part.file_data: + file_data_parts.append(file_part.file_data) + return "\n\n".join(file_data_parts) + + +async def resolve_tool_choice( + tools: Optional[list[InputTool]], + tool_choice: Optional[ToolChoice], + token: str, + mcp_headers: Optional[McpHeaders] = None, + request_headers: Optional[Mapping[str, str]] = None, +) -> tuple[Optional[list[InputTool]], Optional[ToolChoice], Optional[list[str]]]: + """Resolve tools and tool_choice for the Responses API. + + If the request includes tools, uses them as-is and derives vector_store_ids + from tool configs; otherwise loads tools via prepare_tools (using all + configured vector stores) and honors tool_choice "none" via the no_tools + flag. When no tools end up configured, tool_choice is cleared to None. + + Args: + tools: Tools from the request, or None to use LCORE-configured tools. + tool_choice: Requested tool choice (e.g. auto, required, none) or None. + token: User token for MCP/auth. + mcp_headers: Optional MCP headers to propagate. + request_headers: Optional request headers for tool resolution. + + Returns: + A tuple of (prepared_tools, prepared_tool_choice, vector_store_ids): + prepared_tools is the list of tools to use, or None if none configured; + prepared_tool_choice is the resolved tool choice, or None when there + are no tools; vector_store_ids is extracted from tools (in user-facing format) + when provided, otherwise None. + """ + prepared_tools: Optional[list[InputTool]] = None + client = AsyncLlamaStackClientHolder().get_client() + if tools: # explicitly specified in request + # Per-request override of vector stores (user-facing rag_ids) + vector_store_ids = extract_vector_store_ids_from_tools(tools) + # Translate user-facing rag_ids to llama-stack vector_store_ids in each file_search tool + byok_rags = configuration.configuration.byok_rag + prepared_tools = translate_tools_vector_store_ids(tools, byok_rags) + prepared_tool_choice = tool_choice or ToolChoiceMode.auto + else: + # Vector stores were not overwritten in request, use all configured vector stores + vector_store_ids = None + # Get all tools configured in LCORE (returns None or non-empty list) + no_tools = ( + isinstance(tool_choice, ToolChoiceMode) + and tool_choice == ToolChoiceMode.none + ) + # Vector stores are prepared in llama-stack format + prepared_tools = await prepare_tools( + client=client, + vector_store_ids=vector_store_ids, # allow all configured vector stores + no_tools=no_tools, + token=token, + mcp_headers=mcp_headers, + request_headers=request_headers, + ) + # If there are no tools, tool_choice cannot be set at all - LLS implicit behavior + prepared_tool_choice = tool_choice if prepared_tools else None + + return prepared_tools, prepared_tool_choice, vector_store_ids diff --git a/src/utils/shields.py b/src/utils/shields.py index a225cfd6c..19bd0c214 100644 --- a/src/utils/shields.py +++ b/src/utils/shields.py @@ -230,7 +230,6 @@ def create_refusal_response(refusal_message: str) -> OpenAIResponseMessage: OpenAIResponseMessage with refusal message. """ return OpenAIResponseMessage( - type="message", role="assistant", content=refusal_message, ) diff --git a/src/utils/suid.py b/src/utils/suid.py index fd3129401..f05fbe701 100644 --- a/src/utils/suid.py +++ b/src/utils/suid.py @@ -103,3 +103,11 @@ def to_llama_stack_conversation_id(conversation_id: str) -> str: if not conversation_id.startswith("conv_"): return f"conv_{conversation_id}" return conversation_id + + +def is_moderation_id(suid: str) -> bool: + """Check if given string is a moderation ID. + + Returns True if the string starts with 'modr'. + """ + return suid.startswith("modr") diff --git a/src/utils/types.py b/src/utils/types.py index 52f3566c9..1ccc77f43 100644 --- a/src/utils/types.py +++ b/src/utils/types.py @@ -17,14 +17,16 @@ OpenAIResponseOutputMessageWebSearchToolCall as WebSearchToolCall, OpenAIResponsePrompt as Prompt, OpenAIResponseText as Text, + OpenAIResponseReasoning as Reasoning, ) from llama_stack_client.lib.agents.tool_parser import ToolParser from llama_stack_client.lib.agents.types import ( CompletionMessage as AgentCompletionMessage, ToolCall as AgentToolCall, ) -from pydantic import AnyUrl, BaseModel, Field +from pydantic import AnyUrl, BaseModel, ConfigDict, Field +from models.database.conversations import UserConversation from utils.token_counter import TokenCounter @@ -117,6 +119,31 @@ class ShieldModerationPassed(BaseModel): decision: Literal["passed"] = "passed" +class ResponsesConversationContext(BaseModel): + """Result of resolving conversation context for the responses endpoint. + + Holds the conversation ID to use for the LLM, the optional user conversation + record, and the resolved generate_topic_summary flag. Caller assigns these + to the request in outer scope instead of mutating the request inside the + resolver. + + Attributes: + conversation: Conversation ID in llama-stack format to use for the request. + user_conversation: Resolved user conversation record, or None for new ones. + generate_topic_summary: Resolved value for request.generate_topic_summary. + """ + + conversation: str = Field(description="Conversation ID in llama-stack format") + user_conversation: Optional[UserConversation] = Field( + default=None, + description="Resolved user conversation record, or None for new conversations", + ) + generate_topic_summary: bool = Field( + description="Resolved value for request.generate_topic_summary", + ) + model_config = ConfigDict(arbitrary_types_allowed=True) + + class ShieldModerationBlocked(BaseModel): """Shield moderation blocked the content; refusal details are present.""" @@ -177,6 +204,10 @@ class ResponsesApiParams(BaseModel): default=None, description="Maximum number of inference iterations", ) + max_output_tokens: Optional[int] = Field( + default=None, + description="Maximum number of tokens allowed in the response", + ) max_tool_calls: Optional[int] = Field( default=None, description="Maximum tool calls allowed in a single response", @@ -197,6 +228,10 @@ class ResponsesApiParams(BaseModel): default=None, description="Prompt template with variables for dynamic substitution", ) + reasoning: Optional[Reasoning] = Field( + default=None, + description="Reasoning configuration for the response", + ) store: bool = Field(description="Whether to store the response") stream: bool = Field(description="Whether to stream the response") temperature: Optional[float] = Field( @@ -230,6 +265,10 @@ def model_dump(self, *args: Any, **kwargs: Any) -> dict[str, Any]: MCP servers. See LCORE-1414 / GitHub issue #1269. """ result = super().model_dump(*args, **kwargs) + # Only one context option is allowed, previous_response_id has priority + # Turn is added to conversation manually if previous_response_id is used + if self.previous_response_id: + result.pop("conversation", None) dumped_tools = result.get("tools") if not self.tools or not isinstance(dumped_tools, list): return result @@ -327,6 +366,7 @@ class RAGContext(BaseModel): class TurnSummary(BaseModel): """Summary of a turn in llama stack.""" + id: str = Field(default="", description="ID of the response") llm_response: str = "" tool_calls: list[ToolCallSummary] = Field(default_factory=list) tool_results: list[ToolResultSummary] = Field(default_factory=list) diff --git a/src/utils/vector_search.py b/src/utils/vector_search.py index b24b3214a..4a6b58f49 100644 --- a/src/utils/vector_search.py +++ b/src/utils/vector_search.py @@ -6,9 +6,12 @@ import asyncio import traceback -from typing import Any, Optional +from typing import Any, Optional, cast from urllib.parse import urljoin +from llama_stack_api.openai_responses import ( + OpenAIResponseMessage as ResponseMessage, +) from llama_stack_client import AsyncLlamaStackClient from pydantic import AnyUrl @@ -17,7 +20,7 @@ from log import get_logger from models.responses import ReferencedDocument from utils.responses import resolve_vector_store_ids -from utils.types import RAGChunk, RAGContext +from utils.types import RAGChunk, RAGContext, ResponseInput logger = get_logger(__name__) @@ -631,3 +634,39 @@ def _convert_solr_chunks_to_rag_format( ) return rag_chunks + + +def append_inline_rag_context_to_responses_input( + input_value: ResponseInput, + inline_rag_context_text: str, +) -> ResponseInput: + """Append inline RAG context to Responses API input. + + If input is str, appends the context text. + If input is a sequence of items, appends the context to the text of the first user message. + If there is no user message, returns the input unchanged. + + Parameters: + input_value: The request input (string or list of ResponseItem). + inline_rag_context_text: RAG context string to inject. + + Returns: + The same type as input_value, with context merged in. + """ + if not inline_rag_context_text: + return input_value + if isinstance(input_value, str): + return input_value + "\n\n" + inline_rag_context_text + for item in input_value: + if item.type != "message" or item.role != "user": + continue + message = cast(ResponseMessage, item) + content = message.content + if isinstance(content, str): + message.content = content + "\n\n" + inline_rag_context_text + return input_value + for part in content: + if part.type == "input_text": + part.text = part.text + "\n\n" + inline_rag_context_text + return input_value + return input_value diff --git a/tests/e2e/features/responses.feature b/tests/e2e/features/responses.feature new file mode 100644 index 000000000..e1e0ccd61 --- /dev/null +++ b/tests/e2e/features/responses.feature @@ -0,0 +1,24 @@ +@Authorized +Feature: Responses endpoint API tests + + Background: + Given The service is started locally + And REST API service prefix is /v1 + + Scenario: Check if responses endpoint returns 200 for minimal request + Given The system is in default state + And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + When I use "responses" to ask question with authorization header + """ + {"input": "Say hello", "model": "{PROVIDER}/{MODEL}", "stream": false} + """ + Then The status code of the response is 200 + + Scenario: Check if responses endpoint returns 200 for minimal streaming request + Given The system is in default state + And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + When I use "responses" to ask question with authorization header + """ + {"input": "Say hello", "model": "{PROVIDER}/{MODEL}", "stream": true} + """ + Then The status code of the response is 200 \ No newline at end of file diff --git a/tests/e2e/test_list.txt b/tests/e2e/test_list.txt index 3f94d09e0..583cf387b 100644 --- a/tests/e2e/test_list.txt +++ b/tests/e2e/test_list.txt @@ -9,6 +9,7 @@ features/conversation_cache_v2.feature features/feedback.feature features/health.feature features/info.feature +features/responses.feature features/query.feature features/rlsapi_v1.feature features/rlsapi_v1_errors.feature diff --git a/tests/integration/test_openapi_json.py b/tests/integration/test_openapi_json.py index 17ff8ac66..05ccc83f8 100644 --- a/tests/integration/test_openapi_json.py +++ b/tests/integration/test_openapi_json.py @@ -231,6 +231,11 @@ def test_servers_section_present_from_url(spec_from_url: dict[str, Any]) -> None "post", {"200", "401", "403", "404"}, ), + ( + "/v1/responses", + "post", + {"200", "401", "403", "404", "413", "422", "429", "500", "503"}, + ), ("/v1/config", "get", {"200", "401", "403", "500"}), ("/v1/feedback", "post", {"200", "401", "403", "404", "500"}), ("/v1/feedback/status", "get", {"200"}), @@ -318,6 +323,11 @@ def test_paths_and_responses_exist_from_file( "post", {"200", "401", "403", "404"}, ), + ( + "/v1/responses", + "post", + {"200", "401", "403", "404", "413", "422", "429", "500", "503"}, + ), ("/v1/config", "get", {"200", "401", "403", "500"}), ("/v1/feedback", "post", {"200", "401", "403", "404", "500"}), ("/v1/feedback/status", "get", {"200"}), diff --git a/tests/unit/app/endpoints/test_conversations.py b/tests/unit/app/endpoints/test_conversations.py index 5ca4faf0b..9c75f0d2f 100644 --- a/tests/unit/app/endpoints/test_conversations.py +++ b/tests/unit/app/endpoints/test_conversations.py @@ -552,12 +552,8 @@ async def test_llama_stack_not_found_error( ) -> None: """Test the endpoint when LlamaStack returns NotFoundError. - Verify the GET /conversations/{conversation_id} handler raises an HTTP - 404 when the Llama Stack client reports the session as not found. - - Asserts that the raised HTTPException contains a response message - indicating the conversation was not found and a cause that includes - "does not exist" and the conversation ID. + When the Llama Stack client reports the session as not found, + get_all_conversation_items maps it to HTTP 500 (InternalServerError). """ mock_authorization_resolvers(mocker) mocker.patch( @@ -589,13 +585,13 @@ async def test_llama_stack_not_found_error( auth=MOCK_AUTH, ) - assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND - + assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR detail = exc_info.value.detail assert isinstance(detail, dict) - assert "Conversation not found" in detail["response"] # type: ignore - assert "does not exist" in detail["cause"] # type: ignore - assert VALID_CONVERSATION_ID in detail["cause"] # type: ignore + assert detail["response"] == "Internal server error" + assert detail["cause"] == ( + "An unexpected error occurred while processing the request." + ) @pytest.mark.asyncio async def test_get_conversation_forbidden( @@ -679,7 +675,10 @@ async def test_get_others_conversations_allowed_for_authorized_user( mock_item2.role = "assistant" mock_item2.content = "Hi there!" mock_items_response.data = [mock_item1, mock_item2] - mock_client.conversations.items.list.return_value = mock_items_response + mock_items_response.has_next_page.return_value = False + mock_client.conversations.items.list = mocker.AsyncMock( + return_value=mock_items_response + ) mock_client_holder = mocker.patch( "app.endpoints.conversations_v1.AsyncLlamaStackClientHolder" @@ -732,7 +731,8 @@ async def test_successful_conversation_retrieval( type="message", role="assistant", content="I'm doing well, thanks!" ), ] - mock_client.conversations.items.list.return_value = mock_items + mock_items.has_next_page.return_value = False + mock_client.conversations.items.list = mocker.AsyncMock(return_value=mock_items) mock_client_holder = mocker.patch( "app.endpoints.conversations_v1.AsyncLlamaStackClientHolder" @@ -806,7 +806,10 @@ async def test_no_items_found_in_get_conversation( mock_client = mocker.AsyncMock() mock_items_response = mocker.Mock() mock_items_response.data = [] - mock_client.conversations.items.list.return_value = mock_items_response + mock_items_response.has_next_page.return_value = False + mock_client.conversations.items.list = mocker.AsyncMock( + return_value=mock_items_response + ) mock_client_holder = mocker.patch( "app.endpoints.conversations_v1.AsyncLlamaStackClientHolder" ) @@ -832,7 +835,10 @@ async def test_api_status_error_in_get_conversation( dummy_request: Request, mock_conversation: MockType, ) -> None: - """Test when APIStatusError is raised during conversation retrieval.""" + """Test when APIStatusError is raised during conversation retrieval. + + get_all_conversation_items maps APIStatusError to HTTP 500. + """ mock_authorization_resolvers(mocker) mocker.patch( "app.endpoints.conversations_v1.configuration", setup_configuration @@ -863,10 +869,10 @@ async def test_api_status_error_in_get_conversation( auth=MOCK_AUTH, ) - assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND + assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR detail = exc_info.value.detail assert isinstance(detail, dict) - assert "Conversation not found" in detail["response"] # type: ignore + assert "response" in detail @pytest.mark.asyncio async def test_sqlalchemy_error_in_get_conversation( @@ -940,7 +946,7 @@ def query_side_effect(model_class: type[Any]) -> Any: mock_session_context.__enter__.return_value = mock_session mock_session_context.__exit__.return_value = None mocker.patch( - "app.endpoints.conversations_v1.get_session", + "utils.endpoints.get_session", return_value=mock_session_context, ) diff --git a/tests/unit/app/endpoints/test_responses.py b/tests/unit/app/endpoints/test_responses.py new file mode 100644 index 000000000..725e43a07 --- /dev/null +++ b/tests/unit/app/endpoints/test_responses.py @@ -0,0 +1,1374 @@ +# pylint: disable=redefined-outer-name, too-many-locals, too-many-lines +"""Unit tests for the /responses REST API endpoint (LCORE Responses API).""" + +from datetime import UTC, datetime +from typing import Any, cast + +import pytest +from fastapi import HTTPException, Request +from fastapi.responses import StreamingResponse +from llama_stack_api import OpenAIResponseObject +from llama_stack_api.openai_responses import OpenAIResponseMessage +from llama_stack_client import APIConnectionError, APIStatusError, AsyncLlamaStackClient +from pytest_mock import MockerFixture + +from app.endpoints.responses import ( + handle_non_streaming_response, + handle_streaming_response, + responses_endpoint_handler, +) +from configuration import AppConfig +from models.config import Action +from models.database.conversations import UserConversation +from models.requests import ResponsesRequest +from models.responses import ResponsesResponse +from utils.types import RAGContext, ResponsesConversationContext, TurnSummary + +MOCK_AUTH = ( + "00000001-0001-0001-0001-000000000001", + "mock_username", + False, + "mock_token", +) +VALID_CONV_ID = "conv_e6afd7aaa97b49ce8f4f96a801b07893d9cb784d72e53e3c" +VALID_CONV_ID_NORMALIZED = "e6afd7aaa97b49ce8f4f96a801b07893d9cb784d72e53e3c" +MODULE = "app.endpoints.responses" +ENDPOINTS_MODULE = "utils.endpoints" +UTILS_RESPONSES_MODULE = "utils.responses" + + +def _patch_base(mocker: MockerFixture, config: AppConfig) -> None: + """Patch configuration and mandatory checks for responses endpoint.""" + mocker.patch(f"{MODULE}.configuration", config) + mocker.patch(f"{MODULE}.check_configuration_loaded") + mocker.patch(f"{MODULE}.check_tokens_available") + mocker.patch(f"{MODULE}.validate_model_provider_override") + mock_holder = mocker.Mock() + mock_holder.get_client.return_value = mocker.Mock() + mocker.patch( + f"{UTILS_RESPONSES_MODULE}.AsyncLlamaStackClientHolder", + return_value=mock_holder, + ) + mocker.patch( + f"{UTILS_RESPONSES_MODULE}.prepare_tools", + new=mocker.AsyncMock(return_value=None), + ) + + +def _patch_client(mocker: MockerFixture) -> Any: + """Patch AsyncLlamaStackClientHolder; return (mock_client, mock_holder).""" + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_vector_stores = mocker.Mock() + mock_vector_stores.list = mocker.AsyncMock(return_value=mocker.Mock(data=[])) + mock_client.vector_stores = mock_vector_stores + mock_holder = mocker.Mock() + mock_holder.get_client.return_value = mock_client + mocker.patch(f"{MODULE}.AsyncLlamaStackClientHolder", return_value=mock_holder) + return mock_client, mock_holder + + +def _patch_resolve_response_context( + mocker: MockerFixture, + *, + conversation: str = "conv_new", + user_conversation: UserConversation | None = None, + generate_topic_summary: bool = False, +) -> None: + """Patch resolve_response_context to return the given conversation context.""" + mocker.patch( + f"{MODULE}.resolve_response_context", + new=mocker.AsyncMock( + return_value=ResponsesConversationContext( + conversation=conversation, + user_conversation=user_conversation, + generate_topic_summary=generate_topic_summary, + ) + ), + ) + + +def _patch_rag( + mocker: MockerFixture, + *, + rag_context: str = "", +) -> None: + """Patch RAG for responses endpoint by mocking build_rag_context.""" + mocker.patch( + f"{MODULE}.build_rag_context", + new=mocker.AsyncMock( + return_value=RAGContext( + context_text=rag_context, + referenced_documents=[], + ), + ), + ) + + +def _patch_moderation(mocker: MockerFixture, decision: str = "passed") -> Any: + """Patch run_shield_moderation; return mock moderation result.""" + mock_moderation = mocker.Mock() + mock_moderation.decision = decision + mocker.patch( + f"{MODULE}.run_shield_moderation", + new=mocker.AsyncMock(return_value=mock_moderation), + ) + return mock_moderation + + +def _make_responses_response( + *, + output_text: str = "", + conversation: str = "", + model: str = "provider/model1", + **kwargs: Any, +) -> ResponsesResponse: + """Build a minimal valid ResponsesResponse for tests.""" + defaults = { + "id": "resp_1", + "object": "response", + "created_at": 0, + "status": "completed", + "model": model, + "output": [], + "conversation": conversation, + "completed_at": 0, + "output_text": output_text, + "available_quotas": {}, + } + defaults.update(kwargs) + return ResponsesResponse(**defaults) + + +def _patch_handle_non_streaming_common( + mocker: MockerFixture, config: AppConfig +) -> None: + """Patch deps used by handle_non_streaming_response (blocked and success).""" + mocker.patch(f"{MODULE}.configuration", config) + mocker.patch(f"{MODULE}.get_available_quotas", return_value={}) + mocker.patch( + f"{MODULE}.get_topic_summary", + new=mocker.AsyncMock(return_value=None), + ) + mocker.patch(f"{MODULE}.store_query_results") + + +@pytest.fixture(name="dummy_request") +def dummy_request_fixture() -> Request: + """Minimal FastAPI Request with authorized_actions for responses endpoint.""" + req = Request(scope={"type": "http", "headers": []}) + req.state.authorized_actions = {Action.QUERY, Action.READ_OTHERS_CONVERSATIONS} + return req + + +@pytest.fixture(name="minimal_config") +def minimal_config_fixture() -> AppConfig: + """Minimal AppConfig for responses endpoint tests.""" + cfg = AppConfig() + cfg.init_from_dict( + { + "name": "test", + "service": {"host": "localhost", "port": 8080}, + "llama_stack": { + "api_key": "test-key", + "url": "http://test.com:1234", + "use_as_library_client": False, + }, + "user_data_collection": {}, + "authentication": {"module": "noop"}, + "authorization": {"access_rules": []}, + } + ) + return cfg + + +def _request_with_model_and_conv( + input_text: str = "Hello", model: str = "provider/model1" +) -> ResponsesRequest: + """Build request with model and conversation set (as handler does).""" + return ResponsesRequest( + input=input_text, + model=model, + conversation=VALID_CONV_ID, + ) + + +def _request_with_previous_response_id( + input_text: str = "Hello", + model: str = "provider/model1", + previous_response_id: str = "resp_prev_123", + store: bool = True, +) -> ResponsesRequest: + """Build request with previous_response_id (conversation set by handler).""" + request = ResponsesRequest( + input=input_text, + model=model, + previous_response_id=previous_response_id, + store=store, + ) + request.conversation = VALID_CONV_ID + return request + + +class TestResponsesEndpointHandler: + """Unit tests for responses_endpoint_handler.""" + + @pytest.mark.asyncio + async def test_successful_responses_string_input_non_streaming( + self, + dummy_request: Request, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test successful responses request with string input returns ResponsesResponse.""" + responses_request = ResponsesRequest(input="What is Kubernetes?") + _patch_base(mocker, minimal_config) + _patch_client(mocker) + _patch_resolve_response_context(mocker, conversation="conv_new_123") + mocker.patch( + f"{MODULE}.select_model_for_responses", + new=mocker.AsyncMock(return_value="provider/model1"), + ) + mocker.patch( + f"{MODULE}.check_model_configured", + new=mocker.AsyncMock(return_value=True), + ) + _patch_rag(mocker) + _patch_moderation(mocker, decision="passed") + + mock_response = _make_responses_response( + output_text="Kubernetes is a container orchestration platform.", + conversation="conv_new_123", + ) + mocker.patch( + f"{MODULE}.handle_non_streaming_response", + new=mocker.AsyncMock(return_value=mock_response), + ) + + response = await responses_endpoint_handler( + request=dummy_request, + responses_request=responses_request, + auth=MOCK_AUTH, + mcp_headers={}, + ) + assert isinstance(response, ResponsesResponse) + assert ( + response.output_text == "Kubernetes is a container orchestration platform." + ) + assert response.conversation == "conv_new_123" + + @pytest.mark.asyncio + async def test_responses_with_conversation_validates_and_retrieves( + self, + dummy_request: Request, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that providing conversation ID calls validate_and_retrieve_conversation.""" + responses_request = ResponsesRequest( + input="Follow-up question", + conversation=VALID_CONV_ID, + ) + _patch_base(mocker, minimal_config) + mock_user_conv = mocker.Mock(spec=UserConversation) + mock_user_conv.id = VALID_CONV_ID_NORMALIZED + mock_validate = mocker.patch( + f"{ENDPOINTS_MODULE}.validate_and_retrieve_conversation", + return_value=mock_user_conv, + ) + _, mock_holder = _patch_client(mocker) + mocker.patch( + f"{ENDPOINTS_MODULE}.AsyncLlamaStackClientHolder", + return_value=mock_holder, + ) + mocker.patch( + f"{ENDPOINTS_MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + mocker.patch( + f"{ENDPOINTS_MODULE}.to_llama_stack_conversation_id", + return_value=VALID_CONV_ID, + ) + mocker.patch( + f"{MODULE}.select_model_for_responses", + new=mocker.AsyncMock(return_value="provider/model1"), + ) + mocker.patch( + f"{MODULE}.check_model_configured", + new=mocker.AsyncMock(return_value=True), + ) + _patch_rag(mocker) + _patch_moderation(mocker, decision="passed") + mocker.patch( + f"{MODULE}.handle_non_streaming_response", + new=mocker.AsyncMock( + return_value=_make_responses_response( + output_text="Answer", + conversation=VALID_CONV_ID_NORMALIZED, + ) + ), + ) + + await responses_endpoint_handler( + request=dummy_request, + responses_request=responses_request, + auth=MOCK_AUTH, + mcp_headers={}, + ) + + mock_validate.assert_called_once() + + @pytest.mark.asyncio + async def test_responses_model_not_configured_raises_404( + self, + dummy_request: Request, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that unconfigured model leads to 404 HTTPException.""" + responses_request = ResponsesRequest(input="Hello", model="provider/unknown") + _patch_base(mocker, minimal_config) + _patch_client(mocker) + _patch_resolve_response_context(mocker) + mocker.patch( + f"{MODULE}.select_model_for_responses", + new=mocker.AsyncMock(return_value="provider/unknown"), + ) + mocker.patch( + f"{MODULE}.check_model_configured", + new=mocker.AsyncMock(return_value=False), + ) + mocker.patch( + f"{MODULE}.extract_provider_and_model_from_model_id", + return_value=("provider", "unknown"), + ) + + with pytest.raises(HTTPException) as exc_info: + await responses_endpoint_handler( + request=dummy_request, + responses_request=responses_request, + auth=MOCK_AUTH, + mcp_headers={}, + ) + assert exc_info.value.status_code == 404 + + @pytest.mark.asyncio + async def test_responses_streaming_returns_streaming_response( + self, + dummy_request: Request, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that stream=True delegates to handle_streaming_response.""" + responses_request = ResponsesRequest(input="Stream this", stream=True) + _patch_base(mocker, minimal_config) + _patch_client(mocker) + _patch_resolve_response_context(mocker) + mocker.patch( + f"{MODULE}.select_model_for_responses", + new=mocker.AsyncMock(return_value="provider/model1"), + ) + mocker.patch( + f"{MODULE}.check_model_configured", + new=mocker.AsyncMock(return_value=True), + ) + _patch_rag(mocker) + _patch_moderation(mocker, decision="passed") + mock_streaming = mocker.Mock(spec=StreamingResponse) + mocker.patch( + f"{MODULE}.handle_streaming_response", + new=mocker.AsyncMock(return_value=mock_streaming), + ) + + response = await responses_endpoint_handler( + request=dummy_request, + responses_request=responses_request, + auth=MOCK_AUTH, + mcp_headers={}, + ) + assert response is mock_streaming + + @pytest.mark.asyncio + async def test_responses_azure_token_refresh( + self, + dummy_request: Request, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that Azure token refresh is called when model starts with azure.""" + responses_request = ResponsesRequest(input="Hi", model="azure/some-model") + _patch_base(mocker, minimal_config) + _patch_client(mocker) + _patch_resolve_response_context(mocker) + mocker.patch( + f"{MODULE}.select_model_for_responses", + new=mocker.AsyncMock(return_value="azure/some-model"), + ) + mocker.patch( + f"{MODULE}.check_model_configured", + new=mocker.AsyncMock(return_value=True), + ) + mock_azure = mocker.Mock() + mock_azure.is_entra_id_configured = True + mock_azure.is_token_expired = True + mock_azure.refresh_token.return_value = True + mocker.patch(f"{MODULE}.AzureEntraIDManager", return_value=mock_azure) + updated_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_update_token = mocker.patch( + f"{MODULE}.update_azure_token", + new=mocker.AsyncMock(return_value=updated_client), + ) + _patch_rag(mocker) + _patch_moderation(mocker, decision="passed") + mocker.patch( + f"{MODULE}.handle_non_streaming_response", + new=mocker.AsyncMock( + return_value=_make_responses_response( + output_text="Ok", + conversation="conv_new", + model="azure/some-model", + ) + ), + ) + + await responses_endpoint_handler( + request=dummy_request, + responses_request=responses_request, + auth=MOCK_AUTH, + mcp_headers={}, + ) + mock_update_token.assert_called_once() + + @pytest.mark.asyncio + async def test_responses_structured_input_appends_rag_message( + self, + dummy_request: Request, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that non-string input uses extract_text and appends RAG message.""" + structured_input: list[Any] = [ + OpenAIResponseMessage(role="user", content="What is K8s?"), + ] + responses_request = ResponsesRequest( + input=cast(Any, structured_input), + ) + _patch_base(mocker, minimal_config) + _patch_client(mocker) + _patch_resolve_response_context(mocker) + mocker.patch( + f"{MODULE}.select_model_for_responses", + new=mocker.AsyncMock(return_value="provider/model1"), + ) + mocker.patch( + f"{MODULE}.check_model_configured", + new=mocker.AsyncMock(return_value=True), + ) + mock_build_rag = mocker.patch( + f"{MODULE}.build_rag_context", + new=mocker.AsyncMock( + return_value=RAGContext( + context_text="\n\nRelevant documentation:\nDoc1", + referenced_documents=[], + ), + ), + ) + _patch_moderation(mocker, decision="passed") + mocker.patch( + f"{MODULE}.handle_non_streaming_response", + new=mocker.AsyncMock( + return_value=_make_responses_response( + output_text="K8s is Kubernetes.", + conversation="conv_new", + ) + ), + ) + + await responses_endpoint_handler( + request=dummy_request, + responses_request=responses_request, + auth=MOCK_AUTH, + mcp_headers={}, + ) + + mock_build_rag.assert_called_once() + call_args = mock_build_rag.call_args[0] + assert ( + call_args[2] == "What is K8s?" + ) # input_text (3rd arg to build_rag_context) + + @pytest.mark.asyncio + async def test_responses_blocked_with_conversation_appends_refusal( + self, + dummy_request: Request, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Blocked moderation with conversation calls append_turn_items_to_conversation.""" + responses_request = ResponsesRequest( + input="Bad", + conversation=VALID_CONV_ID, + stream=False, + model="provider/model1", + ) + _patch_base(mocker, minimal_config) + mock_user_conv = mocker.Mock(spec=UserConversation) + mock_user_conv.id = VALID_CONV_ID_NORMALIZED + mocker.patch( + f"{ENDPOINTS_MODULE}.validate_and_retrieve_conversation", + return_value=mock_user_conv, + ) + mock_client, mock_holder = _patch_client(mocker) + mocker.patch( + f"{ENDPOINTS_MODULE}.AsyncLlamaStackClientHolder", + return_value=mock_holder, + ) + mocker.patch( + f"{ENDPOINTS_MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + mocker.patch( + f"{ENDPOINTS_MODULE}.to_llama_stack_conversation_id", + return_value=VALID_CONV_ID, + ) + mocker.patch( + f"{MODULE}.select_model_for_responses", + new=mocker.AsyncMock(return_value="provider/model1"), + ) + mocker.patch( + f"{MODULE}.check_model_configured", + new=mocker.AsyncMock(return_value=True), + ) + _patch_rag(mocker) + mock_moderation = _patch_moderation(mocker, decision="blocked") + mock_moderation.message = "Blocked" + mock_moderation.moderation_id = "resp_blocked_123" + mock_moderation.refusal_response = OpenAIResponseMessage( + type="message", role="assistant", content="Blocked" + ) + mock_append = mocker.patch( + f"{MODULE}.append_turn_items_to_conversation", + new=mocker.AsyncMock(), + ) + mocker.patch(f"{MODULE}.store_query_results") + + response = await responses_endpoint_handler( + request=dummy_request, + responses_request=responses_request, + auth=MOCK_AUTH, + mcp_headers={}, + ) + + mock_append.assert_awaited_once_with( + client=mock_client, + conversation_id=VALID_CONV_ID, + user_input=responses_request.input, + llm_output=[mock_moderation.refusal_response], + ) + assert isinstance(response, ResponsesResponse) + payload = response.model_dump() + assert "model" in payload, "Handler must set model on the response payload" + ResponsesResponse.model_validate(payload) + + +class TestHandleNonStreamingResponse: + """Unit tests for handle_non_streaming_response.""" + + @pytest.mark.asyncio + async def test_handle_non_streaming_blocked_returns_refusal( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that blocked moderation returns response with refusal message.""" + request = _request_with_model_and_conv("Bad input") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_moderation = mocker.Mock() + mock_moderation.decision = "blocked" + mock_moderation.message = "Content blocked" + mock_refusal = mocker.Mock(spec=OpenAIResponseMessage) + mock_refusal.type = "message" + mock_refusal.role = "assistant" + mock_refusal.content = "Content blocked" + mock_moderation.refusal_response = mock_refusal + + _patch_handle_non_streaming_common(mocker, minimal_config) + mock_client.conversations.items.create = mocker.AsyncMock() + mock_api_response = mocker.Mock() + mock_api_response.output = [mock_refusal] + mock_api_response.model_dump.return_value = { + "id": "resp_blocked", + "object": "response", + "created_at": 0, + "status": "completed", + "model": "provider/model1", + "output": [mock_refusal], + "usage": { + "input_tokens": 0, + "output_tokens": 0, + "total_tokens": 0, + "input_tokens_details": {"cached_tokens": 0}, + "output_tokens_details": {"reasoning_tokens": 0}, + }, + } + mocker.patch( + f"{MODULE}.OpenAIResponseObject.model_construct", + return_value=mock_api_response, + ) + + response = await handle_non_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Bad input", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + assert isinstance(response, ResponsesResponse) + assert response.output_text == "Content blocked" + mock_client.responses.create.assert_not_called() + + @pytest.mark.asyncio + async def test_handle_non_streaming_success_returns_response( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test successful handle_non_streaming_response returns ResponsesResponse.""" + request = _request_with_model_and_conv("Hello") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_moderation = mocker.Mock() + mock_moderation.decision = "passed" + + mock_api_response = mocker.Mock(spec=OpenAIResponseObject) + mock_api_response.output = [] + mock_api_response.usage = mocker.Mock( + input_tokens=1, output_tokens=2, total_tokens=3 + ) + mock_api_response.model_dump.return_value = { + "id": "resp_1", + "object": "response", + "created_at": 0, + "status": "completed", + "model": "provider/model1", + "output": [], + "usage": { + "input_tokens": 1, + "output_tokens": 2, + "total_tokens": 3, + "input_tokens_details": {"cached_tokens": 0}, + "output_tokens_details": {"reasoning_tokens": 0}, + }, + } + mock_client.responses.create = mocker.AsyncMock(return_value=mock_api_response) + + _patch_handle_non_streaming_common(mocker, minimal_config) + mocker.patch( + f"{MODULE}.extract_token_usage", + return_value=mocker.Mock(input_tokens=1, output_tokens=2), + ) + mocker.patch(f"{MODULE}.consume_query_tokens") + mocker.patch( + f"{MODULE}.build_turn_summary", + return_value=mocker.Mock(referenced_documents=[]), + ) + mocker.patch( + f"{MODULE}.extract_text_from_response_items", + return_value="Model reply", + ) + mocker.patch( + f"{MODULE}.extract_vector_store_ids_from_tools", + return_value=[], + ) + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + + response = await handle_non_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Hello", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + + assert isinstance(response, ResponsesResponse) + assert response.output_text == "Model reply" + mock_client.responses.create.assert_called_once() + + @pytest.mark.asyncio + async def test_handle_non_streaming_with_previous_response_id_appends_turn( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test append_turn_items_to_conversation triggers with store and previous_response_id.""" + request = _request_with_previous_response_id("Hi", previous_response_id="r1") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_moderation = mocker.Mock() + mock_moderation.decision = "passed" + + mock_api_response = mocker.Mock(spec=OpenAIResponseObject) + mock_api_response.output = [] + mock_api_response.id = "resp_1" + mock_api_response.usage = mocker.Mock( + input_tokens=1, output_tokens=2, total_tokens=3 + ) + mock_api_response.model_dump.return_value = { + "id": "resp_1", + "object": "response", + "created_at": 0, + "status": "completed", + "model": "provider/model1", + "output": [], + "usage": { + "input_tokens": 1, + "output_tokens": 2, + "total_tokens": 3, + "input_tokens_details": {"cached_tokens": 0}, + "output_tokens_details": {"reasoning_tokens": 0}, + }, + } + mock_client.responses.create = mocker.AsyncMock(return_value=mock_api_response) + + _patch_handle_non_streaming_common(mocker, minimal_config) + mocker.patch( + f"{MODULE}.extract_token_usage", + return_value=mocker.Mock(input_tokens=1, output_tokens=2), + ) + mocker.patch(f"{MODULE}.consume_query_tokens") + mocker.patch( + f"{MODULE}.build_turn_summary", + return_value=mocker.Mock(referenced_documents=[]), + ) + mocker.patch( + f"{MODULE}.extract_text_from_response_items", + return_value="Reply", + ) + mocker.patch( + f"{MODULE}.extract_vector_store_ids_from_tools", + return_value=[], + ) + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + mock_append = mocker.patch( + f"{MODULE}.append_turn_items_to_conversation", + new=mocker.AsyncMock(), + ) + + await handle_non_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Hi", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + + mock_append.assert_awaited_once() + call_args = mock_append.call_args[0] + assert call_args[1] == VALID_CONV_ID + assert call_args[3] == [] + + @pytest.mark.asyncio + async def test_handle_non_streaming_context_length_raises_413( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that RuntimeError with context_length raises 413.""" + request = _request_with_model_and_conv("Long input") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_client.responses.create = mocker.AsyncMock( + side_effect=RuntimeError("context_length exceeded") + ) + mock_moderation = mocker.Mock() + mock_moderation.decision = "passed" + + _patch_handle_non_streaming_common(mocker, minimal_config) + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + + with pytest.raises(HTTPException) as exc_info: + await handle_non_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Long input", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + + assert exc_info.value.status_code == 413 + + @pytest.mark.asyncio + async def test_handle_non_streaming_connection_error_raises_503( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that APIConnectionError raises 503.""" + request = _request_with_model_and_conv("Hi") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_client.responses.create = mocker.AsyncMock( + side_effect=APIConnectionError( + message="Connection failed", + request=mocker.Mock(), + ) + ) + mock_moderation = mocker.Mock() + mock_moderation.decision = "passed" + + _patch_handle_non_streaming_common(mocker, minimal_config) + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + + with pytest.raises(HTTPException) as exc_info: + await handle_non_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Hi", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + + assert exc_info.value.status_code == 503 + + @pytest.mark.asyncio + async def test_handle_non_streaming_api_status_error_raises_http( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that APIStatusError is handled and re-raised as HTTPException.""" + request = _request_with_model_and_conv("Hi") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_client.responses.create = mocker.AsyncMock( + side_effect=APIStatusError( + message="API error", + response=mocker.Mock(request=None), + body=None, + ) + ) + mock_moderation = mocker.Mock() + mock_moderation.decision = "passed" + + _patch_handle_non_streaming_common(mocker, minimal_config) + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + mocker.patch( + f"{MODULE}.handle_known_apistatus_errors", + return_value=mocker.Mock( + model_dump=lambda: { + "status_code": 500, + "detail": {"response": "Error", "cause": "API error"}, + } + ), + ) + + with pytest.raises(HTTPException) as exc_info: + await handle_non_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Hi", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + + assert exc_info.value.status_code == 500 + + @pytest.mark.asyncio + async def test_handle_non_streaming_runtime_error_without_context_reraises( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that RuntimeError without context_length is re-raised.""" + request = _request_with_model_and_conv("Hi") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_client.responses.create = mocker.AsyncMock( + side_effect=RuntimeError("Some other error") + ) + mock_moderation = mocker.Mock() + mock_moderation.decision = "passed" + + _patch_handle_non_streaming_common(mocker, minimal_config) + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + + with pytest.raises(RuntimeError, match="Some other error"): + await handle_non_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Hi", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + + +class TestHandleStreamingResponse: + """Unit tests for handle_streaming_response and streaming generators.""" + + @pytest.mark.asyncio + async def test_handle_streaming_blocked_returns_sse_consumes_shield_generator( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test streaming with blocked moderation yields SSE from shield_violation_generator.""" + request = _request_with_model_and_conv("Bad", model="provider/model1") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_moderation = mocker.Mock() + mock_moderation.decision = "blocked" + mock_moderation.message = "Blocked" + mock_moderation.moderation_id = "mod_123" + mock_refusal = OpenAIResponseMessage( + role="assistant", content="Blocked", type="message" + ) + mock_moderation.refusal_response = mock_refusal + + mocker.patch(f"{MODULE}.configuration", minimal_config) + mocker.patch(f"{MODULE}.get_available_quotas", return_value={}) + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + mocker.patch( + f"{MODULE}.get_topic_summary", + new=mocker.AsyncMock(return_value=None), + ) + mocker.patch(f"{MODULE}.store_query_results") + + mock_client.conversations.items.create = mocker.AsyncMock() + response = await handle_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Bad", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + + assert isinstance(response, StreamingResponse) + assert response.media_type == "text/event-stream" + collected: list[str] = [] + async for part in response.body_iterator: + chunk_str = ( + part.decode("utf-8") + if isinstance(part, bytes) + else (part if isinstance(part, str) else bytes(part).decode("utf-8")) + ) + collected.append(chunk_str) + body = "".join(collected) + assert "event: response.created" in body + assert "event: response.output_item.added" in body + assert "event: response.output_item.done" in body + assert "event: response.completed" in body + assert "[DONE]" in body + mock_client.responses.create.assert_not_called() + + @pytest.mark.asyncio + async def test_handle_streaming_success_returns_sse_consumes_response_generator( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test streaming with passed moderation yields SSE from response_generator.""" + request = _request_with_model_and_conv("Hi", model="provider/model1") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_moderation = mocker.Mock() + mock_moderation.decision = "passed" + + mock_chunk = mocker.Mock() + mock_chunk.type = "response.completed" + mock_chunk.response = mocker.Mock() + mock_chunk.response.id = "r1" + mock_chunk.response.output = [] + mock_chunk.response.usage = mocker.Mock( + input_tokens=1, output_tokens=2, total_tokens=3 + ) + mock_chunk.model_dump.return_value = { + "type": "response.completed", + "response": {"id": "r1", "usage": {"input_tokens": 1}}, + } + + async def mock_stream() -> Any: + yield mock_chunk + + mock_client.responses.create = mocker.AsyncMock(return_value=mock_stream()) + + mocker.patch(f"{MODULE}.configuration", minimal_config) + mocker.patch(f"{MODULE}.get_available_quotas", return_value={}) + mocker.patch(f"{MODULE}.extract_token_usage", return_value=mocker.Mock()) + mocker.patch(f"{MODULE}.consume_query_tokens") + mocker.patch(f"{MODULE}.extract_vector_store_ids_from_tools", return_value=[]) + mocker.patch( + f"{MODULE}.build_turn_summary", + return_value=TurnSummary(referenced_documents=[]), + ) + mocker.patch( + f"{MODULE}.get_topic_summary", + new=mocker.AsyncMock(return_value=None), + ) + mocker.patch(f"{MODULE}.store_query_results") + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + mock_holder = mocker.Mock() + mock_holder.get_client.return_value = mock_client + mocker.patch(f"{MODULE}.AsyncLlamaStackClientHolder", return_value=mock_holder) + response = await handle_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Hi", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + assert isinstance(response, StreamingResponse) + collected: list[str] = [] + async for part in response.body_iterator: + chunk_str = ( + part.decode("utf-8") + if isinstance(part, bytes) + else (part if isinstance(part, str) else bytes(part).decode("utf-8")) + ) + collected.append(chunk_str) + body = "".join(collected) + assert "response.completed" in body or "event:" in body + assert "[DONE]" in body + mock_client.responses.create.assert_called_once() + + @pytest.mark.asyncio + async def test_handle_streaming_in_progress_chunk_sets_quotas_and_output_text( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test in_progress chunk includes available_quotas and output_text.""" + request = _request_with_model_and_conv("Hi", model="provider/model1") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_moderation = mocker.Mock() + mock_moderation.decision = "passed" + + in_progress_chunk = mocker.Mock() + in_progress_chunk.type = "response.in_progress" + in_progress_chunk.model_dump.return_value = { + "type": "response.in_progress", + "response": {"id": "r0"}, + } + + completed_chunk = mocker.Mock() + completed_chunk.type = "response.completed" + completed_chunk.response = mocker.Mock() + completed_chunk.response.id = "r1" + completed_chunk.response.output = [] + completed_chunk.response.usage = mocker.Mock( + input_tokens=1, output_tokens=2, total_tokens=3 + ) + completed_chunk.model_dump.return_value = { + "type": "response.completed", + "response": {"id": "r1", "usage": {"input_tokens": 1}}, + } + + async def mock_stream() -> Any: + yield in_progress_chunk + yield completed_chunk + + mock_client.responses.create = mocker.AsyncMock(return_value=mock_stream()) + + mocker.patch(f"{MODULE}.configuration", minimal_config) + mocker.patch(f"{MODULE}.get_available_quotas", return_value={}) + mocker.patch(f"{MODULE}.extract_token_usage", return_value=mocker.Mock()) + mocker.patch(f"{MODULE}.consume_query_tokens") + mocker.patch(f"{MODULE}.extract_vector_store_ids_from_tools", return_value=[]) + mocker.patch( + f"{MODULE}.build_turn_summary", + return_value=TurnSummary(referenced_documents=[]), + ) + mocker.patch( + f"{MODULE}.get_topic_summary", + new=mocker.AsyncMock(return_value=None), + ) + mocker.patch(f"{MODULE}.store_query_results") + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + mock_holder = mocker.Mock() + mock_holder.get_client.return_value = mock_client + mocker.patch(f"{MODULE}.AsyncLlamaStackClientHolder", return_value=mock_holder) + + response = await handle_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Hi", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + collected: list[str] = [] + async for part in response.body_iterator: + chunk_str = ( + part.decode("utf-8") + if isinstance(part, bytes) + else (part if isinstance(part, str) else bytes(part).decode("utf-8")) + ) + collected.append(chunk_str) + body = "".join(collected) + assert "response.in_progress" in body + assert '"available_quotas":{}' in body or '"available_quotas": {}' in body + assert "[DONE]" in body + + @pytest.mark.asyncio + async def test_handle_streaming_builds_tool_call_summary_from_output( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that response output items are passed to build_tool_call_summary.""" + request = _request_with_model_and_conv("Hi", model="provider/model1") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_moderation = mocker.Mock() + mock_moderation.decision = "passed" + + mock_output_item = mocker.Mock() + completed_chunk = mocker.Mock() + completed_chunk.type = "response.completed" + completed_chunk.response = mocker.Mock() + completed_chunk.response.id = "r1" + completed_chunk.response.output = [mock_output_item] + completed_chunk.response.usage = mocker.Mock( + input_tokens=1, output_tokens=2, total_tokens=3 + ) + completed_chunk.model_dump.return_value = { + "type": "response.completed", + "response": {"id": "r1", "usage": {"input_tokens": 1}}, + } + + async def mock_stream() -> Any: + yield completed_chunk + + mock_client.responses.create = mocker.AsyncMock(return_value=mock_stream()) + + mocker.patch(f"{MODULE}.configuration", minimal_config) + mocker.patch(f"{MODULE}.get_available_quotas", return_value={}) + mocker.patch(f"{MODULE}.extract_token_usage", return_value=mocker.Mock()) + mocker.patch(f"{MODULE}.consume_query_tokens") + mocker.patch(f"{MODULE}.extract_vector_store_ids_from_tools", return_value=[]) + mocker.patch( + f"{MODULE}.build_turn_summary", + return_value=TurnSummary(referenced_documents=[]), + ) + mock_build_tool_call = mocker.patch( + f"{MODULE}.build_tool_call_summary", + return_value=(mocker.Mock(), mocker.Mock()), + ) + mocker.patch( + f"{MODULE}.get_topic_summary", + new=mocker.AsyncMock(return_value=None), + ) + mocker.patch(f"{MODULE}.store_query_results") + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + mocker.patch(f"{MODULE}.parse_referenced_documents", return_value=[]) + mocker.patch( + f"{MODULE}.deduplicate_referenced_documents", side_effect=lambda x: x + ) + mock_holder = mocker.Mock() + mock_holder.get_client.return_value = mock_client + mocker.patch(f"{MODULE}.AsyncLlamaStackClientHolder", return_value=mock_holder) + + response = await handle_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Hi", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + collected: list[str] = [] + async for part in response.body_iterator: + chunk_str = ( + part.decode("utf-8") + if isinstance(part, bytes) + else (part if isinstance(part, str) else bytes(part).decode("utf-8")) + ) + collected.append(chunk_str) + mock_build_tool_call.assert_called_once() + + @pytest.mark.asyncio + async def test_handle_streaming_with_previous_response_id_appends_turn( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that store=True and previous_response_id in streaming triggers append_turn_items.""" + request = _request_with_previous_response_id( + "Hi", previous_response_id="r_prev" + ) + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_moderation = mocker.Mock() + mock_moderation.decision = "passed" + + completed_chunk = mocker.Mock() + completed_chunk.type = "response.completed" + completed_chunk.response = mocker.Mock() + completed_chunk.response.id = "r1" + completed_chunk.response.output = [] + completed_chunk.response.usage = mocker.Mock( + input_tokens=1, output_tokens=2, total_tokens=3 + ) + completed_chunk.model_dump.return_value = { + "type": "response.completed", + "response": {"id": "r1", "usage": {"input_tokens": 1}}, + } + + async def mock_stream() -> Any: + yield completed_chunk + + mock_client.responses.create = mocker.AsyncMock(return_value=mock_stream()) + + mocker.patch(f"{MODULE}.configuration", minimal_config) + mocker.patch(f"{MODULE}.get_available_quotas", return_value={}) + mocker.patch(f"{MODULE}.extract_token_usage", return_value=mocker.Mock()) + mocker.patch(f"{MODULE}.consume_query_tokens") + mocker.patch(f"{MODULE}.extract_vector_store_ids_from_tools", return_value=[]) + mocker.patch( + f"{MODULE}.build_turn_summary", + return_value=TurnSummary(referenced_documents=[]), + ) + mocker.patch( + f"{MODULE}.get_topic_summary", + new=mocker.AsyncMock(return_value=None), + ) + mocker.patch(f"{MODULE}.store_query_results") + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + mock_append = mocker.patch( + f"{MODULE}.append_turn_items_to_conversation", + new=mocker.AsyncMock(), + ) + mock_holder = mocker.Mock() + mock_holder.get_client.return_value = mock_client + mocker.patch(f"{MODULE}.AsyncLlamaStackClientHolder", return_value=mock_holder) + + response = await handle_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Hi", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + collected: list[str] = [] + async for part in response.body_iterator: + chunk_str = ( + part.decode("utf-8") + if isinstance(part, bytes) + else (part if isinstance(part, str) else bytes(part).decode("utf-8")) + ) + collected.append(chunk_str) + mock_append.assert_called_once() + call_args = mock_append.call_args[0] + assert call_args[1] == VALID_CONV_ID + assert call_args[3] == [] + + @pytest.mark.asyncio + async def test_handle_streaming_context_length_raises_413( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test streaming raises 413 when create raises RuntimeError context_length.""" + request = _request_with_model_and_conv("Long", model="provider/model1") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_client.responses.create = mocker.AsyncMock( + side_effect=RuntimeError("context_length exceeded") + ) + mock_moderation = mocker.Mock() + mock_moderation.decision = "passed" + + mocker.patch(f"{MODULE}.configuration", minimal_config) + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + with pytest.raises(HTTPException) as exc_info: + await handle_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Long", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + assert exc_info.value.status_code == 413 + + @pytest.mark.asyncio + async def test_handle_streaming_connection_error_raises_503( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test streaming raises 503 when create raises APIConnectionError.""" + request = _request_with_model_and_conv("Hi", model="provider/model1") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_client.responses.create = mocker.AsyncMock( + side_effect=APIConnectionError( + message="Connection failed", + request=mocker.Mock(), + ) + ) + mock_moderation = mocker.Mock() + mock_moderation.decision = "passed" + + mocker.patch(f"{MODULE}.configuration", minimal_config) + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + with pytest.raises(HTTPException) as exc_info: + await handle_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Hi", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + + assert exc_info.value.status_code == 503 diff --git a/tests/unit/app/test_routers.py b/tests/unit/app/test_routers.py index 754e3fdb2..668943a50 100644 --- a/tests/unit/app/test_routers.py +++ b/tests/unit/app/test_routers.py @@ -28,6 +28,7 @@ rlsapi_v1, a2a, query, + responses, ) @@ -53,7 +54,7 @@ def include_router( # pylint: disable=too-many-arguments prefix: str = "", tags: Optional[list] = None, dependencies: Optional[Sequence] = None, - responses: Optional[dict] = None, + responses: Optional[dict] = None, # pylint: disable=redefined-outer-name deprecated: Optional[bool] = None, include_in_schema: Optional[bool] = None, default_response_class: Optional[Any] = None, @@ -108,7 +109,7 @@ def test_include_routers() -> None: include_routers(app) # are all routers added? - assert len(app.routers) == 20 + assert len(app.routers) == 21 assert root.router in app.get_routers() assert info.router in app.get_routers() assert models.router in app.get_routers() @@ -129,6 +130,7 @@ def test_include_routers() -> None: assert rlsapi_v1.router in app.get_routers() assert a2a.router in app.get_routers() assert stream_interrupt.router in app.get_routers() + assert responses.router in app.get_routers() def test_check_prefixes() -> None: @@ -136,7 +138,7 @@ def test_check_prefixes() -> None: Verify that include_routers registers the expected routers with their configured URL prefixes. - Asserts that 16 routers are registered on a MockFastAPI instance and that + Asserts that 21 routers are registered on a MockFastAPI instance and that each router's prefix matches the expected value (e.g., root, health, authorized, metrics use an empty prefix; most API routers use "/v1"; conversations_v2 uses "/v2"). @@ -145,7 +147,7 @@ def test_check_prefixes() -> None: include_routers(app) # are all routers added? - assert len(app.routers) == 20 + assert len(app.routers) == 21 assert app.get_router_prefix(root.router) == "" assert app.get_router_prefix(info.router) == "/v1" assert app.get_router_prefix(models.router) == "/v1" @@ -167,3 +169,4 @@ def test_check_prefixes() -> None: assert app.get_router_prefix(rlsapi_v1.router) == "/v1" assert app.get_router_prefix(a2a.router) == "" assert app.get_router_prefix(stream_interrupt.router) == "/v1" + assert app.get_router_prefix(responses.router) == "/v1" diff --git a/tests/unit/utils/test_conversations.py b/tests/unit/utils/test_conversations.py index e4120f145..389793055 100644 --- a/tests/unit/utils/test_conversations.py +++ b/tests/unit/utils/test_conversations.py @@ -3,6 +3,9 @@ from datetime import datetime, UTC from typing import Any +from fastapi import HTTPException +from llama_stack_api import OpenAIResponseMessage +from llama_stack_client import APIConnectionError, APIStatusError import pytest from pytest_mock import MockerFixture @@ -11,7 +14,9 @@ from utils.conversations import ( _build_tool_call_summary_from_item, _extract_text_from_content, + append_turn_items_to_conversation, build_conversation_turns_from_items, + get_all_conversation_items, ) from utils.types import ToolCallSummary @@ -720,3 +725,133 @@ def test_legacy_conversation_without_metadata(self, mocker: MockerFixture) -> No # Timestamps should match conversation start time assert turn.started_at == "2024-01-01T10:00:00Z" assert turn.completed_at == "2024-01-01T10:00:00Z" + + +class TestAppendTurnItemsToConversation: # pylint: disable=too-few-public-methods + """Tests for append_turn_items_to_conversation function.""" + + @pytest.mark.asyncio + async def test_appends_user_input_and_llm_output( + self, mocker: MockerFixture + ) -> None: + """Test that append_turn_items_to_conversation creates conversation items correctly.""" + mock_client = mocker.Mock() + mock_client.conversations.items.create = mocker.AsyncMock(return_value=None) + assistant_msg = OpenAIResponseMessage( + type="message", + role="assistant", + content="I cannot help with that", + ) + + await append_turn_items_to_conversation( + mock_client, + conversation_id="conv-123", + user_input="Hello", + llm_output=[assistant_msg], + ) + + mock_client.conversations.items.create.assert_called_once() + call_args = mock_client.conversations.items.create.call_args + assert call_args[0][0] == "conv-123" + items = call_args[1]["items"] + assert len(items) == 2 + assert items[0]["type"] == "message" and items[0]["role"] == "user" + assert items[0]["content"] == "Hello" + assert items[1]["type"] == "message" and items[1]["role"] == "assistant" + assert items[1]["content"] == "I cannot help with that" + + +class TestGetAllConversationItems: + """Tests for get_all_conversation_items function.""" + + @pytest.mark.asyncio + async def test_returns_single_page_items(self, mocker: MockerFixture) -> None: + """Test that a single page of items is returned.""" + mock_client = mocker.Mock() + item_a = mocker.Mock(type="message", role="user", content="Hello") + item_b = mocker.Mock(type="message", role="assistant", content="Hi") + mock_page = mocker.Mock() + mock_page.data = [item_a, item_b] + mock_page.has_next_page.return_value = False + + mock_client.conversations.items.list = mocker.AsyncMock(return_value=mock_page) + + result = await get_all_conversation_items( + mock_client, "conv_0d21ba731f21f798dc9680125d5d6f49" + ) + + assert result == [item_a, item_b] + mock_client.conversations.items.list.assert_called_once_with( + conversation_id="conv_0d21ba731f21f798dc9680125d5d6f49", + order="asc", + ) + + @pytest.mark.asyncio + async def test_returns_all_items_across_pages(self, mocker: MockerFixture) -> None: + """Test that items from multiple pages are concatenated.""" + mock_client = mocker.Mock() + item_1 = mocker.Mock(type="message", role="user", content="First") + item_2 = mocker.Mock(type="message", role="assistant", content="Second") + item_3 = mocker.Mock(type="message", role="user", content="Third") + + first_page = mocker.Mock() + first_page.data = [item_1] + first_page.has_next_page.return_value = True + second_page = mocker.Mock() + second_page.data = [item_2, item_3] + second_page.has_next_page.return_value = False + + first_page.get_next_page = mocker.AsyncMock(return_value=second_page) + + mock_client.conversations.items.list = mocker.AsyncMock(return_value=first_page) + + result = await get_all_conversation_items(mock_client, "conv_abc") + + assert result == [item_1, item_2, item_3] + + @pytest.mark.asyncio + async def test_handles_empty_data(self, mocker: MockerFixture) -> None: + """Test that None or empty page data is handled.""" + mock_client = mocker.Mock() + mock_page = mocker.Mock() + mock_page.data = None + mock_page.has_next_page.return_value = False + + mock_client.conversations.items.list = mocker.AsyncMock(return_value=mock_page) + + result = await get_all_conversation_items(mock_client, "conv_empty") + + assert result == [] + + @pytest.mark.asyncio + async def test_handles_connection_error(self, mocker: MockerFixture) -> None: + """Test that APIConnectionError is converted to HTTPException 503.""" + mock_client = mocker.Mock() + mock_client.conversations.items.list = mocker.AsyncMock( + side_effect=APIConnectionError( + message="connection refused", request=mocker.Mock() + ) + ) + + with pytest.raises(HTTPException) as exc_info: + await get_all_conversation_items(mock_client, "conv_xyz") + + assert exc_info.value.status_code == 503 + assert "Llama Stack" in str(exc_info.value.detail) + + @pytest.mark.asyncio + async def test_handles_api_status_error(self, mocker: MockerFixture) -> None: + """Test that APIStatusError is converted to HTTPException 500.""" + mock_client = mocker.Mock() + mock_client.conversations.items.list = mocker.AsyncMock( + side_effect=APIStatusError( + message="internal error", + response=mocker.Mock(request=None), + body=None, + ) + ) + + with pytest.raises(HTTPException) as exc_info: + await get_all_conversation_items(mock_client, "conv_xyz") + + assert exc_info.value.status_code == 500 diff --git a/tests/unit/utils/test_endpoints.py b/tests/unit/utils/test_endpoints.py index cc092a1ed..cf8b46568 100644 --- a/tests/unit/utils/test_endpoints.py +++ b/tests/unit/utils/test_endpoints.py @@ -11,9 +11,9 @@ from pytest_mock import MockerFixture from sqlalchemy.exc import SQLAlchemyError -from models.database.conversations import UserConversation +from models.database.conversations import UserConversation, UserTurn from utils import endpoints -from utils.types import ReferencedDocument +from utils.types import ReferencedDocument, ResponsesConversationContext @pytest.fixture(name="input_file") @@ -451,3 +451,266 @@ def test_default_others_allowed_false(self, mocker: MockerFixture) -> None: mock_query.filter_by.assert_called_once_with( id=conversation_id, user_id=user_id ) + + +class TestResolveResponseContext: + """Tests for resolve_response_context function.""" + + @pytest.mark.asyncio + async def test_conversation_id_returns_context_with_existing_conversation( + self, mocker: MockerFixture + ) -> None: + """When conversation_id is set, validate and return context with it.""" + mock_holder = mocker.Mock() + mock_client = mocker.Mock() + mock_holder.get_client.return_value = mock_client + mocker.patch( + "utils.endpoints.AsyncLlamaStackClientHolder", + return_value=mock_holder, + ) + + mock_conv = mocker.Mock(spec=UserConversation) + mock_conv.id = "conv-normalized-123" + mocker.patch( + "utils.endpoints.normalize_conversation_id", + return_value="conv-normalized-123", + ) + mocker.patch( + "utils.endpoints.to_llama_stack_conversation_id", + return_value="conv_conv-normalized-123", + ) + mocker.patch( + "utils.endpoints.validate_and_retrieve_conversation", + return_value=mock_conv, + ) + + result = await endpoints.resolve_response_context( + user_id="user-1", + others_allowed=False, + conversation_id="conv-raw", + previous_response_id=None, + generate_topic_summary=None, + ) + + assert isinstance(result, ResponsesConversationContext) + assert result.conversation == "conv_conv-normalized-123" + assert result.user_conversation is mock_conv + assert result.generate_topic_summary is False + + @pytest.mark.asyncio + async def test_previous_response_id_turn_not_found_raises_404( + self, mocker: MockerFixture + ) -> None: + """When previous_response_id is set but turn does not exist, raise 404.""" + mock_holder = mocker.Mock() + mock_holder.get_client.return_value = mocker.Mock() + mocker.patch( + "utils.endpoints.AsyncLlamaStackClientHolder", + return_value=mock_holder, + ) + mocker.patch("utils.endpoints.check_turn_existence", return_value=False) + + with pytest.raises(HTTPException) as exc_info: + await endpoints.resolve_response_context( + user_id="user-1", + others_allowed=False, + conversation_id=None, + previous_response_id="resp-missing", + generate_topic_summary=None, + ) + + assert exc_info.value.status_code == 404 + assert isinstance(exc_info.value.detail, dict) + assert "resp-missing" in str(exc_info.value.detail["cause"]) + + @pytest.mark.asyncio + async def test_previous_response_id_same_as_last_returns_existing_conversation( + self, mocker: MockerFixture + ) -> None: + """When previous_response_id equals last_response_id, use existing conv.""" + mock_holder = mocker.Mock() + mock_holder.get_client.return_value = mocker.Mock() + mocker.patch( + "utils.endpoints.AsyncLlamaStackClientHolder", + return_value=mock_holder, + ) + mocker.patch("utils.endpoints.check_turn_existence", return_value=True) + + mock_turn = mocker.Mock(spec=UserTurn) + mock_turn.conversation_id = "conv-existing" + mocker.patch( + "utils.endpoints.retrieve_turn_by_response_id", + return_value=mock_turn, + ) + + mock_conv = mocker.Mock(spec=UserConversation) + mock_conv.id = "conv-existing" + mock_conv.last_response_id = "resp-123" # same as previous_response_id + mocker.patch( + "utils.endpoints.validate_and_retrieve_conversation", + return_value=mock_conv, + ) + mocker.patch( + "utils.endpoints.to_llama_stack_conversation_id", + return_value="conv_conv-existing", + ) + mock_create = mocker.patch( + "utils.endpoints.create_new_conversation", + new=mocker.AsyncMock(), + ) + + result = await endpoints.resolve_response_context( + user_id="user-1", + others_allowed=False, + conversation_id=None, + previous_response_id="resp-123", + generate_topic_summary=None, + ) + + assert result.conversation == "conv_conv-existing" + assert result.user_conversation is mock_conv + assert result.generate_topic_summary is False + mock_create.assert_not_called() + + @pytest.mark.asyncio + async def test_previous_response_id_fork_creates_new_conversation( + self, mocker: MockerFixture + ) -> None: + """When last_response_id differs from previous_response_id, fork to new conv.""" + mock_client = mocker.Mock() + mock_holder = mocker.Mock() + mock_holder.get_client.return_value = mock_client + mocker.patch( + "utils.endpoints.AsyncLlamaStackClientHolder", + return_value=mock_holder, + ) + mocker.patch("utils.endpoints.check_turn_existence", return_value=True) + + mock_turn = mocker.Mock(spec=UserTurn) + mock_turn.conversation_id = "conv-existing" + mocker.patch( + "utils.endpoints.retrieve_turn_by_response_id", + return_value=mock_turn, + ) + + mock_conv = mocker.Mock(spec=UserConversation) + mock_conv.id = "conv-existing" + mock_conv.last_response_id = "resp-latest" # fork: different from prev + mocker.patch( + "utils.endpoints.validate_and_retrieve_conversation", + return_value=mock_conv, + ) + mocker.patch( + "utils.endpoints.create_new_conversation", + new=mocker.AsyncMock(return_value="conv_new_fork"), + ) + + result = await endpoints.resolve_response_context( + user_id="user-1", + others_allowed=False, + conversation_id=None, + previous_response_id="resp-old", + generate_topic_summary=None, + ) + + assert result.conversation == "conv_new_fork" + assert result.user_conversation is mock_conv + assert result.generate_topic_summary is True + + @pytest.mark.asyncio + async def test_previous_response_id_fork_respects_generate_topic_summary( + self, mocker: MockerFixture + ) -> None: + """Fork path uses request generate_topic_summary when provided.""" + mock_client = mocker.Mock() + mock_holder = mocker.Mock() + mock_holder.get_client.return_value = mock_client + mocker.patch( + "utils.endpoints.AsyncLlamaStackClientHolder", + return_value=mock_holder, + ) + mocker.patch("utils.endpoints.check_turn_existence", return_value=True) + + mock_turn = mocker.Mock(spec=UserTurn) + mock_turn.conversation_id = "conv-existing" + mocker.patch( + "utils.endpoints.retrieve_turn_by_response_id", + return_value=mock_turn, + ) + + mock_conv = mocker.Mock(spec=UserConversation) + mock_conv.id = "conv-existing" + mock_conv.last_response_id = "resp-latest" + mocker.patch( + "utils.endpoints.validate_and_retrieve_conversation", + return_value=mock_conv, + ) + mocker.patch( + "utils.endpoints.create_new_conversation", + new=mocker.AsyncMock(return_value="conv_new"), + ) + + result = await endpoints.resolve_response_context( + user_id="user-1", + others_allowed=False, + conversation_id=None, + previous_response_id="resp-old", + generate_topic_summary=False, + ) + + assert result.generate_topic_summary is False + + @pytest.mark.asyncio + async def test_no_context_creates_new_conversation( + self, mocker: MockerFixture + ) -> None: + """When neither conversation_id nor previous_response_id set, create new.""" + mock_client = mocker.Mock() + mock_holder = mocker.Mock() + mock_holder.get_client.return_value = mock_client + mocker.patch( + "utils.endpoints.AsyncLlamaStackClientHolder", + return_value=mock_holder, + ) + mocker.patch( + "utils.endpoints.create_new_conversation", + new=mocker.AsyncMock(return_value="conv_brand_new"), + ) + + result = await endpoints.resolve_response_context( + user_id="user-1", + others_allowed=False, + conversation_id=None, + previous_response_id=None, + generate_topic_summary=None, + ) + + assert result.conversation == "conv_brand_new" + assert result.user_conversation is None + assert result.generate_topic_summary is True + + @pytest.mark.asyncio + async def test_no_context_respects_generate_topic_summary( + self, mocker: MockerFixture + ) -> None: + """New conversation path uses generate_topic_summary when provided.""" + mock_holder = mocker.Mock() + mock_holder.get_client.return_value = mocker.Mock() + mocker.patch( + "utils.endpoints.AsyncLlamaStackClientHolder", + return_value=mock_holder, + ) + mocker.patch( + "utils.endpoints.create_new_conversation", + new=mocker.AsyncMock(return_value="conv_new"), + ) + + result = await endpoints.resolve_response_context( + user_id="user-1", + others_allowed=False, + conversation_id=None, + previous_response_id=None, + generate_topic_summary=False, + ) + + assert result.generate_topic_summary is False diff --git a/tests/unit/utils/test_query.py b/tests/unit/utils/test_query.py index 867a5c6c6..5c13a3a2d 100644 --- a/tests/unit/utils/test_query.py +++ b/tests/unit/utils/test_query.py @@ -407,6 +407,7 @@ def query_side_effect(*args: Any) -> Any: model_id="model1", provider_id="provider1", topic_summary="Topic", + response_id="resp_1", ) mock_session.add.assert_called() @@ -454,6 +455,7 @@ def query_side_effect(*args: Any) -> Any: model_id="new_model", provider_id="new_provider", topic_summary=None, + response_id="resp_1", ) assert existing_conv.last_used_model == "new_model" @@ -497,6 +499,7 @@ def query_side_effect(*args: Any) -> Any: model_id="model1", provider_id="provider1", topic_summary="Topic", + response_id="resp_1", ) # Verify that the turn number is incremented correctly diff --git a/tests/unit/utils/test_responses.py b/tests/unit/utils/test_responses.py index 6e4650408..79c8209e0 100644 --- a/tests/unit/utils/test_responses.py +++ b/tests/unit/utils/test_responses.py @@ -126,9 +126,8 @@ def make_content_part( ("function_call", "assistant", "some text", ""), ("file_search_call", "assistant", "some text", ""), (None, "assistant", "some text", ""), - # User role messages are filtered out - return empty string - ("message", "user", "some text", ""), - # Valid assistant message with string content + # Message type extracts content regardless of role (input or output) + ("message", "user", "some text", "some text"), ("message", "assistant", "Hello, world!", "Hello, world!"), ("message", "assistant", "", ""), ], @@ -136,7 +135,7 @@ def make_content_part( "function_call_type_returns_empty", "file_search_call_type_returns_empty", "none_type_returns_empty", - "user_role_returns_empty", + "user_message_extracts_content", "valid_string_content", "empty_string_content", ], @@ -146,11 +145,7 @@ def test_extract_text_basic_cases( ) -> None: """Test basic extraction cases for different types, roles, and simple content. - Args: - item_type: Type of the output item - role: Role of the message - content: Content of the message - expected: Expected extracted text + Extraction works for both input and output items; role is not filtered. """ output_item = make_output_item(item_type=item_type, role=role, content=content) result = extract_text_from_response_item(output_item) # type: ignore[arg-type] @@ -306,8 +301,8 @@ def test_extract_text_from_response_items_filters_non_messages(self) -> None: result = extract_text_from_response_items([item1, item2]) # type: ignore[arg-type] assert result == "Valid message" - def test_extract_text_from_response_items_filters_user_messages(self) -> None: - """Test extract_text_from_response_items filters out user role messages.""" + def test_extract_text_from_response_items_includes_all_roles(self) -> None: + """Test extract_text_from_response_items extracts from all message roles.""" item1 = make_output_item( item_type="message", role="assistant", content="Assistant message" ) @@ -315,8 +310,8 @@ def test_extract_text_from_response_items_filters_user_messages(self) -> None: item_type="message", role="user", content="User message" ) result = extract_text_from_response_items([item1, item2]) # type: ignore[arg-type] - # User messages are filtered out - only assistant message is included - assert result == "Assistant message" + # All message items are included (generalizes for input and output) + assert result == "Assistant message User message" def test_extract_text_from_response_items_with_list_content(self) -> None: """Test extract_text_from_response_items with list-based content.""" diff --git a/tests/unit/utils/test_shields.py b/tests/unit/utils/test_shields.py index 333c96df0..5d68a73a2 100644 --- a/tests/unit/utils/test_shields.py +++ b/tests/unit/utils/test_shields.py @@ -1,8 +1,8 @@ """Unit tests for utils/shields.py functions.""" +from llama_stack_client import APIConnectionError, APIStatusError import pytest from fastapi import HTTPException, status -from llama_stack_client import APIConnectionError, APIStatusError from pytest_mock import MockerFixture from utils.shields import (