From 82c9ae3239648889a1a08ff4a6185211142442c8 Mon Sep 17 00:00:00 2001 From: Pavel Tisnovsky Date: Wed, 11 Feb 2026 09:05:47 +0100 Subject: [PATCH 1/3] Better query parameter definition --- src/app/endpoints/models.py | 18 +++++++++++++----- src/models/requests.py | 15 +++++++++++++++ 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/src/app/endpoints/models.py b/src/app/endpoints/models.py index 5176bbda..bfd52270 100644 --- a/src/app/endpoints/models.py +++ b/src/app/endpoints/models.py @@ -1,9 +1,9 @@ """Handler for REST API call to list available models.""" import logging -from typing import Annotated, Any, Optional +from typing import Annotated, Any -from fastapi import APIRouter, HTTPException, Request +from fastapi import APIRouter, HTTPException, Request, Query from fastapi.params import Depends from llama_stack_client import APIConnectionError @@ -13,6 +13,7 @@ from client import AsyncLlamaStackClientHolder from configuration import configuration from models.config import Action +from models.requests import ModelFilter from models.responses import ( ForbiddenResponse, InternalServerErrorResponse, @@ -76,7 +77,7 @@ def parse_llama_stack_model(model: Any) -> dict[str, Any]: async def models_endpoint_handler( request: Request, auth: Annotated[AuthTuple, Depends(get_auth_dependency())], - model_type: Optional[str] = None, + model_type: Annotated[ModelFilter, Query()], ) -> ModelsResponse: """ Handle requests to the /models endpoint. @@ -84,6 +85,11 @@ async def models_endpoint_handler( Process GET requests to the /models endpoint, returning a list of available models from the Llama Stack service. + Parameters: + request: The incoming HTTP request. + auth: Authentication tuple from the auth dependency. + model_type: Optional filter to return only models matching this type. + Raises: HTTPException: If unable to connect to the Llama Stack server or if model retrieval fails for any reason. @@ -112,9 +118,11 @@ async def models_endpoint_handler( parsed_models = [parse_llama_stack_model(model) for model in models] # optional filtering by model type - if model_type is not None: + if model_type.model_type is not None: parsed_models = [ - model for model in parsed_models if model["model_type"] == model_type + model + for model in parsed_models + if model["model_type"] == model_type.model_type ] return ModelsResponse(models=parsed_models) diff --git a/src/models/requests.py b/src/models/requests.py index 18e5b4b6..38bdc9ac 100644 --- a/src/models/requests.py +++ b/src/models/requests.py @@ -521,3 +521,18 @@ class ConversationUpdateRequest(BaseModel): # Reject unknown fields model_config = {"extra": "forbid"} + + +class ModelFilter(BaseModel): + """Model representing a query parameter to select models by its type. + + Attributes: + model_type: Required model type, such as 'llm', 'embeddings' etc. + """ + + model_config = {"extra": "forbid"} + model_type: Optional[str] = Field( + None, + description="Optional filter to return only models matching this type", + examples=["llm", "embeddings"], + ) From bbe2dfabe1ba9c625116ba71d57cf22672314be6 Mon Sep 17 00:00:00 2001 From: Pavel Tisnovsky Date: Wed, 11 Feb 2026 09:06:10 +0100 Subject: [PATCH 2/3] Updated unit tests accordingly --- tests/unit/app/endpoints/test_models.py | 85 ++++++++++++++++--------- 1 file changed, 54 insertions(+), 31 deletions(-) diff --git a/tests/unit/app/endpoints/test_models.py b/tests/unit/app/endpoints/test_models.py index af81aa74..39bd6167 100644 --- a/tests/unit/app/endpoints/test_models.py +++ b/tests/unit/app/endpoints/test_models.py @@ -6,7 +6,9 @@ from fastapi import HTTPException, Request, status from llama_stack_client import APIConnectionError from pytest_mock import MockerFixture +from pytest_subtests import SubTests +from models.requests import ModelFilter from app.endpoints.models import models_endpoint_handler from authentication.interface import AuthTuple from configuration import AppConfig @@ -48,7 +50,9 @@ async def test_models_endpoint_handler_configuration_not_loaded( auth: AuthTuple = ("test_user_id", "test_user", True, "test_token") with pytest.raises(HTTPException) as e: - await models_endpoint_handler(request=request, auth=auth) + await models_endpoint_handler( + request=request, auth=auth, model_type=ModelFilter(model_type=None) + ) assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR assert e.value.detail["response"] == "Configuration is not loaded" # type: ignore @@ -115,7 +119,9 @@ async def test_models_endpoint_handler_configuration_loaded( auth: AuthTuple = ("test_user_id", "test_user", True, "test_token") with pytest.raises(HTTPException) as e: - await models_endpoint_handler(request=request, auth=auth) + await models_endpoint_handler( + request=request, auth=auth, model_type=ModelFilter(model_type=None) + ) assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE assert e.value.detail["response"] == "Unable to connect to Llama Stack" # type: ignore @@ -173,7 +179,9 @@ async def test_models_endpoint_handler_unable_to_retrieve_models_list( # Authorization tuple required by URL endpoint handler auth: AuthTuple = ("test_user_id", "test_user", True, "test_token") - response = await models_endpoint_handler(request=request, auth=auth) + response = await models_endpoint_handler( + request=request, auth=auth, model_type=ModelFilter(model_type=None) + ) assert response is not None @@ -230,7 +238,7 @@ async def test_models_endpoint_handler_model_type_query_parameter( # Authorization tuple required by URL endpoint handler auth: AuthTuple = ("test_user_id", "test_user", True, "test_token") response = await models_endpoint_handler( - request=request, auth=auth, model_type="llm" + request=request, auth=auth, model_type=ModelFilter(model_type="llm") ) assert response is not None @@ -293,7 +301,9 @@ async def test_models_endpoint_handler_model_list_retrieved( # Authorization tuple required by URL endpoint handler auth: AuthTuple = ("test_user_id", "test_user", True, "test_token") - response = await models_endpoint_handler(request=request, auth=auth) + response = await models_endpoint_handler( + request=request, auth=auth, model_type=ModelFilter(model_type=None) + ) assert response is not None assert len(response.models) == 4 assert response.models[0]["identifier"] == "model1" @@ -309,6 +319,7 @@ async def test_models_endpoint_handler_model_list_retrieved( @pytest.mark.asyncio async def test_models_endpoint_handler_model_list_retrieved_with_query_parameter( mocker: MockerFixture, + subtests: SubTests, ) -> None: """Test the models endpoint handler if model list can be retrieved.""" mock_authorization_resolvers(mocker) @@ -364,31 +375,41 @@ async def test_models_endpoint_handler_model_list_retrieved_with_query_parameter # Authorization tuple required by URL endpoint handler auth: AuthTuple = ("test_user_id", "test_user", True, "test_token") - response = await models_endpoint_handler( - request=request, auth=auth, model_type="llm" - ) - assert response is not None - assert len(response.models) == 2 - assert response.models[0]["identifier"] == "model1" - assert response.models[0]["model_type"] == "llm" - assert response.models[1]["identifier"] == "model3" - assert response.models[1]["model_type"] == "llm" - - response = await models_endpoint_handler( - request=request, auth=auth, model_type="embedding" - ) - assert response is not None - assert len(response.models) == 2 - assert response.models[0]["identifier"] == "model2" - assert response.models[0]["model_type"] == "embedding" - assert response.models[1]["identifier"] == "model4" - assert response.models[1]["model_type"] == "embedding" - - response = await models_endpoint_handler( - request=request, auth=auth, model_type="xyzzy" - ) - assert response is not None - assert len(response.models) == 0 + with subtests.test(msg="Model type = 'llm'"): + response = await models_endpoint_handler( + request=request, auth=auth, model_type=ModelFilter(model_type="llm") + ) + assert response is not None + assert len(response.models) == 2 + assert response.models[0]["identifier"] == "model1" + assert response.models[0]["model_type"] == "llm" + assert response.models[1]["identifier"] == "model3" + assert response.models[1]["model_type"] == "llm" + + with subtests.test(msg="Model type = 'embedding'"): + response = await models_endpoint_handler( + request=request, auth=auth, model_type=ModelFilter(model_type="embedding") + ) + assert response is not None + assert len(response.models) == 2 + assert response.models[0]["identifier"] == "model2" + assert response.models[0]["model_type"] == "embedding" + assert response.models[1]["identifier"] == "model4" + assert response.models[1]["model_type"] == "embedding" + + with subtests.test(msg="Model type = 'xyzzy'"): + response = await models_endpoint_handler( + request=request, auth=auth, model_type=ModelFilter(model_type="xyzzy") + ) + assert response is not None + assert len(response.models) == 0 + + with subtests.test(msg="Model type is empty string"): + response = await models_endpoint_handler( + request=request, auth=auth, model_type=ModelFilter(model_type="") + ) + assert response is not None + assert len(response.models) == 0 @pytest.mark.asyncio @@ -445,7 +466,9 @@ async def test_models_endpoint_llama_stack_connection_error( auth: AuthTuple = ("test_user_id", "test_user", True, "test_token") with pytest.raises(HTTPException) as e: - await models_endpoint_handler(request=request, auth=auth) + await models_endpoint_handler( + request=request, auth=auth, model_type=ModelFilter(model_type=None) + ) assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE assert e.value.detail["response"] == "Unable to connect to Llama Stack" # type: ignore assert "Unable to connect to Llama Stack" in e.value.detail["cause"] # type: ignore From 80d91bc703f2b59d4af4740604b5774c7727207b Mon Sep 17 00:00:00 2001 From: Pavel Tisnovsky Date: Wed, 11 Feb 2026 09:06:29 +0100 Subject: [PATCH 3/3] Updated OpenAPI specificaion --- docs/openapi.json | 14 ++++++++++---- docs/openapi.md | 7 ++++++- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/docs/openapi.json b/docs/openapi.json index c5ca6721..714f3823 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -245,7 +245,7 @@ "models" ], "summary": "Models Endpoint Handler", - "description": "Handle requests to the /models endpoint.\n\nProcess GET requests to the /models endpoint, returning a list of available\nmodels from the Llama Stack service.\n\nRaises:\n HTTPException: If unable to connect to the Llama Stack server or if\n model retrieval fails for any reason.\n\nReturns:\n ModelsResponse: An object containing the list of available models.", + "description": "Handle requests to the /models endpoint.\n\nProcess GET requests to the /models endpoint, returning a list of available\nmodels from the Llama Stack service.\n\nParameters:\n request: The incoming HTTP request.\n auth: Authentication tuple from the auth dependency.\n model_type: Optional filter to return only models matching this type.\n\nRaises:\n HTTPException: If unable to connect to the Llama Stack server or if\n model retrieval fails for any reason.\n\nReturns:\n ModelsResponse: An object containing the list of available models.", "operationId": "models_endpoint_handler_v1_models_get", "parameters": [ { @@ -261,8 +261,14 @@ "type": "null" } ], + "description": "Optional filter to return only models matching this type", + "examples": [ + "llm", + "embeddings" + ], "title": "Model Type" - } + }, + "description": "Optional filter to return only models matching this type" } ], "responses": { @@ -4346,7 +4352,7 @@ ], "summary": "Handle A2A Jsonrpc", "description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n request: FastAPI request object\n auth: Authentication tuple\n mcp_headers: MCP headers for context propagation\n\nReturns:\n JSON-RPC response or streaming response", - "operationId": "handle_a2a_jsonrpc_a2a_post", + "operationId": "handle_a2a_jsonrpc_a2a_get", "responses": { "200": { "description": "Successful Response", @@ -4364,7 +4370,7 @@ ], "summary": "Handle A2A Jsonrpc", "description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n request: FastAPI request object\n auth: Authentication tuple\n mcp_headers: MCP headers for context propagation\n\nReturns:\n JSON-RPC response or streaming response", - "operationId": "handle_a2a_jsonrpc_a2a_post", + "operationId": "handle_a2a_jsonrpc_a2a_get", "responses": { "200": { "description": "Successful Response", diff --git a/docs/openapi.md b/docs/openapi.md index be40a868..09ba5f6e 100644 --- a/docs/openapi.md +++ b/docs/openapi.md @@ -249,6 +249,11 @@ Handle requests to the /models endpoint. Process GET requests to the /models endpoint, returning a list of available models from the Llama Stack service. +Parameters: + request: The incoming HTTP request. + auth: Authentication tuple from the auth dependency. + model_type: Optional filter to return only models matching this type. + Raises: HTTPException: If unable to connect to the Llama Stack server or if model retrieval fails for any reason. @@ -262,7 +267,7 @@ Returns: | Name | Type | Required | Description | |------|------|----------|-------------| -| model_type | | False | | +| model_type | | False | Optional filter to return only models matching this type | ### ✅ Responses