lightspeed-core · tisnik · Feb 11, 2026 · Feb 11, 2026 · Feb 11, 2026 · Feb 11, 2026
diff --git a/docs/openapi.json b/docs/openapi.json
@@ -245,7 +245,7 @@
                     "models"
                 ],
                 "summary": "Models Endpoint Handler",
-                "description": "Handle requests to the /models endpoint.\n\nProcess GET requests to the /models endpoint, returning a list of available\nmodels from the Llama Stack service.\n\nRaises:\n    HTTPException: If unable to connect to the Llama Stack server or if\n    model retrieval fails for any reason.\n\nReturns:\n    ModelsResponse: An object containing the list of available models.",
+                "description": "Handle requests to the /models endpoint.\n\nProcess GET requests to the /models endpoint, returning a list of available\nmodels from the Llama Stack service.\n\nParameters:\n    request: The incoming HTTP request.\n    auth: Authentication tuple from the auth dependency.\n    model_type: Optional filter to return only models matching this type.\n\nRaises:\n    HTTPException: If unable to connect to the Llama Stack server or if\n    model retrieval fails for any reason.\n\nReturns:\n    ModelsResponse: An object containing the list of available models.",
                 "operationId": "models_endpoint_handler_v1_models_get",
                 "parameters": [
                     {
@@ -261,8 +261,14 @@
                                     "type": "null"
                                 }
                             ],
+                            "description": "Optional filter to return only models matching this type",
+                            "examples": [
+                                "llm",
+                                "embeddings"
+                            ],
                             "title": "Model Type"
-                        }
+                        },
+                        "description": "Optional filter to return only models matching this type"
                     }
                 ],
                 "responses": {
@@ -4346,7 +4352,7 @@
                 ],
                 "summary": "Handle A2A Jsonrpc",
                 "description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n    request: FastAPI request object\n    auth: Authentication tuple\n    mcp_headers: MCP headers for context propagation\n\nReturns:\n    JSON-RPC response or streaming response",
-                "operationId": "handle_a2a_jsonrpc_a2a_post",
+                "operationId": "handle_a2a_jsonrpc_a2a_get",
                 "responses": {
                     "200": {
                         "description": "Successful Response",
@@ -4364,7 +4370,7 @@
                 ],
                 "summary": "Handle A2A Jsonrpc",
                 "description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n    request: FastAPI request object\n    auth: Authentication tuple\n    mcp_headers: MCP headers for context propagation\n\nReturns:\n    JSON-RPC response or streaming response",
-                "operationId": "handle_a2a_jsonrpc_a2a_post",
+                "operationId": "handle_a2a_jsonrpc_a2a_get",
                 "responses": {
                     "200": {
                         "description": "Successful Response",

diff --git a/docs/openapi.md b/docs/openapi.md
@@ -249,6 +249,11 @@ Handle requests to the /models endpoint.
 Process GET requests to the /models endpoint, returning a list of available
 models from the Llama Stack service.
 
+Parameters:
+    request: The incoming HTTP request.
+    auth: Authentication tuple from the auth dependency.
+    model_type: Optional filter to return only models matching this type.
+
 Raises:
     HTTPException: If unable to connect to the Llama Stack server or if
     model retrieval fails for any reason.
@@ -262,7 +267,7 @@ Returns:
 
 | Name | Type | Required | Description |
 |------|------|----------|-------------|
-| model_type |  | False |  |
+| model_type |  | False | Optional filter to return only models matching this type |
 
 
 ### ✅ Responses

diff --git a/src/app/endpoints/models.py b/src/app/endpoints/models.py
@@ -1,9 +1,9 @@
 """Handler for REST API call to list available models."""
 
 import logging
-from typing import Annotated, Any, Optional
+from typing import Annotated, Any
 
-from fastapi import APIRouter, HTTPException, Request
+from fastapi import APIRouter, HTTPException, Request, Query
 from fastapi.params import Depends
 from llama_stack_client import APIConnectionError
 
@@ -13,6 +13,7 @@
 from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from models.config import Action
+from models.requests import ModelFilter
 from models.responses import (
     ForbiddenResponse,
     InternalServerErrorResponse,
@@ -76,14 +77,19 @@ def parse_llama_stack_model(model: Any) -> dict[str, Any]:
 async def models_endpoint_handler(
     request: Request,
     auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
-    model_type: Optional[str] = None,
+    model_type: Annotated[ModelFilter, Query()],
 ) -> ModelsResponse:
     """
     Handle requests to the /models endpoint.
 
     Process GET requests to the /models endpoint, returning a list of available
     models from the Llama Stack service.
 
+    Parameters:
+        request: The incoming HTTP request.
+        auth: Authentication tuple from the auth dependency.
+        model_type: Optional filter to return only models matching this type.
+
     Raises:
         HTTPException: If unable to connect to the Llama Stack server or if
         model retrieval fails for any reason.
@@ -112,9 +118,11 @@ async def models_endpoint_handler(
         parsed_models = [parse_llama_stack_model(model) for model in models]
 
         # optional filtering by model type
-        if model_type is not None:
+        if model_type.model_type is not None:
             parsed_models = [
-                model for model in parsed_models if model["model_type"] == model_type
+                model
+                for model in parsed_models
+                if model["model_type"] == model_type.model_type
             ]
 
         return ModelsResponse(models=parsed_models)

diff --git a/src/models/requests.py b/src/models/requests.py
@@ -521,3 +521,18 @@ class ConversationUpdateRequest(BaseModel):
 
     # Reject unknown fields
     model_config = {"extra": "forbid"}
+
+
+class ModelFilter(BaseModel):
+    """Model representing a query parameter to select models by its type.
+
+    Attributes:
+        model_type: Required model type, such as 'llm', 'embeddings' etc.
+    """
+
+    model_config = {"extra": "forbid"}
+    model_type: Optional[str] = Field(
+        None,
+        description="Optional filter to return only models matching this type",
+        examples=["llm", "embeddings"],
+    )
diff --git a/tests/unit/app/endpoints/test_models.py b/tests/unit/app/endpoints/test_models.py
@@ -6,7 +6,9 @@
 from fastapi import HTTPException, Request, status
 from llama_stack_client import APIConnectionError
 from pytest_mock import MockerFixture
+from pytest_subtests import SubTests
 
+from models.requests import ModelFilter
 from app.endpoints.models import models_endpoint_handler
 from authentication.interface import AuthTuple
 from configuration import AppConfig
@@ -48,7 +50,9 @@ async def test_models_endpoint_handler_configuration_not_loaded(
     auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")
 
     with pytest.raises(HTTPException) as e:
-        await models_endpoint_handler(request=request, auth=auth)
+        await models_endpoint_handler(
+            request=request, auth=auth, model_type=ModelFilter(model_type=None)
+        )
         assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
         assert e.value.detail["response"] == "Configuration is not loaded"  # type: ignore
 
@@ -115,7 +119,9 @@ async def test_models_endpoint_handler_configuration_loaded(
     auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")
 
     with pytest.raises(HTTPException) as e:
-        await models_endpoint_handler(request=request, auth=auth)
+        await models_endpoint_handler(
+            request=request, auth=auth, model_type=ModelFilter(model_type=None)
+        )
     assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
     assert e.value.detail["response"] == "Unable to connect to Llama Stack"  # type: ignore
 
@@ -173,7 +179,9 @@ async def test_models_endpoint_handler_unable_to_retrieve_models_list(
     # Authorization tuple required by URL endpoint handler
     auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")
 
-    response = await models_endpoint_handler(request=request, auth=auth)
+    response = await models_endpoint_handler(
+        request=request, auth=auth, model_type=ModelFilter(model_type=None)
+    )
     assert response is not None
 
 
@@ -230,7 +238,7 @@ async def test_models_endpoint_handler_model_type_query_parameter(
     # Authorization tuple required by URL endpoint handler
     auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")
     response = await models_endpoint_handler(
-        request=request, auth=auth, model_type="llm"
+        request=request, auth=auth, model_type=ModelFilter(model_type="llm")
     )
     assert response is not None
 
@@ -293,7 +301,9 @@ async def test_models_endpoint_handler_model_list_retrieved(
     # Authorization tuple required by URL endpoint handler
     auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")
 
-    response = await models_endpoint_handler(request=request, auth=auth)
+    response = await models_endpoint_handler(
+        request=request, auth=auth, model_type=ModelFilter(model_type=None)
+    )
     assert response is not None
     assert len(response.models) == 4
     assert response.models[0]["identifier"] == "model1"
@@ -309,6 +319,7 @@ async def test_models_endpoint_handler_model_list_retrieved(
 @pytest.mark.asyncio
 async def test_models_endpoint_handler_model_list_retrieved_with_query_parameter(
     mocker: MockerFixture,
+    subtests: SubTests,
 ) -> None:
     """Test the models endpoint handler if model list can be retrieved."""
     mock_authorization_resolvers(mocker)
@@ -364,31 +375,41 @@ async def test_models_endpoint_handler_model_list_retrieved_with_query_parameter
     # Authorization tuple required by URL endpoint handler
     auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")
 
-    response = await models_endpoint_handler(
-        request=request, auth=auth, model_type="llm"
-    )
-    assert response is not None
-    assert len(response.models) == 2
-    assert response.models[0]["identifier"] == "model1"
-    assert response.models[0]["model_type"] == "llm"
-    assert response.models[1]["identifier"] == "model3"
-    assert response.models[1]["model_type"] == "llm"
-
-    response = await models_endpoint_handler(
-        request=request, auth=auth, model_type="embedding"
-    )
-    assert response is not None
-    assert len(response.models) == 2
-    assert response.models[0]["identifier"] == "model2"
-    assert response.models[0]["model_type"] == "embedding"
-    assert response.models[1]["identifier"] == "model4"
-    assert response.models[1]["model_type"] == "embedding"
-
-    response = await models_endpoint_handler(
-        request=request, auth=auth, model_type="xyzzy"
-    )
-    assert response is not None
-    assert len(response.models) == 0
+    with subtests.test(msg="Model type = 'llm'"):
+        response = await models_endpoint_handler(
+            request=request, auth=auth, model_type=ModelFilter(model_type="llm")
+        )
+        assert response is not None
+        assert len(response.models) == 2
+        assert response.models[0]["identifier"] == "model1"
+        assert response.models[0]["model_type"] == "llm"
+        assert response.models[1]["identifier"] == "model3"
+        assert response.models[1]["model_type"] == "llm"
+
+    with subtests.test(msg="Model type = 'embedding'"):
+        response = await models_endpoint_handler(
+            request=request, auth=auth, model_type=ModelFilter(model_type="embedding")
+        )
+        assert response is not None
+        assert len(response.models) == 2
+        assert response.models[0]["identifier"] == "model2"
+        assert response.models[0]["model_type"] == "embedding"
+        assert response.models[1]["identifier"] == "model4"
+        assert response.models[1]["model_type"] == "embedding"
+
+    with subtests.test(msg="Model type = 'xyzzy'"):
+        response = await models_endpoint_handler(
+            request=request, auth=auth, model_type=ModelFilter(model_type="xyzzy")
+        )
+        assert response is not None
+        assert len(response.models) == 0
+
+    with subtests.test(msg="Model type is empty string"):
+        response = await models_endpoint_handler(
+            request=request, auth=auth, model_type=ModelFilter(model_type="")
+        )
+        assert response is not None
+        assert len(response.models) == 0
 
 
 @pytest.mark.asyncio
@@ -445,7 +466,9 @@ async def test_models_endpoint_llama_stack_connection_error(
     auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")
 
     with pytest.raises(HTTPException) as e:
-        await models_endpoint_handler(request=request, auth=auth)
+        await models_endpoint_handler(
+            request=request, auth=auth, model_type=ModelFilter(model_type=None)
+        )
         assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
         assert e.value.detail["response"] == "Unable to connect to Llama Stack"  # type: ignore
         assert "Unable to connect to Llama Stack" in e.value.detail["cause"]  # type: ignore