Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions docs/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@
"models"
],
"summary": "Models Endpoint Handler",
"description": "Handle requests to the /models endpoint.\n\nProcess GET requests to the /models endpoint, returning a list of available\nmodels from the Llama Stack service.\n\nRaises:\n HTTPException: If unable to connect to the Llama Stack server or if\n model retrieval fails for any reason.\n\nReturns:\n ModelsResponse: An object containing the list of available models.",
"description": "Handle requests to the /models endpoint.\n\nProcess GET requests to the /models endpoint, returning a list of available\nmodels from the Llama Stack service.\n\nParameters:\n request: The incoming HTTP request.\n auth: Authentication tuple from the auth dependency.\n model_type: Optional filter to return only models matching this type.\n\nRaises:\n HTTPException: If unable to connect to the Llama Stack server or if\n model retrieval fails for any reason.\n\nReturns:\n ModelsResponse: An object containing the list of available models.",
"operationId": "models_endpoint_handler_v1_models_get",
"parameters": [
{
Expand All @@ -261,8 +261,14 @@
"type": "null"
}
],
"description": "Optional filter to return only models matching this type",
"examples": [
"llm",
"embeddings"
],
"title": "Model Type"
}
},
"description": "Optional filter to return only models matching this type"
}
],
"responses": {
Expand Down Expand Up @@ -4346,7 +4352,7 @@
],
"summary": "Handle A2A Jsonrpc",
"description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n request: FastAPI request object\n auth: Authentication tuple\n mcp_headers: MCP headers for context propagation\n\nReturns:\n JSON-RPC response or streaming response",
"operationId": "handle_a2a_jsonrpc_a2a_post",
"operationId": "handle_a2a_jsonrpc_a2a_get",
"responses": {
"200": {
"description": "Successful Response",
Expand All @@ -4364,7 +4370,7 @@
],
"summary": "Handle A2A Jsonrpc",
"description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n request: FastAPI request object\n auth: Authentication tuple\n mcp_headers: MCP headers for context propagation\n\nReturns:\n JSON-RPC response or streaming response",
"operationId": "handle_a2a_jsonrpc_a2a_post",
"operationId": "handle_a2a_jsonrpc_a2a_get",
"responses": {
"200": {
"description": "Successful Response",
Expand Down
7 changes: 6 additions & 1 deletion docs/openapi.md
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,11 @@ Handle requests to the /models endpoint.
Process GET requests to the /models endpoint, returning a list of available
models from the Llama Stack service.

Parameters:
request: The incoming HTTP request.
auth: Authentication tuple from the auth dependency.
model_type: Optional filter to return only models matching this type.

Raises:
HTTPException: If unable to connect to the Llama Stack server or if
model retrieval fails for any reason.
Expand All @@ -262,7 +267,7 @@ Returns:

| Name | Type | Required | Description |
|------|------|----------|-------------|
| model_type | | False | |
| model_type | | False | Optional filter to return only models matching this type |


### ✅ Responses
Expand Down
18 changes: 13 additions & 5 deletions src/app/endpoints/models.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""Handler for REST API call to list available models."""

import logging
from typing import Annotated, Any, Optional
from typing import Annotated, Any

from fastapi import APIRouter, HTTPException, Request
from fastapi import APIRouter, HTTPException, Request, Query
from fastapi.params import Depends
from llama_stack_client import APIConnectionError

Expand All @@ -13,6 +13,7 @@
from client import AsyncLlamaStackClientHolder
from configuration import configuration
from models.config import Action
from models.requests import ModelFilter
from models.responses import (
ForbiddenResponse,
InternalServerErrorResponse,
Expand Down Expand Up @@ -76,14 +77,19 @@ def parse_llama_stack_model(model: Any) -> dict[str, Any]:
async def models_endpoint_handler(
request: Request,
auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
model_type: Optional[str] = None,
model_type: Annotated[ModelFilter, Query()],
) -> ModelsResponse:
"""
Handle requests to the /models endpoint.

Process GET requests to the /models endpoint, returning a list of available
models from the Llama Stack service.

Parameters:
request: The incoming HTTP request.
auth: Authentication tuple from the auth dependency.
model_type: Optional filter to return only models matching this type.

Raises:
HTTPException: If unable to connect to the Llama Stack server or if
model retrieval fails for any reason.
Expand Down Expand Up @@ -112,9 +118,11 @@ async def models_endpoint_handler(
parsed_models = [parse_llama_stack_model(model) for model in models]

# optional filtering by model type
if model_type is not None:
if model_type.model_type is not None:
parsed_models = [
model for model in parsed_models if model["model_type"] == model_type
model
for model in parsed_models
if model["model_type"] == model_type.model_type
]

return ModelsResponse(models=parsed_models)
Expand Down
15 changes: 15 additions & 0 deletions src/models/requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,3 +521,18 @@ class ConversationUpdateRequest(BaseModel):

# Reject unknown fields
model_config = {"extra": "forbid"}


class ModelFilter(BaseModel):
"""Model representing a query parameter to select models by its type.

Attributes:
model_type: Required model type, such as 'llm', 'embeddings' etc.
"""
Comment on lines +526 to +531
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Docstring says "Required" but the field is Optional.

The Attributes section states "Required model type" but model_type is Optional[str] with a default of None. The field description also says "Optional filter." Update the docstring for consistency.

Proposed fix
-    """Model representing a query parameter to select models by its type.
+    """Model representing a query parameter to filter models by type.
 
     Attributes:
-        model_type: Required model type, such as 'llm', 'embeddings' etc.
+        model_type: Optional model type filter, such as 'llm', 'embeddings' etc.
     """
🤖 Prompt for AI Agents
In `@src/models/requests.py` around lines 526 - 531, The docstring for the
ModelFilter class is inconsistent: it calls model_type "Required" but the field
model_type is Optional[str] with default None and described elsewhere as an
optional filter; update the class docstring (ModelFilter) Attributes section so
model_type is described as an optional filter (e.g., "Optional model type, such
as 'llm', 'embeddings', etc.") and remove or replace the word "Required" to
reflect the Optional[str] typing and default None.


model_config = {"extra": "forbid"}
model_type: Optional[str] = Field(
None,
description="Optional filter to return only models matching this type",
examples=["llm", "embeddings"],
)
85 changes: 54 additions & 31 deletions tests/unit/app/endpoints/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
from fastapi import HTTPException, Request, status
from llama_stack_client import APIConnectionError
from pytest_mock import MockerFixture
from pytest_subtests import SubTests

from models.requests import ModelFilter
from app.endpoints.models import models_endpoint_handler
from authentication.interface import AuthTuple
from configuration import AppConfig
Expand Down Expand Up @@ -48,7 +50,9 @@ async def test_models_endpoint_handler_configuration_not_loaded(
auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")

with pytest.raises(HTTPException) as e:
await models_endpoint_handler(request=request, auth=auth)
await models_endpoint_handler(
request=request, auth=auth, model_type=ModelFilter(model_type=None)
)
assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
assert e.value.detail["response"] == "Configuration is not loaded" # type: ignore
Comment on lines 52 to 57
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Assertions inside pytest.raises block are unreachable.

Lines 56-57 are inside the with pytest.raises(HTTPException) block. When the await on line 53 raises HTTPException, the context manager catches it and control exits the with block — the assertions on lines 56-57 never execute. This is a pre-existing issue, but since you're modifying this block, consider moving the assertions outside:

Proposed fix
     with pytest.raises(HTTPException) as e:
         await models_endpoint_handler(
             request=request, auth=auth, model_type=ModelFilter(model_type=None)
         )
-        assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
-        assert e.value.detail["response"] == "Configuration is not loaded"  # type: ignore
+    assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+    assert e.value.detail["response"] == "Configuration is not loaded"  # type: ignore
🤖 Prompt for AI Agents
In `@tests/unit/app/endpoints/test_models.py` around lines 52 - 57, The assertions
inside the pytest.raises context are unreachable; move the two asserts that
check e.value.status_code and e.value.detail outside the with block so they run
after the exception is caught. Keep the with pytest.raises(HTTPException) as e:
wrapper around the await models_endpoint_handler(...) call (which uses
ModelFilter(model_type=None)), then dedent the two assertions so they execute
against e.value after the with block completes.


Expand Down Expand Up @@ -115,7 +119,9 @@ async def test_models_endpoint_handler_configuration_loaded(
auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")

with pytest.raises(HTTPException) as e:
await models_endpoint_handler(request=request, auth=auth)
await models_endpoint_handler(
request=request, auth=auth, model_type=ModelFilter(model_type=None)
)
assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
assert e.value.detail["response"] == "Unable to connect to Llama Stack" # type: ignore

Expand Down Expand Up @@ -173,7 +179,9 @@ async def test_models_endpoint_handler_unable_to_retrieve_models_list(
# Authorization tuple required by URL endpoint handler
auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")

response = await models_endpoint_handler(request=request, auth=auth)
response = await models_endpoint_handler(
request=request, auth=auth, model_type=ModelFilter(model_type=None)
)
assert response is not None


Expand Down Expand Up @@ -230,7 +238,7 @@ async def test_models_endpoint_handler_model_type_query_parameter(
# Authorization tuple required by URL endpoint handler
auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")
response = await models_endpoint_handler(
request=request, auth=auth, model_type="llm"
request=request, auth=auth, model_type=ModelFilter(model_type="llm")
)
assert response is not None

Expand Down Expand Up @@ -293,7 +301,9 @@ async def test_models_endpoint_handler_model_list_retrieved(
# Authorization tuple required by URL endpoint handler
auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")

response = await models_endpoint_handler(request=request, auth=auth)
response = await models_endpoint_handler(
request=request, auth=auth, model_type=ModelFilter(model_type=None)
)
assert response is not None
assert len(response.models) == 4
assert response.models[0]["identifier"] == "model1"
Expand All @@ -309,6 +319,7 @@ async def test_models_endpoint_handler_model_list_retrieved(
@pytest.mark.asyncio
async def test_models_endpoint_handler_model_list_retrieved_with_query_parameter(
mocker: MockerFixture,
subtests: SubTests,
) -> None:
"""Test the models endpoint handler if model list can be retrieved."""
mock_authorization_resolvers(mocker)
Expand Down Expand Up @@ -364,31 +375,41 @@ async def test_models_endpoint_handler_model_list_retrieved_with_query_parameter
# Authorization tuple required by URL endpoint handler
auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")

response = await models_endpoint_handler(
request=request, auth=auth, model_type="llm"
)
assert response is not None
assert len(response.models) == 2
assert response.models[0]["identifier"] == "model1"
assert response.models[0]["model_type"] == "llm"
assert response.models[1]["identifier"] == "model3"
assert response.models[1]["model_type"] == "llm"

response = await models_endpoint_handler(
request=request, auth=auth, model_type="embedding"
)
assert response is not None
assert len(response.models) == 2
assert response.models[0]["identifier"] == "model2"
assert response.models[0]["model_type"] == "embedding"
assert response.models[1]["identifier"] == "model4"
assert response.models[1]["model_type"] == "embedding"

response = await models_endpoint_handler(
request=request, auth=auth, model_type="xyzzy"
)
assert response is not None
assert len(response.models) == 0
with subtests.test(msg="Model type = 'llm'"):
response = await models_endpoint_handler(
request=request, auth=auth, model_type=ModelFilter(model_type="llm")
)
assert response is not None
assert len(response.models) == 2
assert response.models[0]["identifier"] == "model1"
assert response.models[0]["model_type"] == "llm"
assert response.models[1]["identifier"] == "model3"
assert response.models[1]["model_type"] == "llm"

with subtests.test(msg="Model type = 'embedding'"):
response = await models_endpoint_handler(
request=request, auth=auth, model_type=ModelFilter(model_type="embedding")
)
assert response is not None
assert len(response.models) == 2
assert response.models[0]["identifier"] == "model2"
assert response.models[0]["model_type"] == "embedding"
assert response.models[1]["identifier"] == "model4"
assert response.models[1]["model_type"] == "embedding"

with subtests.test(msg="Model type = 'xyzzy'"):
response = await models_endpoint_handler(
request=request, auth=auth, model_type=ModelFilter(model_type="xyzzy")
)
assert response is not None
assert len(response.models) == 0

with subtests.test(msg="Model type is empty string"):
response = await models_endpoint_handler(
request=request, auth=auth, model_type=ModelFilter(model_type="")
)
assert response is not None
assert len(response.models) == 0


@pytest.mark.asyncio
Expand Down Expand Up @@ -445,7 +466,9 @@ async def test_models_endpoint_llama_stack_connection_error(
auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")

with pytest.raises(HTTPException) as e:
await models_endpoint_handler(request=request, auth=auth)
await models_endpoint_handler(
request=request, auth=auth, model_type=ModelFilter(model_type=None)
)
assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
assert e.value.detail["response"] == "Unable to connect to Llama Stack" # type: ignore
Comment on lines 468 to 473
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Same unreachable-assertions bug as above.

Lines 472-473 are inside the with pytest.raises block and will never execute. Move them outside the with block, consistent with the correct pattern used in test_models_endpoint_handler_configuration_loaded (lines 121-126).

Proposed fix
     with pytest.raises(HTTPException) as e:
         await models_endpoint_handler(
             request=request, auth=auth, model_type=ModelFilter(model_type=None)
         )
-        assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
-        assert e.value.detail["response"] == "Unable to connect to Llama Stack"  # type: ignore
-        assert "Unable to connect to Llama Stack" in e.value.detail["cause"]  # type: ignore
+    assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
+    assert e.value.detail["response"] == "Unable to connect to Llama Stack"  # type: ignore
+    assert "Unable to connect to Llama Stack" in e.value.detail["cause"]  # type: ignore
🤖 Prompt for AI Agents
In `@tests/unit/app/endpoints/test_models.py` around lines 468 - 473, The two
assertions that check the raised HTTPException's status and detail are
incorrectly placed inside the pytest.raises context and therefore never
executed; after calling models_endpoint_handler inside the with
pytest.raises(HTTPException) as e: block, move the asserts that reference
e.value (status_code and detail["response"]) to immediately after the with block
so they run against the captured exception (reference: models_endpoint_handler
and the pytest.raises usage in this test).

assert "Unable to connect to Llama Stack" in e.value.detail["cause"] # type: ignore
Loading