diff --git a/frontend/public/mockServiceWorker.js b/frontend/public/mockServiceWorker.js index 723b0714cdef..7e23102e0b28 100644 --- a/frontend/public/mockServiceWorker.js +++ b/frontend/public/mockServiceWorker.js @@ -7,7 +7,7 @@ * - Please do NOT modify this file. */ -const PACKAGE_VERSION = '2.10.5' +const PACKAGE_VERSION = '2.11.1' const INTEGRITY_CHECKSUM = 'f5825c521429caf22a4dd13b66e243af' const IS_MOCKED_RESPONSE = Symbol('isMockedResponse') const activeClientIds = new Set() diff --git a/frontend/src/api/conversation-service/conversation-service.api.ts b/frontend/src/api/conversation-service/conversation-service.api.ts index f426b56a4892..e3765b3d5e43 100644 --- a/frontend/src/api/conversation-service/conversation-service.api.ts +++ b/frontend/src/api/conversation-service/conversation-service.api.ts @@ -12,6 +12,7 @@ import { FileUploadSuccessResponse, GetFilesResponse, GetFileResponse, + ConversationMetricsResponse, } from "../open-hands.types"; import { openHands } from "../open-hands-axios"; import { Provider } from "#/types/settings"; @@ -422,6 +423,21 @@ class ConversationService { ); return response.data; } + + /** + * Get comprehensive metrics data for a conversation + * @param conversationId ID of the conversation + * @returns Comprehensive metrics data including cost, token usage, and latency + */ + static async getConversationMetrics( + conversationId: string, + ): Promise { + const url = `${this.getConversationUrl(conversationId)}/metrics`; + const { data } = await openHands.get(url, { + headers: this.getConversationHeaders(), + }); + return data; + } } export default ConversationService; diff --git a/frontend/src/api/open-hands.types.ts b/frontend/src/api/open-hands.types.ts index b7680029f886..62bc4a670923 100644 --- a/frontend/src/api/open-hands.types.ts +++ b/frontend/src/api/open-hands.types.ts @@ -139,3 +139,41 @@ export type GetFilesResponse = string[]; export interface GetFileResponse { code: string; } + +export interface TokenUsageResponse { + model: string; + prompt_tokens: number; + completion_tokens: number; + cache_read_tokens: number; + cache_write_tokens: number; + context_window: number; + per_turn_token: number; +} + +export interface CostResponse { + model: string; + cost: number; + timestamp: number; +} + +export interface ResponseLatencyResponse { + model: string; + latency: number; + response_id: string; +} + +export interface MetricsResponse { + accumulated_cost: number; + max_budget_per_task?: number; + accumulated_token_usage: TokenUsageResponse; + costs: CostResponse[]; + response_latencies: ResponseLatencyResponse[]; + token_usages: TokenUsageResponse[]; +} + +export interface ConversationMetricsResponse { + conversation_id: string; + metrics?: MetricsResponse; + service_metrics: Record; + has_active_session: boolean; +} diff --git a/openhands/server/data_models/conversation_info.py b/openhands/server/data_models/conversation_info.py index c16c2a4186fe..d5d9fe2fe8d8 100644 --- a/openhands/server/data_models/conversation_info.py +++ b/openhands/server/data_models/conversation_info.py @@ -27,3 +27,8 @@ class ConversationInfo: session_api_key: str | None = None created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) pr_number: list[int] = field(default_factory=list) + # Cost and token metrics from conversation metadata + accumulated_cost: float = 0.0 + prompt_tokens: int = 0 + completion_tokens: int = 0 + total_tokens: int = 0 diff --git a/openhands/server/data_models/metrics_response.py b/openhands/server/data_models/metrics_response.py new file mode 100644 index 000000000000..3e6ff30c2c27 --- /dev/null +++ b/openhands/server/data_models/metrics_response.py @@ -0,0 +1,75 @@ +from typing import Optional + +from pydantic import BaseModel, Field + + +class TokenUsageResponse(BaseModel): + """Response model for token usage metrics.""" + + model: str = Field(default='', description='The LLM model used') + prompt_tokens: int = Field(default=0, description='Number of tokens in the prompt') + completion_tokens: int = Field( + default=0, description='Number of tokens in the completion' + ) + cache_read_tokens: int = Field( + default=0, description='Number of tokens read from cache' + ) + cache_write_tokens: int = Field( + default=0, description='Number of tokens written to cache' + ) + context_window: int = Field(default=0, description='Total context window size') + per_turn_token: int = Field( + default=0, description='Tokens used in the current turn' + ) + + +class CostResponse(BaseModel): + """Response model for cost metrics.""" + + model: str = Field(description='The LLM model used') + cost: float = Field(description='Cost for this specific call') + timestamp: float = Field(description='Timestamp when the cost was recorded') + + +class ResponseLatencyResponse(BaseModel): + """Response model for response latency metrics.""" + + model: str = Field(description='The LLM model used') + latency: float = Field(description='Response latency in seconds') + response_id: str = Field(description='Unique identifier for this response') + + +class MetricsResponse(BaseModel): + """Response model for comprehensive metrics data.""" + + accumulated_cost: float = Field(default=0.0, description='Total accumulated cost') + max_budget_per_task: Optional[float] = Field( + default=None, description='Maximum budget per task' + ) + accumulated_token_usage: TokenUsageResponse = Field( + description='Accumulated token usage across all calls' + ) + costs: list[CostResponse] = Field( + default_factory=list, description='List of individual cost entries' + ) + response_latencies: list[ResponseLatencyResponse] = Field( + default_factory=list, description='List of response latency entries' + ) + token_usages: list[TokenUsageResponse] = Field( + default_factory=list, description='List of individual token usage entries' + ) + + +class ConversationMetricsResponse(BaseModel): + """Response model for conversation-level metrics.""" + + conversation_id: str = Field(description='The conversation ID') + metrics: Optional[MetricsResponse] = Field( + default=None, description='Combined metrics for the conversation' + ) + service_metrics: dict[str, MetricsResponse] = Field( + default_factory=dict, description='Metrics broken down by service ID' + ) + has_active_session: bool = Field( + default=False, description='Whether the conversation has an active session' + ) diff --git a/openhands/server/routes/conversation.py b/openhands/server/routes/conversation.py index 1de94c5ba188..66bc26dfe92e 100644 --- a/openhands/server/routes/conversation.py +++ b/openhands/server/routes/conversation.py @@ -9,6 +9,13 @@ from openhands.memory.memory import Memory from openhands.microagent.types import InputMetadata from openhands.runtime.base import Runtime +from openhands.server.data_models.metrics_response import ( + ConversationMetricsResponse, + CostResponse, + MetricsResponse, + ResponseLatencyResponse, + TokenUsageResponse, +) from openhands.server.dependencies import get_dependencies from openhands.server.session.conversation import ServerConversation from openhands.server.shared import conversation_manager, file_store @@ -268,3 +275,147 @@ async def get_microagents( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, content={'error': f'Error getting microagents: {e}'}, ) + + +def _convert_token_usage_to_response(token_usage) -> TokenUsageResponse: + """Convert a TokenUsage object to TokenUsageResponse.""" + if not token_usage: + return TokenUsageResponse() + + return TokenUsageResponse( + model=getattr(token_usage, 'model', ''), + prompt_tokens=getattr(token_usage, 'prompt_tokens', 0), + completion_tokens=getattr(token_usage, 'completion_tokens', 0), + cache_read_tokens=getattr(token_usage, 'cache_read_tokens', 0), + cache_write_tokens=getattr(token_usage, 'cache_write_tokens', 0), + context_window=getattr(token_usage, 'context_window', 0), + per_turn_token=getattr(token_usage, 'per_turn_token', 0), + ) + + +def _convert_cost_to_response(cost) -> CostResponse: + """Convert a Cost object to CostResponse.""" + return CostResponse( + model=getattr(cost, 'model', ''), + cost=getattr(cost, 'cost', 0.0), + timestamp=getattr(cost, 'timestamp', 0.0), + ) + + +def _convert_latency_to_response(latency) -> ResponseLatencyResponse: + """Convert a ResponseLatency object to ResponseLatencyResponse.""" + return ResponseLatencyResponse( + model=getattr(latency, 'model', ''), + latency=getattr(latency, 'latency', 0.0), + response_id=getattr(latency, 'response_id', ''), + ) + + +def _convert_metrics_to_response(metrics) -> MetricsResponse: + """Convert a Metrics object to MetricsResponse.""" + if not metrics: + return MetricsResponse( + accumulated_cost=0.0, + accumulated_token_usage=TokenUsageResponse(), + costs=[], + response_latencies=[], + token_usages=[], + ) + + return MetricsResponse( + accumulated_cost=getattr(metrics, 'accumulated_cost', 0.0), + max_budget_per_task=getattr(metrics, 'max_budget_per_task', None), + accumulated_token_usage=_convert_token_usage_to_response( + getattr(metrics, 'accumulated_token_usage', None) + ), + costs=[ + _convert_cost_to_response(cost) for cost in getattr(metrics, 'costs', []) + ], + response_latencies=[ + _convert_latency_to_response(latency) + for latency in getattr(metrics, 'response_latencies', []) + ], + token_usages=[ + _convert_token_usage_to_response(usage) + for usage in getattr(metrics, 'token_usages', []) + ], + ) + + +@app.get('/stats') +async def get_conversation_stats( + conversation_id: str, + request: Request, +) -> ConversationMetricsResponse: + """Get conversation statistics from stored pickle data. + + Returns metrics data from the conversation_stats pickle file. + """ + try: + # Get the file store from the session manager + session_manager = request.app.state.session_manager + file_store = ( + getattr(session_manager, 'file_store', None) if session_manager else None + ) + + if not file_store: + raise HTTPException(status_code=500, detail='File store not available') + + # Get user_id from the conversation metadata + conversation_store = request.app.state.conversation_store + user_id = None + if conversation_store: + try: + conversation = await conversation_store.get_conversation_metadata( + conversation_id + ) + user_id = getattr(conversation, 'user_id', None) + except Exception: + pass # Continue without user_id + + # Create ConversationStats to load the pickle data + from openhands.llm.metrics import Metrics + from openhands.server.services.conversation_stats import ConversationStats + + stats = ConversationStats( + file_store=file_store, + conversation_id=conversation_id, + user_id=user_id, + ) + + # Get combined metrics from restored data + combined_metrics = None + service_metrics = {} + + # Check if we have any metrics data + if stats.restored_metrics or stats.service_to_metrics: + # Get combined metrics + if stats.service_to_metrics: + # If we have active service metrics, use those + combined_metrics = _convert_metrics_to_response( + stats.get_combined_metrics() + ) + for service_id, metrics in stats.service_to_metrics.items(): + service_metrics[service_id] = _convert_metrics_to_response(metrics) + elif stats.restored_metrics: + # If we only have restored metrics, combine those + total_metrics = Metrics() + for metrics in stats.restored_metrics.values(): + total_metrics.merge(metrics) + combined_metrics = _convert_metrics_to_response(total_metrics) + for service_id, metrics in stats.restored_metrics.items(): + service_metrics[service_id] = _convert_metrics_to_response(metrics) + + return ConversationMetricsResponse( + conversation_id=conversation_id, + metrics=combined_metrics, + service_metrics=service_metrics, + has_active_session=conversation_id + in (session_manager.sessions if session_manager else {}), + ) + + except Exception as e: + logger.error(f'Error getting conversation stats: {e}') + raise HTTPException( + status_code=500, detail=f'Error getting conversation stats: {str(e)}' + ) diff --git a/openhands/server/routes/manage_conversations.py b/openhands/server/routes/manage_conversations.py index d996feecd951..758f8b227559 100644 --- a/openhands/server/routes/manage_conversations.py +++ b/openhands/server/routes/manage_conversations.py @@ -461,6 +461,11 @@ async def _get_conversation_info( url=agent_loop_info.url if agent_loop_info else None, session_api_key=getattr(agent_loop_info, 'session_api_key', None), pr_number=conversation.pr_number, + # Include metrics data from conversation metadata + accumulated_cost=getattr(conversation, 'accumulated_cost', 0.0), + prompt_tokens=getattr(conversation, 'prompt_tokens', 0), + completion_tokens=getattr(conversation, 'completion_tokens', 0), + total_tokens=getattr(conversation, 'total_tokens', 0), ) except Exception as e: logger.error( diff --git a/tests/unit/server/routes/test_conversation_stats_endpoint.py b/tests/unit/server/routes/test_conversation_stats_endpoint.py new file mode 100644 index 000000000000..ff143a34507c --- /dev/null +++ b/tests/unit/server/routes/test_conversation_stats_endpoint.py @@ -0,0 +1,156 @@ +"""Tests for the conversation stats API endpoint.""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from fastapi import HTTPException + +from openhands.llm.metrics import Metrics +from openhands.server.data_models.metrics_response import ConversationMetricsResponse +from openhands.server.routes.conversation import get_conversation_stats +from openhands.storage.memory import InMemoryFileStore + + +@pytest.mark.asyncio +async def test_get_conversation_stats_success(): + """Test successful retrieval of conversation stats.""" + # Create mock file store with metrics data + mock_file_store = InMemoryFileStore({}) + + # Create mock session manager + mock_session_manager = MagicMock() + mock_session_manager.file_store = mock_file_store + mock_session_manager.sessions = {} + + # Create mock conversation store + mock_conversation_store = MagicMock() + mock_conversation_store.get_conversation_metadata = AsyncMock(return_value=None) + + # Create mock request + mock_request = MagicMock() + mock_request.app.state.session_manager = mock_session_manager + mock_request.app.state.conversation_store = mock_conversation_store + + # Mock ConversationStats to return some test data + with patch( + 'openhands.server.services.conversation_stats.ConversationStats' + ) as mock_stats_class: + mock_stats = MagicMock() + mock_stats.restored_metrics = {} + mock_stats.service_to_metrics = {} + mock_stats_class.return_value = mock_stats + + # Call the endpoint + result = await get_conversation_stats('test-conversation-id', mock_request) + + # Verify the result + assert isinstance(result, ConversationMetricsResponse) + assert result.conversation_id == 'test-conversation-id' + assert result.metrics is None + assert result.service_metrics == {} + assert result.has_active_session is False + + +@pytest.mark.asyncio +async def test_get_conversation_stats_with_metrics(): + """Test retrieval of conversation stats with actual metrics data.""" + # Create mock file store + mock_file_store = InMemoryFileStore({}) + + # Create mock session manager + mock_session_manager = MagicMock() + mock_session_manager.file_store = mock_file_store + mock_session_manager.sessions = {'test-conversation-id': MagicMock()} + + # Create mock conversation store + mock_conversation_store = MagicMock() + mock_conversation_store.get_conversation_metadata = AsyncMock(return_value=None) + + # Create mock request + mock_request = MagicMock() + mock_request.app.state.session_manager = mock_session_manager + mock_request.app.state.conversation_store = mock_conversation_store + + # Create test metrics + test_metrics = Metrics(model_name='gpt-4') + test_metrics.add_cost(0.05) + test_metrics.add_token_usage( + prompt_tokens=100, + completion_tokens=50, + cache_read_tokens=0, + cache_write_tokens=0, + context_window=8000, + response_id='test-response', + ) + + # Mock ConversationStats to return test metrics + with patch( + 'openhands.server.services.conversation_stats.ConversationStats' + ) as mock_stats_class: + mock_stats = MagicMock() + mock_stats.restored_metrics = {} + mock_stats.service_to_metrics = {'test-service': test_metrics} + mock_stats.get_combined_metrics.return_value = test_metrics + mock_stats_class.return_value = mock_stats + + # Call the endpoint + result = await get_conversation_stats('test-conversation-id', mock_request) + + # Verify the result + assert isinstance(result, ConversationMetricsResponse) + assert result.conversation_id == 'test-conversation-id' + assert result.metrics is not None + assert result.metrics.accumulated_cost == 0.05 + assert result.service_metrics['test-service'].accumulated_cost == 0.05 + assert result.has_active_session is True + + +@pytest.mark.asyncio +async def test_get_conversation_stats_no_file_store(): + """Test error handling when file store is not available.""" + # Create mock request with no file store + mock_request = MagicMock() + mock_request.app.state.session_manager = None + + # Call the endpoint and expect an HTTPException + with pytest.raises(HTTPException) as exc_info: + await get_conversation_stats('test-conversation-id', mock_request) + + assert exc_info.value.status_code == 500 + assert 'File store not available' in str(exc_info.value.detail) + + +@pytest.mark.asyncio +async def test_get_conversation_stats_exception_handling(): + """Test error handling when an exception occurs.""" + # Create mock file store + mock_file_store = InMemoryFileStore({}) + + # Create mock session manager + mock_session_manager = MagicMock() + mock_session_manager.file_store = mock_file_store + mock_session_manager.sessions = {} + + # Create mock conversation store + mock_conversation_store = MagicMock() + mock_conversation_store.get_conversation_metadata = AsyncMock(return_value=None) + + # Create mock request + mock_request = MagicMock() + mock_request.app.state.session_manager = mock_session_manager + mock_request.app.state.conversation_store = mock_conversation_store + + # Mock ConversationStats to raise an exception + with patch( + 'openhands.server.services.conversation_stats.ConversationStats' + ) as mock_stats_class: + mock_stats_class.side_effect = Exception('Test error') + + # Call the endpoint and expect an HTTPException + with pytest.raises(HTTPException) as exc_info: + await get_conversation_stats('test-conversation-id', mock_request) + + assert exc_info.value.status_code == 500 + assert 'Error getting conversation stats: Test error' in str( + exc_info.value.detail + )