Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion frontend/public/mockServiceWorker.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
* - Please do NOT modify this file.
*/

const PACKAGE_VERSION = '2.10.5'
const PACKAGE_VERSION = '2.11.1'
const INTEGRITY_CHECKSUM = 'f5825c521429caf22a4dd13b66e243af'
const IS_MOCKED_RESPONSE = Symbol('isMockedResponse')
const activeClientIds = new Set()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import {
FileUploadSuccessResponse,
GetFilesResponse,
GetFileResponse,
ConversationMetricsResponse,
} from "../open-hands.types";
import { openHands } from "../open-hands-axios";
import { Provider } from "#/types/settings";
Expand Down Expand Up @@ -422,6 +423,21 @@ class ConversationService {
);
return response.data;
}

/**
* Get comprehensive metrics data for a conversation
* @param conversationId ID of the conversation
* @returns Comprehensive metrics data including cost, token usage, and latency
*/
static async getConversationMetrics(
conversationId: string,
): Promise<ConversationMetricsResponse> {
const url = `${this.getConversationUrl(conversationId)}/metrics`;
const { data } = await openHands.get<ConversationMetricsResponse>(url, {
headers: this.getConversationHeaders(),
});
return data;
}
}

export default ConversationService;
38 changes: 38 additions & 0 deletions frontend/src/api/open-hands.types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,41 @@ export type GetFilesResponse = string[];
export interface GetFileResponse {
code: string;
}

export interface TokenUsageResponse {
model: string;
prompt_tokens: number;
completion_tokens: number;
cache_read_tokens: number;
cache_write_tokens: number;
context_window: number;
per_turn_token: number;
}

export interface CostResponse {
model: string;
cost: number;
timestamp: number;
}

export interface ResponseLatencyResponse {
model: string;
latency: number;
response_id: string;
}

export interface MetricsResponse {
accumulated_cost: number;
max_budget_per_task?: number;
accumulated_token_usage: TokenUsageResponse;
costs: CostResponse[];
response_latencies: ResponseLatencyResponse[];
token_usages: TokenUsageResponse[];
}

export interface ConversationMetricsResponse {
conversation_id: string;
metrics?: MetricsResponse;
service_metrics: Record<string, MetricsResponse>;
has_active_session: boolean;
}
5 changes: 5 additions & 0 deletions openhands/server/data_models/conversation_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,8 @@ class ConversationInfo:
session_api_key: str | None = None
created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
pr_number: list[int] = field(default_factory=list)
# Cost and token metrics from conversation metadata
accumulated_cost: float = 0.0
prompt_tokens: int = 0
completion_tokens: int = 0
total_tokens: int = 0
75 changes: 75 additions & 0 deletions openhands/server/data_models/metrics_response.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from typing import Optional

from pydantic import BaseModel, Field


class TokenUsageResponse(BaseModel):
"""Response model for token usage metrics."""

model: str = Field(default='', description='The LLM model used')
prompt_tokens: int = Field(default=0, description='Number of tokens in the prompt')
completion_tokens: int = Field(
default=0, description='Number of tokens in the completion'
)
cache_read_tokens: int = Field(
default=0, description='Number of tokens read from cache'
)
cache_write_tokens: int = Field(
default=0, description='Number of tokens written to cache'
)
context_window: int = Field(default=0, description='Total context window size')
per_turn_token: int = Field(
default=0, description='Tokens used in the current turn'
)


class CostResponse(BaseModel):
"""Response model for cost metrics."""

model: str = Field(description='The LLM model used')
cost: float = Field(description='Cost for this specific call')
timestamp: float = Field(description='Timestamp when the cost was recorded')


class ResponseLatencyResponse(BaseModel):
"""Response model for response latency metrics."""

model: str = Field(description='The LLM model used')
latency: float = Field(description='Response latency in seconds')
response_id: str = Field(description='Unique identifier for this response')


class MetricsResponse(BaseModel):
"""Response model for comprehensive metrics data."""

accumulated_cost: float = Field(default=0.0, description='Total accumulated cost')
max_budget_per_task: Optional[float] = Field(
default=None, description='Maximum budget per task'
)
accumulated_token_usage: TokenUsageResponse = Field(
description='Accumulated token usage across all calls'
)
costs: list[CostResponse] = Field(
default_factory=list, description='List of individual cost entries'
)
response_latencies: list[ResponseLatencyResponse] = Field(
default_factory=list, description='List of response latency entries'
)
token_usages: list[TokenUsageResponse] = Field(
default_factory=list, description='List of individual token usage entries'
)


class ConversationMetricsResponse(BaseModel):
"""Response model for conversation-level metrics."""

conversation_id: str = Field(description='The conversation ID')
metrics: Optional[MetricsResponse] = Field(
default=None, description='Combined metrics for the conversation'
)
service_metrics: dict[str, MetricsResponse] = Field(
default_factory=dict, description='Metrics broken down by service ID'
)
has_active_session: bool = Field(
default=False, description='Whether the conversation has an active session'
)
151 changes: 151 additions & 0 deletions openhands/server/routes/conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@
from openhands.memory.memory import Memory
from openhands.microagent.types import InputMetadata
from openhands.runtime.base import Runtime
from openhands.server.data_models.metrics_response import (
ConversationMetricsResponse,
CostResponse,
MetricsResponse,
ResponseLatencyResponse,
TokenUsageResponse,
)
from openhands.server.dependencies import get_dependencies
from openhands.server.session.conversation import ServerConversation
from openhands.server.shared import conversation_manager, file_store
Expand Down Expand Up @@ -268,3 +275,147 @@ async def get_microagents(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={'error': f'Error getting microagents: {e}'},
)


def _convert_token_usage_to_response(token_usage) -> TokenUsageResponse:
"""Convert a TokenUsage object to TokenUsageResponse."""
if not token_usage:
return TokenUsageResponse()

return TokenUsageResponse(
model=getattr(token_usage, 'model', ''),
prompt_tokens=getattr(token_usage, 'prompt_tokens', 0),
completion_tokens=getattr(token_usage, 'completion_tokens', 0),
cache_read_tokens=getattr(token_usage, 'cache_read_tokens', 0),
cache_write_tokens=getattr(token_usage, 'cache_write_tokens', 0),
context_window=getattr(token_usage, 'context_window', 0),
per_turn_token=getattr(token_usage, 'per_turn_token', 0),
)


def _convert_cost_to_response(cost) -> CostResponse:
"""Convert a Cost object to CostResponse."""
return CostResponse(
model=getattr(cost, 'model', ''),
cost=getattr(cost, 'cost', 0.0),
timestamp=getattr(cost, 'timestamp', 0.0),
)


def _convert_latency_to_response(latency) -> ResponseLatencyResponse:
"""Convert a ResponseLatency object to ResponseLatencyResponse."""
return ResponseLatencyResponse(
model=getattr(latency, 'model', ''),
latency=getattr(latency, 'latency', 0.0),
response_id=getattr(latency, 'response_id', ''),
)


def _convert_metrics_to_response(metrics) -> MetricsResponse:
"""Convert a Metrics object to MetricsResponse."""
if not metrics:
return MetricsResponse(
accumulated_cost=0.0,
accumulated_token_usage=TokenUsageResponse(),
costs=[],
response_latencies=[],
token_usages=[],
)

return MetricsResponse(
accumulated_cost=getattr(metrics, 'accumulated_cost', 0.0),
max_budget_per_task=getattr(metrics, 'max_budget_per_task', None),
accumulated_token_usage=_convert_token_usage_to_response(
getattr(metrics, 'accumulated_token_usage', None)
),
costs=[
_convert_cost_to_response(cost) for cost in getattr(metrics, 'costs', [])
],
response_latencies=[
_convert_latency_to_response(latency)
for latency in getattr(metrics, 'response_latencies', [])
],
token_usages=[
_convert_token_usage_to_response(usage)
for usage in getattr(metrics, 'token_usages', [])
],
)


@app.get('/stats')
async def get_conversation_stats(
conversation_id: str,
request: Request,
) -> ConversationMetricsResponse:
"""Get conversation statistics from stored pickle data.

Returns metrics data from the conversation_stats pickle file.
"""
try:
# Get the file store from the session manager
session_manager = request.app.state.session_manager
file_store = (
getattr(session_manager, 'file_store', None) if session_manager else None
)

if not file_store:
raise HTTPException(status_code=500, detail='File store not available')

# Get user_id from the conversation metadata
conversation_store = request.app.state.conversation_store
user_id = None
if conversation_store:
try:
conversation = await conversation_store.get_conversation_metadata(
conversation_id
)
user_id = getattr(conversation, 'user_id', None)
except Exception:
pass # Continue without user_id

# Create ConversationStats to load the pickle data
from openhands.llm.metrics import Metrics
from openhands.server.services.conversation_stats import ConversationStats

stats = ConversationStats(
file_store=file_store,
conversation_id=conversation_id,
user_id=user_id,
)

# Get combined metrics from restored data
combined_metrics = None
service_metrics = {}

# Check if we have any metrics data
if stats.restored_metrics or stats.service_to_metrics:
# Get combined metrics
if stats.service_to_metrics:
# If we have active service metrics, use those
combined_metrics = _convert_metrics_to_response(
stats.get_combined_metrics()
)
for service_id, metrics in stats.service_to_metrics.items():
service_metrics[service_id] = _convert_metrics_to_response(metrics)
elif stats.restored_metrics:
# If we only have restored metrics, combine those
total_metrics = Metrics()
for metrics in stats.restored_metrics.values():
total_metrics.merge(metrics)
combined_metrics = _convert_metrics_to_response(total_metrics)
for service_id, metrics in stats.restored_metrics.items():
service_metrics[service_id] = _convert_metrics_to_response(metrics)

return ConversationMetricsResponse(
conversation_id=conversation_id,
metrics=combined_metrics,
service_metrics=service_metrics,
has_active_session=conversation_id
in (session_manager.sessions if session_manager else {}),
)

except Exception as e:
logger.error(f'Error getting conversation stats: {e}')
raise HTTPException(
status_code=500, detail=f'Error getting conversation stats: {str(e)}'
)
5 changes: 5 additions & 0 deletions openhands/server/routes/manage_conversations.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,11 @@ async def _get_conversation_info(
url=agent_loop_info.url if agent_loop_info else None,
session_api_key=getattr(agent_loop_info, 'session_api_key', None),
pr_number=conversation.pr_number,
# Include metrics data from conversation metadata
accumulated_cost=getattr(conversation, 'accumulated_cost', 0.0),
prompt_tokens=getattr(conversation, 'prompt_tokens', 0),
completion_tokens=getattr(conversation, 'completion_tokens', 0),
total_tokens=getattr(conversation, 'total_tokens', 0),
)
except Exception as e:
logger.error(
Expand Down
Loading
Loading