mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
3 Commits
openhands/
...
api/conver
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f6d9d9c70d | ||
|
|
bf79cd1fad | ||
|
|
624018e04a |
@@ -7,7 +7,7 @@
|
||||
* - Please do NOT modify this file.
|
||||
*/
|
||||
|
||||
const PACKAGE_VERSION = '2.10.5'
|
||||
const PACKAGE_VERSION = '2.11.1'
|
||||
const INTEGRITY_CHECKSUM = 'f5825c521429caf22a4dd13b66e243af'
|
||||
const IS_MOCKED_RESPONSE = Symbol('isMockedResponse')
|
||||
const activeClientIds = new Set()
|
||||
|
||||
@@ -12,6 +12,7 @@ import {
|
||||
FileUploadSuccessResponse,
|
||||
GetFilesResponse,
|
||||
GetFileResponse,
|
||||
ConversationMetricsResponse,
|
||||
} from "../open-hands.types";
|
||||
import { openHands } from "../open-hands-axios";
|
||||
import { Provider } from "#/types/settings";
|
||||
@@ -422,6 +423,21 @@ class ConversationService {
|
||||
);
|
||||
return response.data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get comprehensive metrics data for a conversation
|
||||
* @param conversationId ID of the conversation
|
||||
* @returns Comprehensive metrics data including cost, token usage, and latency
|
||||
*/
|
||||
static async getConversationMetrics(
|
||||
conversationId: string,
|
||||
): Promise<ConversationMetricsResponse> {
|
||||
const url = `${this.getConversationUrl(conversationId)}/metrics`;
|
||||
const { data } = await openHands.get<ConversationMetricsResponse>(url, {
|
||||
headers: this.getConversationHeaders(),
|
||||
});
|
||||
return data;
|
||||
}
|
||||
}
|
||||
|
||||
export default ConversationService;
|
||||
|
||||
@@ -139,3 +139,41 @@ export type GetFilesResponse = string[];
|
||||
export interface GetFileResponse {
|
||||
code: string;
|
||||
}
|
||||
|
||||
export interface TokenUsageResponse {
|
||||
model: string;
|
||||
prompt_tokens: number;
|
||||
completion_tokens: number;
|
||||
cache_read_tokens: number;
|
||||
cache_write_tokens: number;
|
||||
context_window: number;
|
||||
per_turn_token: number;
|
||||
}
|
||||
|
||||
export interface CostResponse {
|
||||
model: string;
|
||||
cost: number;
|
||||
timestamp: number;
|
||||
}
|
||||
|
||||
export interface ResponseLatencyResponse {
|
||||
model: string;
|
||||
latency: number;
|
||||
response_id: string;
|
||||
}
|
||||
|
||||
export interface MetricsResponse {
|
||||
accumulated_cost: number;
|
||||
max_budget_per_task?: number;
|
||||
accumulated_token_usage: TokenUsageResponse;
|
||||
costs: CostResponse[];
|
||||
response_latencies: ResponseLatencyResponse[];
|
||||
token_usages: TokenUsageResponse[];
|
||||
}
|
||||
|
||||
export interface ConversationMetricsResponse {
|
||||
conversation_id: string;
|
||||
metrics?: MetricsResponse;
|
||||
service_metrics: Record<string, MetricsResponse>;
|
||||
has_active_session: boolean;
|
||||
}
|
||||
|
||||
@@ -27,3 +27,8 @@ class ConversationInfo:
|
||||
session_api_key: str | None = None
|
||||
created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
pr_number: list[int] = field(default_factory=list)
|
||||
# Cost and token metrics from conversation metadata
|
||||
accumulated_cost: float = 0.0
|
||||
prompt_tokens: int = 0
|
||||
completion_tokens: int = 0
|
||||
total_tokens: int = 0
|
||||
|
||||
75
openhands/server/data_models/metrics_response.py
Normal file
75
openhands/server/data_models/metrics_response.py
Normal file
@@ -0,0 +1,75 @@
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class TokenUsageResponse(BaseModel):
|
||||
"""Response model for token usage metrics."""
|
||||
|
||||
model: str = Field(default='', description='The LLM model used')
|
||||
prompt_tokens: int = Field(default=0, description='Number of tokens in the prompt')
|
||||
completion_tokens: int = Field(
|
||||
default=0, description='Number of tokens in the completion'
|
||||
)
|
||||
cache_read_tokens: int = Field(
|
||||
default=0, description='Number of tokens read from cache'
|
||||
)
|
||||
cache_write_tokens: int = Field(
|
||||
default=0, description='Number of tokens written to cache'
|
||||
)
|
||||
context_window: int = Field(default=0, description='Total context window size')
|
||||
per_turn_token: int = Field(
|
||||
default=0, description='Tokens used in the current turn'
|
||||
)
|
||||
|
||||
|
||||
class CostResponse(BaseModel):
|
||||
"""Response model for cost metrics."""
|
||||
|
||||
model: str = Field(description='The LLM model used')
|
||||
cost: float = Field(description='Cost for this specific call')
|
||||
timestamp: float = Field(description='Timestamp when the cost was recorded')
|
||||
|
||||
|
||||
class ResponseLatencyResponse(BaseModel):
|
||||
"""Response model for response latency metrics."""
|
||||
|
||||
model: str = Field(description='The LLM model used')
|
||||
latency: float = Field(description='Response latency in seconds')
|
||||
response_id: str = Field(description='Unique identifier for this response')
|
||||
|
||||
|
||||
class MetricsResponse(BaseModel):
|
||||
"""Response model for comprehensive metrics data."""
|
||||
|
||||
accumulated_cost: float = Field(default=0.0, description='Total accumulated cost')
|
||||
max_budget_per_task: Optional[float] = Field(
|
||||
default=None, description='Maximum budget per task'
|
||||
)
|
||||
accumulated_token_usage: TokenUsageResponse = Field(
|
||||
description='Accumulated token usage across all calls'
|
||||
)
|
||||
costs: list[CostResponse] = Field(
|
||||
default_factory=list, description='List of individual cost entries'
|
||||
)
|
||||
response_latencies: list[ResponseLatencyResponse] = Field(
|
||||
default_factory=list, description='List of response latency entries'
|
||||
)
|
||||
token_usages: list[TokenUsageResponse] = Field(
|
||||
default_factory=list, description='List of individual token usage entries'
|
||||
)
|
||||
|
||||
|
||||
class ConversationMetricsResponse(BaseModel):
|
||||
"""Response model for conversation-level metrics."""
|
||||
|
||||
conversation_id: str = Field(description='The conversation ID')
|
||||
metrics: Optional[MetricsResponse] = Field(
|
||||
default=None, description='Combined metrics for the conversation'
|
||||
)
|
||||
service_metrics: dict[str, MetricsResponse] = Field(
|
||||
default_factory=dict, description='Metrics broken down by service ID'
|
||||
)
|
||||
has_active_session: bool = Field(
|
||||
default=False, description='Whether the conversation has an active session'
|
||||
)
|
||||
@@ -9,6 +9,13 @@ from openhands.events.serialization.event import event_to_dict
|
||||
from openhands.memory.memory import Memory
|
||||
from openhands.microagent.types import InputMetadata
|
||||
from openhands.runtime.base import Runtime
|
||||
from openhands.server.data_models.metrics_response import (
|
||||
ConversationMetricsResponse,
|
||||
CostResponse,
|
||||
MetricsResponse,
|
||||
ResponseLatencyResponse,
|
||||
TokenUsageResponse,
|
||||
)
|
||||
from openhands.server.dependencies import get_dependencies
|
||||
from openhands.server.session.conversation import ServerConversation
|
||||
from openhands.server.shared import conversation_manager, file_store
|
||||
@@ -268,3 +275,147 @@ async def get_microagents(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
content={'error': f'Error getting microagents: {e}'},
|
||||
)
|
||||
|
||||
|
||||
def _convert_token_usage_to_response(token_usage) -> TokenUsageResponse:
|
||||
"""Convert a TokenUsage object to TokenUsageResponse."""
|
||||
if not token_usage:
|
||||
return TokenUsageResponse()
|
||||
|
||||
return TokenUsageResponse(
|
||||
model=getattr(token_usage, 'model', ''),
|
||||
prompt_tokens=getattr(token_usage, 'prompt_tokens', 0),
|
||||
completion_tokens=getattr(token_usage, 'completion_tokens', 0),
|
||||
cache_read_tokens=getattr(token_usage, 'cache_read_tokens', 0),
|
||||
cache_write_tokens=getattr(token_usage, 'cache_write_tokens', 0),
|
||||
context_window=getattr(token_usage, 'context_window', 0),
|
||||
per_turn_token=getattr(token_usage, 'per_turn_token', 0),
|
||||
)
|
||||
|
||||
|
||||
def _convert_cost_to_response(cost) -> CostResponse:
|
||||
"""Convert a Cost object to CostResponse."""
|
||||
return CostResponse(
|
||||
model=getattr(cost, 'model', ''),
|
||||
cost=getattr(cost, 'cost', 0.0),
|
||||
timestamp=getattr(cost, 'timestamp', 0.0),
|
||||
)
|
||||
|
||||
|
||||
def _convert_latency_to_response(latency) -> ResponseLatencyResponse:
|
||||
"""Convert a ResponseLatency object to ResponseLatencyResponse."""
|
||||
return ResponseLatencyResponse(
|
||||
model=getattr(latency, 'model', ''),
|
||||
latency=getattr(latency, 'latency', 0.0),
|
||||
response_id=getattr(latency, 'response_id', ''),
|
||||
)
|
||||
|
||||
|
||||
def _convert_metrics_to_response(metrics) -> MetricsResponse:
|
||||
"""Convert a Metrics object to MetricsResponse."""
|
||||
if not metrics:
|
||||
return MetricsResponse(
|
||||
accumulated_cost=0.0,
|
||||
accumulated_token_usage=TokenUsageResponse(),
|
||||
costs=[],
|
||||
response_latencies=[],
|
||||
token_usages=[],
|
||||
)
|
||||
|
||||
return MetricsResponse(
|
||||
accumulated_cost=getattr(metrics, 'accumulated_cost', 0.0),
|
||||
max_budget_per_task=getattr(metrics, 'max_budget_per_task', None),
|
||||
accumulated_token_usage=_convert_token_usage_to_response(
|
||||
getattr(metrics, 'accumulated_token_usage', None)
|
||||
),
|
||||
costs=[
|
||||
_convert_cost_to_response(cost) for cost in getattr(metrics, 'costs', [])
|
||||
],
|
||||
response_latencies=[
|
||||
_convert_latency_to_response(latency)
|
||||
for latency in getattr(metrics, 'response_latencies', [])
|
||||
],
|
||||
token_usages=[
|
||||
_convert_token_usage_to_response(usage)
|
||||
for usage in getattr(metrics, 'token_usages', [])
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@app.get('/stats')
|
||||
async def get_conversation_stats(
|
||||
conversation_id: str,
|
||||
request: Request,
|
||||
) -> ConversationMetricsResponse:
|
||||
"""Get conversation statistics from stored pickle data.
|
||||
|
||||
Returns metrics data from the conversation_stats pickle file.
|
||||
"""
|
||||
try:
|
||||
# Get the file store from the session manager
|
||||
session_manager = request.app.state.session_manager
|
||||
file_store = (
|
||||
getattr(session_manager, 'file_store', None) if session_manager else None
|
||||
)
|
||||
|
||||
if not file_store:
|
||||
raise HTTPException(status_code=500, detail='File store not available')
|
||||
|
||||
# Get user_id from the conversation metadata
|
||||
conversation_store = request.app.state.conversation_store
|
||||
user_id = None
|
||||
if conversation_store:
|
||||
try:
|
||||
conversation = await conversation_store.get_conversation_metadata(
|
||||
conversation_id
|
||||
)
|
||||
user_id = getattr(conversation, 'user_id', None)
|
||||
except Exception:
|
||||
pass # Continue without user_id
|
||||
|
||||
# Create ConversationStats to load the pickle data
|
||||
from openhands.llm.metrics import Metrics
|
||||
from openhands.server.services.conversation_stats import ConversationStats
|
||||
|
||||
stats = ConversationStats(
|
||||
file_store=file_store,
|
||||
conversation_id=conversation_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
# Get combined metrics from restored data
|
||||
combined_metrics = None
|
||||
service_metrics = {}
|
||||
|
||||
# Check if we have any metrics data
|
||||
if stats.restored_metrics or stats.service_to_metrics:
|
||||
# Get combined metrics
|
||||
if stats.service_to_metrics:
|
||||
# If we have active service metrics, use those
|
||||
combined_metrics = _convert_metrics_to_response(
|
||||
stats.get_combined_metrics()
|
||||
)
|
||||
for service_id, metrics in stats.service_to_metrics.items():
|
||||
service_metrics[service_id] = _convert_metrics_to_response(metrics)
|
||||
elif stats.restored_metrics:
|
||||
# If we only have restored metrics, combine those
|
||||
total_metrics = Metrics()
|
||||
for metrics in stats.restored_metrics.values():
|
||||
total_metrics.merge(metrics)
|
||||
combined_metrics = _convert_metrics_to_response(total_metrics)
|
||||
for service_id, metrics in stats.restored_metrics.items():
|
||||
service_metrics[service_id] = _convert_metrics_to_response(metrics)
|
||||
|
||||
return ConversationMetricsResponse(
|
||||
conversation_id=conversation_id,
|
||||
metrics=combined_metrics,
|
||||
service_metrics=service_metrics,
|
||||
has_active_session=conversation_id
|
||||
in (session_manager.sessions if session_manager else {}),
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f'Error getting conversation stats: {e}')
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f'Error getting conversation stats: {str(e)}'
|
||||
)
|
||||
|
||||
@@ -461,6 +461,11 @@ async def _get_conversation_info(
|
||||
url=agent_loop_info.url if agent_loop_info else None,
|
||||
session_api_key=getattr(agent_loop_info, 'session_api_key', None),
|
||||
pr_number=conversation.pr_number,
|
||||
# Include metrics data from conversation metadata
|
||||
accumulated_cost=getattr(conversation, 'accumulated_cost', 0.0),
|
||||
prompt_tokens=getattr(conversation, 'prompt_tokens', 0),
|
||||
completion_tokens=getattr(conversation, 'completion_tokens', 0),
|
||||
total_tokens=getattr(conversation, 'total_tokens', 0),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
|
||||
156
tests/unit/server/routes/test_conversation_stats_endpoint.py
Normal file
156
tests/unit/server/routes/test_conversation_stats_endpoint.py
Normal file
@@ -0,0 +1,156 @@
|
||||
"""Tests for the conversation stats API endpoint."""
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from fastapi import HTTPException
|
||||
|
||||
from openhands.llm.metrics import Metrics
|
||||
from openhands.server.data_models.metrics_response import ConversationMetricsResponse
|
||||
from openhands.server.routes.conversation import get_conversation_stats
|
||||
from openhands.storage.memory import InMemoryFileStore
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_conversation_stats_success():
|
||||
"""Test successful retrieval of conversation stats."""
|
||||
# Create mock file store with metrics data
|
||||
mock_file_store = InMemoryFileStore({})
|
||||
|
||||
# Create mock session manager
|
||||
mock_session_manager = MagicMock()
|
||||
mock_session_manager.file_store = mock_file_store
|
||||
mock_session_manager.sessions = {}
|
||||
|
||||
# Create mock conversation store
|
||||
mock_conversation_store = MagicMock()
|
||||
mock_conversation_store.get_conversation_metadata = AsyncMock(return_value=None)
|
||||
|
||||
# Create mock request
|
||||
mock_request = MagicMock()
|
||||
mock_request.app.state.session_manager = mock_session_manager
|
||||
mock_request.app.state.conversation_store = mock_conversation_store
|
||||
|
||||
# Mock ConversationStats to return some test data
|
||||
with patch(
|
||||
'openhands.server.services.conversation_stats.ConversationStats'
|
||||
) as mock_stats_class:
|
||||
mock_stats = MagicMock()
|
||||
mock_stats.restored_metrics = {}
|
||||
mock_stats.service_to_metrics = {}
|
||||
mock_stats_class.return_value = mock_stats
|
||||
|
||||
# Call the endpoint
|
||||
result = await get_conversation_stats('test-conversation-id', mock_request)
|
||||
|
||||
# Verify the result
|
||||
assert isinstance(result, ConversationMetricsResponse)
|
||||
assert result.conversation_id == 'test-conversation-id'
|
||||
assert result.metrics is None
|
||||
assert result.service_metrics == {}
|
||||
assert result.has_active_session is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_conversation_stats_with_metrics():
|
||||
"""Test retrieval of conversation stats with actual metrics data."""
|
||||
# Create mock file store
|
||||
mock_file_store = InMemoryFileStore({})
|
||||
|
||||
# Create mock session manager
|
||||
mock_session_manager = MagicMock()
|
||||
mock_session_manager.file_store = mock_file_store
|
||||
mock_session_manager.sessions = {'test-conversation-id': MagicMock()}
|
||||
|
||||
# Create mock conversation store
|
||||
mock_conversation_store = MagicMock()
|
||||
mock_conversation_store.get_conversation_metadata = AsyncMock(return_value=None)
|
||||
|
||||
# Create mock request
|
||||
mock_request = MagicMock()
|
||||
mock_request.app.state.session_manager = mock_session_manager
|
||||
mock_request.app.state.conversation_store = mock_conversation_store
|
||||
|
||||
# Create test metrics
|
||||
test_metrics = Metrics(model_name='gpt-4')
|
||||
test_metrics.add_cost(0.05)
|
||||
test_metrics.add_token_usage(
|
||||
prompt_tokens=100,
|
||||
completion_tokens=50,
|
||||
cache_read_tokens=0,
|
||||
cache_write_tokens=0,
|
||||
context_window=8000,
|
||||
response_id='test-response',
|
||||
)
|
||||
|
||||
# Mock ConversationStats to return test metrics
|
||||
with patch(
|
||||
'openhands.server.services.conversation_stats.ConversationStats'
|
||||
) as mock_stats_class:
|
||||
mock_stats = MagicMock()
|
||||
mock_stats.restored_metrics = {}
|
||||
mock_stats.service_to_metrics = {'test-service': test_metrics}
|
||||
mock_stats.get_combined_metrics.return_value = test_metrics
|
||||
mock_stats_class.return_value = mock_stats
|
||||
|
||||
# Call the endpoint
|
||||
result = await get_conversation_stats('test-conversation-id', mock_request)
|
||||
|
||||
# Verify the result
|
||||
assert isinstance(result, ConversationMetricsResponse)
|
||||
assert result.conversation_id == 'test-conversation-id'
|
||||
assert result.metrics is not None
|
||||
assert result.metrics.accumulated_cost == 0.05
|
||||
assert result.service_metrics['test-service'].accumulated_cost == 0.05
|
||||
assert result.has_active_session is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_conversation_stats_no_file_store():
|
||||
"""Test error handling when file store is not available."""
|
||||
# Create mock request with no file store
|
||||
mock_request = MagicMock()
|
||||
mock_request.app.state.session_manager = None
|
||||
|
||||
# Call the endpoint and expect an HTTPException
|
||||
with pytest.raises(HTTPException) as exc_info:
|
||||
await get_conversation_stats('test-conversation-id', mock_request)
|
||||
|
||||
assert exc_info.value.status_code == 500
|
||||
assert 'File store not available' in str(exc_info.value.detail)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_conversation_stats_exception_handling():
|
||||
"""Test error handling when an exception occurs."""
|
||||
# Create mock file store
|
||||
mock_file_store = InMemoryFileStore({})
|
||||
|
||||
# Create mock session manager
|
||||
mock_session_manager = MagicMock()
|
||||
mock_session_manager.file_store = mock_file_store
|
||||
mock_session_manager.sessions = {}
|
||||
|
||||
# Create mock conversation store
|
||||
mock_conversation_store = MagicMock()
|
||||
mock_conversation_store.get_conversation_metadata = AsyncMock(return_value=None)
|
||||
|
||||
# Create mock request
|
||||
mock_request = MagicMock()
|
||||
mock_request.app.state.session_manager = mock_session_manager
|
||||
mock_request.app.state.conversation_store = mock_conversation_store
|
||||
|
||||
# Mock ConversationStats to raise an exception
|
||||
with patch(
|
||||
'openhands.server.services.conversation_stats.ConversationStats'
|
||||
) as mock_stats_class:
|
||||
mock_stats_class.side_effect = Exception('Test error')
|
||||
|
||||
# Call the endpoint and expect an HTTPException
|
||||
with pytest.raises(HTTPException) as exc_info:
|
||||
await get_conversation_stats('test-conversation-id', mock_request)
|
||||
|
||||
assert exc_info.value.status_code == 500
|
||||
assert 'Error getting conversation stats: Test error' in str(
|
||||
exc_info.value.detail
|
||||
)
|
||||
Reference in New Issue
Block a user