From ced4ee3038441a1d8432768c8f90640d413d14ae Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Fri, 11 Apr 2025 07:38:47 -0700 Subject: [PATCH] Fix: Display accumulated token usage in frontend metrics (#7803) Co-authored-by: openhands --- openhands/controller/agent_controller.py | 42 +++++++++++++----------- tests/unit/test_agent_controller.py | 31 +++++++++++++---- 2 files changed, 46 insertions(+), 27 deletions(-) diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 320c49ebe5..9f86664c7c 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -66,7 +66,7 @@ from openhands.events.observation import ( ) from openhands.events.serialization.event import event_to_trajectory, truncate_content from openhands.llm.llm import LLM -from openhands.llm.metrics import Metrics, TokenUsage +from openhands.llm.metrics import Metrics # note: RESUME is only available on web GUI TRAFFIC_CONTROL_REMINDER = ( @@ -1114,36 +1114,38 @@ class AgentController: To avoid performance issues with long conversations, we only keep: - accumulated_cost: The current total cost - - latest token_usage: Token statistics from the most recent API call + - accumulated_token_usage: Accumulated token statistics across all API calls Args: action: The action to attach metrics to """ + # Create a minimal metrics object with just what the frontend needs metrics = Metrics(model_name=self.agent.llm.metrics.model_name) metrics.accumulated_cost = self.agent.llm.metrics.accumulated_cost - if self.agent.llm.metrics.token_usages: - latest_usage = self.agent.llm.metrics.token_usages[-1] - metrics.add_token_usage( - prompt_tokens=latest_usage.prompt_tokens, - completion_tokens=latest_usage.completion_tokens, - cache_read_tokens=latest_usage.cache_read_tokens, - cache_write_tokens=latest_usage.cache_write_tokens, - response_id=latest_usage.response_id, - ) + metrics._accumulated_token_usage = ( + self.agent.llm.metrics.accumulated_token_usage + ) + action.llm_metrics = metrics - # Log the metrics information for frontend display - log_usage: TokenUsage | None = ( - metrics.token_usages[-1] if metrics.token_usages else None - ) + # Log the metrics information for debugging + # Get the latest usage directly from the agent's metrics + latest_usage = None + if self.agent.llm.metrics.token_usages: + latest_usage = self.agent.llm.metrics.token_usages[-1] + + accumulated_usage = self.agent.llm.metrics.accumulated_token_usage self.log( 'debug', f'Action metrics - accumulated_cost: {metrics.accumulated_cost}, ' - f'tokens (prompt/completion/cache_read/cache_write): ' - f'{log_usage.prompt_tokens if log_usage else 0}/' - f'{log_usage.completion_tokens if log_usage else 0}/' - f'{log_usage.cache_read_tokens if log_usage else 0}/' - f'{log_usage.cache_write_tokens if log_usage else 0}', + f'latest tokens (prompt/completion/cache_read/cache_write): ' + f'{latest_usage.prompt_tokens if latest_usage else 0}/' + f'{latest_usage.completion_tokens if latest_usage else 0}/' + f'{latest_usage.cache_read_tokens if latest_usage else 0}/' + f'{latest_usage.cache_write_tokens if latest_usage else 0}, ' + f'accumulated tokens (prompt/completion): ' + f'{accumulated_usage.prompt_tokens}/' + f'{accumulated_usage.completion_tokens}', extra={'msg_type': 'METRICS'}, ) diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py index de1a8a634f..43f6c9a8ee 100644 --- a/tests/unit/test_agent_controller.py +++ b/tests/unit/test_agent_controller.py @@ -1089,6 +1089,16 @@ async def test_action_metrics_copy(): metrics.token_usages = [usage1, usage2] + # Set the accumulated token usage + metrics._accumulated_token_usage = TokenUsage( + model='test-model', + prompt_tokens=15, # 5 + 10 + completion_tokens=30, # 10 + 20 + cache_read_tokens=7, # 2 + 5 + cache_write_tokens=7, # 2 + 5 + response_id='accumulated', + ) + # Add a cost instance - should not be included in action metrics # This will increase accumulated_cost by 0.02 metrics.add_cost(0.02) @@ -1131,13 +1141,20 @@ async def test_action_metrics_copy(): last_action.llm_metrics.accumulated_cost == 0.07 ) # 0.05 initial + 0.02 from add_cost - # Should include the last token usage - assert len(last_action.llm_metrics.token_usages) == 1 - assert last_action.llm_metrics.token_usages[0].prompt_tokens == 10 - assert last_action.llm_metrics.token_usages[0].completion_tokens == 20 - assert last_action.llm_metrics.token_usages[0].cache_read_tokens == 5 - assert last_action.llm_metrics.token_usages[0].cache_write_tokens == 5 - assert last_action.llm_metrics.token_usages[0].response_id == 'test-id-2' + # Should not include individual token usages anymore (after the fix) + assert len(last_action.llm_metrics.token_usages) == 0 + + # But should include the accumulated token usage + assert last_action.llm_metrics.accumulated_token_usage.prompt_tokens == 15 # 5 + 10 + assert ( + last_action.llm_metrics.accumulated_token_usage.completion_tokens == 30 + ) # 10 + 20 + assert ( + last_action.llm_metrics.accumulated_token_usage.cache_read_tokens == 7 + ) # 2 + 5 + assert ( + last_action.llm_metrics.accumulated_token_usage.cache_write_tokens == 7 + ) # 2 + 5 # Should not include the cost history assert len(last_action.llm_metrics.costs) == 0