Compare commits

...

3 Commits

Author SHA1 Message Date
openhands
668de03c33 Replace hardcoded MODEL_CONTEXT_SIZES with DEFAULT_CONTEXT_WINDOW_SIZE 2025-03-27 18:36:56 +00:00
openhands
833f3e74a0 Add context window usage metrics to cost modal 2025-03-27 18:31:47 +00:00
openhands
593539a8e2 Add context window usage metrics to cost modal 2025-03-27 18:24:34 +00:00
6 changed files with 324 additions and 9 deletions

View File

@@ -0,0 +1,149 @@
import React from "react";
import { render, screen } from "@testing-library/react";
import { Provider } from "react-redux";
import { configureStore } from "@reduxjs/toolkit";
import userEvent from "@testing-library/user-event";
import { vi, describe, it, expect } from "vitest";
import { ConversationCard } from "../conversation-card";
import metricsReducer, {
DEFAULT_CONTEXT_WINDOW_SIZE,
} from "#/state/metrics-slice";
// Mock the formatTimeDelta function
vi.mock("#/utils/format-time-delta", () => ({
formatTimeDelta: () => "5 minutes",
}));
// Mock posthog
vi.mock("posthog-js", () => ({
capture: vi.fn(),
}));
describe("Metrics Modal", () => {
const createStore = (initialState = {}) =>
configureStore({
reducer: {
metrics: metricsReducer,
},
preloadedState: {
metrics: {
cost: 0.05,
usage: {
prompt_tokens: 1000,
completion_tokens: 500,
cache_read_tokens: 100,
cache_write_tokens: 200,
},
mostRecentUsage: {
prompt_tokens: 300,
completion_tokens: 150,
cache_read_tokens: 50,
cache_write_tokens: 100,
},
modelName: "claude-3-sonnet-20240229",
...initialState,
},
},
});
it("should display total input and output tokens for the conversation", async () => {
const store = createStore();
render(
<Provider store={store}>
<ConversationCard
title="Test Conversation"
selectedRepository={null}
lastUpdatedAt={new Date().toISOString()}
createdAt={new Date().toISOString()}
showOptions
/>
</Provider>,
);
// Open the metrics modal
const ellipsisButton = screen.getByTestId("ellipsis-button");
await userEvent.click(ellipsisButton);
const displayCostButton = screen.getByTestId("display-cost-button");
await userEvent.click(displayCostButton);
// Check if the modal is open
const modal = screen.getByTestId("metrics-modal");
expect(modal).toBeInTheDocument();
// Check if total input tokens are displayed
expect(screen.getByText("Total Input Tokens:")).toBeInTheDocument();
expect(screen.getByText("1,000")).toBeInTheDocument();
// Check if total output tokens are displayed
expect(screen.getByText("Total Output Tokens:")).toBeInTheDocument();
expect(screen.getByText("500")).toBeInTheDocument();
});
it("should display most recent prompt metrics", async () => {
const store = createStore();
render(
<Provider store={store}>
<ConversationCard
title="Test Conversation"
selectedRepository={null}
lastUpdatedAt={new Date().toISOString()}
createdAt={new Date().toISOString()}
showOptions
/>
</Provider>,
);
// Open the metrics modal
const ellipsisButton = screen.getByTestId("ellipsis-button");
await userEvent.click(ellipsisButton);
const displayCostButton = screen.getByTestId("display-cost-button");
await userEvent.click(displayCostButton);
// Check if the most recent prompt section is displayed
expect(screen.getByText("Most Recent Prompt")).toBeInTheDocument();
// Check if most recent input tokens are displayed
const inputTokensElements = screen.getAllByText("Input Tokens:");
expect(inputTokensElements.length).toBeGreaterThan(0);
expect(screen.getByText("300")).toBeInTheDocument();
// Check if most recent output tokens are displayed
const outputTokensElements = screen.getAllByText("Output Tokens:");
expect(outputTokensElements.length).toBeGreaterThan(0);
expect(screen.getByText("150")).toBeInTheDocument();
});
it("should display context window usage percentage", async () => {
const store = createStore();
const contextSize = DEFAULT_CONTEXT_WINDOW_SIZE;
const totalTokens = 300 + 150; // prompt_tokens + completion_tokens
const expectedPercentage = `${((totalTokens / contextSize) * 100).toFixed(2)}%`;
render(
<Provider store={store}>
<ConversationCard
title="Test Conversation"
selectedRepository={null}
lastUpdatedAt={new Date().toISOString()}
createdAt={new Date().toISOString()}
showOptions
/>
</Provider>,
);
// Open the metrics modal
const ellipsisButton = screen.getByTestId("ellipsis-button");
await userEvent.click(ellipsisButton);
const displayCostButton = screen.getByTestId("display-cost-button");
await userEvent.click(displayCostButton);
// Check if context window usage is displayed
expect(screen.getByText("Context Window Usage:")).toBeInTheDocument();
expect(screen.getByText(expectedPercentage)).toBeInTheDocument();
});
});

View File

@@ -12,6 +12,7 @@ import { ConversationCardContextMenu } from "./conversation-card-context-menu";
import { cn } from "#/utils/utils";
import { BaseModal } from "../../shared/modals/base-modal/base-modal";
import { RootState } from "#/store";
import { DEFAULT_CONTEXT_WINDOW_SIZE } from "#/state/metrics-slice";
interface ConversationCardProps {
onClick?: () => void;
@@ -282,7 +283,7 @@ export function ConversationCard({
</span>
</div>
<div className="flex justify-between items-center pt-1">
<div className="flex justify-between items-center pt-1 pb-2">
<span className="font-semibold">Total Tokens:</span>
<span className="font-bold">
{(
@@ -293,6 +294,113 @@ export function ConversationCard({
</div>
</>
)}
{/* Most Recent Prompt Metrics */}
{metrics?.mostRecentUsage && (
<div className="border-t border-neutral-700 pt-4 pb-2">
<h3 className="text-lg font-semibold mb-3">
Most Recent Prompt
</h3>
<div className="flex justify-between items-center pb-2">
<span>Input Tokens:</span>
<span className="font-semibold">
{metrics.mostRecentUsage.prompt_tokens.toLocaleString()}
</span>
</div>
<div className="flex justify-between items-center pb-2">
<span>Output Tokens:</span>
<span className="font-semibold">
{metrics.mostRecentUsage.completion_tokens.toLocaleString()}
</span>
</div>
<div className="flex justify-between items-center pb-2">
<span>Total Tokens:</span>
<span className="font-bold">
{(
metrics.mostRecentUsage.prompt_tokens +
metrics.mostRecentUsage.completion_tokens
).toLocaleString()}
</span>
</div>
{/* Context Window Usage */}
{metrics.mostRecentUsage && (
<div className="mt-3 pt-2 border-t border-neutral-700">
<div className="flex justify-between items-center">
<span>Context Window Usage:</span>
<span className="font-semibold">
{(() => {
// Get context window size from model_info if available, otherwise use default
let contextSize = DEFAULT_CONTEXT_WINDOW_SIZE;
if (
metrics.modelInfo &&
metrics.modelInfo.max_input_tokens
) {
contextSize =
metrics.modelInfo.max_input_tokens;
} else if (
metrics.modelInfo &&
metrics.modelInfo.max_tokens
) {
contextSize = metrics.modelInfo.max_tokens;
} else if (metrics.modelName) {
// No additional fallback needed, using DEFAULT_CONTEXT_WINDOW_SIZE
}
const totalTokens =
metrics.mostRecentUsage.prompt_tokens +
metrics.mostRecentUsage.completion_tokens;
const percentage =
(totalTokens / contextSize) * 100;
return `${percentage.toFixed(2)}%`;
})()}
</span>
</div>
{/* Progress bar for context window usage */}
<div className="w-full bg-neutral-700 rounded-full h-2.5 mt-2">
<div
className="bg-blue-600 h-2.5 rounded-full"
style={{
width: (() => {
// Get context window size from model_info if available, otherwise use default
let contextSize = DEFAULT_CONTEXT_WINDOW_SIZE;
if (
metrics.modelInfo &&
metrics.modelInfo.max_input_tokens
) {
contextSize =
metrics.modelInfo.max_input_tokens;
} else if (
metrics.modelInfo &&
metrics.modelInfo.max_tokens
) {
contextSize = metrics.modelInfo.max_tokens;
} else if (metrics.modelName) {
// No additional fallback needed, using DEFAULT_CONTEXT_WINDOW_SIZE
}
const totalTokens =
metrics.mostRecentUsage.prompt_tokens +
metrics.mostRecentUsage.completion_tokens;
const percentage =
(totalTokens / contextSize) * 100;
return `${Math.min(percentage, 100)}%`;
})(),
}}
/>
</div>
</div>
)}
</div>
)}
</div>
</div>
)}

View File

@@ -91,6 +91,9 @@ export function handleActionMessage(message: ActionMessage) {
const metrics = {
cost: message.llm_metrics?.accumulated_cost ?? null,
usage: message.llm_metrics?.accumulated_token_usage ?? null,
token_usages: message.llm_metrics?.token_usages ?? [],
model_name: message.llm_metrics?.model_name ?? null,
model_info: message.llm_metrics?.model_info ?? null,
};
store.dispatch(setMetrics(metrics));
}

View File

@@ -1,27 +1,74 @@
import { createSlice, PayloadAction } from "@reduxjs/toolkit";
// Default context window size if model_info is not available
export const DEFAULT_CONTEXT_WINDOW_SIZE = 100000;
interface TokenUsage {
prompt_tokens: number;
completion_tokens: number;
cache_read_tokens: number;
cache_write_tokens: number;
model?: string;
}
interface ModelInfo {
max_tokens?: number;
max_input_tokens?: number;
max_output_tokens?: number;
[key: string]: unknown;
}
interface MetricsState {
cost: number | null;
usage: {
prompt_tokens: number;
completion_tokens: number;
cache_read_tokens: number;
cache_write_tokens: number;
} | null;
usage: TokenUsage | null;
mostRecentUsage: TokenUsage | null;
modelName: string | null;
modelInfo: ModelInfo | null;
}
const initialState: MetricsState = {
cost: null,
usage: null,
mostRecentUsage: null,
modelName: null,
modelInfo: null,
};
const metricsSlice = createSlice({
name: "metrics",
initialState,
reducers: {
setMetrics: (state, action: PayloadAction<MetricsState>) => {
setMetrics: (
state,
action: PayloadAction<{
cost: number | null;
usage: TokenUsage | null;
token_usages?: TokenUsage[];
model_name?: string;
model_info?: ModelInfo;
}>,
) => {
state.cost = action.payload.cost;
state.usage = action.payload.usage;
// Set the model name if provided
if (action.payload.model_name) {
state.modelName = action.payload.model_name;
}
// Set the model info if provided
if (action.payload.model_info) {
state.modelInfo = action.payload.model_info;
}
// Set the most recent usage if token_usages is provided and has entries
if (
action.payload.token_usages &&
action.payload.token_usages.length > 0
) {
state.mostRecentUsage =
action.payload.token_usages[action.payload.token_usages.length - 1];
}
},
},
});

View File

@@ -1178,7 +1178,14 @@ class AgentController:
cache_write_tokens=latest_usage.cache_write_tokens,
response_id=latest_usage.response_id,
)
action.llm_metrics = metrics
# Add model_info to metrics if available
if hasattr(self.agent.llm, 'model_info') and self.agent.llm.model_info:
# Add model_info to metrics
metrics_dict = metrics.get()
metrics_dict['model_info'] = self.agent.llm.model_info
action.llm_metrics = metrics_dict
else:
action.llm_metrics = metrics
# Log the metrics information for frontend display
log_usage: TokenUsage | None = (

View File

@@ -161,6 +161,7 @@ class Metrics:
latency.model_dump() for latency in self._response_latencies
],
'token_usages': [usage.model_dump() for usage in self._token_usages],
'model_name': self.model_name,
}
def reset(self):