mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 668de03c33 | |||
| 833f3e74a0 | |||
| 593539a8e2 |
@@ -0,0 +1,149 @@
|
||||
import React from "react";
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import { Provider } from "react-redux";
|
||||
import { configureStore } from "@reduxjs/toolkit";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { vi, describe, it, expect } from "vitest";
|
||||
import { ConversationCard } from "../conversation-card";
|
||||
import metricsReducer, {
|
||||
DEFAULT_CONTEXT_WINDOW_SIZE,
|
||||
} from "#/state/metrics-slice";
|
||||
|
||||
// Mock the formatTimeDelta function
|
||||
vi.mock("#/utils/format-time-delta", () => ({
|
||||
formatTimeDelta: () => "5 minutes",
|
||||
}));
|
||||
|
||||
// Mock posthog
|
||||
vi.mock("posthog-js", () => ({
|
||||
capture: vi.fn(),
|
||||
}));
|
||||
|
||||
describe("Metrics Modal", () => {
|
||||
const createStore = (initialState = {}) =>
|
||||
configureStore({
|
||||
reducer: {
|
||||
metrics: metricsReducer,
|
||||
},
|
||||
preloadedState: {
|
||||
metrics: {
|
||||
cost: 0.05,
|
||||
usage: {
|
||||
prompt_tokens: 1000,
|
||||
completion_tokens: 500,
|
||||
cache_read_tokens: 100,
|
||||
cache_write_tokens: 200,
|
||||
},
|
||||
mostRecentUsage: {
|
||||
prompt_tokens: 300,
|
||||
completion_tokens: 150,
|
||||
cache_read_tokens: 50,
|
||||
cache_write_tokens: 100,
|
||||
},
|
||||
modelName: "claude-3-sonnet-20240229",
|
||||
...initialState,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
it("should display total input and output tokens for the conversation", async () => {
|
||||
const store = createStore();
|
||||
|
||||
render(
|
||||
<Provider store={store}>
|
||||
<ConversationCard
|
||||
title="Test Conversation"
|
||||
selectedRepository={null}
|
||||
lastUpdatedAt={new Date().toISOString()}
|
||||
createdAt={new Date().toISOString()}
|
||||
showOptions
|
||||
/>
|
||||
</Provider>,
|
||||
);
|
||||
|
||||
// Open the metrics modal
|
||||
const ellipsisButton = screen.getByTestId("ellipsis-button");
|
||||
await userEvent.click(ellipsisButton);
|
||||
|
||||
const displayCostButton = screen.getByTestId("display-cost-button");
|
||||
await userEvent.click(displayCostButton);
|
||||
|
||||
// Check if the modal is open
|
||||
const modal = screen.getByTestId("metrics-modal");
|
||||
expect(modal).toBeInTheDocument();
|
||||
|
||||
// Check if total input tokens are displayed
|
||||
expect(screen.getByText("Total Input Tokens:")).toBeInTheDocument();
|
||||
expect(screen.getByText("1,000")).toBeInTheDocument();
|
||||
|
||||
// Check if total output tokens are displayed
|
||||
expect(screen.getByText("Total Output Tokens:")).toBeInTheDocument();
|
||||
expect(screen.getByText("500")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should display most recent prompt metrics", async () => {
|
||||
const store = createStore();
|
||||
|
||||
render(
|
||||
<Provider store={store}>
|
||||
<ConversationCard
|
||||
title="Test Conversation"
|
||||
selectedRepository={null}
|
||||
lastUpdatedAt={new Date().toISOString()}
|
||||
createdAt={new Date().toISOString()}
|
||||
showOptions
|
||||
/>
|
||||
</Provider>,
|
||||
);
|
||||
|
||||
// Open the metrics modal
|
||||
const ellipsisButton = screen.getByTestId("ellipsis-button");
|
||||
await userEvent.click(ellipsisButton);
|
||||
|
||||
const displayCostButton = screen.getByTestId("display-cost-button");
|
||||
await userEvent.click(displayCostButton);
|
||||
|
||||
// Check if the most recent prompt section is displayed
|
||||
expect(screen.getByText("Most Recent Prompt")).toBeInTheDocument();
|
||||
|
||||
// Check if most recent input tokens are displayed
|
||||
const inputTokensElements = screen.getAllByText("Input Tokens:");
|
||||
expect(inputTokensElements.length).toBeGreaterThan(0);
|
||||
expect(screen.getByText("300")).toBeInTheDocument();
|
||||
|
||||
// Check if most recent output tokens are displayed
|
||||
const outputTokensElements = screen.getAllByText("Output Tokens:");
|
||||
expect(outputTokensElements.length).toBeGreaterThan(0);
|
||||
expect(screen.getByText("150")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should display context window usage percentage", async () => {
|
||||
const store = createStore();
|
||||
const contextSize = DEFAULT_CONTEXT_WINDOW_SIZE;
|
||||
const totalTokens = 300 + 150; // prompt_tokens + completion_tokens
|
||||
const expectedPercentage = `${((totalTokens / contextSize) * 100).toFixed(2)}%`;
|
||||
|
||||
render(
|
||||
<Provider store={store}>
|
||||
<ConversationCard
|
||||
title="Test Conversation"
|
||||
selectedRepository={null}
|
||||
lastUpdatedAt={new Date().toISOString()}
|
||||
createdAt={new Date().toISOString()}
|
||||
showOptions
|
||||
/>
|
||||
</Provider>,
|
||||
);
|
||||
|
||||
// Open the metrics modal
|
||||
const ellipsisButton = screen.getByTestId("ellipsis-button");
|
||||
await userEvent.click(ellipsisButton);
|
||||
|
||||
const displayCostButton = screen.getByTestId("display-cost-button");
|
||||
await userEvent.click(displayCostButton);
|
||||
|
||||
// Check if context window usage is displayed
|
||||
expect(screen.getByText("Context Window Usage:")).toBeInTheDocument();
|
||||
expect(screen.getByText(expectedPercentage)).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
@@ -12,6 +12,7 @@ import { ConversationCardContextMenu } from "./conversation-card-context-menu";
|
||||
import { cn } from "#/utils/utils";
|
||||
import { BaseModal } from "../../shared/modals/base-modal/base-modal";
|
||||
import { RootState } from "#/store";
|
||||
import { DEFAULT_CONTEXT_WINDOW_SIZE } from "#/state/metrics-slice";
|
||||
|
||||
interface ConversationCardProps {
|
||||
onClick?: () => void;
|
||||
@@ -282,7 +283,7 @@ export function ConversationCard({
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div className="flex justify-between items-center pt-1">
|
||||
<div className="flex justify-between items-center pt-1 pb-2">
|
||||
<span className="font-semibold">Total Tokens:</span>
|
||||
<span className="font-bold">
|
||||
{(
|
||||
@@ -293,6 +294,113 @@ export function ConversationCard({
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
|
||||
{/* Most Recent Prompt Metrics */}
|
||||
{metrics?.mostRecentUsage && (
|
||||
<div className="border-t border-neutral-700 pt-4 pb-2">
|
||||
<h3 className="text-lg font-semibold mb-3">
|
||||
Most Recent Prompt
|
||||
</h3>
|
||||
|
||||
<div className="flex justify-between items-center pb-2">
|
||||
<span>Input Tokens:</span>
|
||||
<span className="font-semibold">
|
||||
{metrics.mostRecentUsage.prompt_tokens.toLocaleString()}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div className="flex justify-between items-center pb-2">
|
||||
<span>Output Tokens:</span>
|
||||
<span className="font-semibold">
|
||||
{metrics.mostRecentUsage.completion_tokens.toLocaleString()}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div className="flex justify-between items-center pb-2">
|
||||
<span>Total Tokens:</span>
|
||||
<span className="font-bold">
|
||||
{(
|
||||
metrics.mostRecentUsage.prompt_tokens +
|
||||
metrics.mostRecentUsage.completion_tokens
|
||||
).toLocaleString()}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Context Window Usage */}
|
||||
{metrics.mostRecentUsage && (
|
||||
<div className="mt-3 pt-2 border-t border-neutral-700">
|
||||
<div className="flex justify-between items-center">
|
||||
<span>Context Window Usage:</span>
|
||||
<span className="font-semibold">
|
||||
{(() => {
|
||||
// Get context window size from model_info if available, otherwise use default
|
||||
let contextSize = DEFAULT_CONTEXT_WINDOW_SIZE;
|
||||
|
||||
if (
|
||||
metrics.modelInfo &&
|
||||
metrics.modelInfo.max_input_tokens
|
||||
) {
|
||||
contextSize =
|
||||
metrics.modelInfo.max_input_tokens;
|
||||
} else if (
|
||||
metrics.modelInfo &&
|
||||
metrics.modelInfo.max_tokens
|
||||
) {
|
||||
contextSize = metrics.modelInfo.max_tokens;
|
||||
} else if (metrics.modelName) {
|
||||
// No additional fallback needed, using DEFAULT_CONTEXT_WINDOW_SIZE
|
||||
}
|
||||
|
||||
const totalTokens =
|
||||
metrics.mostRecentUsage.prompt_tokens +
|
||||
metrics.mostRecentUsage.completion_tokens;
|
||||
const percentage =
|
||||
(totalTokens / contextSize) * 100;
|
||||
|
||||
return `${percentage.toFixed(2)}%`;
|
||||
})()}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Progress bar for context window usage */}
|
||||
<div className="w-full bg-neutral-700 rounded-full h-2.5 mt-2">
|
||||
<div
|
||||
className="bg-blue-600 h-2.5 rounded-full"
|
||||
style={{
|
||||
width: (() => {
|
||||
// Get context window size from model_info if available, otherwise use default
|
||||
let contextSize = DEFAULT_CONTEXT_WINDOW_SIZE;
|
||||
|
||||
if (
|
||||
metrics.modelInfo &&
|
||||
metrics.modelInfo.max_input_tokens
|
||||
) {
|
||||
contextSize =
|
||||
metrics.modelInfo.max_input_tokens;
|
||||
} else if (
|
||||
metrics.modelInfo &&
|
||||
metrics.modelInfo.max_tokens
|
||||
) {
|
||||
contextSize = metrics.modelInfo.max_tokens;
|
||||
} else if (metrics.modelName) {
|
||||
// No additional fallback needed, using DEFAULT_CONTEXT_WINDOW_SIZE
|
||||
}
|
||||
|
||||
const totalTokens =
|
||||
metrics.mostRecentUsage.prompt_tokens +
|
||||
metrics.mostRecentUsage.completion_tokens;
|
||||
const percentage =
|
||||
(totalTokens / contextSize) * 100;
|
||||
|
||||
return `${Math.min(percentage, 100)}%`;
|
||||
})(),
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -91,6 +91,9 @@ export function handleActionMessage(message: ActionMessage) {
|
||||
const metrics = {
|
||||
cost: message.llm_metrics?.accumulated_cost ?? null,
|
||||
usage: message.llm_metrics?.accumulated_token_usage ?? null,
|
||||
token_usages: message.llm_metrics?.token_usages ?? [],
|
||||
model_name: message.llm_metrics?.model_name ?? null,
|
||||
model_info: message.llm_metrics?.model_info ?? null,
|
||||
};
|
||||
store.dispatch(setMetrics(metrics));
|
||||
}
|
||||
|
||||
@@ -1,27 +1,74 @@
|
||||
import { createSlice, PayloadAction } from "@reduxjs/toolkit";
|
||||
|
||||
// Default context window size if model_info is not available
|
||||
export const DEFAULT_CONTEXT_WINDOW_SIZE = 100000;
|
||||
|
||||
interface TokenUsage {
|
||||
prompt_tokens: number;
|
||||
completion_tokens: number;
|
||||
cache_read_tokens: number;
|
||||
cache_write_tokens: number;
|
||||
model?: string;
|
||||
}
|
||||
|
||||
interface ModelInfo {
|
||||
max_tokens?: number;
|
||||
max_input_tokens?: number;
|
||||
max_output_tokens?: number;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
interface MetricsState {
|
||||
cost: number | null;
|
||||
usage: {
|
||||
prompt_tokens: number;
|
||||
completion_tokens: number;
|
||||
cache_read_tokens: number;
|
||||
cache_write_tokens: number;
|
||||
} | null;
|
||||
usage: TokenUsage | null;
|
||||
mostRecentUsage: TokenUsage | null;
|
||||
modelName: string | null;
|
||||
modelInfo: ModelInfo | null;
|
||||
}
|
||||
|
||||
const initialState: MetricsState = {
|
||||
cost: null,
|
||||
usage: null,
|
||||
mostRecentUsage: null,
|
||||
modelName: null,
|
||||
modelInfo: null,
|
||||
};
|
||||
|
||||
const metricsSlice = createSlice({
|
||||
name: "metrics",
|
||||
initialState,
|
||||
reducers: {
|
||||
setMetrics: (state, action: PayloadAction<MetricsState>) => {
|
||||
setMetrics: (
|
||||
state,
|
||||
action: PayloadAction<{
|
||||
cost: number | null;
|
||||
usage: TokenUsage | null;
|
||||
token_usages?: TokenUsage[];
|
||||
model_name?: string;
|
||||
model_info?: ModelInfo;
|
||||
}>,
|
||||
) => {
|
||||
state.cost = action.payload.cost;
|
||||
state.usage = action.payload.usage;
|
||||
|
||||
// Set the model name if provided
|
||||
if (action.payload.model_name) {
|
||||
state.modelName = action.payload.model_name;
|
||||
}
|
||||
|
||||
// Set the model info if provided
|
||||
if (action.payload.model_info) {
|
||||
state.modelInfo = action.payload.model_info;
|
||||
}
|
||||
|
||||
// Set the most recent usage if token_usages is provided and has entries
|
||||
if (
|
||||
action.payload.token_usages &&
|
||||
action.payload.token_usages.length > 0
|
||||
) {
|
||||
state.mostRecentUsage =
|
||||
action.payload.token_usages[action.payload.token_usages.length - 1];
|
||||
}
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
@@ -1178,7 +1178,14 @@ class AgentController:
|
||||
cache_write_tokens=latest_usage.cache_write_tokens,
|
||||
response_id=latest_usage.response_id,
|
||||
)
|
||||
action.llm_metrics = metrics
|
||||
# Add model_info to metrics if available
|
||||
if hasattr(self.agent.llm, 'model_info') and self.agent.llm.model_info:
|
||||
# Add model_info to metrics
|
||||
metrics_dict = metrics.get()
|
||||
metrics_dict['model_info'] = self.agent.llm.model_info
|
||||
action.llm_metrics = metrics_dict
|
||||
else:
|
||||
action.llm_metrics = metrics
|
||||
|
||||
# Log the metrics information for frontend display
|
||||
log_usage: TokenUsage | None = (
|
||||
|
||||
@@ -161,6 +161,7 @@ class Metrics:
|
||||
latency.model_dump() for latency in self._response_latencies
|
||||
],
|
||||
'token_usages': [usage.model_dump() for usage in self._token_usages],
|
||||
'model_name': self.model_name,
|
||||
}
|
||||
|
||||
def reset(self):
|
||||
|
||||
Reference in New Issue
Block a user