fix: use last API call's cache tokens for context-size display (#13698) (#13805)

The UsageAccumulator sums cacheRead/cacheWrite across all API calls
within a single turn. With Anthropic prompt caching, each call reports
cacheRead ≈ current_context_size, so after N tool-call round-trips the
accumulated total becomes N × actual_context, which gets clamped to
contextWindow (200k) by deriveSessionTotalTokens().

Fix: track the most recent API call's cache fields separately and use
them in toNormalizedUsage() for context-size reporting. This makes
/status Context display accurate while preserving accumulated output
token counts.

Fixes #13698
Fixes #13782

Co-authored-by: akari-musubi <259925157+akari-musubi@users.noreply.github.com>
This commit is contained in:
Akari
2026-02-12 23:01:36 +09:00
committed by GitHub
parent 4c350bc4c8
commit 455bc1ebba

View File

@@ -80,6 +80,10 @@ type UsageAccumulator = {
cacheRead: number;
cacheWrite: number;
total: number;
/** Cache fields from the most recent API call (not accumulated). */
lastCacheRead: number;
lastCacheWrite: number;
lastInput: number;
};
const createUsageAccumulator = (): UsageAccumulator => ({
@@ -88,6 +92,9 @@ const createUsageAccumulator = (): UsageAccumulator => ({
cacheRead: 0,
cacheWrite: 0,
total: 0,
lastCacheRead: 0,
lastCacheWrite: 0,
lastInput: 0,
});
const hasUsageValues = (
@@ -112,6 +119,12 @@ const mergeUsageIntoAccumulator = (
target.total +=
usage.total ??
(usage.input ?? 0) + (usage.output ?? 0) + (usage.cacheRead ?? 0) + (usage.cacheWrite ?? 0);
// Track the most recent API call's cache fields for accurate context-size reporting.
// Accumulated cache totals inflate context size when there are multiple tool-call round-trips,
// since each call reports cacheRead ≈ current_context_size.
target.lastCacheRead = usage.cacheRead ?? 0;
target.lastCacheWrite = usage.cacheWrite ?? 0;
target.lastInput = usage.input ?? 0;
};
const toNormalizedUsage = (usage: UsageAccumulator) => {
@@ -124,13 +137,21 @@ const toNormalizedUsage = (usage: UsageAccumulator) => {
if (!hasUsage) {
return undefined;
}
const derivedTotal = usage.input + usage.output + usage.cacheRead + usage.cacheWrite;
// Use the LAST API call's cache fields for context-size calculation.
// The accumulated cacheRead/cacheWrite inflate context size because each tool-call
// round-trip reports cacheRead ≈ current_context_size, and summing N calls gives
// N × context_size which gets clamped to contextWindow (e.g. 200k).
// See: https://github.com/openclaw/openclaw/issues/13698
//
// We use lastInput/lastCacheRead/lastCacheWrite (from the most recent API call) for
// cache-related fields, but keep accumulated output (total generated text this turn).
const lastPromptTokens = usage.lastInput + usage.lastCacheRead + usage.lastCacheWrite;
return {
input: usage.input || undefined,
input: usage.lastInput || undefined,
output: usage.output || undefined,
cacheRead: usage.cacheRead || undefined,
cacheWrite: usage.cacheWrite || undefined,
total: usage.total || derivedTotal || undefined,
cacheRead: usage.lastCacheRead || undefined,
cacheWrite: usage.lastCacheWrite || undefined,
total: lastPromptTokens + usage.output || undefined,
};
};