fix: update totalTokens after compaction using last-call usage (#15018)

Merged via /review-pr -> /prepare-pr -> /merge-pr. Prepared head SHA: 9214291bf7 Co-authored-by: shtse8 <8020099+shtse8@users.noreply.github.com> Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com> Reviewed-by: @gumadeiras
2026-02-19 18:39:20 -05:00 · 2026-02-12 23:02:30 +00:00
parent 033d5b5c15
commit a10f228a5b
10 changed files with 602 additions and 19 deletions
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -820,11 +820,18 @@ export async function runEmbeddedPiAgent(
          }

          const usage = toNormalizedUsage(usageAccumulator);
+          // Extract the last individual API call's usage for context-window
+          // utilization display. The accumulated `usage` sums input tokens
+          // across all calls (tool-use loops, compaction retries), which
+          // overstates the actual context size. `lastCallUsage` reflects only
+          // the final call, giving an accurate snapshot of current context.
+          const lastCallUsage = normalizeUsage(lastAssistant?.usage as UsageLike);
          const agentMeta: EmbeddedPiAgentMeta = {
            sessionId: sessionIdUsed,
            provider: lastAssistant?.provider ?? provider,
            model: lastAssistant?.model ?? model.id,
            usage,
+            lastCallUsage: lastCallUsage ?? undefined,
            compactionCount: autoCompactionCount > 0 ? autoCompactionCount : undefined,
          };

--- a/src/agents/pi-embedded-runner/types.ts
+++ b/src/agents/pi-embedded-runner/types.ts
@@ -13,6 +13,20 @@ export type EmbeddedPiAgentMeta = {
    cacheWrite?: number;
    total?: number;
  };
+  /**
+   * Usage from the last individual API call (not accumulated across tool-use
+   * loops or compaction retries). Used for context-window utilization display
+   * (`totalTokens` in sessions.json) because the accumulated `usage.input`
+   * sums input tokens from every API call in the run, which overstates the
+   * actual context size.
+   */
+  lastCallUsage?: {
+    input?: number;
+    output?: number;
+    cacheRead?: number;
+    cacheWrite?: number;
+    total?: number;
+  };
 };

 export type EmbeddedPiRunMeta = {
--- a/src/auto-reply/reply/agent-runner.auto-compaction-updates-total-tokens.test.ts
+++ b/src/auto-reply/reply/agent-runner.auto-compaction-updates-total-tokens.test.ts
@@ -0,0 +1,240 @@
+import fs from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+import { describe, expect, it, vi } from "vitest";
+import type { TemplateContext } from "../templating.js";
+import type { FollowupRun, QueueSettings } from "./queue.js";
+import { createMockTypingController } from "./test-helpers.js";
+
+const runEmbeddedPiAgentMock = vi.fn();
+
+type EmbeddedRunParams = {
+  prompt?: string;
+  extraSystemPrompt?: string;
+  onAgentEvent?: (evt: { stream?: string; data?: { phase?: string; willRetry?: boolean } }) => void;
+};
+
+vi.mock("../../agents/model-fallback.js", () => ({
+  runWithModelFallback: async ({
+    provider,
+    model,
+    run,
+  }: {
+    provider: string;
+    model: string;
+    run: (provider: string, model: string) => Promise<unknown>;
+  }) => ({
+    result: await run(provider, model),
+    provider,
+    model,
+  }),
+}));
+
+vi.mock("../../agents/cli-runner.js", () => ({
+  runCliAgent: vi.fn(),
+}));
+
+vi.mock("../../agents/pi-embedded.js", () => ({
+  queueEmbeddedPiMessage: vi.fn().mockReturnValue(false),
+  runEmbeddedPiAgent: (params: unknown) => runEmbeddedPiAgentMock(params),
+}));
+
+vi.mock("./queue.js", async () => {
+  const actual = await vi.importActual<typeof import("./queue.js")>("./queue.js");
+  return {
+    ...actual,
+    enqueueFollowupRun: vi.fn(),
+    scheduleFollowupDrain: vi.fn(),
+  };
+});
+
+import { runReplyAgent } from "./agent-runner.js";
+
+async function seedSessionStore(params: {
+  storePath: string;
+  sessionKey: string;
+  entry: Record<string, unknown>;
+}) {
+  await fs.mkdir(path.dirname(params.storePath), { recursive: true });
+  await fs.writeFile(
+    params.storePath,
+    JSON.stringify({ [params.sessionKey]: params.entry }, null, 2),
+    "utf-8",
+  );
+}
+
+function createBaseRun(params: {
+  storePath: string;
+  sessionEntry: Record<string, unknown>;
+  config?: Record<string, unknown>;
+}) {
+  const typing = createMockTypingController();
+  const sessionCtx = {
+    Provider: "whatsapp",
+    OriginatingTo: "+15550001111",
+    AccountId: "primary",
+    MessageSid: "msg",
+  } as unknown as TemplateContext;
+  const resolvedQueue = { mode: "interrupt" } as unknown as QueueSettings;
+  const followupRun = {
+    prompt: "hello",
+    summaryLine: "hello",
+    enqueuedAt: Date.now(),
+    run: {
+      agentId: "main",
+      agentDir: "/tmp/agent",
+      sessionId: "session",
+      sessionKey: "main",
+      messageProvider: "whatsapp",
+      sessionFile: "/tmp/session.jsonl",
+      workspaceDir: "/tmp",
+      config: params.config ?? {},
+      skillsSnapshot: {},
+      provider: "anthropic",
+      model: "claude",
+      thinkLevel: "low",
+      verboseLevel: "off",
+      elevatedLevel: "off",
+      bashElevated: { enabled: false, allowed: false, defaultLevel: "off" },
+      timeoutMs: 1_000,
+      blockReplyBreak: "message_end",
+    },
+  } as unknown as FollowupRun;
+  return { typing, sessionCtx, resolvedQueue, followupRun };
+}
+
+describe("runReplyAgent auto-compaction token update", () => {
+  it("updates totalTokens after auto-compaction using lastCallUsage", async () => {
+    runEmbeddedPiAgentMock.mockReset();
+    const tmp = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-compact-tokens-"));
+    const storePath = path.join(tmp, "sessions.json");
+    const sessionKey = "main";
+    const sessionEntry = {
+      sessionId: "session",
+      updatedAt: Date.now(),
+      totalTokens: 181_000,
+      compactionCount: 0,
+    };
+
+    await seedSessionStore({ storePath, sessionKey, entry: sessionEntry });
+
+    runEmbeddedPiAgentMock.mockImplementation(async (params: EmbeddedRunParams) => {
+      // Simulate auto-compaction during agent run
+      params.onAgentEvent?.({ stream: "compaction", data: { phase: "start" } });
+      params.onAgentEvent?.({ stream: "compaction", data: { phase: "end", willRetry: false } });
+      return {
+        payloads: [{ text: "done" }],
+        meta: {
+          agentMeta: {
+            // Accumulated usage across pre+post compaction calls — inflated
+            usage: { input: 190_000, output: 8_000, total: 198_000 },
+            // Last individual API call's usage — actual post-compaction context
+            lastCallUsage: { input: 10_000, output: 3_000, total: 13_000 },
+            compactionCount: 1,
+          },
+        },
+      };
+    });
+
+    // Disable memory flush so we isolate the auto-compaction path
+    const config = {
+      agents: { defaults: { compaction: { memoryFlush: { enabled: false } } } },
+    };
+    const { typing, sessionCtx, resolvedQueue, followupRun } = createBaseRun({
+      storePath,
+      sessionEntry,
+      config,
+    });
+
+    await runReplyAgent({
+      commandBody: "hello",
+      followupRun,
+      queueKey: "main",
+      resolvedQueue,
+      shouldSteer: false,
+      shouldFollowup: false,
+      isActive: false,
+      isStreaming: false,
+      typing,
+      sessionCtx,
+      sessionEntry,
+      sessionStore: { [sessionKey]: sessionEntry },
+      sessionKey,
+      storePath,
+      defaultModel: "anthropic/claude-opus-4-5",
+      agentCfgContextTokens: 200_000,
+      resolvedVerboseLevel: "off",
+      isNewSession: false,
+      blockStreamingEnabled: false,
+      resolvedBlockStreamingBreak: "message_end",
+      shouldInjectGroupIntro: false,
+      typingMode: "instant",
+    });
+
+    const stored = JSON.parse(await fs.readFile(storePath, "utf-8"));
+    // totalTokens should reflect actual post-compaction context (~10k), not
+    // the stale pre-compaction value (181k) or the inflated accumulated (190k)
+    expect(stored[sessionKey].totalTokens).toBe(10_000);
+    // compactionCount should be incremented
+    expect(stored[sessionKey].compactionCount).toBe(1);
+  });
+
+  it("updates totalTokens from lastCallUsage even without compaction", async () => {
+    runEmbeddedPiAgentMock.mockReset();
+    const tmp = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-usage-last-"));
+    const storePath = path.join(tmp, "sessions.json");
+    const sessionKey = "main";
+    const sessionEntry = {
+      sessionId: "session",
+      updatedAt: Date.now(),
+      totalTokens: 50_000,
+    };
+
+    await seedSessionStore({ storePath, sessionKey, entry: sessionEntry });
+
+    runEmbeddedPiAgentMock.mockImplementation(async (_params: EmbeddedRunParams) => ({
+      payloads: [{ text: "ok" }],
+      meta: {
+        agentMeta: {
+          // Tool-use loop: accumulated input is higher than last call's input
+          usage: { input: 75_000, output: 5_000, total: 80_000 },
+          lastCallUsage: { input: 55_000, output: 2_000, total: 57_000 },
+        },
+      },
+    }));
+
+    const { typing, sessionCtx, resolvedQueue, followupRun } = createBaseRun({
+      storePath,
+      sessionEntry,
+    });
+
+    await runReplyAgent({
+      commandBody: "hello",
+      followupRun,
+      queueKey: "main",
+      resolvedQueue,
+      shouldSteer: false,
+      shouldFollowup: false,
+      isActive: false,
+      isStreaming: false,
+      typing,
+      sessionCtx,
+      sessionEntry,
+      sessionStore: { [sessionKey]: sessionEntry },
+      sessionKey,
+      storePath,
+      defaultModel: "anthropic/claude-opus-4-5",
+      agentCfgContextTokens: 200_000,
+      resolvedVerboseLevel: "off",
+      isNewSession: false,
+      blockStreamingEnabled: false,
+      resolvedBlockStreamingBreak: "message_end",
+      shouldInjectGroupIntro: false,
+      typingMode: "instant",
+    });
+
+    const stored = JSON.parse(await fs.readFile(storePath, "utf-8"));
+    // totalTokens should use lastCallUsage (55k), not accumulated (75k)
+    expect(stored[sessionKey].totalTokens).toBe(55_000);
+  });
+});
--- a/src/auto-reply/reply/agent-runner.ts
+++ b/src/auto-reply/reply/agent-runner.ts
@@ -38,8 +38,7 @@ import { resolveBlockStreamingCoalescing } from "./block-streaming.js";
 import { createFollowupRunner } from "./followup-runner.js";
 import { enqueueFollowupRun, type FollowupRun, type QueueSettings } from "./queue.js";
 import { createReplyToModeFilterForChannel, resolveReplyToMode } from "./reply-threading.js";
-import { incrementCompactionCount } from "./session-updates.js";
-import { persistSessionUsageUpdate } from "./session-usage.js";
+import { incrementRunCompactionCount, persistRunSessionUsage } from "./session-run-accounting.js";
 import { createTypingSignaler } from "./typing-mode.js";

 const BLOCK_REPLY_SEND_TIMEOUT_MS = 15_000;
@@ -384,10 +383,11 @@ export async function runReplyAgent(params: {
      activeSessionEntry?.contextTokens ??
      DEFAULT_CONTEXT_TOKENS;

-    await persistSessionUsageUpdate({
+    await persistRunSessionUsage({
      storePath,
      sessionKey,
      usage,
+      lastCallUsage: runResult.meta.agentMeta?.lastCallUsage,
      modelUsed,
      providerUsed,
      contextTokensUsed,
@@ -495,11 +495,13 @@ export async function runReplyAgent(params: {
    let finalPayloads = replyPayloads;
    const verboseEnabled = resolvedVerboseLevel !== "off";
    if (autoCompactionCompleted) {
-      const count = await incrementCompactionCount({
+      const count = await incrementRunCompactionCount({
        sessionEntry: activeSessionEntry,
        sessionStore: activeSessionStore,
        sessionKey,
        storePath,
+        lastCallUsage: runResult.meta.agentMeta?.lastCallUsage,
+        contextTokensUsed,
      });
      if (verboseEnabled) {
        const suffix = typeof count === "number" ? ` (count ${count})` : "";
--- a/src/auto-reply/reply/followup-runner.test.ts
+++ b/src/auto-reply/reply/followup-runner.test.ts
@@ -131,6 +131,68 @@ describe("createFollowupRunner compaction", () => {
    expect(onBlockReply.mock.calls[0][0].text).toContain("Auto-compaction complete");
    expect(sessionStore.main.compactionCount).toBe(1);
  });
+
+  it("updates totalTokens after auto-compaction using lastCallUsage", async () => {
+    const storePath = path.join(
+      await fs.mkdtemp(path.join(tmpdir(), "openclaw-followup-compaction-")),
+      "sessions.json",
+    );
+    const sessionKey = "main";
+    const sessionEntry: SessionEntry = {
+      sessionId: "session",
+      updatedAt: Date.now(),
+      totalTokens: 180_000,
+      compactionCount: 0,
+    };
+    const sessionStore: Record<string, SessionEntry> = { [sessionKey]: sessionEntry };
+    await saveSessionStore(storePath, sessionStore);
+    const onBlockReply = vi.fn(async () => {});
+
+    runEmbeddedPiAgentMock.mockImplementationOnce(
+      async (params: {
+        onAgentEvent?: (evt: { stream: string; data: Record<string, unknown> }) => void;
+      }) => {
+        params.onAgentEvent?.({
+          stream: "compaction",
+          data: { phase: "end", willRetry: false },
+        });
+        return {
+          payloads: [{ text: "done" }],
+          meta: {
+            agentMeta: {
+              // Accumulated usage across pre+post compaction calls.
+              usage: { input: 190_000, output: 8_000, total: 198_000 },
+              // Last call usage reflects post-compaction context.
+              lastCallUsage: { input: 11_000, output: 2_000, total: 13_000 },
+              model: "claude-opus-4-5",
+              provider: "anthropic",
+            },
+          },
+        };
+      },
+    );
+
+    const runner = createFollowupRunner({
+      opts: { onBlockReply },
+      typing: createMockTypingController(),
+      typingMode: "instant",
+      sessionEntry,
+      sessionStore,
+      sessionKey,
+      storePath,
+      defaultModel: "anthropic/claude-opus-4-5",
+      agentCfgContextTokens: 200_000,
+    });
+
+    await runner(baseQueuedRun());
+
+    const store = loadSessionStore(storePath, { skipCache: true });
+    expect(store[sessionKey]?.compactionCount).toBe(1);
+    expect(store[sessionKey]?.totalTokens).toBe(11_000);
+    // We only keep the total estimate after compaction.
+    expect(store[sessionKey]?.inputTokens).toBeUndefined();
+    expect(store[sessionKey]?.outputTokens).toBeUndefined();
+  });
 });

 describe("createFollowupRunner messaging tool dedupe", () => {
@@ -212,7 +274,8 @@ describe("createFollowupRunner messaging tool dedupe", () => {
      messagingToolSentTargets: [{ tool: "slack", provider: "slack", to: "channel:C1" }],
      meta: {
        agentMeta: {
-          usage: { input: 10, output: 5 },
+          usage: { input: 1_000, output: 50 },
+          lastCallUsage: { input: 400, output: 20 },
          model: "claude-opus-4-5",
          provider: "anthropic",
        },
@@ -234,7 +297,11 @@ describe("createFollowupRunner messaging tool dedupe", () => {

    expect(onBlockReply).not.toHaveBeenCalled();
    const store = loadSessionStore(storePath, { skipCache: true });
-    expect(store[sessionKey]?.totalTokens ?? 0).toBeGreaterThan(0);
+    // totalTokens should reflect the last call usage snapshot, not the accumulated input.
+    expect(store[sessionKey]?.totalTokens).toBe(400);
    expect(store[sessionKey]?.model).toBe("claude-opus-4-5");
+    // Accumulated usage is still stored for usage/cost tracking.
+    expect(store[sessionKey]?.inputTokens).toBe(1_000);
+    expect(store[sessionKey]?.outputTokens).toBe(50);
  });
 });
--- a/src/auto-reply/reply/followup-runner.ts
+++ b/src/auto-reply/reply/followup-runner.ts
@@ -22,8 +22,7 @@ import {
 } from "./reply-payloads.js";
 import { resolveReplyToMode } from "./reply-threading.js";
 import { isRoutableChannel, routeReply } from "./route-reply.js";
-import { incrementCompactionCount } from "./session-updates.js";
-import { persistSessionUsageUpdate } from "./session-usage.js";
+import { incrementRunCompactionCount, persistRunSessionUsage } from "./session-run-accounting.js";
 import { createTypingSignaler } from "./typing-mode.js";

 export function createFollowupRunner(params: {
@@ -194,19 +193,20 @@ export function createFollowupRunner(params: {
        return;
      }

-      if (storePath && sessionKey) {
-        const usage = runResult.meta.agentMeta?.usage;
-        const modelUsed = runResult.meta.agentMeta?.model ?? fallbackModel ?? defaultModel;
-        const contextTokensUsed =
-          agentCfgContextTokens ??
-          lookupContextTokens(modelUsed) ??
-          sessionEntry?.contextTokens ??
-          DEFAULT_CONTEXT_TOKENS;
+      const usage = runResult.meta.agentMeta?.usage;
+      const modelUsed = runResult.meta.agentMeta?.model ?? fallbackModel ?? defaultModel;
+      const contextTokensUsed =
+        agentCfgContextTokens ??
+        lookupContextTokens(modelUsed) ??
+        sessionEntry?.contextTokens ??
+        DEFAULT_CONTEXT_TOKENS;

-        await persistSessionUsageUpdate({
+      if (storePath && sessionKey) {
+        await persistRunSessionUsage({
          storePath,
          sessionKey,
          usage,
+          lastCallUsage: runResult.meta.agentMeta?.lastCallUsage,
          modelUsed,
          providerUsed: fallbackProvider,
          contextTokensUsed,
@@ -263,11 +263,13 @@ export function createFollowupRunner(params: {
      }

      if (autoCompactionCompleted) {
-        const count = await incrementCompactionCount({
+        const count = await incrementRunCompactionCount({
          sessionEntry,
          sessionStore,
          sessionKey,
          storePath,
+          lastCallUsage: runResult.meta.agentMeta?.lastCallUsage,
+          contextTokensUsed,
        });
        if (queued.run.verboseLevel && queued.run.verboseLevel !== "off") {
          const suffix = typeof count === "number" ? ` (count ${count})` : "";
--- a/src/auto-reply/reply/session-run-accounting.ts
+++ b/src/auto-reply/reply/session-run-accounting.ts
@@ -0,0 +1,46 @@
+import { deriveSessionTotalTokens, type NormalizedUsage } from "../../agents/usage.js";
+import { incrementCompactionCount } from "./session-updates.js";
+import { persistSessionUsageUpdate } from "./session-usage.js";
+
+type PersistRunSessionUsageParams = Parameters<typeof persistSessionUsageUpdate>[0];
+
+type IncrementRunCompactionCountParams = Omit<
+  Parameters<typeof incrementCompactionCount>[0],
+  "tokensAfter"
+> & {
+  lastCallUsage?: NormalizedUsage;
+  contextTokensUsed?: number;
+};
+
+export async function persistRunSessionUsage(params: PersistRunSessionUsageParams): Promise<void> {
+  await persistSessionUsageUpdate({
+    storePath: params.storePath,
+    sessionKey: params.sessionKey,
+    usage: params.usage,
+    lastCallUsage: params.lastCallUsage,
+    modelUsed: params.modelUsed,
+    providerUsed: params.providerUsed,
+    contextTokensUsed: params.contextTokensUsed,
+    systemPromptReport: params.systemPromptReport,
+    cliSessionId: params.cliSessionId,
+    logLabel: params.logLabel,
+  });
+}
+
+export async function incrementRunCompactionCount(
+  params: IncrementRunCompactionCountParams,
+): Promise<number | undefined> {
+  const tokensAfterCompaction = params.lastCallUsage
+    ? deriveSessionTotalTokens({
+        usage: params.lastCallUsage,
+        contextTokens: params.contextTokensUsed,
+      })
+    : undefined;
+  return incrementCompactionCount({
+    sessionEntry: params.sessionEntry,
+    sessionStore: params.sessionStore,
+    sessionKey: params.sessionKey,
+    storePath: params.storePath,
+    tokensAfter: tokensAfterCompaction,
+  });
+}
--- a/src/auto-reply/reply/session-updates.incrementcompactioncount.test.ts
+++ b/src/auto-reply/reply/session-updates.incrementcompactioncount.test.ts
@@ -0,0 +1,98 @@
+import fs from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+import { describe, expect, it } from "vitest";
+import type { SessionEntry } from "../../config/sessions.js";
+import { incrementCompactionCount } from "./session-updates.js";
+
+async function seedSessionStore(params: {
+  storePath: string;
+  sessionKey: string;
+  entry: Record<string, unknown>;
+}) {
+  await fs.mkdir(path.dirname(params.storePath), { recursive: true });
+  await fs.writeFile(
+    params.storePath,
+    JSON.stringify({ [params.sessionKey]: params.entry }, null, 2),
+    "utf-8",
+  );
+}
+
+describe("incrementCompactionCount", () => {
+  it("increments compaction count", async () => {
+    const tmp = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-compact-"));
+    const storePath = path.join(tmp, "sessions.json");
+    const sessionKey = "main";
+    const entry = { sessionId: "s1", updatedAt: Date.now(), compactionCount: 2 } as SessionEntry;
+    const sessionStore: Record<string, SessionEntry> = { [sessionKey]: entry };
+    await seedSessionStore({ storePath, sessionKey, entry });
+
+    const count = await incrementCompactionCount({
+      sessionEntry: entry,
+      sessionStore,
+      sessionKey,
+      storePath,
+    });
+    expect(count).toBe(3);
+
+    const stored = JSON.parse(await fs.readFile(storePath, "utf-8"));
+    expect(stored[sessionKey].compactionCount).toBe(3);
+  });
+
+  it("updates totalTokens when tokensAfter is provided", async () => {
+    const tmp = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-compact-"));
+    const storePath = path.join(tmp, "sessions.json");
+    const sessionKey = "main";
+    const entry = {
+      sessionId: "s1",
+      updatedAt: Date.now(),
+      compactionCount: 0,
+      totalTokens: 180_000,
+      inputTokens: 170_000,
+      outputTokens: 10_000,
+    } as SessionEntry;
+    const sessionStore: Record<string, SessionEntry> = { [sessionKey]: entry };
+    await seedSessionStore({ storePath, sessionKey, entry });
+
+    await incrementCompactionCount({
+      sessionEntry: entry,
+      sessionStore,
+      sessionKey,
+      storePath,
+      tokensAfter: 12_000,
+    });
+
+    const stored = JSON.parse(await fs.readFile(storePath, "utf-8"));
+    expect(stored[sessionKey].compactionCount).toBe(1);
+    expect(stored[sessionKey].totalTokens).toBe(12_000);
+    // input/output cleared since we only have the total estimate
+    expect(stored[sessionKey].inputTokens).toBeUndefined();
+    expect(stored[sessionKey].outputTokens).toBeUndefined();
+  });
+
+  it("does not update totalTokens when tokensAfter is not provided", async () => {
+    const tmp = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-compact-"));
+    const storePath = path.join(tmp, "sessions.json");
+    const sessionKey = "main";
+    const entry = {
+      sessionId: "s1",
+      updatedAt: Date.now(),
+      compactionCount: 0,
+      totalTokens: 180_000,
+    } as SessionEntry;
+    const sessionStore: Record<string, SessionEntry> = { [sessionKey]: entry };
+    await seedSessionStore({ storePath, sessionKey, entry });
+
+    await incrementCompactionCount({
+      sessionEntry: entry,
+      sessionStore,
+      sessionKey,
+      storePath,
+    });
+
+    const stored = JSON.parse(await fs.readFile(storePath, "utf-8"));
+    expect(stored[sessionKey].compactionCount).toBe(1);
+    // totalTokens unchanged
+    expect(stored[sessionKey].totalTokens).toBe(180_000);
+  });
+});
--- a/src/auto-reply/reply/session-usage.test.ts
+++ b/src/auto-reply/reply/session-usage.test.ts
@@ -0,0 +1,95 @@
+import fs from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+import { describe, expect, it } from "vitest";
+import { persistSessionUsageUpdate } from "./session-usage.js";
+
+async function seedSessionStore(params: {
+  storePath: string;
+  sessionKey: string;
+  entry: Record<string, unknown>;
+}) {
+  await fs.mkdir(path.dirname(params.storePath), { recursive: true });
+  await fs.writeFile(
+    params.storePath,
+    JSON.stringify({ [params.sessionKey]: params.entry }, null, 2),
+    "utf-8",
+  );
+}
+
+describe("persistSessionUsageUpdate", () => {
+  it("uses lastCallUsage for totalTokens when provided", async () => {
+    const tmp = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-usage-"));
+    const storePath = path.join(tmp, "sessions.json");
+    const sessionKey = "main";
+    await seedSessionStore({
+      storePath,
+      sessionKey,
+      entry: { sessionId: "s1", updatedAt: Date.now(), totalTokens: 100_000 },
+    });
+
+    // Accumulated usage (sums all API calls) — inflated
+    const accumulatedUsage = { input: 180_000, output: 10_000, total: 190_000 };
+    // Last individual API call's usage — actual context after compaction
+    const lastCallUsage = { input: 12_000, output: 2_000, total: 14_000 };
+
+    await persistSessionUsageUpdate({
+      storePath,
+      sessionKey,
+      usage: accumulatedUsage,
+      lastCallUsage,
+      contextTokensUsed: 200_000,
+    });
+
+    const stored = JSON.parse(await fs.readFile(storePath, "utf-8"));
+    // totalTokens should reflect lastCallUsage (12_000 input), not accumulated (180_000)
+    expect(stored[sessionKey].totalTokens).toBe(12_000);
+    // inputTokens/outputTokens still reflect accumulated usage for cost tracking
+    expect(stored[sessionKey].inputTokens).toBe(180_000);
+    expect(stored[sessionKey].outputTokens).toBe(10_000);
+  });
+
+  it("falls back to accumulated usage for totalTokens when lastCallUsage not provided", async () => {
+    const tmp = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-usage-"));
+    const storePath = path.join(tmp, "sessions.json");
+    const sessionKey = "main";
+    await seedSessionStore({
+      storePath,
+      sessionKey,
+      entry: { sessionId: "s1", updatedAt: Date.now() },
+    });
+
+    await persistSessionUsageUpdate({
+      storePath,
+      sessionKey,
+      usage: { input: 50_000, output: 5_000, total: 55_000 },
+      contextTokensUsed: 200_000,
+    });
+
+    const stored = JSON.parse(await fs.readFile(storePath, "utf-8"));
+    expect(stored[sessionKey].totalTokens).toBe(50_000);
+  });
+
+  it("caps totalTokens at context window even with lastCallUsage", async () => {
+    const tmp = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-usage-"));
+    const storePath = path.join(tmp, "sessions.json");
+    const sessionKey = "main";
+    await seedSessionStore({
+      storePath,
+      sessionKey,
+      entry: { sessionId: "s1", updatedAt: Date.now() },
+    });
+
+    await persistSessionUsageUpdate({
+      storePath,
+      sessionKey,
+      usage: { input: 300_000, output: 10_000, total: 310_000 },
+      lastCallUsage: { input: 250_000, output: 5_000, total: 255_000 },
+      contextTokensUsed: 200_000,
+    });
+
+    const stored = JSON.parse(await fs.readFile(storePath, "utf-8"));
+    // Capped at context window
+    expect(stored[sessionKey].totalTokens).toBe(200_000);
+  });
+});
--- a/src/auto-reply/reply/session-usage.ts
+++ b/src/auto-reply/reply/session-usage.ts
@@ -15,6 +15,13 @@ export async function persistSessionUsageUpdate(params: {
  storePath?: string;
  sessionKey?: string;
  usage?: NormalizedUsage;
+  /**
+   * Usage from the last individual API call (not accumulated). When provided,
+   * this is used for `totalTokens` instead of the accumulated `usage` so that
+   * context-window utilization reflects the actual current context size rather
+   * than the sum of input tokens across all API calls in the run.
+   */
+  lastCallUsage?: NormalizedUsage;
  modelUsed?: string;
  providerUsed?: string;
  contextTokensUsed?: number;
@@ -37,12 +44,17 @@ export async function persistSessionUsageUpdate(params: {
          const input = params.usage?.input ?? 0;
          const output = params.usage?.output ?? 0;
          const resolvedContextTokens = params.contextTokensUsed ?? entry.contextTokens;
+          // Use last-call usage for totalTokens when available. The accumulated
+          // `usage.input` sums input tokens from every API call in the run
+          // (tool-use loops, compaction retries), overstating actual context.
+          // `lastCallUsage` reflects only the final API call — the true context.
+          const usageForContext = params.lastCallUsage ?? params.usage;
          const patch: Partial<SessionEntry> = {
            inputTokens: input,
            outputTokens: output,
            totalTokens:
              deriveSessionTotalTokens({
-                usage: params.usage,
+                usage: usageForContext,
                contextTokens: resolvedContextTokens,
              }) ?? input,
            modelProvider: params.providerUsed ?? entry.modelProvider,