Files
openclaw/src/agents/pi-embedded-runner/run.ts
Kyle Tse a10f228a5b fix: update totalTokens after compaction using last-call usage (#15018)
Merged via /review-pr -> /prepare-pr -> /merge-pr.

Prepared head SHA: 9214291bf7
Co-authored-by: shtse8 <8020099+shtse8@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras
2026-02-12 18:02:30 -05:00

898 lines
35 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import fs from "node:fs/promises";
import type { ThinkLevel } from "../../auto-reply/thinking.js";
import type { RunEmbeddedPiAgentParams } from "./run/params.js";
import type { EmbeddedPiAgentMeta, EmbeddedPiRunResult } from "./types.js";
import { enqueueCommandInLane } from "../../process/command-queue.js";
import { isMarkdownCapableMessageChannel } from "../../utils/message-channel.js";
import { resolveOpenClawAgentDir } from "../agent-paths.js";
import {
isProfileInCooldown,
markAuthProfileFailure,
markAuthProfileGood,
markAuthProfileUsed,
} from "../auth-profiles.js";
import {
CONTEXT_WINDOW_HARD_MIN_TOKENS,
CONTEXT_WINDOW_WARN_BELOW_TOKENS,
evaluateContextWindowGuard,
resolveContextWindowInfo,
} from "../context-window-guard.js";
import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js";
import { FailoverError, resolveFailoverStatus } from "../failover-error.js";
import {
ensureAuthProfileStore,
getApiKeyForModel,
resolveAuthProfileOrder,
type ResolvedProviderAuth,
} from "../model-auth.js";
import { normalizeProviderId } from "../model-selection.js";
import { ensureOpenClawModelsJson } from "../models-config.js";
import {
formatBillingErrorMessage,
classifyFailoverReason,
formatAssistantErrorText,
isAuthAssistantError,
isBillingAssistantError,
isCompactionFailureError,
isContextOverflowError,
isFailoverAssistantError,
isFailoverErrorMessage,
parseImageSizeError,
parseImageDimensionError,
isRateLimitAssistantError,
isTimeoutErrorMessage,
pickFallbackThinkingLevel,
type FailoverReason,
} from "../pi-embedded-helpers.js";
import { normalizeUsage, type UsageLike } from "../usage.js";
import { redactRunIdentifier, resolveRunWorkspaceDir } from "../workspace-run.js";
import { compactEmbeddedPiSessionDirect } from "./compact.js";
import { resolveGlobalLane, resolveSessionLane } from "./lanes.js";
import { log } from "./logger.js";
import { resolveModel } from "./model.js";
import { runEmbeddedAttempt } from "./run/attempt.js";
import { buildEmbeddedRunPayloads } from "./run/payloads.js";
import {
truncateOversizedToolResultsInSession,
sessionLikelyHasOversizedToolResults,
} from "./tool-result-truncation.js";
import { describeUnknownError } from "./utils.js";
type ApiKeyInfo = ResolvedProviderAuth;
// Avoid Anthropic's refusal test token poisoning session transcripts.
const ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL = "ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL";
const ANTHROPIC_MAGIC_STRING_REPLACEMENT = "ANTHROPIC MAGIC STRING TRIGGER REFUSAL (redacted)";
function scrubAnthropicRefusalMagic(prompt: string): string {
if (!prompt.includes(ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL)) {
return prompt;
}
return prompt.replaceAll(
ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL,
ANTHROPIC_MAGIC_STRING_REPLACEMENT,
);
}
type UsageAccumulator = {
input: number;
output: number;
cacheRead: number;
cacheWrite: number;
total: number;
/** Cache fields from the most recent API call (not accumulated). */
lastCacheRead: number;
lastCacheWrite: number;
lastInput: number;
};
const createUsageAccumulator = (): UsageAccumulator => ({
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
total: 0,
lastCacheRead: 0,
lastCacheWrite: 0,
lastInput: 0,
});
const hasUsageValues = (
usage: ReturnType<typeof normalizeUsage>,
): usage is NonNullable<ReturnType<typeof normalizeUsage>> =>
!!usage &&
[usage.input, usage.output, usage.cacheRead, usage.cacheWrite, usage.total].some(
(value) => typeof value === "number" && Number.isFinite(value) && value > 0,
);
const mergeUsageIntoAccumulator = (
target: UsageAccumulator,
usage: ReturnType<typeof normalizeUsage>,
) => {
if (!hasUsageValues(usage)) {
return;
}
target.input += usage.input ?? 0;
target.output += usage.output ?? 0;
target.cacheRead += usage.cacheRead ?? 0;
target.cacheWrite += usage.cacheWrite ?? 0;
target.total +=
usage.total ??
(usage.input ?? 0) + (usage.output ?? 0) + (usage.cacheRead ?? 0) + (usage.cacheWrite ?? 0);
// Track the most recent API call's cache fields for accurate context-size reporting.
// Accumulated cache totals inflate context size when there are multiple tool-call round-trips,
// since each call reports cacheRead ≈ current_context_size.
target.lastCacheRead = usage.cacheRead ?? 0;
target.lastCacheWrite = usage.cacheWrite ?? 0;
target.lastInput = usage.input ?? 0;
};
const toNormalizedUsage = (usage: UsageAccumulator) => {
const hasUsage =
usage.input > 0 ||
usage.output > 0 ||
usage.cacheRead > 0 ||
usage.cacheWrite > 0 ||
usage.total > 0;
if (!hasUsage) {
return undefined;
}
// Use the LAST API call's cache fields for context-size calculation.
// The accumulated cacheRead/cacheWrite inflate context size because each tool-call
// round-trip reports cacheRead ≈ current_context_size, and summing N calls gives
// N × context_size which gets clamped to contextWindow (e.g. 200k).
// See: https://github.com/openclaw/openclaw/issues/13698
//
// We use lastInput/lastCacheRead/lastCacheWrite (from the most recent API call) for
// cache-related fields, but keep accumulated output (total generated text this turn).
const lastPromptTokens = usage.lastInput + usage.lastCacheRead + usage.lastCacheWrite;
return {
input: usage.lastInput || undefined,
output: usage.output || undefined,
cacheRead: usage.lastCacheRead || undefined,
cacheWrite: usage.lastCacheWrite || undefined,
total: lastPromptTokens + usage.output || undefined,
};
};
export async function runEmbeddedPiAgent(
params: RunEmbeddedPiAgentParams,
): Promise<EmbeddedPiRunResult> {
const sessionLane = resolveSessionLane(params.sessionKey?.trim() || params.sessionId);
const globalLane = resolveGlobalLane(params.lane);
const enqueueGlobal =
params.enqueue ?? ((task, opts) => enqueueCommandInLane(globalLane, task, opts));
const enqueueSession =
params.enqueue ?? ((task, opts) => enqueueCommandInLane(sessionLane, task, opts));
const channelHint = params.messageChannel ?? params.messageProvider;
const resolvedToolResultFormat =
params.toolResultFormat ??
(channelHint
? isMarkdownCapableMessageChannel(channelHint)
? "markdown"
: "plain"
: "markdown");
const isProbeSession = params.sessionId?.startsWith("probe-") ?? false;
return enqueueSession(() =>
enqueueGlobal(async () => {
const started = Date.now();
const workspaceResolution = resolveRunWorkspaceDir({
workspaceDir: params.workspaceDir,
sessionKey: params.sessionKey,
agentId: params.agentId,
config: params.config,
});
const resolvedWorkspace = workspaceResolution.workspaceDir;
const redactedSessionId = redactRunIdentifier(params.sessionId);
const redactedSessionKey = redactRunIdentifier(params.sessionKey);
const redactedWorkspace = redactRunIdentifier(resolvedWorkspace);
if (workspaceResolution.usedFallback) {
log.warn(
`[workspace-fallback] caller=runEmbeddedPiAgent reason=${workspaceResolution.fallbackReason} run=${params.runId} session=${redactedSessionId} sessionKey=${redactedSessionKey} agent=${workspaceResolution.agentId} workspace=${redactedWorkspace}`,
);
}
const prevCwd = process.cwd();
const provider = (params.provider ?? DEFAULT_PROVIDER).trim() || DEFAULT_PROVIDER;
const modelId = (params.model ?? DEFAULT_MODEL).trim() || DEFAULT_MODEL;
const agentDir = params.agentDir ?? resolveOpenClawAgentDir();
const fallbackConfigured =
(params.config?.agents?.defaults?.model?.fallbacks?.length ?? 0) > 0;
await ensureOpenClawModelsJson(params.config, agentDir);
const { model, error, authStorage, modelRegistry } = resolveModel(
provider,
modelId,
agentDir,
params.config,
);
if (!model) {
throw new Error(error ?? `Unknown model: ${provider}/${modelId}`);
}
const ctxInfo = resolveContextWindowInfo({
cfg: params.config,
provider,
modelId,
modelContextWindow: model.contextWindow,
defaultTokens: DEFAULT_CONTEXT_TOKENS,
});
const ctxGuard = evaluateContextWindowGuard({
info: ctxInfo,
warnBelowTokens: CONTEXT_WINDOW_WARN_BELOW_TOKENS,
hardMinTokens: CONTEXT_WINDOW_HARD_MIN_TOKENS,
});
if (ctxGuard.shouldWarn) {
log.warn(
`low context window: ${provider}/${modelId} ctx=${ctxGuard.tokens} (warn<${CONTEXT_WINDOW_WARN_BELOW_TOKENS}) source=${ctxGuard.source}`,
);
}
if (ctxGuard.shouldBlock) {
log.error(
`blocked model (context window too small): ${provider}/${modelId} ctx=${ctxGuard.tokens} (min=${CONTEXT_WINDOW_HARD_MIN_TOKENS}) source=${ctxGuard.source}`,
);
throw new FailoverError(
`Model context window too small (${ctxGuard.tokens} tokens). Minimum is ${CONTEXT_WINDOW_HARD_MIN_TOKENS}.`,
{ reason: "unknown", provider, model: modelId },
);
}
const authStore = ensureAuthProfileStore(agentDir, { allowKeychainPrompt: false });
const preferredProfileId = params.authProfileId?.trim();
let lockedProfileId = params.authProfileIdSource === "user" ? preferredProfileId : undefined;
if (lockedProfileId) {
const lockedProfile = authStore.profiles[lockedProfileId];
if (
!lockedProfile ||
normalizeProviderId(lockedProfile.provider) !== normalizeProviderId(provider)
) {
lockedProfileId = undefined;
}
}
const profileOrder = resolveAuthProfileOrder({
cfg: params.config,
store: authStore,
provider,
preferredProfile: preferredProfileId,
});
if (lockedProfileId && !profileOrder.includes(lockedProfileId)) {
throw new Error(`Auth profile "${lockedProfileId}" is not configured for ${provider}.`);
}
const profileCandidates = lockedProfileId
? [lockedProfileId]
: profileOrder.length > 0
? profileOrder
: [undefined];
let profileIndex = 0;
const initialThinkLevel = params.thinkLevel ?? "off";
let thinkLevel = initialThinkLevel;
const attemptedThinking = new Set<ThinkLevel>();
let apiKeyInfo: ApiKeyInfo | null = null;
let lastProfileId: string | undefined;
const resolveAuthProfileFailoverReason = (params: {
allInCooldown: boolean;
message: string;
}): FailoverReason => {
if (params.allInCooldown) {
return "rate_limit";
}
const classified = classifyFailoverReason(params.message);
return classified ?? "auth";
};
const throwAuthProfileFailover = (params: {
allInCooldown: boolean;
message?: string;
error?: unknown;
}): never => {
const fallbackMessage = `No available auth profile for ${provider} (all in cooldown or unavailable).`;
const message =
params.message?.trim() ||
(params.error ? describeUnknownError(params.error).trim() : "") ||
fallbackMessage;
const reason = resolveAuthProfileFailoverReason({
allInCooldown: params.allInCooldown,
message,
});
if (fallbackConfigured) {
throw new FailoverError(message, {
reason,
provider,
model: modelId,
status: resolveFailoverStatus(reason),
cause: params.error,
});
}
if (params.error instanceof Error) {
throw params.error;
}
throw new Error(message);
};
const resolveApiKeyForCandidate = async (candidate?: string) => {
return getApiKeyForModel({
model,
cfg: params.config,
profileId: candidate,
store: authStore,
agentDir,
});
};
const applyApiKeyInfo = async (candidate?: string): Promise<void> => {
apiKeyInfo = await resolveApiKeyForCandidate(candidate);
const resolvedProfileId = apiKeyInfo.profileId ?? candidate;
if (!apiKeyInfo.apiKey) {
if (apiKeyInfo.mode !== "aws-sdk") {
throw new Error(
`No API key resolved for provider "${model.provider}" (auth mode: ${apiKeyInfo.mode}).`,
);
}
lastProfileId = resolvedProfileId;
return;
}
if (model.provider === "github-copilot") {
const { resolveCopilotApiToken } =
await import("../../providers/github-copilot-token.js");
const copilotToken = await resolveCopilotApiToken({
githubToken: apiKeyInfo.apiKey,
});
authStorage.setRuntimeApiKey(model.provider, copilotToken.token);
} else {
authStorage.setRuntimeApiKey(model.provider, apiKeyInfo.apiKey);
}
lastProfileId = apiKeyInfo.profileId;
};
const advanceAuthProfile = async (): Promise<boolean> => {
if (lockedProfileId) {
return false;
}
let nextIndex = profileIndex + 1;
while (nextIndex < profileCandidates.length) {
const candidate = profileCandidates[nextIndex];
if (candidate && isProfileInCooldown(authStore, candidate)) {
nextIndex += 1;
continue;
}
try {
await applyApiKeyInfo(candidate);
profileIndex = nextIndex;
thinkLevel = initialThinkLevel;
attemptedThinking.clear();
return true;
} catch (err) {
if (candidate && candidate === lockedProfileId) {
throw err;
}
nextIndex += 1;
}
}
return false;
};
try {
while (profileIndex < profileCandidates.length) {
const candidate = profileCandidates[profileIndex];
if (
candidate &&
candidate !== lockedProfileId &&
isProfileInCooldown(authStore, candidate)
) {
profileIndex += 1;
continue;
}
await applyApiKeyInfo(profileCandidates[profileIndex]);
break;
}
if (profileIndex >= profileCandidates.length) {
throwAuthProfileFailover({ allInCooldown: true });
}
} catch (err) {
if (err instanceof FailoverError) {
throw err;
}
if (profileCandidates[profileIndex] === lockedProfileId) {
throwAuthProfileFailover({ allInCooldown: false, error: err });
}
const advanced = await advanceAuthProfile();
if (!advanced) {
throwAuthProfileFailover({ allInCooldown: false, error: err });
}
}
const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3;
let overflowCompactionAttempts = 0;
let toolResultTruncationAttempted = false;
const usageAccumulator = createUsageAccumulator();
let autoCompactionCount = 0;
try {
while (true) {
attemptedThinking.add(thinkLevel);
await fs.mkdir(resolvedWorkspace, { recursive: true });
const prompt =
provider === "anthropic" ? scrubAnthropicRefusalMagic(params.prompt) : params.prompt;
const attempt = await runEmbeddedAttempt({
sessionId: params.sessionId,
sessionKey: params.sessionKey,
messageChannel: params.messageChannel,
messageProvider: params.messageProvider,
agentAccountId: params.agentAccountId,
messageTo: params.messageTo,
messageThreadId: params.messageThreadId,
groupId: params.groupId,
groupChannel: params.groupChannel,
groupSpace: params.groupSpace,
spawnedBy: params.spawnedBy,
senderIsOwner: params.senderIsOwner,
currentChannelId: params.currentChannelId,
currentThreadTs: params.currentThreadTs,
replyToMode: params.replyToMode,
hasRepliedRef: params.hasRepliedRef,
sessionFile: params.sessionFile,
workspaceDir: resolvedWorkspace,
agentDir,
config: params.config,
skillsSnapshot: params.skillsSnapshot,
prompt,
images: params.images,
disableTools: params.disableTools,
provider,
modelId,
model,
authStorage,
modelRegistry,
agentId: workspaceResolution.agentId,
thinkLevel,
verboseLevel: params.verboseLevel,
reasoningLevel: params.reasoningLevel,
toolResultFormat: resolvedToolResultFormat,
execOverrides: params.execOverrides,
bashElevated: params.bashElevated,
timeoutMs: params.timeoutMs,
runId: params.runId,
abortSignal: params.abortSignal,
shouldEmitToolResult: params.shouldEmitToolResult,
shouldEmitToolOutput: params.shouldEmitToolOutput,
onPartialReply: params.onPartialReply,
onAssistantMessageStart: params.onAssistantMessageStart,
onBlockReply: params.onBlockReply,
onBlockReplyFlush: params.onBlockReplyFlush,
blockReplyBreak: params.blockReplyBreak,
blockReplyChunking: params.blockReplyChunking,
onReasoningStream: params.onReasoningStream,
onToolResult: params.onToolResult,
onAgentEvent: params.onAgentEvent,
extraSystemPrompt: params.extraSystemPrompt,
streamParams: params.streamParams,
ownerNumbers: params.ownerNumbers,
enforceFinalTag: params.enforceFinalTag,
});
const { aborted, promptError, timedOut, sessionIdUsed, lastAssistant } = attempt;
mergeUsageIntoAccumulator(
usageAccumulator,
attempt.attemptUsage ?? normalizeUsage(lastAssistant?.usage as UsageLike),
);
autoCompactionCount += Math.max(0, attempt.compactionCount ?? 0);
const formattedAssistantErrorText = lastAssistant
? formatAssistantErrorText(lastAssistant, {
cfg: params.config,
sessionKey: params.sessionKey ?? params.sessionId,
provider,
})
: undefined;
const assistantErrorText =
lastAssistant?.stopReason === "error"
? lastAssistant.errorMessage?.trim() || formattedAssistantErrorText
: undefined;
const contextOverflowError = !aborted
? (() => {
if (promptError) {
const errorText = describeUnknownError(promptError);
if (isContextOverflowError(errorText)) {
return { text: errorText, source: "promptError" as const };
}
// Prompt submission failed with a non-overflow error. Do not
// inspect prior assistant errors from history for this attempt.
return null;
}
if (assistantErrorText && isContextOverflowError(assistantErrorText)) {
return { text: assistantErrorText, source: "assistantError" as const };
}
return null;
})()
: null;
if (contextOverflowError) {
const errorText = contextOverflowError.text;
const msgCount = attempt.messagesSnapshot?.length ?? 0;
log.warn(
`[context-overflow-diag] sessionKey=${params.sessionKey ?? params.sessionId} ` +
`provider=${provider}/${modelId} source=${contextOverflowError.source} ` +
`messages=${msgCount} sessionFile=${params.sessionFile} ` +
`compactionAttempts=${overflowCompactionAttempts} error=${errorText.slice(0, 200)}`,
);
const isCompactionFailure = isCompactionFailureError(errorText);
// Attempt auto-compaction on context overflow (not compaction_failure)
if (
!isCompactionFailure &&
overflowCompactionAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS
) {
overflowCompactionAttempts++;
log.warn(
`context overflow detected (attempt ${overflowCompactionAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}); attempting auto-compaction for ${provider}/${modelId}`,
);
const compactResult = await compactEmbeddedPiSessionDirect({
sessionId: params.sessionId,
sessionKey: params.sessionKey,
messageChannel: params.messageChannel,
messageProvider: params.messageProvider,
agentAccountId: params.agentAccountId,
authProfileId: lastProfileId,
sessionFile: params.sessionFile,
workspaceDir: resolvedWorkspace,
agentDir,
config: params.config,
skillsSnapshot: params.skillsSnapshot,
senderIsOwner: params.senderIsOwner,
provider,
model: modelId,
thinkLevel,
reasoningLevel: params.reasoningLevel,
bashElevated: params.bashElevated,
extraSystemPrompt: params.extraSystemPrompt,
ownerNumbers: params.ownerNumbers,
});
if (compactResult.compacted) {
autoCompactionCount += 1;
log.info(`auto-compaction succeeded for ${provider}/${modelId}; retrying prompt`);
continue;
}
log.warn(
`auto-compaction failed for ${provider}/${modelId}: ${compactResult.reason ?? "nothing to compact"}`,
);
}
// Fallback: try truncating oversized tool results in the session.
// This handles the case where a single tool result exceeds the
// context window and compaction cannot reduce it further.
if (!toolResultTruncationAttempted) {
const contextWindowTokens = ctxInfo.tokens;
const hasOversized = attempt.messagesSnapshot
? sessionLikelyHasOversizedToolResults({
messages: attempt.messagesSnapshot,
contextWindowTokens,
})
: false;
if (hasOversized) {
toolResultTruncationAttempted = true;
log.warn(
`[context-overflow-recovery] Attempting tool result truncation for ${provider}/${modelId} ` +
`(contextWindow=${contextWindowTokens} tokens)`,
);
const truncResult = await truncateOversizedToolResultsInSession({
sessionFile: params.sessionFile,
contextWindowTokens,
sessionId: params.sessionId,
sessionKey: params.sessionKey,
});
if (truncResult.truncated) {
log.info(
`[context-overflow-recovery] Truncated ${truncResult.truncatedCount} tool result(s); retrying prompt`,
);
// Session is now smaller; allow compaction retries again.
overflowCompactionAttempts = 0;
continue;
}
log.warn(
`[context-overflow-recovery] Tool result truncation did not help: ${truncResult.reason ?? "unknown"}`,
);
}
}
const kind = isCompactionFailure ? "compaction_failure" : "context_overflow";
return {
payloads: [
{
text:
"Context overflow: prompt too large for the model. " +
"Try /reset (or /new) to start a fresh session, or use a larger-context model.",
isError: true,
},
],
meta: {
durationMs: Date.now() - started,
agentMeta: {
sessionId: sessionIdUsed,
provider,
model: model.id,
},
systemPromptReport: attempt.systemPromptReport,
error: { kind, message: errorText },
},
};
}
if (promptError && !aborted) {
const errorText = describeUnknownError(promptError);
// Handle role ordering errors with a user-friendly message
if (/incorrect role information|roles must alternate/i.test(errorText)) {
return {
payloads: [
{
text:
"Message ordering conflict - please try again. " +
"If this persists, use /new to start a fresh session.",
isError: true,
},
],
meta: {
durationMs: Date.now() - started,
agentMeta: {
sessionId: sessionIdUsed,
provider,
model: model.id,
},
systemPromptReport: attempt.systemPromptReport,
error: { kind: "role_ordering", message: errorText },
},
};
}
// Handle image size errors with a user-friendly message (no retry needed)
const imageSizeError = parseImageSizeError(errorText);
if (imageSizeError) {
const maxMb = imageSizeError.maxMb;
const maxMbLabel =
typeof maxMb === "number" && Number.isFinite(maxMb) ? `${maxMb}` : null;
const maxBytesHint = maxMbLabel ? ` (max ${maxMbLabel}MB)` : "";
return {
payloads: [
{
text:
`Image too large for the model${maxBytesHint}. ` +
"Please compress or resize the image and try again.",
isError: true,
},
],
meta: {
durationMs: Date.now() - started,
agentMeta: {
sessionId: sessionIdUsed,
provider,
model: model.id,
},
systemPromptReport: attempt.systemPromptReport,
error: { kind: "image_size", message: errorText },
},
};
}
const promptFailoverReason = classifyFailoverReason(errorText);
if (promptFailoverReason && promptFailoverReason !== "timeout" && lastProfileId) {
await markAuthProfileFailure({
store: authStore,
profileId: lastProfileId,
reason: promptFailoverReason,
cfg: params.config,
agentDir: params.agentDir,
});
}
if (
isFailoverErrorMessage(errorText) &&
promptFailoverReason !== "timeout" &&
(await advanceAuthProfile())
) {
continue;
}
const fallbackThinking = pickFallbackThinkingLevel({
message: errorText,
attempted: attemptedThinking,
});
if (fallbackThinking) {
log.warn(
`unsupported thinking level for ${provider}/${modelId}; retrying with ${fallbackThinking}`,
);
thinkLevel = fallbackThinking;
continue;
}
// FIX: Throw FailoverError for prompt errors when fallbacks configured
// This enables model fallback for quota/rate limit errors during prompt submission
if (fallbackConfigured && isFailoverErrorMessage(errorText)) {
throw new FailoverError(errorText, {
reason: promptFailoverReason ?? "unknown",
provider,
model: modelId,
profileId: lastProfileId,
status: resolveFailoverStatus(promptFailoverReason ?? "unknown"),
});
}
throw promptError;
}
const fallbackThinking = pickFallbackThinkingLevel({
message: lastAssistant?.errorMessage,
attempted: attemptedThinking,
});
if (fallbackThinking && !aborted) {
log.warn(
`unsupported thinking level for ${provider}/${modelId}; retrying with ${fallbackThinking}`,
);
thinkLevel = fallbackThinking;
continue;
}
const authFailure = isAuthAssistantError(lastAssistant);
const rateLimitFailure = isRateLimitAssistantError(lastAssistant);
const billingFailure = isBillingAssistantError(lastAssistant);
const failoverFailure = isFailoverAssistantError(lastAssistant);
const assistantFailoverReason = classifyFailoverReason(lastAssistant?.errorMessage ?? "");
const cloudCodeAssistFormatError = attempt.cloudCodeAssistFormatError;
const imageDimensionError = parseImageDimensionError(lastAssistant?.errorMessage ?? "");
if (imageDimensionError && lastProfileId) {
const details = [
imageDimensionError.messageIndex !== undefined
? `message=${imageDimensionError.messageIndex}`
: null,
imageDimensionError.contentIndex !== undefined
? `content=${imageDimensionError.contentIndex}`
: null,
imageDimensionError.maxDimensionPx !== undefined
? `limit=${imageDimensionError.maxDimensionPx}px`
: null,
]
.filter(Boolean)
.join(" ");
log.warn(
`Profile ${lastProfileId} rejected image payload${details ? ` (${details})` : ""}.`,
);
}
// Treat timeout as potential rate limit (Antigravity hangs on rate limit)
const shouldRotate = (!aborted && failoverFailure) || timedOut;
if (shouldRotate) {
if (lastProfileId) {
const reason =
timedOut || assistantFailoverReason === "timeout"
? "timeout"
: (assistantFailoverReason ?? "unknown");
await markAuthProfileFailure({
store: authStore,
profileId: lastProfileId,
reason,
cfg: params.config,
agentDir: params.agentDir,
});
if (timedOut && !isProbeSession) {
log.warn(
`Profile ${lastProfileId} timed out (possible rate limit). Trying next account...`,
);
}
if (cloudCodeAssistFormatError) {
log.warn(
`Profile ${lastProfileId} hit Cloud Code Assist format error. Tool calls will be sanitized on retry.`,
);
}
}
const rotated = await advanceAuthProfile();
if (rotated) {
continue;
}
if (fallbackConfigured) {
// Prefer formatted error message (user-friendly) over raw errorMessage
const message =
(lastAssistant
? formatAssistantErrorText(lastAssistant, {
cfg: params.config,
sessionKey: params.sessionKey ?? params.sessionId,
provider,
})
: undefined) ||
lastAssistant?.errorMessage?.trim() ||
(timedOut
? "LLM request timed out."
: rateLimitFailure
? "LLM request rate limited."
: billingFailure
? formatBillingErrorMessage(provider)
: authFailure
? "LLM request unauthorized."
: "LLM request failed.");
const status =
resolveFailoverStatus(assistantFailoverReason ?? "unknown") ??
(isTimeoutErrorMessage(message) ? 408 : undefined);
throw new FailoverError(message, {
reason: assistantFailoverReason ?? "unknown",
provider,
model: modelId,
profileId: lastProfileId,
status,
});
}
}
const usage = toNormalizedUsage(usageAccumulator);
// Extract the last individual API call's usage for context-window
// utilization display. The accumulated `usage` sums input tokens
// across all calls (tool-use loops, compaction retries), which
// overstates the actual context size. `lastCallUsage` reflects only
// the final call, giving an accurate snapshot of current context.
const lastCallUsage = normalizeUsage(lastAssistant?.usage as UsageLike);
const agentMeta: EmbeddedPiAgentMeta = {
sessionId: sessionIdUsed,
provider: lastAssistant?.provider ?? provider,
model: lastAssistant?.model ?? model.id,
usage,
lastCallUsage: lastCallUsage ?? undefined,
compactionCount: autoCompactionCount > 0 ? autoCompactionCount : undefined,
};
const payloads = buildEmbeddedRunPayloads({
assistantTexts: attempt.assistantTexts,
toolMetas: attempt.toolMetas,
lastAssistant: attempt.lastAssistant,
lastToolError: attempt.lastToolError,
config: params.config,
sessionKey: params.sessionKey ?? params.sessionId,
provider,
verboseLevel: params.verboseLevel,
reasoningLevel: params.reasoningLevel,
toolResultFormat: resolvedToolResultFormat,
inlineToolResultsAllowed: false,
});
log.debug(
`embedded run done: runId=${params.runId} sessionId=${params.sessionId} durationMs=${Date.now() - started} aborted=${aborted}`,
);
if (lastProfileId) {
await markAuthProfileGood({
store: authStore,
provider,
profileId: lastProfileId,
agentDir: params.agentDir,
});
await markAuthProfileUsed({
store: authStore,
profileId: lastProfileId,
agentDir: params.agentDir,
});
}
return {
payloads: payloads.length ? payloads : undefined,
meta: {
durationMs: Date.now() - started,
agentMeta,
aborted,
systemPromptReport: attempt.systemPromptReport,
// Handle client tool calls (OpenResponses hosted tools)
stopReason: attempt.clientToolCall ? "tool_calls" : undefined,
pendingToolCalls: attempt.clientToolCall
? [
{
id: `call_${Date.now()}`,
name: attempt.clientToolCall.name,
arguments: JSON.stringify(attempt.clientToolCall.params),
},
]
: undefined,
},
didSendViaMessagingTool: attempt.didSendViaMessagingTool,
messagingToolSentTexts: attempt.messagingToolSentTexts,
messagingToolSentTargets: attempt.messagingToolSentTargets,
};
}
} finally {
process.chdir(prevCwd);
}
}),
);
}