Fix subagent announce failover race (always emit lifecycle end + treat timeout=0 as no-timeout) (#6621)

* Fix subagent announce race and timeout handling

Bug 1: Subagent announce fires before model failover retries finish
- Problem: CLI provider emitted lifecycle error on each attempt, causing
  subagent registry to prematurely call beginSubagentCleanup() and announce
  with incorrect status before failover retries completed
- Fix: Removed lifecycle error emission from CLI provider's attempt-level
  .catch() in agent-runner-execution.ts. Errors still propagate to
  runWithModelFallback for retry, but no intermediate lifecycle events
  are emitted. Only the final outcome (after all retries) emits lifecycle
  events.

Bug 2: Hard 600s per-prompt timeout ignores runTimeoutSeconds=0
- Problem: When runTimeoutSeconds=0 (meaning 'no timeout'), the code
  returned the default 600s timeout instead of respecting the 0 setting
- Fix: Modified resolveAgentTimeoutMs() to treat 0 as 'no timeout' and
  return a very large timeout value (30 days) instead of the default.
  This avoids setTimeout issues with Infinity while effectively providing
  unlimited time for long-running tasks.

* fix: emit lifecycle:error for CLI failures (#6621) (thanks @tyler6204)

* chore: satisfy format/lint gates (#6621) (thanks @tyler6204)

* fix: restore build after upstream type changes (#6621) (thanks @tyler6204)

* test: fix createSystemPromptOverride tests to match new return type (#6621) (thanks @tyler6204)
This commit is contained in:
Tyler Yust
2026-02-02 02:06:14 -08:00
committed by GitHub
parent d5f6caba3f
commit 8d2f98fb01
14 changed files with 81 additions and 42 deletions

View File

@@ -2167,10 +2167,9 @@ async function processMessage(
sendBlueBubblesTyping(chatGuidForActions, true, {
cfg: config,
accountId: account.accountId,
})
.catch((err) => {
runtime.error?.(`[bluebubbles] typing restart failed: ${String(err)}`);
});
}).catch((err) => {
runtime.error?.(`[bluebubbles] typing restart failed: ${String(err)}`);
});
}, typingRestartDelayMs);
};
try {

View File

@@ -3,11 +3,13 @@
## 2026.2.1
### Changes
- Version alignment with core OpenClaw release numbers.
## 2026.1.31
### Changes
- Version alignment with core OpenClaw release numbers.
## 2026.1.30

View File

@@ -3,11 +3,13 @@
## 2026.2.1
### Changes
- Version alignment with core OpenClaw release numbers.
## 2026.1.31
### Changes
- Version alignment with core OpenClaw release numbers.
## 2026.1.30

View File

@@ -3,11 +3,13 @@
## 2026.2.1
### Changes
- Version alignment with core OpenClaw release numbers.
## 2026.1.31
### Changes
- Version alignment with core OpenClaw release numbers.
## 2026.1.30

View File

@@ -3,11 +3,13 @@
## 2026.2.1
### Changes
- Version alignment with core OpenClaw release numbers.
## 2026.1.31
### Changes
- Version alignment with core OpenClaw release numbers.
## 2026.1.30

View File

@@ -3,11 +3,13 @@
## 2026.2.1
### Changes
- Version alignment with core OpenClaw release numbers.
## 2026.1.31
### Changes
- Version alignment with core OpenClaw release numbers.
## 2026.1.30

View File

@@ -3,11 +3,13 @@
## 2026.2.1
### Changes
- Version alignment with core OpenClaw release numbers.
## 2026.1.31
### Changes
- Version alignment with core OpenClaw release numbers.
## 2026.1.30

View File

@@ -3,11 +3,13 @@
## 2026.2.1
### Changes
- Version alignment with core OpenClaw release numbers.
## 2026.1.31
### Changes
- Version alignment with core OpenClaw release numbers.
## 2026.1.30

View File

@@ -15,12 +15,10 @@ import { ensureAuthStoreFile, resolveAuthStorePath } from "./paths.js";
import { suggestOAuthProfileIdForLegacyDefault } from "./repair.js";
import { ensureAuthProfileStore, saveAuthProfileStore } from "./store.js";
const OAUTH_PROVIDER_IDS = new Set<OAuthProvider>(
getOAuthProviders().map((provider) => provider.id),
);
const OAUTH_PROVIDER_IDS = new Set<string>(getOAuthProviders().map((provider) => provider.id));
const isOAuthProvider = (provider: string): provider is OAuthProvider =>
OAUTH_PROVIDER_IDS.has(provider as OAuthProvider);
OAUTH_PROVIDER_IDS.has(provider);
const resolveOAuthProvider = (provider: string): OAuthProvider | null =>
isOAuthProvider(provider) ? provider : null;

View File

@@ -99,12 +99,12 @@ const _readSessionMessages = async (sessionFile: string) => {
};
describe("createSystemPromptOverride", () => {
it("returns the override prompt regardless of default prompt", () => {
it("returns the override prompt trimmed", () => {
const override = createSystemPromptOverride("OVERRIDE");
expect(override("DEFAULT")).toBe("OVERRIDE");
expect(override).toBe("OVERRIDE");
});
it("returns an empty string for blank overrides", () => {
const override = createSystemPromptOverride(" \n ");
expect(override("DEFAULT")).toBe("");
expect(override).toBe("");
});
});

View File

@@ -74,11 +74,8 @@ export function buildEmbeddedSystemPrompt(params: {
});
}
export function createSystemPromptOverride(
systemPrompt: string,
): (defaultPrompt?: string) => string {
const override = systemPrompt.trim();
return (_defaultPrompt?: string) => override;
export function createSystemPromptOverride(systemPrompt: string): string {
return systemPrompt.trim();
}
export function applySystemPromptOverrideToSession(session: AgentSession, override: string) {

View File

@@ -40,9 +40,9 @@ export function toToolDefinitions(tools: AnyAgentTool[]): ToolDefinition[] {
execute: async (
toolCallId,
params,
signal: AbortSignal | undefined,
onUpdate: AgentToolUpdateCallback<unknown> | undefined,
_ctx,
signal,
): Promise<AgentToolResult<unknown>> => {
try {
return await tool.execute(toolCallId, params, signal, onUpdate);
@@ -91,9 +91,9 @@ export function toClientToolDefinitions(
execute: async (
toolCallId,
params,
_signal: AbortSignal | undefined,
_onUpdate: AgentToolUpdateCallback<unknown> | undefined,
_ctx,
_signal,
): Promise<AgentToolResult<unknown>> => {
const outcome = await runBeforeToolCallHook({
toolName: func.name,

View File

@@ -19,16 +19,25 @@ export function resolveAgentTimeoutMs(opts: {
}): number {
const minMs = Math.max(normalizeNumber(opts.minMs) ?? 1, 1);
const defaultMs = resolveAgentTimeoutSeconds(opts.cfg) * 1000;
// Use a very large timeout value (30 days) to represent "no timeout"
// when explicitly set to 0. This avoids setTimeout issues with Infinity.
const NO_TIMEOUT_MS = 30 * 24 * 60 * 60 * 1000;
const overrideMs = normalizeNumber(opts.overrideMs);
if (overrideMs !== undefined) {
if (overrideMs <= 0) {
if (overrideMs === 0) {
return NO_TIMEOUT_MS;
}
if (overrideMs < 0) {
return defaultMs;
}
return Math.max(overrideMs, minMs);
}
const overrideSeconds = normalizeNumber(opts.overrideSeconds);
if (overrideSeconds !== undefined) {
if (overrideSeconds <= 0) {
if (overrideSeconds === 0) {
return NO_TIMEOUT_MS;
}
if (overrideSeconds < 0) {
return defaultMs;
}
return Math.max(overrideSeconds * 1000, minMs);

View File

@@ -172,24 +172,27 @@ export async function runAgentTurnWithFallback(params: {
},
});
const cliSessionId = getCliSessionId(params.getActiveSessionEntry(), provider);
return runCliAgent({
sessionId: params.followupRun.run.sessionId,
sessionKey: params.sessionKey,
sessionFile: params.followupRun.run.sessionFile,
workspaceDir: params.followupRun.run.workspaceDir,
config: params.followupRun.run.config,
prompt: params.commandBody,
provider,
model,
thinkLevel: params.followupRun.run.thinkLevel,
timeoutMs: params.followupRun.run.timeoutMs,
runId,
extraSystemPrompt: params.followupRun.run.extraSystemPrompt,
ownerNumbers: params.followupRun.run.ownerNumbers,
cliSessionId,
images: params.opts?.images,
})
.then((result) => {
return (async () => {
let lifecycleTerminalEmitted = false;
try {
const result = await runCliAgent({
sessionId: params.followupRun.run.sessionId,
sessionKey: params.sessionKey,
sessionFile: params.followupRun.run.sessionFile,
workspaceDir: params.followupRun.run.workspaceDir,
config: params.followupRun.run.config,
prompt: params.commandBody,
provider,
model,
thinkLevel: params.followupRun.run.thinkLevel,
timeoutMs: params.followupRun.run.timeoutMs,
runId,
extraSystemPrompt: params.followupRun.run.extraSystemPrompt,
ownerNumbers: params.followupRun.run.ownerNumbers,
cliSessionId,
images: params.opts?.images,
});
// CLI backends don't emit streaming assistant events, so we need to
// emit one with the final text so server-chat can populate its buffer
// and send the response to TUI/WebSocket clients.
@@ -201,6 +204,7 @@ export async function runAgentTurnWithFallback(params: {
data: { text: cliText },
});
}
emitAgentEvent({
runId,
stream: "lifecycle",
@@ -210,9 +214,10 @@ export async function runAgentTurnWithFallback(params: {
endedAt: Date.now(),
},
});
lifecycleTerminalEmitted = true;
return result;
})
.catch((err) => {
} catch (err) {
emitAgentEvent({
runId,
stream: "lifecycle",
@@ -220,11 +225,28 @@ export async function runAgentTurnWithFallback(params: {
phase: "error",
startedAt,
endedAt: Date.now(),
error: err instanceof Error ? err.message : String(err),
error: String(err),
},
});
lifecycleTerminalEmitted = true;
throw err;
});
} finally {
// Defensive backstop: never let a CLI run complete without a terminal
// lifecycle event, otherwise downstream consumers can hang.
if (!lifecycleTerminalEmitted) {
emitAgentEvent({
runId,
stream: "lifecycle",
data: {
phase: "error",
startedAt,
endedAt: Date.now(),
error: "CLI run completed without lifecycle terminal event",
},
});
}
}
})();
}
const authProfileId =
provider === params.followupRun.run.provider