fix(agents): align session lock hold budget with run timeouts

This commit is contained in:
Peter Steinberger
2026-02-17 03:09:56 +01:00
parent ce4b4d947c
commit fb6e415d0c
5 changed files with 63 additions and 13 deletions

View File

@@ -23,6 +23,7 @@ Docs: https://docs.openclaw.ai
- Scripts/UI/Windows: fix `pnpm ui:*` spawn `EINVAL` failures by restoring shell-backed launch for `.cmd`/`.bat` runners, narrowing shell usage to launcher types that require it, and rejecting unsafe forwarded shell metacharacters in UI script args. (#18594)
- Hooks/Session-memory: recover `/new` conversation summaries when session pointers are reset-path or missing `sessionFile`, and consistently prefer the newest `.jsonl.reset.*` transcript candidate for fallback extraction. (#18088)
- Slack: restrict forwarded-attachment ingestion to explicit shared-message attachments and skip non-Slack forwarded `image_url` fetches, preventing non-forward attachment unfurls from polluting inbound agent context while preserving forwarded message handling.
- Agents/Sessions: align session lock watchdog hold windows with run and compaction timeout budgets (plus grace), preventing valid long-running turns from being force-unlocked mid-run while still recovering hung lock owners. (#18060)
- Cron/Heartbeat: canonicalize session-scoped reminder `sessionKey` routing and preserve explicit flat `sessionKey` cron tool inputs, preventing enqueue/wake namespace drift for session-targeted reminders. (#18637) Thanks @vignesh07.
- OpenClawKit/iOS ChatUI: accept canonical session-key completion events for local pending runs and preserve message IDs across history refreshes, preventing stuck "thinking" state and message flicker after gateway replies. (#18165) Thanks @mbelinky.
- iOS/Onboarding: add QR-first onboarding wizard with setup-code deep link support, pairing/auth issue guidance, and device-pair QR generation improvements for Telegram/Web/TUI fallback flows. (#18162) Thanks @mbelinky and @Marvae.

View File

@@ -1,5 +1,3 @@
import fs from "node:fs/promises";
import os from "node:os";
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import {
createAgentSession,
@@ -7,10 +5,14 @@ import {
SessionManager,
SettingsManager,
} from "@mariozechner/pi-coding-agent";
import { resolveHeartbeatPrompt } from "../../auto-reply/heartbeat.js";
import fs from "node:fs/promises";
import os from "node:os";
import type { ReasoningLevel, ThinkLevel } from "../../auto-reply/thinking.js";
import { resolveChannelCapabilities } from "../../config/channel-capabilities.js";
import type { OpenClawConfig } from "../../config/config.js";
import type { ExecElevatedDefaults } from "../bash-tools.js";
import type { EmbeddedPiCompactResult } from "./types.js";
import { resolveHeartbeatPrompt } from "../../auto-reply/heartbeat.js";
import { resolveChannelCapabilities } from "../../config/channel-capabilities.js";
import { getMachineDisplayName } from "../../infra/machine-name.js";
import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
import { type enqueueCommand, enqueueCommandInLane } from "../../process/command-queue.js";
@@ -24,7 +26,6 @@ import { normalizeMessageChannel } from "../../utils/message-channel.js";
import { isReasoningTagProvider } from "../../utils/provider-utils.js";
import { resolveOpenClawAgentDir } from "../agent-paths.js";
import { resolveSessionAgentIds } from "../agent-scope.js";
import type { ExecElevatedDefaults } from "../bash-tools.js";
import { makeBootstrapWarn, resolveBootstrapContextForRun } from "../bootstrap-files.js";
import { listChannelSupportedActions, resolveChannelMessageToolHints } from "../channel-tools.js";
import { formatUserTime, resolveUserTimeFormat, resolveUserTimezone } from "../date-time.js";
@@ -46,7 +47,10 @@ import { resolveSandboxContext } from "../sandbox.js";
import { repairSessionFileIfNeeded } from "../session-file-repair.js";
import { guardSessionManager } from "../session-tool-result-guard-wrapper.js";
import { sanitizeToolUseResultPairing } from "../session-transcript-repair.js";
import { acquireSessionWriteLock } from "../session-write-lock.js";
import {
acquireSessionWriteLock,
resolveSessionLockMaxHoldFromTimeout,
} from "../session-write-lock.js";
import { detectRuntimeShell } from "../shell-utils.js";
import {
applySkillEnvOverrides,
@@ -56,7 +60,10 @@ import {
type SkillSnapshot,
} from "../skills.js";
import { resolveTranscriptPolicy } from "../transcript-policy.js";
import { compactWithSafetyTimeout } from "./compaction-safety-timeout.js";
import {
compactWithSafetyTimeout,
EMBEDDED_COMPACTION_TIMEOUT_MS,
} from "./compaction-safety-timeout.js";
import { buildEmbeddedExtensionPaths } from "./extensions.js";
import {
logToolSchemasForGoogle,
@@ -75,7 +82,6 @@ import {
createSystemPromptOverride,
} from "./system-prompt.js";
import { splitSdkTools } from "./tool-split.js";
import type { EmbeddedPiCompactResult } from "./types.js";
import { describeUnknownError, mapThinkingLevel } from "./utils.js";
import { flushPendingToolResultsAfterIdle } from "./wait-for-idle-before-flush.js";
@@ -503,6 +509,9 @@ export async function compactEmbeddedPiSessionDirect(
const sessionLock = await acquireSessionWriteLock({
sessionFile: params.sessionFile,
maxHoldMs: resolveSessionLockMaxHoldFromTimeout({
timeoutMs: EMBEDDED_COMPACTION_TIMEOUT_MS,
}),
});
try {
await repairSessionFileIfNeeded({

View File

@@ -1,9 +1,10 @@
import fs from "node:fs/promises";
import os from "node:os";
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { ImageContent } from "@mariozechner/pi-ai";
import { streamSimple } from "@mariozechner/pi-ai";
import { createAgentSession, SessionManager, SettingsManager } from "@mariozechner/pi-coding-agent";
import fs from "node:fs/promises";
import os from "node:os";
import type { EmbeddedRunAttemptParams, EmbeddedRunAttemptResult } from "./types.js";
import { resolveHeartbeatPrompt } from "../../../auto-reply/heartbeat.js";
import { resolveChannelCapabilities } from "../../../config/channel-capabilities.js";
import { getMachineDisplayName } from "../../../infra/machine-name.js";
@@ -54,7 +55,10 @@ import { resolveSandboxRuntimeStatus } from "../../sandbox/runtime-status.js";
import { repairSessionFileIfNeeded } from "../../session-file-repair.js";
import { guardSessionManager } from "../../session-tool-result-guard-wrapper.js";
import { sanitizeToolUseResultPairing } from "../../session-transcript-repair.js";
import { acquireSessionWriteLock } from "../../session-write-lock.js";
import {
acquireSessionWriteLock,
resolveSessionLockMaxHoldFromTimeout,
} from "../../session-write-lock.js";
import { detectRuntimeShell } from "../../shell-utils.js";
import {
applySkillEnvOverrides,
@@ -100,7 +104,6 @@ import {
shouldFlagCompactionTimeout,
} from "./compaction-timeout.js";
import { detectAndLoadPromptImages } from "./images.js";
import type { EmbeddedRunAttemptParams, EmbeddedRunAttemptResult } from "./types.js";
export function injectHistoryImagesIntoMessages(
messages: AgentMessage[],
@@ -482,6 +485,9 @@ export async function runEmbeddedAttempt(
const sessionLock = await acquireSessionWriteLock({
sessionFile: params.sessionFile,
maxHoldMs: resolveSessionLockMaxHoldFromTimeout({
timeoutMs: params.timeoutMs,
}),
});
let sessionManager: ReturnType<typeof guardSessionManager> | undefined;

View File

@@ -2,7 +2,12 @@ import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { describe, expect, it, vi } from "vitest";
import { __testing, acquireSessionWriteLock, cleanStaleLockFiles } from "./session-write-lock.js";
import {
__testing,
acquireSessionWriteLock,
cleanStaleLockFiles,
resolveSessionLockMaxHoldFromTimeout,
} from "./session-write-lock.js";
describe("acquireSessionWriteLock", () => {
it("reuses locks across symlinked session paths", async () => {
@@ -103,6 +108,19 @@ describe("acquireSessionWriteLock", () => {
}
});
it("derives max hold from timeout plus grace", () => {
expect(resolveSessionLockMaxHoldFromTimeout({ timeoutMs: 600_000 })).toBe(720_000);
expect(resolveSessionLockMaxHoldFromTimeout({ timeoutMs: 1_000, minMs: 5_000 })).toBe(123_000);
});
it("clamps max hold for effectively no-timeout runs", () => {
expect(
resolveSessionLockMaxHoldFromTimeout({
timeoutMs: 2_147_000_000,
}),
).toBe(2_147_000_000);
});
it("cleans stale .jsonl lock files in sessions directories", async () => {
const root = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-lock-"));
const sessionsDir = path.join(root, "sessions");

View File

@@ -38,6 +38,8 @@ const WATCHDOG_STATE_KEY = Symbol.for("openclaw.sessionWriteLockWatchdogState");
const DEFAULT_STALE_MS = 30 * 60 * 1000;
const DEFAULT_MAX_HOLD_MS = 5 * 60 * 1000;
const DEFAULT_WATCHDOG_INTERVAL_MS = 60_000;
const DEFAULT_TIMEOUT_GRACE_MS = 2 * 60 * 1000;
const MAX_LOCK_HOLD_MS = 2_147_000_000;
type CleanupState = {
registered: boolean;
@@ -95,6 +97,20 @@ function resolvePositiveMs(
return value;
}
export function resolveSessionLockMaxHoldFromTimeout(params: {
timeoutMs: number;
graceMs?: number;
minMs?: number;
}): number {
const minMs = resolvePositiveMs(params.minMs, DEFAULT_MAX_HOLD_MS);
const timeoutMs = resolvePositiveMs(params.timeoutMs, minMs, { allowInfinity: true });
if (timeoutMs === Number.POSITIVE_INFINITY) {
return MAX_LOCK_HOLD_MS;
}
const graceMs = resolvePositiveMs(params.graceMs, DEFAULT_TIMEOUT_GRACE_MS);
return Math.min(MAX_LOCK_HOLD_MS, Math.max(minMs, timeoutMs + graceMs));
}
async function releaseHeldLock(
normalizedSessionFile: string,
held: HeldLock,