From e98ccc8e17a4f91cd000606f9a8b51ca0f7a4efd Mon Sep 17 00:00:00 2001 From: Mariano <132747814+mbelinky@users.noreply.github.com> Date: Thu, 19 Feb 2026 20:20:28 +0000 Subject: [PATCH] iOS/Gateway: stabilize background wake and reconnect behavior (#21226) Merged via /review-pr -> /prepare-pr -> /merge-pr. Prepared head SHA: 7705a7741e06335197a2015593355a7f4f9170ab Co-authored-by: mbelinky <132747814+mbelinky@users.noreply.github.com> Co-authored-by: mbelinky <132747814+mbelinky@users.noreply.github.com> Reviewed-by: @mbelinky --- CHANGELOG.md | 2 + apps/ios/README.md | 31 +++ apps/ios/Sources/Info.plist | 4 + .../Location/SignificantLocationMonitor.swift | 6 +- apps/ios/Sources/Model/NodeAppModel.swift | 263 ++++++++++++++++-- apps/ios/Sources/OpenClawApp.swift | 62 +++++ .../Sources/OpenClawKit/GatewayChannel.swift | 38 ++- .../GatewayNodeSessionTests.swift | 4 + .../server-methods/nodes.invoke-wake.test.ts | 25 +- src/gateway/server-methods/nodes.ts | 223 +++++++++++++-- 10 files changed, 604 insertions(+), 54 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 45c636d091..9727eb0946 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ Docs: https://docs.openclaw.ai ### Changes +- iOS/Gateway: stabilize background wake and reconnect behavior with background reconnect suppression/lease windows, BGAppRefresh wake fallback, location wake hook throttling, and APNs wake retry+nudge instrumentation. (#21226) thanks @mbelinky. + ### Fixes - Discord/Gateway: handle close code 4014 (missing privileged gateway intents) without crashing the gateway. Thanks @thewilloftheshadow. diff --git a/apps/ios/README.md b/apps/ios/README.md index b870bdcea5..c7c501fcbf 100644 --- a/apps/ios/README.md +++ b/apps/ios/README.md @@ -67,6 +67,37 @@ pnpm ios:open - iPhone node commands in foreground: camera snap/clip, canvas present/navigate/eval/snapshot, screen record, location, contacts, calendar, reminders, photos, motion, local notifications. - Share extension deep-link forwarding into the connected gateway session. +## Location Automation Use Case (Testing) + +Use this for automation signals ("I moved", "I arrived", "I left"), not as a keep-awake mechanism. + +- Product intent: + - movement-aware automations driven by iOS location events + - example: arrival/exit geofence, significant movement, visit detection +- Non-goal: + - continuous GPS polling just to keep the app alive + +Test path to include in QA runs: + +1. Enable location permission in app: + - set `Always` permission + - verify background location capability is enabled in the build profile +2. Background the app and trigger movement: + - walk/drive enough for a significant location update, or cross a configured geofence +3. Validate gateway side effects: + - node reconnect/wake if needed + - expected location/movement event arrives at gateway + - automation trigger executes once (no duplicate storm) +4. Validate resource impact: + - no sustained high thermal state + - no excessive background battery drain over a short observation window + +Pass criteria: + +- movement events are delivered reliably enough for automation UX +- no location-driven reconnect spam loops +- app remains stable after repeated background/foreground transitions + ## Known Issues / Limitations / Problems - Foreground-first: iOS can suspend sockets in background; reconnect recovery is still being tuned. diff --git a/apps/ios/Sources/Info.plist b/apps/ios/Sources/Info.plist index fe086049a8..0d74308a8b 100644 --- a/apps/ios/Sources/Info.plist +++ b/apps/ios/Sources/Info.plist @@ -64,6 +64,10 @@ audio remote-notification + BGTaskSchedulerPermittedIdentifiers + + ai.openclaw.ios.bgrefresh + UILaunchScreen UISupportedInterfaceOrientations diff --git a/apps/ios/Sources/Location/SignificantLocationMonitor.swift b/apps/ios/Sources/Location/SignificantLocationMonitor.swift index f12a157dc6..1b8d5ca2a0 100644 --- a/apps/ios/Sources/Location/SignificantLocationMonitor.swift +++ b/apps/ios/Sources/Location/SignificantLocationMonitor.swift @@ -10,7 +10,8 @@ enum SignificantLocationMonitor { static func startIfNeeded( locationService: any LocationServicing, locationMode: OpenClawLocationMode, - gateway: GatewayNodeSession + gateway: GatewayNodeSession, + beforeSend: (@MainActor @Sendable () async -> Void)? = nil ) { guard locationMode == .always else { return } let status = locationService.authorizationStatus() @@ -31,6 +32,9 @@ enum SignificantLocationMonitor { let json = String(data: data, encoding: .utf8) else { return } Task { @MainActor in + if let beforeSend { + await beforeSend() + } await gateway.sendEvent(event: "location.update", payloadJSON: json) } } diff --git a/apps/ios/Sources/Model/NodeAppModel.swift b/apps/ios/Sources/Model/NodeAppModel.swift index ef2f375296..d9206c41ef 100644 --- a/apps/ios/Sources/Model/NodeAppModel.swift +++ b/apps/ios/Sources/Model/NodeAppModel.swift @@ -42,6 +42,7 @@ private final class NotificationInvokeLatch: @unchecked Sendable { final class NodeAppModel { private let deepLinkLogger = Logger(subsystem: "ai.openclaw.ios", category: "DeepLink") private let pushWakeLogger = Logger(subsystem: "ai.openclaw.ios", category: "PushWake") + private let locationWakeLogger = Logger(subsystem: "ai.openclaw.ios", category: "LocationWake") enum CameraHUDKind { case photo case recording @@ -103,6 +104,11 @@ final class NodeAppModel { private var backgroundTalkKeptActive = false private var backgroundedAt: Date? private var reconnectAfterBackgroundArmed = false + private var backgroundGraceTaskID: UIBackgroundTaskIdentifier = .invalid + @ObservationIgnored private var backgroundGraceTaskTimer: Task? + private var backgroundReconnectSuppressed = false + private var backgroundReconnectLeaseUntil: Date? + private var lastSignificantLocationWakeAt: Date? private var gatewayConnected = false private var operatorConnected = false @@ -271,6 +277,7 @@ final class NodeAppModel { self.stopGatewayHealthMonitor() self.backgroundedAt = Date() self.reconnectAfterBackgroundArmed = true + self.beginBackgroundConnectionGracePeriod() // Release voice wake mic in background. self.backgroundVoiceWakeSuspended = self.voiceWake.suspendForExternalAudioCapture() let shouldKeepTalkActive = keepTalkActive && self.talkMode.isEnabled @@ -278,6 +285,8 @@ final class NodeAppModel { self.backgroundTalkSuspended = self.talkMode.suspendForBackground(keepActive: shouldKeepTalkActive) case .active, .inactive: self.isBackgrounded = false + self.endBackgroundConnectionGracePeriod(reason: "scene_foreground") + self.clearBackgroundReconnectSuppression(reason: "scene_foreground") if self.operatorConnected { self.startGatewayHealthMonitor() } @@ -329,9 +338,98 @@ final class NodeAppModel { } @unknown default: self.isBackgrounded = false + self.endBackgroundConnectionGracePeriod(reason: "scene_unknown") + self.clearBackgroundReconnectSuppression(reason: "scene_unknown") } } + private func beginBackgroundConnectionGracePeriod(seconds: TimeInterval = 25) { + self.grantBackgroundReconnectLease(seconds: seconds, reason: "scene_background_grace") + self.endBackgroundConnectionGracePeriod(reason: "restart") + let taskID = UIApplication.shared.beginBackgroundTask(withName: "gateway-background-grace") { [weak self] in + Task { @MainActor in + self?.suppressBackgroundReconnect( + reason: "background_grace_expired", + disconnectIfNeeded: true) + self?.endBackgroundConnectionGracePeriod(reason: "expired") + } + } + guard taskID != .invalid else { + self.pushWakeLogger.info("Background grace unavailable: beginBackgroundTask returned invalid") + return + } + self.backgroundGraceTaskID = taskID + self.pushWakeLogger.info("Background grace started seconds=\(seconds, privacy: .public)") + self.backgroundGraceTaskTimer = Task { [weak self] in + guard let self else { return } + try? await Task.sleep(nanoseconds: UInt64(max(1, seconds) * 1_000_000_000)) + await MainActor.run { + self.suppressBackgroundReconnect(reason: "background_grace_timer", disconnectIfNeeded: true) + self.endBackgroundConnectionGracePeriod(reason: "timer") + } + } + } + + private func endBackgroundConnectionGracePeriod(reason: String) { + self.backgroundGraceTaskTimer?.cancel() + self.backgroundGraceTaskTimer = nil + guard self.backgroundGraceTaskID != .invalid else { return } + UIApplication.shared.endBackgroundTask(self.backgroundGraceTaskID) + self.backgroundGraceTaskID = .invalid + self.pushWakeLogger.info("Background grace ended reason=\(reason, privacy: .public)") + } + + private func grantBackgroundReconnectLease(seconds: TimeInterval, reason: String) { + guard self.isBackgrounded else { return } + let leaseSeconds = max(5, seconds) + let leaseUntil = Date().addingTimeInterval(leaseSeconds) + if let existing = self.backgroundReconnectLeaseUntil, existing > leaseUntil { + // Keep the longer lease if one is already active. + } else { + self.backgroundReconnectLeaseUntil = leaseUntil + } + let wasSuppressed = self.backgroundReconnectSuppressed + self.backgroundReconnectSuppressed = false + self.pushWakeLogger.info( + "Background reconnect lease reason=\(reason, privacy: .public) seconds=\(leaseSeconds, privacy: .public) wasSuppressed=\(wasSuppressed, privacy: .public)") + } + + private func suppressBackgroundReconnect(reason: String, disconnectIfNeeded: Bool) { + guard self.isBackgrounded else { return } + let hadLease = self.backgroundReconnectLeaseUntil != nil + let changed = hadLease || !self.backgroundReconnectSuppressed + self.backgroundReconnectLeaseUntil = nil + self.backgroundReconnectSuppressed = true + guard changed else { return } + self.pushWakeLogger.info( + "Background reconnect suppressed reason=\(reason, privacy: .public) disconnect=\(disconnectIfNeeded, privacy: .public)") + guard disconnectIfNeeded else { return } + Task { [weak self] in + guard let self else { return } + await self.operatorGateway.disconnect() + await self.nodeGateway.disconnect() + await MainActor.run { + self.operatorConnected = false + self.gatewayConnected = false + self.talkMode.updateGatewayConnected(false) + if self.isBackgrounded { + self.gatewayStatusText = "Background idle" + self.gatewayServerName = nil + self.gatewayRemoteAddress = nil + self.showLocalCanvasOnDisconnect() + } + } + } + } + + private func clearBackgroundReconnectSuppression(reason: String) { + let changed = self.backgroundReconnectSuppressed || self.backgroundReconnectLeaseUntil != nil + self.backgroundReconnectSuppressed = false + self.backgroundReconnectLeaseUntil = nil + guard changed else { return } + self.pushWakeLogger.info("Background reconnect cleared reason=\(reason, privacy: .public)") + } + func setVoiceWakeEnabled(_ enabled: Bool) { self.voiceWake.setEnabled(enabled) if enabled { @@ -568,7 +666,7 @@ final class NodeAppModel { } catch { if let gatewayError = error as? GatewayResponseError { let lower = gatewayError.message.lowercased() - if lower.contains("unauthorized role") { + if lower.contains("unauthorized role") || lower.contains("missing scope") { await self.setGatewayHealthMonitorDisabled(true) return true } @@ -601,7 +699,7 @@ final class NodeAppModel { } catch { if let gatewayError = error as? GatewayResponseError { let lower = gatewayError.message.lowercased() - if lower.contains("unauthorized role") { + if lower.contains("unauthorized role") || lower.contains("missing scope") { await self.setGatewayHealthMonitorDisabled(true) return } @@ -1725,6 +1823,23 @@ private extension NodeAppModel { self.apnsLastRegisteredTokenHex = nil } + func refreshBackgroundReconnectSuppressionIfNeeded(source: String) { + guard self.isBackgrounded else { return } + guard !self.backgroundReconnectSuppressed else { return } + guard let leaseUntil = self.backgroundReconnectLeaseUntil else { + self.suppressBackgroundReconnect(reason: "\(source):no_lease", disconnectIfNeeded: true) + return + } + if Date() >= leaseUntil { + self.suppressBackgroundReconnect(reason: "\(source):lease_expired", disconnectIfNeeded: true) + } + } + + func shouldPauseReconnectLoopInBackground(source: String) -> Bool { + self.refreshBackgroundReconnectSuppressionIfNeeded(source: source) + return self.isBackgrounded && self.backgroundReconnectSuppressed + } + func startOperatorGatewayLoop( url: URL, stableID: String, @@ -1747,6 +1862,7 @@ private extension NodeAppModel { try? await Task.sleep(nanoseconds: 1_000_000_000) continue } + if self.shouldPauseReconnectLoopInBackground(source: "operator_loop") { try? await Task.sleep(nanoseconds: 2_000_000_000); continue } if await self.isOperatorConnected() { try? await Task.sleep(nanoseconds: 1_000_000_000) continue @@ -1834,6 +1950,7 @@ private extension NodeAppModel { try? await Task.sleep(nanoseconds: 1_000_000_000) continue } + if self.shouldPauseReconnectLoopInBackground(source: "node_loop") { try? await Task.sleep(nanoseconds: 2_000_000_000); continue } if await self.isGatewayConnected() { try? await Task.sleep(nanoseconds: 1_000_000_000) continue @@ -1883,7 +2000,15 @@ private extension NodeAppModel { } await self.showA2UIOnConnectIfNeeded() await self.onNodeGatewayConnected() - await MainActor.run { SignificantLocationMonitor.startIfNeeded(locationService: self.locationService, locationMode: self.locationMode(), gateway: self.nodeGateway) } + await MainActor.run { + SignificantLocationMonitor.startIfNeeded( + locationService: self.locationService, + locationMode: self.locationMode(), + gateway: self.nodeGateway, + beforeSend: { [weak self] in + await self?.handleSignificantLocationWakeIfNeeded() + }) + } }, onDisconnected: { [weak self] reason in guard let self else { return } @@ -2135,12 +2260,59 @@ extension NodeAppModel { } func handleSilentPushWake(_ userInfo: [AnyHashable: Any]) async -> Bool { + let wakeId = Self.makePushWakeAttemptID() guard Self.isSilentPushPayload(userInfo) else { - self.pushWakeLogger.info("Ignored APNs payload: not silent push") + self.pushWakeLogger.info("Ignored APNs payload wakeId=\(wakeId, privacy: .public): not silent push") return false } - self.pushWakeLogger.info("Silent push received; attempting reconnect if needed") - return await self.reconnectGatewaySessionsForSilentPushIfNeeded() + let pushKind = Self.openclawPushKind(userInfo) + self.pushWakeLogger.info( + "Silent push received wakeId=\(wakeId, privacy: .public) kind=\(pushKind, privacy: .public) backgrounded=\(self.isBackgrounded, privacy: .public) autoReconnect=\(self.gatewayAutoReconnectEnabled, privacy: .public)") + let result = await self.reconnectGatewaySessionsForSilentPushIfNeeded(wakeId: wakeId) + self.pushWakeLogger.info( + "Silent push outcome wakeId=\(wakeId, privacy: .public) applied=\(result.applied, privacy: .public) reason=\(result.reason, privacy: .public) durationMs=\(result.durationMs, privacy: .public)") + return result.applied + } + + func handleBackgroundRefreshWake(trigger: String = "bg_app_refresh") async -> Bool { + let wakeId = Self.makePushWakeAttemptID() + self.pushWakeLogger.info( + "Background refresh wake received wakeId=\(wakeId, privacy: .public) trigger=\(trigger, privacy: .public) backgrounded=\(self.isBackgrounded, privacy: .public) autoReconnect=\(self.gatewayAutoReconnectEnabled, privacy: .public)") + let result = await self.reconnectGatewaySessionsForSilentPushIfNeeded(wakeId: wakeId) + self.pushWakeLogger.info( + "Background refresh wake outcome wakeId=\(wakeId, privacy: .public) applied=\(result.applied, privacy: .public) reason=\(result.reason, privacy: .public) durationMs=\(result.durationMs, privacy: .public)") + return result.applied + } + + func handleSignificantLocationWakeIfNeeded() async { + let wakeId = Self.makePushWakeAttemptID() + let now = Date() + let throttleWindowSeconds: TimeInterval = 180 + + if await self.isGatewayConnected() { + self.locationWakeLogger.info( + "Location wake no-op wakeId=\(wakeId, privacy: .public): already connected") + return + } + if let last = self.lastSignificantLocationWakeAt, + now.timeIntervalSince(last) < throttleWindowSeconds + { + self.locationWakeLogger.info( + "Location wake throttled wakeId=\(wakeId, privacy: .public) elapsedSec=\(now.timeIntervalSince(last), privacy: .public)") + return + } + self.lastSignificantLocationWakeAt = now + + self.locationWakeLogger.info( + "Location wake begin wakeId=\(wakeId, privacy: .public) backgrounded=\(self.isBackgrounded, privacy: .public) autoReconnect=\(self.gatewayAutoReconnectEnabled, privacy: .public)") + let result = await self.reconnectGatewaySessionsForSilentPushIfNeeded(wakeId: wakeId) + self.locationWakeLogger.info( + "Location wake trigger wakeId=\(wakeId, privacy: .public) applied=\(result.applied, privacy: .public) reason=\(result.reason, privacy: .public) durationMs=\(result.durationMs, privacy: .public)") + + guard result.applied else { return } + let connected = await self.waitForGatewayConnection(timeoutMs: 5000, pollMs: 250) + self.locationWakeLogger.info( + "Location wake post-check wakeId=\(wakeId, privacy: .public) connected=\(connected, privacy: .public)") } func updateAPNsDeviceToken(_ tokenData: Data) { @@ -2210,28 +2382,83 @@ extension NodeAppModel { return false } - private func reconnectGatewaySessionsForSilentPushIfNeeded() async -> Bool { - guard self.isBackgrounded else { - self.pushWakeLogger.info("Wake no-op: app not backgrounded") - return false + private static func makePushWakeAttemptID() -> String { + let raw = UUID().uuidString.replacingOccurrences(of: "-", with: "") + return String(raw.prefix(8)) + } + + private static func openclawPushKind(_ userInfo: [AnyHashable: Any]) -> String { + if let payload = userInfo["openclaw"] as? [String: Any], + let kind = payload["kind"] as? String + { + let trimmed = kind.trimmingCharacters(in: .whitespacesAndNewlines) + if !trimmed.isEmpty { return trimmed } } - guard self.gatewayAutoReconnectEnabled else { - self.pushWakeLogger.info("Wake no-op: auto reconnect disabled") - return false + if let payload = userInfo["openclaw"] as? [AnyHashable: Any], + let kind = payload["kind"] as? String + { + let trimmed = kind.trimmingCharacters(in: .whitespacesAndNewlines) + if !trimmed.isEmpty { return trimmed } } - guard self.activeGatewayConnectConfig != nil else { - self.pushWakeLogger.info("Wake no-op: no active gateway config") - return false + return "unknown" + } + + private struct SilentPushWakeAttemptResult { + var applied: Bool + var reason: String + var durationMs: Int + } + + private func waitForGatewayConnection(timeoutMs: Int, pollMs: Int) async -> Bool { + let clampedTimeoutMs = max(0, timeoutMs) + let pollIntervalNs = UInt64(max(50, pollMs)) * 1_000_000 + let deadline = Date().addingTimeInterval(Double(clampedTimeoutMs) / 1000.0) + while Date() < deadline { + if await self.isGatewayConnected() { + return true + } + try? await Task.sleep(nanoseconds: pollIntervalNs) + } + return await self.isGatewayConnected() + } + + private func reconnectGatewaySessionsForSilentPushIfNeeded( + wakeId: String + ) async -> SilentPushWakeAttemptResult { + let startedAt = Date() + let makeResult: (Bool, String) -> SilentPushWakeAttemptResult = { applied, reason in + let durationMs = Int(Date().timeIntervalSince(startedAt) * 1000) + return SilentPushWakeAttemptResult( + applied: applied, + reason: reason, + durationMs: max(0, durationMs)) } + guard self.isBackgrounded else { + self.pushWakeLogger.info("Wake no-op wakeId=\(wakeId, privacy: .public): app not backgrounded") + return makeResult(false, "not_backgrounded") + } + guard self.gatewayAutoReconnectEnabled else { + self.pushWakeLogger.info("Wake no-op wakeId=\(wakeId, privacy: .public): auto reconnect disabled") + return makeResult(false, "auto_reconnect_disabled") + } + guard let cfg = self.activeGatewayConnectConfig else { + self.pushWakeLogger.info("Wake no-op wakeId=\(wakeId, privacy: .public): no active gateway config") + return makeResult(false, "no_active_gateway_config") + } + + self.pushWakeLogger.info( + "Wake reconnect begin wakeId=\(wakeId, privacy: .public) stableID=\(cfg.stableID, privacy: .public)") + self.grantBackgroundReconnectLease(seconds: 30, reason: "wake_\(wakeId)") await self.operatorGateway.disconnect() await self.nodeGateway.disconnect() self.operatorConnected = false self.gatewayConnected = false self.gatewayStatusText = "Reconnecting…" self.talkMode.updateGatewayConnected(false) - self.pushWakeLogger.info("Wake reconnect trigger applied") - return true + self.applyGatewayConnectConfig(cfg) + self.pushWakeLogger.info("Wake reconnect trigger applied wakeId=\(wakeId, privacy: .public)") + return makeResult(true, "reconnect_triggered") } } diff --git a/apps/ios/Sources/OpenClawApp.swift b/apps/ios/Sources/OpenClawApp.swift index 091c1b90fd..ade0cadad3 100644 --- a/apps/ios/Sources/OpenClawApp.swift +++ b/apps/ios/Sources/OpenClawApp.swift @@ -2,9 +2,13 @@ import SwiftUI import Foundation import os import UIKit +import BackgroundTasks final class OpenClawAppDelegate: NSObject, UIApplicationDelegate { private let logger = Logger(subsystem: "ai.openclaw.ios", category: "Push") + private let backgroundWakeLogger = Logger(subsystem: "ai.openclaw.ios", category: "BackgroundWake") + private static let wakeRefreshTaskIdentifier = "ai.openclaw.ios.bgrefresh" + private var backgroundWakeTask: Task? private var pendingAPNsDeviceToken: Data? weak var appModel: NodeAppModel? { didSet { @@ -21,6 +25,7 @@ final class OpenClawAppDelegate: NSObject, UIApplicationDelegate { didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]? = nil ) -> Bool { + self.registerBackgroundWakeRefreshTask() application.registerForRemoteNotifications() return true } @@ -49,14 +54,70 @@ final class OpenClawAppDelegate: NSObject, UIApplicationDelegate { Task { @MainActor in guard let appModel = self.appModel else { self.logger.info("APNs wake skipped: appModel unavailable") + self.scheduleBackgroundWakeRefresh(afterSeconds: 90, reason: "silent_push_no_model") completionHandler(.noData) return } let handled = await appModel.handleSilentPushWake(userInfo) self.logger.info("APNs wake handled=\(handled, privacy: .public)") + if !handled { + self.scheduleBackgroundWakeRefresh(afterSeconds: 90, reason: "silent_push_not_applied") + } completionHandler(handled ? .newData : .noData) } } + + func scenePhaseChanged(_ phase: ScenePhase) { + if phase == .background { + self.scheduleBackgroundWakeRefresh(afterSeconds: 120, reason: "scene_background") + } + } + + private func registerBackgroundWakeRefreshTask() { + BGTaskScheduler.shared.register( + forTaskWithIdentifier: Self.wakeRefreshTaskIdentifier, + using: nil + ) { [weak self] task in + guard let refreshTask = task as? BGAppRefreshTask else { + task.setTaskCompleted(success: false) + return + } + self?.handleBackgroundWakeRefresh(task: refreshTask) + } + } + + private func scheduleBackgroundWakeRefresh(afterSeconds delay: TimeInterval, reason: String) { + let request = BGAppRefreshTaskRequest(identifier: Self.wakeRefreshTaskIdentifier) + request.earliestBeginDate = Date().addingTimeInterval(max(60, delay)) + do { + try BGTaskScheduler.shared.submit(request) + self.backgroundWakeLogger.info( + "Scheduled background wake refresh reason=\(reason, privacy: .public) delaySeconds=\(max(60, delay), privacy: .public)") + } catch { + self.backgroundWakeLogger.error( + "Failed scheduling background wake refresh reason=\(reason, privacy: .public) error=\(error.localizedDescription, privacy: .public)") + } + } + + private func handleBackgroundWakeRefresh(task: BGAppRefreshTask) { + self.scheduleBackgroundWakeRefresh(afterSeconds: 15 * 60, reason: "reschedule") + self.backgroundWakeTask?.cancel() + + let wakeTask = Task { @MainActor [weak self] in + guard let self, let appModel = self.appModel else { return false } + return await appModel.handleBackgroundRefreshWake(trigger: "bg_app_refresh") + } + self.backgroundWakeTask = wakeTask + task.expirationHandler = { + wakeTask.cancel() + } + Task { + let applied = await wakeTask.value + task.setTaskCompleted(success: applied) + self.backgroundWakeLogger.info( + "Background wake refresh finished applied=\(applied, privacy: .public)") + } + } } @main @@ -89,6 +150,7 @@ struct OpenClawApp: App { .onChange(of: self.scenePhase) { _, newValue in self.appModel.setScenePhase(newValue) self.gatewayController.setScenePhase(newValue) + self.appDelegate.scenePhaseChanged(newValue) } } } diff --git a/apps/shared/OpenClawKit/Sources/OpenClawKit/GatewayChannel.swift b/apps/shared/OpenClawKit/Sources/OpenClawKit/GatewayChannel.swift index 9682a31aa4..fc0be4a94a 100644 --- a/apps/shared/OpenClawKit/Sources/OpenClawKit/GatewayChannel.swift +++ b/apps/shared/OpenClawKit/Sources/OpenClawKit/GatewayChannel.swift @@ -7,6 +7,7 @@ public protocol WebSocketTasking: AnyObject { func resume() func cancel(with closeCode: URLSessionWebSocketTask.CloseCode, reason: Data?) func send(_ message: URLSessionWebSocketTask.Message) async throws + func sendPing(pongReceiveHandler: @escaping @Sendable (Error?) -> Void) func receive() async throws -> URLSessionWebSocketTask.Message func receive(completionHandler: @escaping @Sendable (Result) -> Void) } @@ -40,6 +41,18 @@ public struct WebSocketTaskBox: @unchecked Sendable { { self.task.receive(completionHandler: completionHandler) } + + public func sendPing() async throws { + try await withCheckedThrowingContinuation { (continuation: CheckedContinuation) in + self.task.sendPing { error in + if let error { + continuation.resume(throwing: error) + } else { + continuation.resume(returning: ()) + } + } + } + } } public protocol WebSocketSessioning: AnyObject { @@ -213,7 +226,7 @@ public actor GatewayChannelActor { private func watchdogLoop() async { // Keep nudging reconnect in case exponential backoff stalls. while self.shouldReconnect { - try? await Task.sleep(nanoseconds: 30 * 1_000_000_000) // 30s cadence + guard await self.sleepUnlessCancelled(nanoseconds: 30 * 1_000_000_000) else { return } // 30s cadence guard self.shouldReconnect else { return } if self.connected { continue } do { @@ -285,13 +298,15 @@ public actor GatewayChannelActor { private func keepaliveLoop() async { while self.shouldReconnect { - try? await Task.sleep(nanoseconds: UInt64(self.keepaliveIntervalSeconds * 1_000_000_000)) + guard await self.sleepUnlessCancelled( + nanoseconds: UInt64(self.keepaliveIntervalSeconds * 1_000_000_000)) + else { return } guard self.shouldReconnect else { return } guard self.connected else { continue } - // Best-effort outbound message to keep intermediate NAT/proxy state alive. - // We intentionally ignore the response. + guard let task = self.task else { continue } + // Best-effort ping keeps NAT/proxy state alive without generating RPC load. do { - try await self.send(method: "health", params: nil) + try await task.sendPing() } catch { // Avoid spamming logs; the reconnect paths will surface meaningful errors. } @@ -593,7 +608,7 @@ public actor GatewayChannelActor { private func watchTicks() async { let tolerance = self.tickIntervalMs * 2 while self.connected { - try? await Task.sleep(nanoseconds: UInt64(tolerance * 1_000_000)) + guard await self.sleepUnlessCancelled(nanoseconds: UInt64(tolerance * 1_000_000)) else { return } guard self.connected else { return } if let last = self.lastTick { let delta = Date().timeIntervalSince(last) * 1000 @@ -616,7 +631,7 @@ public actor GatewayChannelActor { guard self.shouldReconnect else { return } let delay = self.backoffMs / 1000 self.backoffMs = min(self.backoffMs * 2, 30000) - try? await Task.sleep(nanoseconds: UInt64(delay * 1_000_000_000)) + guard await self.sleepUnlessCancelled(nanoseconds: UInt64(delay * 1_000_000_000)) else { return } guard self.shouldReconnect else { return } do { try await self.connect() @@ -627,6 +642,15 @@ public actor GatewayChannelActor { } } + private nonisolated func sleepUnlessCancelled(nanoseconds: UInt64) async -> Bool { + do { + try await Task.sleep(nanoseconds: nanoseconds) + } catch { + return false + } + return !Task.isCancelled + } + public func request( method: String, params: [String: AnyCodable]?, diff --git a/apps/shared/OpenClawKit/Tests/OpenClawKitTests/GatewayNodeSessionTests.swift b/apps/shared/OpenClawKit/Tests/OpenClawKitTests/GatewayNodeSessionTests.swift index fc6461cdfa..08a6ea2162 100644 --- a/apps/shared/OpenClawKit/Tests/OpenClawKitTests/GatewayNodeSessionTests.swift +++ b/apps/shared/OpenClawKit/Tests/OpenClawKitTests/GatewayNodeSessionTests.swift @@ -75,6 +75,10 @@ private final class FakeGatewayWebSocketTask: WebSocketTasking, @unchecked Senda } } + func sendPing(pongReceiveHandler: @escaping @Sendable (Error?) -> Void) { + pongReceiveHandler(nil) + } + func receive() async throws -> URLSessionWebSocketTask.Message { let phase = self.lock.withLock { () -> Int in let current = self.receivePhase diff --git a/src/gateway/server-methods/nodes.invoke-wake.test.ts b/src/gateway/server-methods/nodes.invoke-wake.test.ts index 147e1df86d..82bf3cee99 100644 --- a/src/gateway/server-methods/nodes.invoke-wake.test.ts +++ b/src/gateway/server-methods/nodes.invoke-wake.test.ts @@ -13,6 +13,7 @@ const mocks = vi.hoisted(() => ({ loadApnsRegistration: vi.fn(), resolveApnsAuthConfigFromEnv: vi.fn(), sendApnsBackgroundWake: vi.fn(), + sendApnsAlert: vi.fn(), })); vi.mock("../../config/config.js", () => ({ @@ -32,6 +33,7 @@ vi.mock("../../infra/push-apns.js", () => ({ loadApnsRegistration: mocks.loadApnsRegistration, resolveApnsAuthConfigFromEnv: mocks.resolveApnsAuthConfigFromEnv, sendApnsBackgroundWake: mocks.sendApnsBackgroundWake, + sendApnsAlert: mocks.sendApnsAlert, })); type RespondCall = [ @@ -81,12 +83,17 @@ async function invokeNode(params: { requestParams?: Partial>; }) { const respond = vi.fn(); + const logGateway = { + info: vi.fn(), + warn: vi.fn(), + }; await nodeHandlers["node.invoke"]({ params: makeNodeInvokeParams(params.requestParams), respond: respond as never, context: { nodeRegistry: params.nodeRegistry, execApprovalManager: undefined, + logGateway, } as never, client: null, req: { type: "req", id: "req-node-invoke", method: "node.invoke" }, @@ -135,6 +142,7 @@ describe("node.invoke APNs wake path", () => { mocks.loadApnsRegistration.mockReset(); mocks.resolveApnsAuthConfigFromEnv.mockReset(); mocks.sendApnsBackgroundWake.mockReset(); + mocks.sendApnsAlert.mockReset(); }); afterEach(() => { @@ -202,7 +210,7 @@ describe("node.invoke APNs wake path", () => { expect(call?.[1]).toMatchObject({ ok: true, nodeId: "ios-node-reconnect" }); }); - it("throttles repeated wake attempts for the same disconnected node", async () => { + it("forces one retry wake when the first wake still fails to reconnect", async () => { vi.useFakeTimers(); mockSuccessfulWakeConfig("ios-node-throttle"); @@ -211,21 +219,14 @@ describe("node.invoke APNs wake path", () => { invoke: vi.fn().mockResolvedValue({ ok: true }), }; - const first = invokeNode({ + const invokePromise = invokeNode({ nodeRegistry, requestParams: { nodeId: "ios-node-throttle", idempotencyKey: "idem-throttle-1" }, }); - await vi.advanceTimersByTimeAsync(WAKE_WAIT_TIMEOUT_MS); - await first; + await vi.advanceTimersByTimeAsync(20_000); + await invokePromise; - const second = invokeNode({ - nodeRegistry, - requestParams: { nodeId: "ios-node-throttle", idempotencyKey: "idem-throttle-2" }, - }); - await vi.advanceTimersByTimeAsync(WAKE_WAIT_TIMEOUT_MS); - await second; - - expect(mocks.sendApnsBackgroundWake).toHaveBeenCalledTimes(1); + expect(mocks.sendApnsBackgroundWake).toHaveBeenCalledTimes(2); expect(nodeRegistry.invoke).not.toHaveBeenCalled(); }); }); diff --git a/src/gateway/server-methods/nodes.ts b/src/gateway/server-methods/nodes.ts index 1ea705365e..9bb2704968 100644 --- a/src/gateway/server-methods/nodes.ts +++ b/src/gateway/server-methods/nodes.ts @@ -11,6 +11,7 @@ import { import { loadApnsRegistration, resolveApnsAuthConfigFromEnv, + sendApnsAlert, sendApnsBackgroundWake, } from "../../infra/push-apns.js"; import { isNodeCommandAllowed, resolveNodeCommandAllowlist } from "../node-command-policy.js"; @@ -40,15 +41,36 @@ import { import type { GatewayRequestHandlers } from "./types.js"; const NODE_WAKE_RECONNECT_WAIT_MS = 3_000; +const NODE_WAKE_RECONNECT_RETRY_WAIT_MS = 12_000; const NODE_WAKE_RECONNECT_POLL_MS = 150; const NODE_WAKE_THROTTLE_MS = 15_000; +const NODE_WAKE_NUDGE_THROTTLE_MS = 10 * 60_000; type NodeWakeState = { lastWakeAtMs: number; - inFlight?: Promise; + inFlight?: Promise; }; const nodeWakeById = new Map(); +const nodeWakeNudgeById = new Map(); + +type NodeWakeAttempt = { + available: boolean; + throttled: boolean; + path: "throttled" | "no-registration" | "no-auth" | "sent" | "send-error"; + durationMs: number; + apnsStatus?: number; + apnsReason?: string; +}; + +type NodeWakeNudgeAttempt = { + sent: boolean; + throttled: boolean; + reason: "throttled" | "no-registration" | "no-auth" | "send-error" | "apns-not-ok" | "sent"; + durationMs: number; + apnsStatus?: number; + apnsReason?: string; +}; function isNodeEntry(entry: { role?: string; roles?: string[] }) { if (entry.role === "node") { @@ -64,7 +86,10 @@ async function delayMs(ms: number): Promise { await new Promise((resolve) => setTimeout(resolve, ms)); } -async function maybeWakeNodeWithApns(nodeId: string): Promise { +async function maybeWakeNodeWithApns( + nodeId: string, + opts?: { force?: boolean }, +): Promise { const state = nodeWakeById.get(nodeId) ?? { lastWakeAtMs: 0 }; nodeWakeById.set(nodeId, state); @@ -73,36 +98,75 @@ async function maybeWakeNodeWithApns(nodeId: string): Promise { } const now = Date.now(); - if (state.lastWakeAtMs > 0 && now - state.lastWakeAtMs < NODE_WAKE_THROTTLE_MS) { - return true; + const force = opts?.force === true; + if (!force && state.lastWakeAtMs > 0 && now - state.lastWakeAtMs < NODE_WAKE_THROTTLE_MS) { + return { available: true, throttled: true, path: "throttled", durationMs: 0 }; } state.inFlight = (async () => { + const startedAtMs = Date.now(); + const withDuration = (attempt: Omit): NodeWakeAttempt => ({ + ...attempt, + durationMs: Math.max(0, Date.now() - startedAtMs), + }); + try { const registration = await loadApnsRegistration(nodeId); if (!registration) { - return false; + return withDuration({ available: false, throttled: false, path: "no-registration" }); } const auth = await resolveApnsAuthConfigFromEnv(process.env); if (!auth.ok) { - return false; + return withDuration({ + available: false, + throttled: false, + path: "no-auth", + apnsReason: auth.error, + }); } state.lastWakeAtMs = Date.now(); - await sendApnsBackgroundWake({ + const wakeResult = await sendApnsBackgroundWake({ auth: auth.value, registration, nodeId, wakeReason: "node.invoke", }); - } catch { - // Best-effort wake only. - if (state.lastWakeAtMs === 0) { - return false; + if (!wakeResult.ok) { + return withDuration({ + available: true, + throttled: false, + path: "send-error", + apnsStatus: wakeResult.status, + apnsReason: wakeResult.reason, + }); } + return withDuration({ + available: true, + throttled: false, + path: "sent", + apnsStatus: wakeResult.status, + apnsReason: wakeResult.reason, + }); + } catch (err) { + // Best-effort wake only. + const message = err instanceof Error ? err.message : String(err); + if (state.lastWakeAtMs === 0) { + return withDuration({ + available: false, + throttled: false, + path: "send-error", + apnsReason: message, + }); + } + return withDuration({ + available: true, + throttled: false, + path: "send-error", + apnsReason: message, + }); } - return true; })(); try { @@ -112,6 +176,70 @@ async function maybeWakeNodeWithApns(nodeId: string): Promise { } } +async function maybeSendNodeWakeNudge(nodeId: string): Promise { + const startedAtMs = Date.now(); + const withDuration = ( + attempt: Omit, + ): NodeWakeNudgeAttempt => ({ + ...attempt, + durationMs: Math.max(0, Date.now() - startedAtMs), + }); + + const lastNudgeAtMs = nodeWakeNudgeById.get(nodeId) ?? 0; + if (lastNudgeAtMs > 0 && Date.now() - lastNudgeAtMs < NODE_WAKE_NUDGE_THROTTLE_MS) { + return withDuration({ sent: false, throttled: true, reason: "throttled" }); + } + + const registration = await loadApnsRegistration(nodeId); + if (!registration) { + return withDuration({ sent: false, throttled: false, reason: "no-registration" }); + } + const auth = await resolveApnsAuthConfigFromEnv(process.env); + if (!auth.ok) { + return withDuration({ + sent: false, + throttled: false, + reason: "no-auth", + apnsReason: auth.error, + }); + } + + try { + const result = await sendApnsAlert({ + auth: auth.value, + registration, + nodeId, + title: "OpenClaw needs a quick reopen", + body: "Tap to reopen OpenClaw and restore the node connection.", + }); + if (!result.ok) { + return withDuration({ + sent: false, + throttled: false, + reason: "apns-not-ok", + apnsStatus: result.status, + apnsReason: result.reason, + }); + } + nodeWakeNudgeById.set(nodeId, Date.now()); + return withDuration({ + sent: true, + throttled: false, + reason: "sent", + apnsStatus: result.status, + apnsReason: result.reason, + }); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + return withDuration({ + sent: false, + throttled: false, + reason: "send-error", + apnsReason: message, + }); + } +} + async function waitForNodeReconnect(params: { nodeId: string; context: { nodeRegistry: { get: (nodeId: string) => unknown } }; @@ -430,7 +558,7 @@ export const nodeHandlers: GatewayRequestHandlers = { ); }); }, - "node.invoke": async ({ params, respond, context, client }) => { + "node.invoke": async ({ params, respond, context, client, req }) => { if (!validateNodeInvokeParams(params)) { respondInvalidParams({ respond, @@ -472,12 +600,70 @@ export const nodeHandlers: GatewayRequestHandlers = { await respondUnavailableOnThrow(respond, async () => { let nodeSession = context.nodeRegistry.get(nodeId); if (!nodeSession) { - const wakeAvailable = await maybeWakeNodeWithApns(nodeId); - if (wakeAvailable) { - await waitForNodeReconnect({ nodeId, context }); + const wakeReqId = req.id; + const wakeFlowStartedAtMs = Date.now(); + context.logGateway.info( + `node wake start node=${nodeId} req=${wakeReqId} command=${command}`, + ); + + const wake = await maybeWakeNodeWithApns(nodeId); + context.logGateway.info( + `node wake stage=wake1 node=${nodeId} req=${wakeReqId} ` + + `available=${wake.available} throttled=${wake.throttled} ` + + `path=${wake.path} durationMs=${wake.durationMs} ` + + `apnsStatus=${wake.apnsStatus ?? -1} apnsReason=${wake.apnsReason ?? "-"}`, + ); + if (wake.available) { + const waitStartedAtMs = Date.now(); + const waitTimeoutMs = NODE_WAKE_RECONNECT_WAIT_MS; + const reconnected = await waitForNodeReconnect({ + nodeId, + context, + timeoutMs: waitTimeoutMs, + }); + const waitDurationMs = Math.max(0, Date.now() - waitStartedAtMs); + context.logGateway.info( + `node wake stage=wait1 node=${nodeId} req=${wakeReqId} ` + + `reconnected=${reconnected} timeoutMs=${waitTimeoutMs} durationMs=${waitDurationMs}`, + ); } nodeSession = context.nodeRegistry.get(nodeId); + if (!nodeSession && wake.available) { + const retryWake = await maybeWakeNodeWithApns(nodeId, { force: true }); + context.logGateway.info( + `node wake stage=wake2 node=${nodeId} req=${wakeReqId} force=true ` + + `available=${retryWake.available} throttled=${retryWake.throttled} ` + + `path=${retryWake.path} durationMs=${retryWake.durationMs} ` + + `apnsStatus=${retryWake.apnsStatus ?? -1} apnsReason=${retryWake.apnsReason ?? "-"}`, + ); + if (retryWake.available) { + const waitStartedAtMs = Date.now(); + const waitTimeoutMs = NODE_WAKE_RECONNECT_RETRY_WAIT_MS; + const reconnected = await waitForNodeReconnect({ + nodeId, + context, + timeoutMs: waitTimeoutMs, + }); + const waitDurationMs = Math.max(0, Date.now() - waitStartedAtMs); + context.logGateway.info( + `node wake stage=wait2 node=${nodeId} req=${wakeReqId} ` + + `reconnected=${reconnected} timeoutMs=${waitTimeoutMs} durationMs=${waitDurationMs}`, + ); + } + nodeSession = context.nodeRegistry.get(nodeId); + } if (!nodeSession) { + const totalDurationMs = Math.max(0, Date.now() - wakeFlowStartedAtMs); + const nudge = await maybeSendNodeWakeNudge(nodeId); + context.logGateway.info( + `node wake nudge node=${nodeId} req=${wakeReqId} sent=${nudge.sent} ` + + `throttled=${nudge.throttled} reason=${nudge.reason} durationMs=${nudge.durationMs} ` + + `apnsStatus=${nudge.apnsStatus ?? -1} apnsReason=${nudge.apnsReason ?? "-"}`, + ); + context.logGateway.warn( + `node wake done node=${nodeId} req=${wakeReqId} connected=false ` + + `reason=not_connected totalMs=${totalDurationMs}`, + ); respond( false, undefined, @@ -487,6 +673,11 @@ export const nodeHandlers: GatewayRequestHandlers = { ); return; } + + const totalDurationMs = Math.max(0, Date.now() - wakeFlowStartedAtMs); + context.logGateway.info( + `node wake done node=${nodeId} req=${wakeReqId} connected=true totalMs=${totalDurationMs}`, + ); } const cfg = loadConfig(); const allowlist = resolveNodeCommandAllowlist(cfg, nodeSession);